From 6fa33f5dd945015d79be42c5cff146e4e2b7c4f3 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Wed, 7 Aug 2013 22:47:18 +0000
Subject: DataFlowSanitizer; LLVM changes.

DataFlowSanitizer is a generalised dynamic data flow analysis.

Unlike other Sanitizer tools, this tool is not designed to detect a
specific class of bugs on its own.  Instead, it provides a generic
dynamic data flow analysis framework to be used by clients to help
detect application-specific issues within their own code.

Differential Revision: http://llvm-reviews.chandlerc.com/D965

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187923 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/CMakeLists.txt      |    1 +
 .../Instrumentation/DataFlowSanitizer.cpp          | 1006 ++++++++++++++++++++
 lib/Transforms/Instrumentation/Instrumentation.cpp |    1 +
 3 files changed, 1008 insertions(+)
 create mode 100644 lib/Transforms/Instrumentation/DataFlowSanitizer.cpp

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 5e34863..65d41f5 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_library(LLVMInstrumentation
   AddressSanitizer.cpp
   BoundsChecking.cpp
+  DataFlowSanitizer.cpp
   DebugIR.cpp
   EdgeProfiling.cpp
   GCOVProfiling.cpp
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
new file mode 100644
index 0000000..1cfbba7
--- /dev/null
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -0,0 +1,1006 @@
+//===-- DataFlowSanitizer.cpp - dynamic data flow analysis ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
+/// analysis.
+///
+/// Unlike other Sanitizer tools, this tool is not designed to detect a specific
+/// class of bugs on its own.  Instead, it provides a generic dynamic data flow
+/// analysis framework to be used by clients to help detect application-specific
+/// issues within their own code.
+///
+/// The analysis is based on automatic propagation of data flow labels (also
+/// known as taint labels) through a program as it performs computation.  Each
+/// byte of application memory is backed by two bytes of shadow memory which
+/// hold the label.  On Linux/x86_64, memory is laid out as follows:
+///
+/// +--------------------+ 0x800000000000 (top of memory)
+/// | application memory |
+/// +--------------------+ 0x700000008000 (kAppAddr)
+/// |                    |
+/// |       unused       |
+/// |                    |
+/// +--------------------+ 0x200200000000 (kUnusedAddr)
+/// |    union table     |
+/// +--------------------+ 0x200000000000 (kUnionTableAddr)
+/// |   shadow memory    |
+/// +--------------------+ 0x000000010000 (kShadowAddr)
+/// | reserved by kernel |
+/// +--------------------+ 0x000000000000
+///
+/// To derive a shadow memory address from an application memory address,
+/// bits 44-46 are cleared to bring the address into the range
+/// [0x000000008000,0x100000000000).  Then the address is shifted left by 1 to
+/// account for the double byte representation of shadow labels and move the
+/// address into the shadow memory range.  See the function
+/// DataFlowSanitizer::getShadowAddress below.
+///
+/// For more information, please refer to the design document:
+/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SpecialCaseList.h"
+#include <iterator>
+
+using namespace llvm;
+
+// The -dfsan-preserve-alignment flag controls whether this pass assumes that
+// alignment requirements provided by the input IR are correct.  For example,
+// if the input IR contains a load with alignment 8, this flag will cause
+// the shadow load to have alignment 16.  This flag is disabled by default as
+// we have unfortunately encountered too much code (including Clang itself;
+// see PR14291) which performs misaligned access.
+static cl::opt<bool> ClPreserveAlignment(
+    "dfsan-preserve-alignment",
+    cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
+    cl::init(false));
+
+// The greylist file controls how shadow parameters are passed.
+// The program acts as though every function in the greylist is passed
+// parameters with zero shadow and that its return value also has zero shadow.
+// This avoids the use of TLS or extra function parameters to pass shadow state
+// and essentially makes the function conform to the "native" (i.e. unsanitized)
+// ABI.
+static cl::opt<std::string> ClGreylistFile(
+    "dfsan-greylist",
+    cl::desc("File containing the list of functions with a native ABI"),
+    cl::Hidden);
+
+static cl::opt<bool> ClArgsABI(
+    "dfsan-args-abi",
+    cl::desc("Use the argument ABI rather than the TLS ABI"),
+    cl::Hidden);
+
+namespace {
+
+class DataFlowSanitizer : public ModulePass {
+  friend struct DFSanFunction;
+  friend class DFSanVisitor;
+
+  enum {
+    ShadowWidth = 16
+  };
+
+  enum InstrumentedABI {
+    IA_None,
+    IA_MemOnly,
+    IA_Args,
+    IA_TLS
+  };
+
+  DataLayout *DL;
+  Module *Mod;
+  LLVMContext *Ctx;
+  IntegerType *ShadowTy;
+  PointerType *ShadowPtrTy;
+  IntegerType *IntptrTy;
+  ConstantInt *ZeroShadow;
+  ConstantInt *ShadowPtrMask;
+  ConstantInt *ShadowPtrMul;
+  Constant *ArgTLS;
+  Constant *RetvalTLS;
+  void *(*GetArgTLSPtr)();
+  void *(*GetRetvalTLSPtr)();
+  Constant *GetArgTLS;
+  Constant *GetRetvalTLS;
+  FunctionType *DFSanUnionFnTy;
+  FunctionType *DFSanUnionLoadFnTy;
+  Constant *DFSanUnionFn;
+  Constant *DFSanUnionLoadFn;
+  MDNode *ColdCallWeights;
+  SpecialCaseList Greylist;
+  DenseMap<Value *, Function *> UnwrappedFnMap;
+
+  Value *getShadowAddress(Value *Addr, Instruction *Pos);
+  Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
+  FunctionType *getInstrumentedFunctionType(FunctionType *T);
+  InstrumentedABI getInstrumentedABI(Function *F);
+  InstrumentedABI getDefaultInstrumentedABI();
+
+public:
+  DataFlowSanitizer(void *(*getArgTLS)() = 0, void *(*getRetValTLS)() = 0);
+  static char ID;
+  bool doInitialization(Module &M);
+  bool runOnModule(Module &M);
+};
+
+struct DFSanFunction {
+  DataFlowSanitizer &DFS;
+  Function *F;
+  DataFlowSanitizer::InstrumentedABI IA;
+  Value *ArgTLSPtr;
+  Value *RetvalTLSPtr;
+  DenseMap<Value *, Value *> ValShadowMap;
+  DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
+  std::vector<std::pair<PHINode *, PHINode *> > PHIFixups;
+  DenseSet<Instruction *> SkipInsts;
+
+  DFSanFunction(DataFlowSanitizer &DFS, Function *F)
+      : DFS(DFS), F(F), IA(DFS.getInstrumentedABI(F)), ArgTLSPtr(0),
+        RetvalTLSPtr(0) {}
+  Value *getArgTLSPtr();
+  Value *getArgTLS(unsigned Index, Instruction *Pos);
+  Value *getRetvalTLS();
+  Value *getShadow(Value *V);
+  void setShadow(Instruction *I, Value *Shadow);
+  Value *combineOperandShadows(Instruction *Inst);
+  Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align,
+                    Instruction *Pos);
+  void storeShadow(Value *Addr, uint64_t Size, uint64_t Align, Value *Shadow,
+                   Instruction *Pos);
+};
+
+class DFSanVisitor : public InstVisitor<DFSanVisitor> {
+public:
+  DFSanFunction &DFSF;
+  DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
+
+  void visitOperandShadowInst(Instruction &I);
+
+  void visitBinaryOperator(BinaryOperator &BO);
+  void visitCastInst(CastInst &CI);
+  void visitCmpInst(CmpInst &CI);
+  void visitGetElementPtrInst(GetElementPtrInst &GEPI);
+  void visitLoadInst(LoadInst &LI);
+  void visitStoreInst(StoreInst &SI);
+  void visitReturnInst(ReturnInst &RI);
+  void visitCallSite(CallSite CS);
+  void visitPHINode(PHINode &PN);
+  void visitExtractElementInst(ExtractElementInst &I);
+  void visitInsertElementInst(InsertElementInst &I);
+  void visitShuffleVectorInst(ShuffleVectorInst &I);
+  void visitExtractValueInst(ExtractValueInst &I);
+  void visitInsertValueInst(InsertValueInst &I);
+  void visitAllocaInst(AllocaInst &I);
+  void visitSelectInst(SelectInst &I);
+  void visitMemTransferInst(MemTransferInst &I);
+};
+
+}
+
+char DataFlowSanitizer::ID;
+INITIALIZE_PASS(DataFlowSanitizer, "dfsan",
+                "DataFlowSanitizer: dynamic data flow analysis.", false, false)
+
+ModulePass *llvm::createDataFlowSanitizerPass(void *(*getArgTLS)(),
+                                              void *(*getRetValTLS)()) {
+  return new DataFlowSanitizer(getArgTLS, getRetValTLS);
+}
+
+DataFlowSanitizer::DataFlowSanitizer(void *(*getArgTLS)(),
+                                     void *(*getRetValTLS)())
+    : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS),
+      Greylist(ClGreylistFile) {}
+
+FunctionType *DataFlowSanitizer::getInstrumentedFunctionType(FunctionType *T) {
+  llvm::SmallVector<Type *, 4> ArgTypes;
+  std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
+  for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
+    ArgTypes.push_back(ShadowTy);
+  if (T->isVarArg())
+    ArgTypes.push_back(ShadowPtrTy);
+  Type *RetType = T->getReturnType();
+  if (!RetType->isVoidTy())
+    RetType = StructType::get(RetType, ShadowTy, (Type *)0);
+  return FunctionType::get(RetType, ArgTypes, T->isVarArg());
+}
+
+bool DataFlowSanitizer::doInitialization(Module &M) {
+  DL = getAnalysisIfAvailable<DataLayout>();
+  if (!DL)
+    return false;
+
+  Mod = &M;
+  Ctx = &M.getContext();
+  ShadowTy = IntegerType::get(*Ctx, ShadowWidth);
+  ShadowPtrTy = PointerType::getUnqual(ShadowTy);
+  IntptrTy = DL->getIntPtrType(*Ctx);
+  ZeroShadow = ConstantInt::getSigned(ShadowTy, 0);
+  ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000);
+  ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8);
+
+  Type *DFSanUnionArgs[2] = { ShadowTy, ShadowTy };
+  DFSanUnionFnTy =
+      FunctionType::get(ShadowTy, DFSanUnionArgs, /*isVarArg=*/ false);
+  Type *DFSanUnionLoadArgs[2] = { ShadowPtrTy, IntptrTy };
+  DFSanUnionLoadFnTy =
+      FunctionType::get(ShadowTy, DFSanUnionLoadArgs, /*isVarArg=*/ false);
+
+  if (GetArgTLSPtr) {
+    Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
+    ArgTLS = 0;
+    GetArgTLS = ConstantExpr::getIntToPtr(
+        ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)),
+        PointerType::getUnqual(
+            FunctionType::get(PointerType::getUnqual(ArgTLSTy), (Type *)0)));
+  }
+  if (GetRetvalTLSPtr) {
+    RetvalTLS = 0;
+    GetRetvalTLS = ConstantExpr::getIntToPtr(
+        ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)),
+        PointerType::getUnqual(
+            FunctionType::get(PointerType::getUnqual(ShadowTy), (Type *)0)));
+  }
+
+  ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
+  return true;
+}
+
+DataFlowSanitizer::InstrumentedABI
+DataFlowSanitizer::getInstrumentedABI(Function *F) {
+  if (Greylist.isIn(*F))
+    return IA_MemOnly;
+  else
+    return getDefaultInstrumentedABI();
+}
+
+DataFlowSanitizer::InstrumentedABI
+DataFlowSanitizer::getDefaultInstrumentedABI() {
+  return ClArgsABI ? IA_Args : IA_TLS;
+}
+
+bool DataFlowSanitizer::runOnModule(Module &M) {
+  if (!DL)
+    return false;
+
+  if (!GetArgTLSPtr) {
+    Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
+    ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
+    if (GlobalVariable *G = dyn_cast<GlobalVariable>(ArgTLS))
+      G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
+  }
+  if (!GetRetvalTLSPtr) {
+    RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", ShadowTy);
+    if (GlobalVariable *G = dyn_cast<GlobalVariable>(RetvalTLS))
+      G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
+  }
+
+  DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy);
+  if (Function *F = dyn_cast<Function>(DFSanUnionFn)) {
+    F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
+    F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+    F->addAttribute(1, Attribute::ZExt);
+    F->addAttribute(2, Attribute::ZExt);
+  }
+  DFSanUnionLoadFn =
+      Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy);
+  if (Function *F = dyn_cast<Function>(DFSanUnionLoadFn)) {
+    F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+  }
+
+  std::vector<Function *> FnsToInstrument;
+  for (Module::iterator i = M.begin(), e = M.end(); i != e; ++i) {
+    if (!i->isIntrinsic() && i != DFSanUnionFn && i != DFSanUnionLoadFn)
+      FnsToInstrument.push_back(&*i);
+  }
+
+  // First, change the ABI of every function in the module.  Greylisted
+  // functions keep their original ABI and get a wrapper function.
+  for (std::vector<Function *>::iterator i = FnsToInstrument.begin(),
+                                         e = FnsToInstrument.end();
+       i != e; ++i) {
+    Function &F = **i;
+
+    FunctionType *FT = F.getFunctionType();
+    FunctionType *NewFT = getInstrumentedFunctionType(FT);
+    // If the function types are the same (i.e. void()), we don't need to do
+    // anything here.
+    if (FT != NewFT) {
+      switch (getInstrumentedABI(&F)) {
+      case IA_Args: {
+        Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M);
+        NewF->setCallingConv(F.getCallingConv());
+        NewF->setAttributes(F.getAttributes().removeAttributes(
+            *Ctx, AttributeSet::ReturnIndex,
+            AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
+                                             AttributeSet::ReturnIndex)));
+        for (Function::arg_iterator FArg = F.arg_begin(),
+                                    NewFArg = NewF->arg_begin(),
+                                    FArgEnd = F.arg_end();
+             FArg != FArgEnd; ++FArg, ++NewFArg) {
+          FArg->replaceAllUsesWith(NewFArg);
+        }
+        NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
+
+        for (Function::use_iterator ui = F.use_begin(), ue = F.use_end();
+             ui != ue;) {
+          BlockAddress *BA = dyn_cast<BlockAddress>(ui.getUse().getUser());
+          ++ui;
+          if (BA) {
+            BA->replaceAllUsesWith(
+                BlockAddress::get(NewF, BA->getBasicBlock()));
+            delete BA;
+          }
+        }
+        F.replaceAllUsesWith(
+            ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)));
+        NewF->takeName(&F);
+        F.eraseFromParent();
+        *i = NewF;
+        break;
+      }
+      case IA_MemOnly: {
+        assert(!FT->isVarArg() && "varargs not handled here yet");
+        assert(getDefaultInstrumentedABI() == IA_Args);
+        Function *NewF =
+            Function::Create(NewFT, GlobalValue::LinkOnceODRLinkage,
+                             std::string("dfsw$") + F.getName(), &M);
+        NewF->setCallingConv(F.getCallingConv());
+        NewF->setAttributes(F.getAttributes());
+
+        BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
+        std::vector<Value *> Args;
+        unsigned n = FT->getNumParams();
+        for (Function::arg_iterator i = NewF->arg_begin(); n != 0; ++i, --n)
+          Args.push_back(&*i);
+        CallInst *CI = CallInst::Create(&F, Args, "", BB);
+        if (FT->getReturnType()->isVoidTy())
+          ReturnInst::Create(*Ctx, BB);
+        else {
+          Value *InsVal = InsertValueInst::Create(
+              UndefValue::get(NewFT->getReturnType()), CI, 0, "", BB);
+          Value *InsShadow =
+              InsertValueInst::Create(InsVal, ZeroShadow, 1, "", BB);
+          ReturnInst::Create(*Ctx, InsShadow, BB);
+        }
+
+        Value *WrappedFnCst =
+            ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
+        F.replaceAllUsesWith(WrappedFnCst);
+        UnwrappedFnMap[WrappedFnCst] = &F;
+        break;
+      }
+      default:
+        break;
+      }
+    }
+  }
+
+  for (std::vector<Function *>::iterator i = FnsToInstrument.begin(),
+                                         e = FnsToInstrument.end();
+       i != e; ++i) {
+    if ((*i)->isDeclaration())
+      continue;
+
+    DFSanFunction DFSF(*this, *i);
+
+    // DFSanVisitor may create new basic blocks, which confuses df_iterator.
+    // Build a copy of the list before iterating over it.
+    llvm::SmallVector<BasicBlock *, 4> BBList;
+    std::copy(df_begin(&(*i)->getEntryBlock()), df_end(&(*i)->getEntryBlock()),
+              std::back_inserter(BBList));
+
+    for (llvm::SmallVector<BasicBlock *, 4>::iterator i = BBList.begin(),
+                                                      e = BBList.end();
+         i != e; ++i) {
+      Instruction *Inst = &(*i)->front();
+      while (1) {
+        // DFSanVisitor may split the current basic block, changing the current
+        // instruction's next pointer and moving the next instruction to the
+        // tail block from which we should continue.
+        Instruction *Next = Inst->getNextNode();
+        if (!DFSF.SkipInsts.count(Inst))
+          DFSanVisitor(DFSF).visit(Inst);
+        if (isa<TerminatorInst>(Inst))
+          break;
+        Inst = Next;
+      }
+    }
+
+    for (std::vector<std::pair<PHINode *, PHINode *> >::iterator
+             i = DFSF.PHIFixups.begin(),
+             e = DFSF.PHIFixups.end();
+         i != e; ++i) {
+      for (unsigned val = 0, n = i->first->getNumIncomingValues(); val != n;
+           ++val) {
+        i->second->setIncomingValue(
+            val, DFSF.getShadow(i->first->getIncomingValue(val)));
+      }
+    }
+  }
+
+  return false;
+}
+
+Value *DFSanFunction::getArgTLSPtr() {
+  if (ArgTLSPtr)
+    return ArgTLSPtr;
+  if (DFS.ArgTLS)
+    return ArgTLSPtr = DFS.ArgTLS;
+
+  IRBuilder<> IRB(F->getEntryBlock().begin());
+  return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS);
+}
+
+Value *DFSanFunction::getRetvalTLS() {
+  if (RetvalTLSPtr)
+    return RetvalTLSPtr;
+  if (DFS.RetvalTLS)
+    return RetvalTLSPtr = DFS.RetvalTLS;
+
+  IRBuilder<> IRB(F->getEntryBlock().begin());
+  return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS);
+}
+
+Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) {
+  IRBuilder<> IRB(Pos);
+  return IRB.CreateConstGEP2_64(getArgTLSPtr(), 0, Idx);
+}
+
+Value *DFSanFunction::getShadow(Value *V) {
+  if (!isa<Argument>(V) && !isa<Instruction>(V))
+    return DFS.ZeroShadow;
+  Value *&Shadow = ValShadowMap[V];
+  if (!Shadow) {
+    if (Argument *A = dyn_cast<Argument>(V)) {
+      switch (IA) {
+      case DataFlowSanitizer::IA_TLS: {
+        Value *ArgTLSPtr = getArgTLSPtr();
+        Instruction *ArgTLSPos =
+            DFS.ArgTLS ? &*F->getEntryBlock().begin()
+                       : cast<Instruction>(ArgTLSPtr)->getNextNode();
+        IRBuilder<> IRB(ArgTLSPos);
+        Shadow = IRB.CreateLoad(getArgTLS(A->getArgNo(), ArgTLSPos));
+        break;
+      }
+      case DataFlowSanitizer::IA_Args: {
+        unsigned ArgIdx = A->getArgNo() + F->getArgumentList().size() / 2;
+        Function::arg_iterator i = F->arg_begin();
+        while (ArgIdx--)
+          ++i;
+        Shadow = i;
+        break;
+      }
+      default:
+        Shadow = DFS.ZeroShadow;
+        break;
+      }
+    } else {
+      Shadow = DFS.ZeroShadow;
+    }
+  }
+  return Shadow;
+}
+
+void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
+  assert(!ValShadowMap.count(I));
+  assert(Shadow->getType() == DFS.ShadowTy);
+  ValShadowMap[I] = Shadow;
+}
+
+Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
+  assert(Addr != RetvalTLS && "Reinstrumenting?");
+  IRBuilder<> IRB(Pos);
+  return IRB.CreateIntToPtr(
+      IRB.CreateMul(
+          IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), ShadowPtrMask),
+          ShadowPtrMul),
+      ShadowPtrTy);
+}
+
+// Generates IR to compute the union of the two given shadows, inserting it
+// before Pos.  Returns the computed union Value.
+Value *DataFlowSanitizer::combineShadows(Value *V1, Value *V2,
+                                         Instruction *Pos) {
+  if (V1 == ZeroShadow)
+    return V2;
+  if (V2 == ZeroShadow)
+    return V1;
+  if (V1 == V2)
+    return V1;
+  IRBuilder<> IRB(Pos);
+  BasicBlock *Head = Pos->getParent();
+  Value *Ne = IRB.CreateICmpNE(V1, V2);
+  Instruction *NeInst = dyn_cast<Instruction>(Ne);
+  if (NeInst) {
+    BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+        NeInst, /*Unreachable=*/ false, ColdCallWeights));
+    IRBuilder<> ThenIRB(BI);
+    CallInst *Call = ThenIRB.CreateCall2(DFSanUnionFn, V1, V2);
+    Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+    Call->addAttribute(1, Attribute::ZExt);
+    Call->addAttribute(2, Attribute::ZExt);
+
+    BasicBlock *Tail = BI->getSuccessor(0);
+    PHINode *Phi = PHINode::Create(ShadowTy, 2, "", Tail->begin());
+    Phi->addIncoming(Call, Call->getParent());
+    Phi->addIncoming(ZeroShadow, Head);
+    Pos = Phi;
+    return Phi;
+  } else {
+    assert(0 && "todo");
+    return 0;
+  }
+}
+
+// A convenience function which folds the shadows of each of the operands
+// of the provided instruction Inst, inserting the IR before Inst.  Returns
+// the computed union Value.
+Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
+  if (Inst->getNumOperands() == 0)
+    return DFS.ZeroShadow;
+
+  Value *Shadow = getShadow(Inst->getOperand(0));
+  for (unsigned i = 1, n = Inst->getNumOperands(); i != n; ++i) {
+    Shadow = DFS.combineShadows(Shadow, getShadow(Inst->getOperand(i)), Inst);
+  }
+  return Shadow;
+}
+
+void DFSanVisitor::visitOperandShadowInst(Instruction &I) {
+  Value *CombinedShadow = DFSF.combineOperandShadows(&I);
+  DFSF.setShadow(&I, CombinedShadow);
+}
+
+// Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where
+// Addr has alignment Align, and take the union of each of those shadows.
+Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
+                                 Instruction *Pos) {
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
+    llvm::DenseMap<AllocaInst *, AllocaInst *>::iterator i =
+        AllocaShadowMap.find(AI);
+    if (i != AllocaShadowMap.end()) {
+      IRBuilder<> IRB(Pos);
+      return IRB.CreateLoad(i->second);
+    }
+  }
+
+  uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
+  SmallVector<Value *, 2> Objs;
+  GetUnderlyingObjects(Addr, Objs, DFS.DL);
+  bool AllConstants = true;
+  for (SmallVector<Value *, 2>::iterator i = Objs.begin(), e = Objs.end();
+       i != e; ++i) {
+    if (isa<Function>(*i) || isa<BlockAddress>(*i))
+      continue;
+    if (isa<GlobalVariable>(*i) && cast<GlobalVariable>(*i)->isConstant())
+      continue;
+
+    AllConstants = false;
+    break;
+  }
+  if (AllConstants)
+    return DFS.ZeroShadow;
+
+  Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
+  switch (Size) {
+  case 0:
+    return DFS.ZeroShadow;
+  case 1: {
+    LoadInst *LI = new LoadInst(ShadowAddr, "", Pos);
+    LI->setAlignment(ShadowAlign);
+    return LI;
+  }
+  case 2: {
+    IRBuilder<> IRB(Pos);
+    Value *ShadowAddr1 =
+        IRB.CreateGEP(ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1));
+    return DFS.combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign),
+                              IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign),
+                              Pos);
+  }
+  }
+  if (Size % (64 / DFS.ShadowWidth) == 0) {
+    // Fast path for the common case where each byte has identical shadow: load
+    // shadow 64 bits at a time, fall out to a __dfsan_union_load call if any
+    // shadow is non-equal.
+    BasicBlock *FallbackBB = BasicBlock::Create(*DFS.Ctx, "", F);
+    IRBuilder<> FallbackIRB(FallbackBB);
+    CallInst *FallbackCall = FallbackIRB.CreateCall2(
+        DFS.DFSanUnionLoadFn, ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size));
+    FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+
+    // Compare each of the shadows stored in the loaded 64 bits to each other,
+    // by computing (WideShadow rotl ShadowWidth) == WideShadow.
+    IRBuilder<> IRB(Pos);
+    Value *WideAddr =
+        IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx));
+    Value *WideShadow = IRB.CreateAlignedLoad(WideAddr, ShadowAlign);
+    Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.ShadowTy);
+    Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidth);
+    Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidth);
+    Value *RotShadow = IRB.CreateOr(ShlShadow, ShrShadow);
+    Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow);
+
+    BasicBlock *Head = Pos->getParent();
+    BasicBlock *Tail = Head->splitBasicBlock(Pos);
+    // In the following code LastBr will refer to the previous basic block's
+    // conditional branch instruction, whose true successor is fixed up to point
+    // to the next block during the loop below or to the tail after the final
+    // iteration.
+    BranchInst *LastBr = BranchInst::Create(FallbackBB, FallbackBB, ShadowsEq);
+    ReplaceInstWithInst(Head->getTerminator(), LastBr);
+
+    for (uint64_t Ofs = 64 / DFS.ShadowWidth; Ofs != Size;
+         Ofs += 64 / DFS.ShadowWidth) {
+      BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);
+      IRBuilder<> NextIRB(NextBB);
+      WideAddr = NextIRB.CreateGEP(WideAddr, ConstantInt::get(DFS.IntptrTy, 1));
+      Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign);
+      ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
+      LastBr->setSuccessor(0, NextBB);
+      LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB);
+    }
+
+    LastBr->setSuccessor(0, Tail);
+    FallbackIRB.CreateBr(Tail);
+    PHINode *Shadow = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
+    Shadow->addIncoming(FallbackCall, FallbackBB);
+    Shadow->addIncoming(TruncShadow, LastBr->getParent());
+    return Shadow;
+  }
+
+  IRBuilder<> IRB(Pos);
+  CallInst *FallbackCall = IRB.CreateCall2(
+      DFS.DFSanUnionLoadFn, ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size));
+  FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+  return FallbackCall;
+}
+
+void DFSanVisitor::visitLoadInst(LoadInst &LI) {
+  uint64_t Size = DFSF.DFS.DL->getTypeStoreSize(LI.getType());
+  uint64_t Align;
+  if (ClPreserveAlignment) {
+    Align = LI.getAlignment();
+    if (Align == 0)
+      Align = DFSF.DFS.DL->getABITypeAlignment(LI.getType());
+  } else {
+    Align = 1;
+  }
+  IRBuilder<> IRB(&LI);
+  Value *LoadedShadow =
+      DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI);
+  Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
+  DFSF.setShadow(&LI, DFSF.DFS.combineShadows(LoadedShadow, PtrShadow, &LI));
+}
+
+void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
+                                Value *Shadow, Instruction *Pos) {
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
+    llvm::DenseMap<AllocaInst *, AllocaInst *>::iterator i =
+        AllocaShadowMap.find(AI);
+    if (i != AllocaShadowMap.end()) {
+      IRBuilder<> IRB(Pos);
+      IRB.CreateStore(Shadow, i->second);
+      return;
+    }
+  }
+
+  uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
+  IRBuilder<> IRB(Pos);
+  Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
+  if (Shadow == DFS.ZeroShadow) {
+    IntegerType *ShadowTy = IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidth);
+    Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
+    Value *ExtShadowAddr =
+        IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy));
+    IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign);
+    return;
+  }
+
+  const unsigned ShadowVecSize = 128 / DFS.ShadowWidth;
+  uint64_t Offset = 0;
+  if (Size >= ShadowVecSize) {
+    VectorType *ShadowVecTy = VectorType::get(DFS.ShadowTy, ShadowVecSize);
+    Value *ShadowVec = UndefValue::get(ShadowVecTy);
+    for (unsigned i = 0; i != ShadowVecSize; ++i) {
+      ShadowVec = IRB.CreateInsertElement(
+          ShadowVec, Shadow, ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), i));
+    }
+    Value *ShadowVecAddr =
+        IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
+    do {
+      Value *CurShadowVecAddr = IRB.CreateConstGEP1_32(ShadowVecAddr, Offset);
+      IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
+      Size -= ShadowVecSize;
+      ++Offset;
+    } while (Size >= ShadowVecSize);
+    Offset *= ShadowVecSize;
+  }
+  while (Size > 0) {
+    Value *CurShadowAddr = IRB.CreateConstGEP1_32(ShadowAddr, Offset);
+    IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign);
+    --Size;
+    ++Offset;
+  }
+}
+
+void DFSanVisitor::visitStoreInst(StoreInst &SI) {
+  uint64_t Size =
+      DFSF.DFS.DL->getTypeStoreSize(SI.getValueOperand()->getType());
+  uint64_t Align;
+  if (ClPreserveAlignment) {
+    Align = SI.getAlignment();
+    if (Align == 0)
+      Align = DFSF.DFS.DL->getABITypeAlignment(SI.getValueOperand()->getType());
+  } else {
+    Align = 1;
+  }
+  DFSF.storeShadow(SI.getPointerOperand(), Size, Align,
+                   DFSF.getShadow(SI.getValueOperand()), &SI);
+}
+
+void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
+  visitOperandShadowInst(BO);
+}
+
+void DFSanVisitor::visitCastInst(CastInst &CI) { visitOperandShadowInst(CI); }
+
+void DFSanVisitor::visitCmpInst(CmpInst &CI) { visitOperandShadowInst(CI); }
+
+void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+  visitOperandShadowInst(GEPI);
+}
+
+void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
+  visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
+  visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
+  visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
+  visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
+  visitOperandShadowInst(I);
+}
+
+void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
+  bool AllLoadsStores = true;
+  for (Instruction::use_iterator i = I.use_begin(), e = I.use_end(); i != e;
+       ++i) {
+    if (isa<LoadInst>(*i))
+      continue;
+
+    if (StoreInst *SI = dyn_cast<StoreInst>(*i)) {
+      if (SI->getPointerOperand() == &I)
+        continue;
+    }
+
+    AllLoadsStores = false;
+    break;
+  }
+  if (AllLoadsStores) {
+    IRBuilder<> IRB(&I);
+    DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.ShadowTy);
+  }
+  DFSF.setShadow(&I, DFSF.DFS.ZeroShadow);
+}
+
+void DFSanVisitor::visitSelectInst(SelectInst &I) {
+  Value *CondShadow = DFSF.getShadow(I.getCondition());
+  Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
+  Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
+
+  if (isa<VectorType>(I.getCondition()->getType())) {
+    DFSF.setShadow(
+        &I, DFSF.DFS.combineShadows(
+                CondShadow,
+                DFSF.DFS.combineShadows(TrueShadow, FalseShadow, &I), &I));
+  } else {
+    Value *ShadowSel;
+    if (TrueShadow == FalseShadow) {
+      ShadowSel = TrueShadow;
+    } else {
+      ShadowSel =
+          SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
+    }
+    DFSF.setShadow(&I, DFSF.DFS.combineShadows(CondShadow, ShadowSel, &I));
+  }
+}
+
+void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
+  IRBuilder<> IRB(&I);
+  Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
+  Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
+  Value *LenShadow = IRB.CreateMul(
+      I.getLength(),
+      ConstantInt::get(I.getLength()->getType(), DFSF.DFS.ShadowWidth / 8));
+  Value *AlignShadow;
+  if (ClPreserveAlignment) {
+    AlignShadow = IRB.CreateMul(I.getAlignmentCst(),
+                                ConstantInt::get(I.getAlignmentCst()->getType(),
+                                                 DFSF.DFS.ShadowWidth / 8));
+  } else {
+    AlignShadow = ConstantInt::get(I.getAlignmentCst()->getType(),
+                                   DFSF.DFS.ShadowWidth / 8);
+  }
+  Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
+  DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr);
+  SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
+  IRB.CreateCall5(I.getCalledValue(), DestShadow, SrcShadow, LenShadow,
+                  AlignShadow, I.getVolatileCst());
+}
+
+void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
+  if (RI.getReturnValue()) {
+    switch (DFSF.IA) {
+    case DataFlowSanitizer::IA_TLS: {
+      Value *S = DFSF.getShadow(RI.getReturnValue());
+      IRBuilder<> IRB(&RI);
+      IRB.CreateStore(S, DFSF.getRetvalTLS());
+      break;
+    }
+    case DataFlowSanitizer::IA_Args: {
+      IRBuilder<> IRB(&RI);
+      Type *RT = DFSF.F->getFunctionType()->getReturnType();
+      Value *InsVal =
+          IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0);
+      Value *InsShadow =
+          IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1);
+      RI.setOperand(0, InsShadow);
+      break;
+    }
+    default:
+      break;
+    }
+  }
+}
+
+void DFSanVisitor::visitCallSite(CallSite CS) {
+  Function *F = CS.getCalledFunction();
+  if ((F && F->isIntrinsic()) || isa<InlineAsm>(CS.getCalledValue())) {
+    visitOperandShadowInst(*CS.getInstruction());
+    return;
+  }
+
+  DenseMap<Value *, Function *>::iterator i =
+      DFSF.DFS.UnwrappedFnMap.find(CS.getCalledValue());
+  if (i != DFSF.DFS.UnwrappedFnMap.end()) {
+    CS.setCalledFunction(i->second);
+    DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);
+    return;
+  }
+
+  IRBuilder<> IRB(CS.getInstruction());
+
+  FunctionType *FT = cast<FunctionType>(
+      CS.getCalledValue()->getType()->getPointerElementType());
+  if (DFSF.DFS.getDefaultInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
+    for (unsigned i = 0, n = FT->getNumParams(); i != n; ++i) {
+      IRB.CreateStore(DFSF.getShadow(CS.getArgument(i)),
+                      DFSF.getArgTLS(i, CS.getInstruction()));
+    }
+  }
+
+  Instruction *Next = 0;
+  if (!CS.getType()->isVoidTy()) {
+    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+      if (II->getNormalDest()->getSinglePredecessor()) {
+        Next = II->getNormalDest()->begin();
+      } else {
+        BasicBlock *NewBB =
+            SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DFS);
+        Next = NewBB->begin();
+      }
+    } else {
+      Next = CS->getNextNode();
+    }
+
+    if (DFSF.DFS.getDefaultInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
+      IRBuilder<> NextIRB(Next);
+      LoadInst *LI = NextIRB.CreateLoad(DFSF.getRetvalTLS());
+      DFSF.SkipInsts.insert(LI);
+      DFSF.setShadow(CS.getInstruction(), LI);
+    }
+  }
+
+  // Do all instrumentation for IA_Args down here to defer tampering with the
+  // CFG in a way that SplitEdge may be able to detect.
+  if (DFSF.DFS.getDefaultInstrumentedABI() == DataFlowSanitizer::IA_Args) {
+    FunctionType *NewFT = DFSF.DFS.getInstrumentedFunctionType(FT);
+    Value *Func =
+        IRB.CreateBitCast(CS.getCalledValue(), PointerType::getUnqual(NewFT));
+    std::vector<Value *> Args;
+
+    CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+    for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+      Args.push_back(*i);
+
+    i = CS.arg_begin();
+    for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+      Args.push_back(DFSF.getShadow(*i));
+
+    if (FT->isVarArg()) {
+      unsigned VarArgSize = CS.arg_size() - FT->getNumParams();
+      ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
+      AllocaInst *VarArgShadow =
+          new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin());
+      Args.push_back(IRB.CreateConstGEP2_32(VarArgShadow, 0, 0));
+      for (unsigned n = 0; i != e; ++i, ++n) {
+        IRB.CreateStore(DFSF.getShadow(*i),
+                        IRB.CreateConstGEP2_32(VarArgShadow, 0, n));
+        Args.push_back(*i);
+      }
+    }
+
+    CallSite NewCS;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+      NewCS = IRB.CreateInvoke(Func, II->getNormalDest(), II->getUnwindDest(),
+                               Args);
+    } else {
+      NewCS = IRB.CreateCall(Func, Args);
+    }
+    NewCS.setCallingConv(CS.getCallingConv());
+    NewCS.setAttributes(CS.getAttributes().removeAttributes(
+        *DFSF.DFS.Ctx, AttributeSet::ReturnIndex,
+        AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType(),
+                                         AttributeSet::ReturnIndex)));
+
+    if (Next) {
+      ExtractValueInst *ExVal =
+          ExtractValueInst::Create(NewCS.getInstruction(), 0, "", Next);
+      DFSF.SkipInsts.insert(ExVal);
+      ExtractValueInst *ExShadow =
+          ExtractValueInst::Create(NewCS.getInstruction(), 1, "", Next);
+      DFSF.SkipInsts.insert(ExShadow);
+      DFSF.setShadow(ExVal, ExShadow);
+
+      CS.getInstruction()->replaceAllUsesWith(ExVal);
+    }
+
+    CS.getInstruction()->eraseFromParent();
+  }
+}
+
+void DFSanVisitor::visitPHINode(PHINode &PN) {
+  PHINode *ShadowPN =
+      PHINode::Create(DFSF.DFS.ShadowTy, PN.getNumIncomingValues(), "", &PN);
+
+  // Give the shadow phi node valid predecessors to fool SplitEdge into working.
+  Value *UndefShadow = UndefValue::get(DFSF.DFS.ShadowTy);
+  for (PHINode::block_iterator i = PN.block_begin(), e = PN.block_end(); i != e;
+       ++i) {
+    ShadowPN->addIncoming(UndefShadow, *i);
+  }
+
+  DFSF.PHIFixups.push_back(std::make_pair(&PN, ShadowPN));
+  DFSF.setShadow(&PN, ShadowPN);
+}
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index 9f35396..94f7901 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -30,6 +30,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
   initializePathProfilerPass(Registry);
   initializeMemorySanitizerPass(Registry);
   initializeThreadSanitizerPass(Registry);
+  initializeDataFlowSanitizerPass(Registry);
 }
 
 /// LLVMInitializeInstrumentation - C binding for
-- 
cgit v1.1


From 41418d17cced656f91038b2482bc9d173b4974b0 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Wed, 7 Aug 2013 22:49:12 +0000
Subject: Add ISD::FROUND for libm round()

All libm floating-point rounding functions, except for round(), had their own
ISD nodes. Recent PowerPC cores have an instruction for round(), and so here I'm
adding ISD::FROUND so that round() can be custom lowered as well.

For the most part, this is straightforward. I've added an intrinsic
and a matching ISD node just like those for nearbyint() and friends. The
SelectionDAG pattern I've named frnd (because ISD::FP_ROUND has already claimed
fround).

This will be used by the PowerPC backend in a follow-up commit.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187926 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index a62fedc..e452acd 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1772,6 +1772,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
     case Intrinsic::trunc:
     case Intrinsic::rint:
     case Intrinsic::nearbyint:
+    case Intrinsic::round:
     case Intrinsic::pow:
     case Intrinsic::fma:
     case Intrinsic::fmuladd:
@@ -1850,6 +1851,10 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
   case LibFunc::nearbyintf:
   case LibFunc::nearbyintl:
     return Intrinsic::nearbyint;
+  case LibFunc::round:
+  case LibFunc::roundf:
+  case LibFunc::roundl:
+    return Intrinsic::round;
   case LibFunc::pow:
   case LibFunc::powf:
   case LibFunc::powl:
-- 
cgit v1.1


From b0fd15f645a05480467136f94d5e5baacd1905a9 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Wed, 7 Aug 2013 23:56:34 +0000
Subject: [objc-arc] Change 4 iterator methods which return const_iterators to
 be const methods.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187940 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 6f94a7c..6d4ff65 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -756,10 +756,10 @@ namespace {
 
     // Specialized CFG utilities.
     typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator;
-    edge_iterator pred_begin() { return Preds.begin(); }
-    edge_iterator pred_end() { return Preds.end(); }
-    edge_iterator succ_begin() { return Succs.begin(); }
-    edge_iterator succ_end() { return Succs.end(); }
+    edge_iterator pred_begin() const { return Preds.begin(); }
+    edge_iterator pred_end() const { return Preds.end(); }
+    edge_iterator succ_begin() const { return Succs.begin(); }
+    edge_iterator succ_end() const { return Succs.end(); }
 
     void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
     void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
-- 
cgit v1.1


From 7f1a7d4137ce535558480f8e044238f35a8654e6 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Wed, 7 Aug 2013 23:56:41 +0000
Subject: [objc-arc] Track if we encountered an additive overflow while
 computing {TopDown,BottomUp}PathCounts and do nothing if it occured.

rdar://14590914

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187941 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 35 +++++++++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 6d4ff65..582f7ea 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -648,6 +648,8 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
 namespace {
   /// \brief Per-BasicBlock state.
   class BBState {
+    static const unsigned OverflowOccurredValue;
+
     /// The number of unique control paths from the entry which can reach this
     /// block.
     unsigned TopDownPathCount;
@@ -674,7 +676,7 @@ namespace {
     SmallVector<BasicBlock *, 2> Succs;
 
   public:
-    BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
+    BBState() : TopDownPathCount(0), BottomUpPathCount(0) { }
 
     typedef MapTy::iterator ptr_iterator;
     typedef MapTy::const_iterator ptr_const_iterator;
@@ -745,8 +747,9 @@ namespace {
     /// Returns true if overflow occured. Returns false if overflow did not
     /// occur.
     bool GetAllPathCountWithOverflow(unsigned &PathCount) const {
-      assert(TopDownPathCount != 0);
-      assert(BottomUpPathCount != 0);
+      if (TopDownPathCount == OverflowOccurredValue ||
+          BottomUpPathCount == OverflowOccurredValue)
+        return false;
       unsigned long long Product =
         (unsigned long long)TopDownPathCount*BottomUpPathCount;
       PathCount = Product;
@@ -766,6 +769,8 @@ namespace {
 
     bool isExit() const { return Succs.empty(); }
   };
+
+  const unsigned BBState::OverflowOccurredValue = -1;
 }
 
 void BBState::InitFromPred(const BBState &Other) {
@@ -781,13 +786,25 @@ void BBState::InitFromSucc(const BBState &Other) {
 /// The top-down traversal uses this to merge information about predecessors to
 /// form the initial state for a new block.
 void BBState::MergePred(const BBState &Other) {
+  if (TopDownPathCount == OverflowOccurredValue)
+    return;
+
   // Other.TopDownPathCount can be 0, in which case it is either dead or a
   // loop backedge. Loop backedges are special.
   TopDownPathCount += Other.TopDownPathCount;
 
+  // In order to be consistent, we clear the top down pointers when by adding
+  // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow
+  // has not occured.
+  if (TopDownPathCount == OverflowOccurredValue) {
+    clearTopDownPointers();
+    return;
+  }
+
   // Check for overflow. If we have overflow, fall back to conservative
   // behavior.
   if (TopDownPathCount < Other.TopDownPathCount) {
+    TopDownPathCount = OverflowOccurredValue;
     clearTopDownPointers();
     return;
   }
@@ -813,13 +830,25 @@ void BBState::MergePred(const BBState &Other) {
 /// The bottom-up traversal uses this to merge information about successors to
 /// form the initial state for a new block.
 void BBState::MergeSucc(const BBState &Other) {
+  if (BottomUpPathCount == OverflowOccurredValue)
+    return;
+
   // Other.BottomUpPathCount can be 0, in which case it is either dead or a
   // loop backedge. Loop backedges are special.
   BottomUpPathCount += Other.BottomUpPathCount;
 
+  // In order to be consistent, we clear the top down pointers when by adding
+  // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow
+  // has not occured.
+  if (BottomUpPathCount == OverflowOccurredValue) {
+    clearBottomUpPointers();
+    return;
+  }
+
   // Check for overflow. If we have overflow, fall back to conservative
   // behavior.
   if (BottomUpPathCount < Other.BottomUpPathCount) {
+    BottomUpPathCount = OverflowOccurredValue;
     clearBottomUpPointers();
     return;
   }
-- 
cgit v1.1


From 46c72c74cfebb27a39296b292a8fc9b75d66f665 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Thu, 8 Aug 2013 00:15:27 +0000
Subject: Fix ARM build.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187944 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 1cfbba7..5e6313a 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -236,7 +236,7 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
   ShadowPtrTy = PointerType::getUnqual(ShadowTy);
   IntptrTy = DL->getIntPtrType(*Ctx);
   ZeroShadow = ConstantInt::getSigned(ShadowTy, 0);
-  ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000);
+  ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL);
   ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8);
 
   Type *DFSanUnionArgs[2] = { ShadowTy, ShadowTy };
-- 
cgit v1.1


From 35b6edfbeaf0f82954e0b308e1c6693e3d39a9e1 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Thu, 8 Aug 2013 00:41:18 +0000
Subject: Revert "[objc-arc] Track if we encountered an additive overflow while
 computing {TopDown,BottomUp}PathCounts and do nothing if it occured."

This reverts commit r187941.

The commit was passing on my os x box, but it is failing on some non-osx
platforms. I do not have time to look into it now, so I am reverting and will
recommit after I figure this out.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187946 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 35 +++-------------------------------
 1 file changed, 3 insertions(+), 32 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 582f7ea..6d4ff65 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -648,8 +648,6 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
 namespace {
   /// \brief Per-BasicBlock state.
   class BBState {
-    static const unsigned OverflowOccurredValue;
-
     /// The number of unique control paths from the entry which can reach this
     /// block.
     unsigned TopDownPathCount;
@@ -676,7 +674,7 @@ namespace {
     SmallVector<BasicBlock *, 2> Succs;
 
   public:
-    BBState() : TopDownPathCount(0), BottomUpPathCount(0) { }
+    BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
 
     typedef MapTy::iterator ptr_iterator;
     typedef MapTy::const_iterator ptr_const_iterator;
@@ -747,9 +745,8 @@ namespace {
     /// Returns true if overflow occured. Returns false if overflow did not
     /// occur.
     bool GetAllPathCountWithOverflow(unsigned &PathCount) const {
-      if (TopDownPathCount == OverflowOccurredValue ||
-          BottomUpPathCount == OverflowOccurredValue)
-        return false;
+      assert(TopDownPathCount != 0);
+      assert(BottomUpPathCount != 0);
       unsigned long long Product =
         (unsigned long long)TopDownPathCount*BottomUpPathCount;
       PathCount = Product;
@@ -769,8 +766,6 @@ namespace {
 
     bool isExit() const { return Succs.empty(); }
   };
-
-  const unsigned BBState::OverflowOccurredValue = -1;
 }
 
 void BBState::InitFromPred(const BBState &Other) {
@@ -786,25 +781,13 @@ void BBState::InitFromSucc(const BBState &Other) {
 /// The top-down traversal uses this to merge information about predecessors to
 /// form the initial state for a new block.
 void BBState::MergePred(const BBState &Other) {
-  if (TopDownPathCount == OverflowOccurredValue)
-    return;
-
   // Other.TopDownPathCount can be 0, in which case it is either dead or a
   // loop backedge. Loop backedges are special.
   TopDownPathCount += Other.TopDownPathCount;
 
-  // In order to be consistent, we clear the top down pointers when by adding
-  // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow
-  // has not occured.
-  if (TopDownPathCount == OverflowOccurredValue) {
-    clearTopDownPointers();
-    return;
-  }
-
   // Check for overflow. If we have overflow, fall back to conservative
   // behavior.
   if (TopDownPathCount < Other.TopDownPathCount) {
-    TopDownPathCount = OverflowOccurredValue;
     clearTopDownPointers();
     return;
   }
@@ -830,25 +813,13 @@ void BBState::MergePred(const BBState &Other) {
 /// The bottom-up traversal uses this to merge information about successors to
 /// form the initial state for a new block.
 void BBState::MergeSucc(const BBState &Other) {
-  if (BottomUpPathCount == OverflowOccurredValue)
-    return;
-
   // Other.BottomUpPathCount can be 0, in which case it is either dead or a
   // loop backedge. Loop backedges are special.
   BottomUpPathCount += Other.BottomUpPathCount;
 
-  // In order to be consistent, we clear the top down pointers when by adding
-  // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow
-  // has not occured.
-  if (BottomUpPathCount == OverflowOccurredValue) {
-    clearBottomUpPointers();
-    return;
-  }
-
   // Check for overflow. If we have overflow, fall back to conservative
   // behavior.
   if (BottomUpPathCount < Other.BottomUpPathCount) {
-    BottomUpPathCount = OverflowOccurredValue;
     clearBottomUpPointers();
     return;
   }
-- 
cgit v1.1


From 4c71064129d1e5def34d74ee47c4f3beaa0a66df Mon Sep 17 00:00:00 2001
From: Jakub Staszak <kubastaszak@gmail.com>
Date: Fri, 9 Aug 2013 20:53:48 +0000
Subject: Mark obviously const methods. Also use reference for parameters when
 possible.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188103 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/AddressSanitizer.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index d77e20b..7556522 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -488,7 +488,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   void initializeCallbacks(Module &M);
 
   // Check if we want (and can) handle this alloca.
-  bool isInterestingAlloca(AllocaInst &AI) {
+  bool isInterestingAlloca(AllocaInst &AI) const {
     return (!AI.isArrayAllocation() &&
             AI.isStaticAlloca() &&
             AI.getAlignment() <= RedzoneSize() &&
@@ -498,24 +498,24 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   size_t RedzoneSize() const {
     return RedzoneSizeForScale(Mapping.Scale);
   }
-  uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
+  uint64_t getAllocaSizeInBytes(AllocaInst *AI) const {
     Type *Ty = AI->getAllocatedType();
     uint64_t SizeInBytes = ASan.TD->getTypeAllocSize(Ty);
     return SizeInBytes;
   }
-  uint64_t getAlignedSize(uint64_t SizeInBytes) {
+  uint64_t getAlignedSize(uint64_t SizeInBytes) const {
     size_t RZ = RedzoneSize();
     return ((SizeInBytes + RZ - 1) / RZ) * RZ;
   }
-  uint64_t getAlignedAllocaSize(AllocaInst *AI) {
+  uint64_t getAlignedAllocaSize(AllocaInst *AI) const {
     uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
     return getAlignedSize(SizeInBytes);
   }
   /// Finds alloca where the value comes from.
   AllocaInst *findAllocaForValue(Value *V);
-  void poisonRedZones(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
+  void poisonRedZones(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> &IRB,
                       Value *ShadowBase, bool DoPoison);
-  void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> IRB, bool DoPoison);
+  void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> &IRB, bool DoPoison);
 };
 
 }  // namespace
@@ -1283,7 +1283,7 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
 }
 
 void FunctionStackPoisoner::poisonRedZones(
-  const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, Value *ShadowBase,
+  const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> &IRB, Value *ShadowBase,
   bool DoPoison) {
   size_t ShadowRZSize = RedzoneSize() >> Mapping.Scale;
   assert(ShadowRZSize >= 1 && ShadowRZSize <= 4);
@@ -1460,7 +1460,7 @@ void FunctionStackPoisoner::poisonStack() {
 }
 
 void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
-                                         IRBuilder<> IRB, bool DoPoison) {
+                                         IRBuilder<> &IRB, bool DoPoison) {
   // For now just insert the call to ASan runtime.
   Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy);
   Value *SizeArg = ConstantInt::get(IntptrTy, Size);
-- 
cgit v1.1


From aaae6e9cb8c8567409739d7cde1c5caaf35810cf Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Fri, 9 Aug 2013 21:42:53 +0000
Subject: DataFlowSanitizer: Remove unreachable BBs so IR continues to verify
 under the args ABI.

Differential Revision: http://llvm-reviews.chandlerc.com/D1316

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188113 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 5e6313a..0bbbfef 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -59,6 +59,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SpecialCaseList.h"
 #include <iterator>
 
@@ -402,6 +403,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
     if ((*i)->isDeclaration())
       continue;
 
+    removeUnreachableBlocks(**i);
+
     DFSanFunction DFSF(*this, *i);
 
     // DFSanVisitor may create new basic blocks, which confuses df_iterator.
-- 
cgit v1.1


From 835738ce54edb4746cfa4cdfc76608fec03fab3a Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Fri, 9 Aug 2013 22:47:24 +0000
Subject: Kill some duplicated code for removing unreachable BBs.

This moves removeUnreachableBlocksFromFn from SimplifyCFGPass.cpp
to Utils/Local.cpp and uses it to replace the implementation of
llvm::removeUnreachableBlocks, which appears to do a strict subset
of what removeUnreachableBlocksFromFn does.

Differential Revision: http://llvm-reviews.chandlerc.com/D1334

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188119 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/SimplifyCFGPass.cpp | 165 +-----------------------------
 lib/Transforms/Utils/Local.cpp            | 147 +++++++++++++++++++++++---
 2 files changed, 140 insertions(+), 172 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 6d05640..8371f6d 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -66,161 +66,6 @@ FunctionPass *llvm::createCFGSimplificationPass() {
   return new CFGSimplifyPass();
 }
 
-/// changeToUnreachable - Insert an unreachable instruction before the specified
-/// instruction, making it and the rest of the code in the block dead.
-static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
-  BasicBlock *BB = I->getParent();
-  // Loop over all of the successors, removing BB's entry from any PHI
-  // nodes.
-  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
-    (*SI)->removePredecessor(BB);
-
-  // Insert a call to llvm.trap right before this.  This turns the undefined
-  // behavior into a hard fail instead of falling through into random code.
-  if (UseLLVMTrap) {
-    Function *TrapFn =
-      Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
-    CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
-    CallTrap->setDebugLoc(I->getDebugLoc());
-  }
-  new UnreachableInst(I->getContext(), I);
-
-  // All instructions after this are dead.
-  BasicBlock::iterator BBI = I, BBE = BB->end();
-  while (BBI != BBE) {
-    if (!BBI->use_empty())
-      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
-    BB->getInstList().erase(BBI++);
-  }
-}
-
-/// changeToCall - Convert the specified invoke into a normal call.
-static void changeToCall(InvokeInst *II) {
-  SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
-  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
-  NewCall->takeName(II);
-  NewCall->setCallingConv(II->getCallingConv());
-  NewCall->setAttributes(II->getAttributes());
-  NewCall->setDebugLoc(II->getDebugLoc());
-  II->replaceAllUsesWith(NewCall);
-
-  // Follow the call by a branch to the normal destination.
-  BranchInst::Create(II->getNormalDest(), II);
-
-  // Update PHI nodes in the unwind destination
-  II->getUnwindDest()->removePredecessor(II->getParent());
-  II->eraseFromParent();
-}
-
-static bool markAliveBlocks(BasicBlock *BB,
-                            SmallPtrSet<BasicBlock*, 128> &Reachable) {
-
-  SmallVector<BasicBlock*, 128> Worklist;
-  Worklist.push_back(BB);
-  Reachable.insert(BB);
-  bool Changed = false;
-  do {
-    BB = Worklist.pop_back_val();
-
-    // Do a quick scan of the basic block, turning any obviously unreachable
-    // instructions into LLVM unreachable insts.  The instruction combining pass
-    // canonicalizes unreachable insts into stores to null or undef.
-    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
-      if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
-        if (CI->doesNotReturn()) {
-          // If we found a call to a no-return function, insert an unreachable
-          // instruction after it.  Make sure there isn't *already* one there
-          // though.
-          ++BBI;
-          if (!isa<UnreachableInst>(BBI)) {
-            // Don't insert a call to llvm.trap right before the unreachable.
-            changeToUnreachable(BBI, false);
-            Changed = true;
-          }
-          break;
-        }
-      }
-
-      // Store to undef and store to null are undefined and used to signal that
-      // they should be changed to unreachable by passes that can't modify the
-      // CFG.
-      if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
-        // Don't touch volatile stores.
-        if (SI->isVolatile()) continue;
-
-        Value *Ptr = SI->getOperand(1);
-
-        if (isa<UndefValue>(Ptr) ||
-            (isa<ConstantPointerNull>(Ptr) &&
-             SI->getPointerAddressSpace() == 0)) {
-          changeToUnreachable(SI, true);
-          Changed = true;
-          break;
-        }
-      }
-    }
-
-    // Turn invokes that call 'nounwind' functions into ordinary calls.
-    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
-      Value *Callee = II->getCalledValue();
-      if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
-        changeToUnreachable(II, true);
-        Changed = true;
-      } else if (II->doesNotThrow()) {
-        if (II->use_empty() && II->onlyReadsMemory()) {
-          // jump to the normal destination branch.
-          BranchInst::Create(II->getNormalDest(), II);
-          II->getUnwindDest()->removePredecessor(II->getParent());
-          II->eraseFromParent();
-        } else
-          changeToCall(II);
-        Changed = true;
-      }
-    }
-
-    Changed |= ConstantFoldTerminator(BB, true);
-    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
-      if (Reachable.insert(*SI))
-        Worklist.push_back(*SI);
-  } while (!Worklist.empty());
-  return Changed;
-}
-
-/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
-/// if they are in a dead cycle.  Return true if a change was made, false
-/// otherwise.
-static bool removeUnreachableBlocksFromFn(Function &F) {
-  SmallPtrSet<BasicBlock*, 128> Reachable;
-  bool Changed = markAliveBlocks(F.begin(), Reachable);
-
-  // If there are unreachable blocks in the CFG...
-  if (Reachable.size() == F.size())
-    return Changed;
-
-  assert(Reachable.size() < F.size());
-  NumSimpl += F.size()-Reachable.size();
-
-  // Loop over all of the basic blocks that are not reachable, dropping all of
-  // their internal references...
-  for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
-    if (Reachable.count(BB))
-      continue;
-
-    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
-      if (Reachable.count(*SI))
-        (*SI)->removePredecessor(BB);
-    BB->dropAllReferences();
-  }
-
-  for (Function::iterator I = ++F.begin(); I != F.end();)
-    if (!Reachable.count(I))
-      I = F.getBasicBlockList().erase(I);
-    else
-      ++I;
-
-  return true;
-}
-
 /// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
 /// node) return blocks, merge them together to promote recursive block merging.
 static bool mergeEmptyReturnBlocks(Function &F) {
@@ -325,7 +170,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
 bool CFGSimplifyPass::runOnFunction(Function &F) {
   const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
   const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
-  bool EverChanged = removeUnreachableBlocksFromFn(F);
+  bool EverChanged = removeUnreachableBlocks(F);
   EverChanged |= mergeEmptyReturnBlocks(F);
   EverChanged |= iterativelySimplifyCFG(F, TTI, TD);
 
@@ -333,16 +178,16 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
   if (!EverChanged) return false;
 
   // iterativelySimplifyCFG can (rarely) make some loops dead.  If this happens,
-  // removeUnreachableBlocksFromFn is needed to nuke them, which means we should
+  // removeUnreachableBlocks is needed to nuke them, which means we should
   // iterate between the two optimizations.  We structure the code like this to
   // avoid reruning iterativelySimplifyCFG if the second pass of
-  // removeUnreachableBlocksFromFn doesn't do anything.
-  if (!removeUnreachableBlocksFromFn(F))
+  // removeUnreachableBlocks doesn't do anything.
+  if (!removeUnreachableBlocks(F))
     return true;
 
   do {
     EverChanged = iterativelySimplifyCFG(F, TTI, TD);
-    EverChanged |= removeUnreachableBlocksFromFn(F);
+    EverChanged |= removeUnreachableBlocks(F);
   } while (EverChanged);
 
   return true;
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 08e1808..4db3a72 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
@@ -43,6 +44,8 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
+
 //===----------------------------------------------------------------------===//
 //  Local constant propagation.
 //
@@ -1121,33 +1124,153 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
   return true;
 }
 
-bool llvm::removeUnreachableBlocks(Function &F) {
-  SmallPtrSet<BasicBlock*, 16> Reachable;
+/// changeToUnreachable - Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+  BasicBlock *BB = I->getParent();
+  // Loop over all of the successors, removing BB's entry from any PHI
+  // nodes.
+  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+    (*SI)->removePredecessor(BB);
+
+  // Insert a call to llvm.trap right before this.  This turns the undefined
+  // behavior into a hard fail instead of falling through into random code.
+  if (UseLLVMTrap) {
+    Function *TrapFn =
+      Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+    CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+    CallTrap->setDebugLoc(I->getDebugLoc());
+  }
+  new UnreachableInst(I->getContext(), I);
+
+  // All instructions after this are dead.
+  BasicBlock::iterator BBI = I, BBE = BB->end();
+  while (BBI != BBE) {
+    if (!BBI->use_empty())
+      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+    BB->getInstList().erase(BBI++);
+  }
+}
+
+/// changeToCall - Convert the specified invoke into a normal call.
+static void changeToCall(InvokeInst *II) {
+  SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
+  NewCall->takeName(II);
+  NewCall->setCallingConv(II->getCallingConv());
+  NewCall->setAttributes(II->getAttributes());
+  NewCall->setDebugLoc(II->getDebugLoc());
+  II->replaceAllUsesWith(NewCall);
+
+  // Follow the call by a branch to the normal destination.
+  BranchInst::Create(II->getNormalDest(), II);
+
+  // Update PHI nodes in the unwind destination
+  II->getUnwindDest()->removePredecessor(II->getParent());
+  II->eraseFromParent();
+}
+
+static bool markAliveBlocks(BasicBlock *BB,
+                            SmallPtrSet<BasicBlock*, 128> &Reachable) {
+
   SmallVector<BasicBlock*, 128> Worklist;
-  Worklist.push_back(&F.getEntryBlock());
-  Reachable.insert(&F.getEntryBlock());
+  Worklist.push_back(BB);
+  Reachable.insert(BB);
+  bool Changed = false;
   do {
-    BasicBlock *BB = Worklist.pop_back_val();
+    BB = Worklist.pop_back_val();
+
+    // Do a quick scan of the basic block, turning any obviously unreachable
+    // instructions into LLVM unreachable insts.  The instruction combining pass
+    // canonicalizes unreachable insts into stores to null or undef.
+    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+      if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+        if (CI->doesNotReturn()) {
+          // If we found a call to a no-return function, insert an unreachable
+          // instruction after it.  Make sure there isn't *already* one there
+          // though.
+          ++BBI;
+          if (!isa<UnreachableInst>(BBI)) {
+            // Don't insert a call to llvm.trap right before the unreachable.
+            changeToUnreachable(BBI, false);
+            Changed = true;
+          }
+          break;
+        }
+      }
+
+      // Store to undef and store to null are undefined and used to signal that
+      // they should be changed to unreachable by passes that can't modify the
+      // CFG.
+      if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+        // Don't touch volatile stores.
+        if (SI->isVolatile()) continue;
+
+        Value *Ptr = SI->getOperand(1);
+
+        if (isa<UndefValue>(Ptr) ||
+            (isa<ConstantPointerNull>(Ptr) &&
+             SI->getPointerAddressSpace() == 0)) {
+          changeToUnreachable(SI, true);
+          Changed = true;
+          break;
+        }
+      }
+    }
+
+    // Turn invokes that call 'nounwind' functions into ordinary calls.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      Value *Callee = II->getCalledValue();
+      if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+        changeToUnreachable(II, true);
+        Changed = true;
+      } else if (II->doesNotThrow()) {
+        if (II->use_empty() && II->onlyReadsMemory()) {
+          // jump to the normal destination branch.
+          BranchInst::Create(II->getNormalDest(), II);
+          II->getUnwindDest()->removePredecessor(II->getParent());
+          II->eraseFromParent();
+        } else
+          changeToCall(II);
+        Changed = true;
+      }
+    }
+
+    Changed |= ConstantFoldTerminator(BB, true);
     for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
       if (Reachable.insert(*SI))
         Worklist.push_back(*SI);
   } while (!Worklist.empty());
+  return Changed;
+}
+
+/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// if they are in a dead cycle.  Return true if a change was made, false
+/// otherwise.
+bool llvm::removeUnreachableBlocks(Function &F) {
+  SmallPtrSet<BasicBlock*, 128> Reachable;
+  bool Changed = markAliveBlocks(F.begin(), Reachable);
 
+  // If there are unreachable blocks in the CFG...
   if (Reachable.size() == F.size())
-    return false;
+    return Changed;
 
   assert(Reachable.size() < F.size());
-  for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ++I) {
-    if (Reachable.count(I))
+  NumRemoved += F.size()-Reachable.size();
+
+  // Loop over all of the basic blocks that are not reachable, dropping all of
+  // their internal references...
+  for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+    if (Reachable.count(BB))
       continue;
 
-    for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI)
+    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
       if (Reachable.count(*SI))
-        (*SI)->removePredecessor(I);
-    I->dropAllReferences();
+        (*SI)->removePredecessor(BB);
+    BB->dropAllReferences();
   }
 
-  for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;)
+  for (Function::iterator I = ++F.begin(); I != F.end();)
     if (!Reachable.count(I))
       I = F.getBasicBlockList().erase(I);
     else
-- 
cgit v1.1


From 3ed7576a3bfd8e4e76e7b8f7ae76422c1ae48f72 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Fri, 9 Aug 2013 23:22:27 +0000
Subject: [objc-arc] Track if we encountered an additive overflow while
 computing {TopDown,BottomUp}PathCounts and do nothing if it occurred.

I fixed the aforementioned problems that came up on some of the linux boxes.
Major thanks to Nick Lewycky for his help debugging!

rdar://14590914

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188122 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 61 +++++++++++++++++++++++++++++-----
 1 file changed, 53 insertions(+), 8 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 6d4ff65..0385de5 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -674,7 +674,9 @@ namespace {
     SmallVector<BasicBlock *, 2> Succs;
 
   public:
-    BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
+    static const unsigned OverflowOccurredValue;
+
+    BBState() : TopDownPathCount(0), BottomUpPathCount(0) { }
 
     typedef MapTy::iterator ptr_iterator;
     typedef MapTy::const_iterator ptr_const_iterator;
@@ -745,13 +747,15 @@ namespace {
     /// Returns true if overflow occured. Returns false if overflow did not
     /// occur.
     bool GetAllPathCountWithOverflow(unsigned &PathCount) const {
-      assert(TopDownPathCount != 0);
-      assert(BottomUpPathCount != 0);
+      if (TopDownPathCount == OverflowOccurredValue ||
+          BottomUpPathCount == OverflowOccurredValue)
+        return true;
       unsigned long long Product =
         (unsigned long long)TopDownPathCount*BottomUpPathCount;
-      PathCount = Product;
-      // Overflow occured if any of the upper bits of Product are set.
-      return Product >> 32;
+      // Overflow occured if any of the upper bits of Product are set or if all
+      // the lower bits of Product are all set.
+      return (Product >> 32) ||
+             ((PathCount = Product) == OverflowOccurredValue);
     }
 
     // Specialized CFG utilities.
@@ -766,6 +770,8 @@ namespace {
 
     bool isExit() const { return Succs.empty(); }
   };
+
+  const unsigned BBState::OverflowOccurredValue = 0xffffffff;
 }
 
 void BBState::InitFromPred(const BBState &Other) {
@@ -781,13 +787,25 @@ void BBState::InitFromSucc(const BBState &Other) {
 /// The top-down traversal uses this to merge information about predecessors to
 /// form the initial state for a new block.
 void BBState::MergePred(const BBState &Other) {
+  if (TopDownPathCount == OverflowOccurredValue)
+    return;
+
   // Other.TopDownPathCount can be 0, in which case it is either dead or a
   // loop backedge. Loop backedges are special.
   TopDownPathCount += Other.TopDownPathCount;
 
+  // In order to be consistent, we clear the top down pointers when by adding
+  // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow
+  // has not occured.
+  if (TopDownPathCount == OverflowOccurredValue) {
+    clearTopDownPointers();
+    return;
+  }
+
   // Check for overflow. If we have overflow, fall back to conservative
   // behavior.
   if (TopDownPathCount < Other.TopDownPathCount) {
+    TopDownPathCount = OverflowOccurredValue;
     clearTopDownPointers();
     return;
   }
@@ -813,13 +831,25 @@ void BBState::MergePred(const BBState &Other) {
 /// The bottom-up traversal uses this to merge information about successors to
 /// form the initial state for a new block.
 void BBState::MergeSucc(const BBState &Other) {
+  if (BottomUpPathCount == OverflowOccurredValue)
+    return;
+
   // Other.BottomUpPathCount can be 0, in which case it is either dead or a
   // loop backedge. Loop backedges are special.
   BottomUpPathCount += Other.BottomUpPathCount;
 
+  // In order to be consistent, we clear the top down pointers when by adding
+  // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow
+  // has not occured.
+  if (BottomUpPathCount == OverflowOccurredValue) {
+    clearBottomUpPointers();
+    return;
+  }
+
   // Check for overflow. If we have overflow, fall back to conservative
   // behavior.
   if (BottomUpPathCount < Other.BottomUpPathCount) {
+    BottomUpPathCount = OverflowOccurredValue;
     clearBottomUpPointers();
     return;
   }
@@ -2526,9 +2556,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
           // If we overflow when we compute the path count, don't remove/move
           // anything.
           const BBState &NRRBBState = BBStates[NewRetainRelease->getParent()];
-          unsigned PathCount;
+          unsigned PathCount = BBState::OverflowOccurredValue;
           if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
             return false;
+          assert(PathCount != BBState::OverflowOccurredValue &&
+                 "PathCount at this point can not be "
+                 "OverflowOccurredValue.");
           OldDelta -= PathCount;
 
           // Merge the ReleaseMetadata and IsTailCallRelease values.
@@ -2558,8 +2591,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
                 // If we overflow when we compute the path count, don't
                 // remove/move anything.
                 const BBState &RIPBBState = BBStates[RIP->getParent()];
+                PathCount = BBState::OverflowOccurredValue;
                 if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
                   return false;
+                assert(PathCount != BBState::OverflowOccurredValue &&
+                       "PathCount at this point can not be "
+                       "OverflowOccurredValue.");
                 NewDelta -= PathCount;
               }
             }
@@ -2595,9 +2632,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
           // If we overflow when we compute the path count, don't remove/move
           // anything.
           const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()];
-          unsigned PathCount;
+          unsigned PathCount = BBState::OverflowOccurredValue;
           if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
             return false;
+          assert(PathCount != BBState::OverflowOccurredValue &&
+                 "PathCount at this point can not be "
+                 "OverflowOccurredValue.");
           OldDelta += PathCount;
           OldCount += PathCount;
 
@@ -2612,8 +2652,13 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
                 // If we overflow when we compute the path count, don't
                 // remove/move anything.
                 const BBState &RIPBBState = BBStates[RIP->getParent()];
+
+                PathCount = BBState::OverflowOccurredValue;
                 if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
                   return false;
+                assert(PathCount != BBState::OverflowOccurredValue &&
+                       "PathCount at this point can not be "
+                       "OverflowOccurredValue.");
                 NewDelta += PathCount;
                 NewCount += PathCount;
               }
-- 
cgit v1.1


From 5cf14916c3b1dd1df32c825b8eb63b6d828aa7a5 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Sat, 10 Aug 2013 20:16:06 +0000
Subject: Revert r188119 "Kill some duplicated code for removing unreachable
 BBs."
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is breaking builbots with libgmalloc enabled on Mac OS X.

$ cd llvm ; mkdir release ; cd release
$ ../configure --enable-optimized —prefix=$PWD/install
$ make
$ make check
$ Release+Asserts/bin/llvm-lit -v --param use_gmalloc=1 --param \
  gmalloc_path=/usr/lib/libgmalloc.dylib \
  ../test/Instrumentation/DataFlowSanitizer/args-unreachable-bb.ll

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188142 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/SimplifyCFGPass.cpp | 165 +++++++++++++++++++++++++++++-
 lib/Transforms/Utils/Local.cpp            | 147 +++-----------------------
 2 files changed, 172 insertions(+), 140 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 8371f6d..6d05640 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -66,6 +66,161 @@ FunctionPass *llvm::createCFGSimplificationPass() {
   return new CFGSimplifyPass();
 }
 
+/// changeToUnreachable - Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+  BasicBlock *BB = I->getParent();
+  // Loop over all of the successors, removing BB's entry from any PHI
+  // nodes.
+  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+    (*SI)->removePredecessor(BB);
+
+  // Insert a call to llvm.trap right before this.  This turns the undefined
+  // behavior into a hard fail instead of falling through into random code.
+  if (UseLLVMTrap) {
+    Function *TrapFn =
+      Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+    CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+    CallTrap->setDebugLoc(I->getDebugLoc());
+  }
+  new UnreachableInst(I->getContext(), I);
+
+  // All instructions after this are dead.
+  BasicBlock::iterator BBI = I, BBE = BB->end();
+  while (BBI != BBE) {
+    if (!BBI->use_empty())
+      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+    BB->getInstList().erase(BBI++);
+  }
+}
+
+/// changeToCall - Convert the specified invoke into a normal call.
+static void changeToCall(InvokeInst *II) {
+  SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
+  NewCall->takeName(II);
+  NewCall->setCallingConv(II->getCallingConv());
+  NewCall->setAttributes(II->getAttributes());
+  NewCall->setDebugLoc(II->getDebugLoc());
+  II->replaceAllUsesWith(NewCall);
+
+  // Follow the call by a branch to the normal destination.
+  BranchInst::Create(II->getNormalDest(), II);
+
+  // Update PHI nodes in the unwind destination
+  II->getUnwindDest()->removePredecessor(II->getParent());
+  II->eraseFromParent();
+}
+
+static bool markAliveBlocks(BasicBlock *BB,
+                            SmallPtrSet<BasicBlock*, 128> &Reachable) {
+
+  SmallVector<BasicBlock*, 128> Worklist;
+  Worklist.push_back(BB);
+  Reachable.insert(BB);
+  bool Changed = false;
+  do {
+    BB = Worklist.pop_back_val();
+
+    // Do a quick scan of the basic block, turning any obviously unreachable
+    // instructions into LLVM unreachable insts.  The instruction combining pass
+    // canonicalizes unreachable insts into stores to null or undef.
+    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+      if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+        if (CI->doesNotReturn()) {
+          // If we found a call to a no-return function, insert an unreachable
+          // instruction after it.  Make sure there isn't *already* one there
+          // though.
+          ++BBI;
+          if (!isa<UnreachableInst>(BBI)) {
+            // Don't insert a call to llvm.trap right before the unreachable.
+            changeToUnreachable(BBI, false);
+            Changed = true;
+          }
+          break;
+        }
+      }
+
+      // Store to undef and store to null are undefined and used to signal that
+      // they should be changed to unreachable by passes that can't modify the
+      // CFG.
+      if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+        // Don't touch volatile stores.
+        if (SI->isVolatile()) continue;
+
+        Value *Ptr = SI->getOperand(1);
+
+        if (isa<UndefValue>(Ptr) ||
+            (isa<ConstantPointerNull>(Ptr) &&
+             SI->getPointerAddressSpace() == 0)) {
+          changeToUnreachable(SI, true);
+          Changed = true;
+          break;
+        }
+      }
+    }
+
+    // Turn invokes that call 'nounwind' functions into ordinary calls.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      Value *Callee = II->getCalledValue();
+      if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+        changeToUnreachable(II, true);
+        Changed = true;
+      } else if (II->doesNotThrow()) {
+        if (II->use_empty() && II->onlyReadsMemory()) {
+          // jump to the normal destination branch.
+          BranchInst::Create(II->getNormalDest(), II);
+          II->getUnwindDest()->removePredecessor(II->getParent());
+          II->eraseFromParent();
+        } else
+          changeToCall(II);
+        Changed = true;
+      }
+    }
+
+    Changed |= ConstantFoldTerminator(BB, true);
+    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+      if (Reachable.insert(*SI))
+        Worklist.push_back(*SI);
+  } while (!Worklist.empty());
+  return Changed;
+}
+
+/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// if they are in a dead cycle.  Return true if a change was made, false
+/// otherwise.
+static bool removeUnreachableBlocksFromFn(Function &F) {
+  SmallPtrSet<BasicBlock*, 128> Reachable;
+  bool Changed = markAliveBlocks(F.begin(), Reachable);
+
+  // If there are unreachable blocks in the CFG...
+  if (Reachable.size() == F.size())
+    return Changed;
+
+  assert(Reachable.size() < F.size());
+  NumSimpl += F.size()-Reachable.size();
+
+  // Loop over all of the basic blocks that are not reachable, dropping all of
+  // their internal references...
+  for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+    if (Reachable.count(BB))
+      continue;
+
+    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+      if (Reachable.count(*SI))
+        (*SI)->removePredecessor(BB);
+    BB->dropAllReferences();
+  }
+
+  for (Function::iterator I = ++F.begin(); I != F.end();)
+    if (!Reachable.count(I))
+      I = F.getBasicBlockList().erase(I);
+    else
+      ++I;
+
+  return true;
+}
+
 /// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
 /// node) return blocks, merge them together to promote recursive block merging.
 static bool mergeEmptyReturnBlocks(Function &F) {
@@ -170,7 +325,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
 bool CFGSimplifyPass::runOnFunction(Function &F) {
   const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
   const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
-  bool EverChanged = removeUnreachableBlocks(F);
+  bool EverChanged = removeUnreachableBlocksFromFn(F);
   EverChanged |= mergeEmptyReturnBlocks(F);
   EverChanged |= iterativelySimplifyCFG(F, TTI, TD);
 
@@ -178,16 +333,16 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
   if (!EverChanged) return false;
 
   // iterativelySimplifyCFG can (rarely) make some loops dead.  If this happens,
-  // removeUnreachableBlocks is needed to nuke them, which means we should
+  // removeUnreachableBlocksFromFn is needed to nuke them, which means we should
   // iterate between the two optimizations.  We structure the code like this to
   // avoid reruning iterativelySimplifyCFG if the second pass of
-  // removeUnreachableBlocks doesn't do anything.
-  if (!removeUnreachableBlocks(F))
+  // removeUnreachableBlocksFromFn doesn't do anything.
+  if (!removeUnreachableBlocksFromFn(F))
     return true;
 
   do {
     EverChanged = iterativelySimplifyCFG(F, TTI, TD);
-    EverChanged |= removeUnreachableBlocks(F);
+    EverChanged |= removeUnreachableBlocksFromFn(F);
   } while (EverChanged);
 
   return true;
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 4db3a72..08e1808 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -16,7 +16,6 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
@@ -44,8 +43,6 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
-
 //===----------------------------------------------------------------------===//
 //  Local constant propagation.
 //
@@ -1124,153 +1121,33 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
   return true;
 }
 
-/// changeToUnreachable - Insert an unreachable instruction before the specified
-/// instruction, making it and the rest of the code in the block dead.
-static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
-  BasicBlock *BB = I->getParent();
-  // Loop over all of the successors, removing BB's entry from any PHI
-  // nodes.
-  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
-    (*SI)->removePredecessor(BB);
-
-  // Insert a call to llvm.trap right before this.  This turns the undefined
-  // behavior into a hard fail instead of falling through into random code.
-  if (UseLLVMTrap) {
-    Function *TrapFn =
-      Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
-    CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
-    CallTrap->setDebugLoc(I->getDebugLoc());
-  }
-  new UnreachableInst(I->getContext(), I);
-
-  // All instructions after this are dead.
-  BasicBlock::iterator BBI = I, BBE = BB->end();
-  while (BBI != BBE) {
-    if (!BBI->use_empty())
-      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
-    BB->getInstList().erase(BBI++);
-  }
-}
-
-/// changeToCall - Convert the specified invoke into a normal call.
-static void changeToCall(InvokeInst *II) {
-  SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
-  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
-  NewCall->takeName(II);
-  NewCall->setCallingConv(II->getCallingConv());
-  NewCall->setAttributes(II->getAttributes());
-  NewCall->setDebugLoc(II->getDebugLoc());
-  II->replaceAllUsesWith(NewCall);
-
-  // Follow the call by a branch to the normal destination.
-  BranchInst::Create(II->getNormalDest(), II);
-
-  // Update PHI nodes in the unwind destination
-  II->getUnwindDest()->removePredecessor(II->getParent());
-  II->eraseFromParent();
-}
-
-static bool markAliveBlocks(BasicBlock *BB,
-                            SmallPtrSet<BasicBlock*, 128> &Reachable) {
-
+bool llvm::removeUnreachableBlocks(Function &F) {
+  SmallPtrSet<BasicBlock*, 16> Reachable;
   SmallVector<BasicBlock*, 128> Worklist;
-  Worklist.push_back(BB);
-  Reachable.insert(BB);
-  bool Changed = false;
+  Worklist.push_back(&F.getEntryBlock());
+  Reachable.insert(&F.getEntryBlock());
   do {
-    BB = Worklist.pop_back_val();
-
-    // Do a quick scan of the basic block, turning any obviously unreachable
-    // instructions into LLVM unreachable insts.  The instruction combining pass
-    // canonicalizes unreachable insts into stores to null or undef.
-    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
-      if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
-        if (CI->doesNotReturn()) {
-          // If we found a call to a no-return function, insert an unreachable
-          // instruction after it.  Make sure there isn't *already* one there
-          // though.
-          ++BBI;
-          if (!isa<UnreachableInst>(BBI)) {
-            // Don't insert a call to llvm.trap right before the unreachable.
-            changeToUnreachable(BBI, false);
-            Changed = true;
-          }
-          break;
-        }
-      }
-
-      // Store to undef and store to null are undefined and used to signal that
-      // they should be changed to unreachable by passes that can't modify the
-      // CFG.
-      if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
-        // Don't touch volatile stores.
-        if (SI->isVolatile()) continue;
-
-        Value *Ptr = SI->getOperand(1);
-
-        if (isa<UndefValue>(Ptr) ||
-            (isa<ConstantPointerNull>(Ptr) &&
-             SI->getPointerAddressSpace() == 0)) {
-          changeToUnreachable(SI, true);
-          Changed = true;
-          break;
-        }
-      }
-    }
-
-    // Turn invokes that call 'nounwind' functions into ordinary calls.
-    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
-      Value *Callee = II->getCalledValue();
-      if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
-        changeToUnreachable(II, true);
-        Changed = true;
-      } else if (II->doesNotThrow()) {
-        if (II->use_empty() && II->onlyReadsMemory()) {
-          // jump to the normal destination branch.
-          BranchInst::Create(II->getNormalDest(), II);
-          II->getUnwindDest()->removePredecessor(II->getParent());
-          II->eraseFromParent();
-        } else
-          changeToCall(II);
-        Changed = true;
-      }
-    }
-
-    Changed |= ConstantFoldTerminator(BB, true);
+    BasicBlock *BB = Worklist.pop_back_val();
     for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
       if (Reachable.insert(*SI))
         Worklist.push_back(*SI);
   } while (!Worklist.empty());
-  return Changed;
-}
-
-/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
-/// if they are in a dead cycle.  Return true if a change was made, false
-/// otherwise.
-bool llvm::removeUnreachableBlocks(Function &F) {
-  SmallPtrSet<BasicBlock*, 128> Reachable;
-  bool Changed = markAliveBlocks(F.begin(), Reachable);
 
-  // If there are unreachable blocks in the CFG...
   if (Reachable.size() == F.size())
-    return Changed;
+    return false;
 
   assert(Reachable.size() < F.size());
-  NumRemoved += F.size()-Reachable.size();
-
-  // Loop over all of the basic blocks that are not reachable, dropping all of
-  // their internal references...
-  for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
-    if (Reachable.count(BB))
+  for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ++I) {
+    if (Reachable.count(I))
       continue;
 
-    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+    for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI)
       if (Reachable.count(*SI))
-        (*SI)->removePredecessor(BB);
-    BB->dropAllReferences();
+        (*SI)->removePredecessor(I);
+    I->dropAllReferences();
   }
 
-  for (Function::iterator I = ++F.begin(); I != F.end();)
+  for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;)
     if (!Reachable.count(I))
       I = F.getBasicBlockList().erase(I);
     else
-- 
cgit v1.1


From 3c7a446059133de68c912242cb3b0cc934b8e6b1 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Sun, 11 Aug 2013 01:03:18 +0000
Subject: Reformat some bits of AllocaPromoter and simplify the name and type
 of our visiting datastructures in the AllocaPromoter/SSAUpdater path of SROA.
 Also shift the order if clears around to be more consistent.

No functionality changed here, this is just a cleanup.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188144 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/SROA.cpp | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 5c55143..2793825 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -733,9 +733,9 @@ class AllocaPromoter : public LoadAndStorePromoter {
   SmallVector<DbgValueInst *, 4> DVIs;
 
 public:
-  AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+  AllocaPromoter(const SmallVectorImpl<Instruction *> &Insts, SSAUpdater &S,
                  AllocaInst &AI, DIBuilder &DIB)
-    : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
+      : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
 
   void run(const SmallVectorImpl<Instruction*> &Insts) {
     // Retain the debug information attached to the alloca for use when
@@ -3364,12 +3364,12 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
 }
 
 static void enqueueUsersInWorklist(Instruction &I,
-                                   SmallVectorImpl<Use *> &UseWorklist,
-                                   SmallPtrSet<Use *, 8> &VisitedUses) {
+                                   SmallVectorImpl<Instruction *> &Worklist,
+                                   SmallPtrSet<Instruction *, 8> &Visited) {
   for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
        ++UI)
-    if (VisitedUses.insert(&UI.getUse()))
-      UseWorklist.push_back(&UI.getUse());
+    if (Visited.insert(cast<Instruction>(*UI)))
+      Worklist.push_back(cast<Instruction>(*UI));
 }
 
 /// \brief Promote the allocas, using the best available technique.
@@ -3396,29 +3396,29 @@ bool SROA::promoteAllocas(Function &F) {
   DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n");
   SSAUpdater SSA;
   DIBuilder DIB(*F.getParent());
-  SmallVector<Instruction*, 64> Insts;
+  SmallVector<Instruction *, 64> Insts;
 
   // We need a worklist to walk the uses of each alloca.
-  SmallVector<Use *, 8> UseWorklist;
-  SmallPtrSet<Use *, 8> VisitedUses;
+  SmallVector<Instruction *, 8> Worklist;
+  SmallPtrSet<Instruction *, 8> Visited;
   SmallVector<Instruction *, 32> DeadInsts;
 
   for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) {
     AllocaInst *AI = PromotableAllocas[Idx];
-    UseWorklist.clear();
-    VisitedUses.clear();
+    Insts.clear();
+    Worklist.clear();
+    Visited.clear();
 
-    enqueueUsersInWorklist(*AI, UseWorklist, VisitedUses);
+    enqueueUsersInWorklist(*AI, Worklist, Visited);
 
-    while (!UseWorklist.empty()) {
-      Use *U = UseWorklist.pop_back_val();
-      Instruction &I = *cast<Instruction>(U->getUser());
+    while (!Worklist.empty()) {
+      Instruction *I = Worklist.pop_back_val();
 
       // FIXME: Currently the SSAUpdater infrastructure doesn't reason about
       // lifetime intrinsics and so we strip them (and the bitcasts+GEPs
       // leading to them) here. Eventually it should use them to optimize the
       // scalar values produced.
-      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) {
+      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
         assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
                II->getIntrinsicID() == Intrinsic::lifetime_end);
         II->eraseFromParent();
@@ -3428,12 +3428,12 @@ bool SROA::promoteAllocas(Function &F) {
       // Push the loads and stores we find onto the list. SROA will already
       // have validated that all loads and stores are viable candidates for
       // promotion.
-      if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
         assert(LI->getType() == AI->getAllocatedType());
         Insts.push_back(LI);
         continue;
       }
-      if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+      if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
         assert(SI->getValueOperand()->getType() == AI->getAllocatedType());
         Insts.push_back(SI);
         continue;
@@ -3442,11 +3442,10 @@ bool SROA::promoteAllocas(Function &F) {
       // For everything else, we know that only no-op bitcasts and GEPs will
       // make it this far, just recurse through them and recall them for later
       // removal.
-      DeadInsts.push_back(&I);
-      enqueueUsersInWorklist(I, UseWorklist, VisitedUses);
+      DeadInsts.push_back(I);
+      enqueueUsersInWorklist(*I, Worklist, Visited);
     }
     AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts);
-    Insts.clear();
     while (!DeadInsts.empty())
       DeadInsts.pop_back_val()->eraseFromParent();
     AI->eraseFromParent();
-- 
cgit v1.1


From 37508bb842d9beedd75139a589c6f538f90efbaa Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Sun, 11 Aug 2013 01:56:15 +0000
Subject: Finish fixing the SSAUpdater-based AllocaPromoter strategy in SROA to
 cope with the more general set of patterns that are now handled by mem2reg
 and that we can detect quickly while doing SROA's initial analysis. Notably,
 this allows it to promote through no-op bitcast and GEP sequences. A core
 part of the SSAUpdater approach is the ability to test whether a particular
 instruction is part of the set being promoted. Testing this becomes
 significantly more complex in the world where the operand to every load and
 store isn't the alloca itself. I ended up using the approach of walking up
 the def-chain until we find the alloca. I benchmarked this against keeping a
 set of pointer operands and keeping a set of the loads and stores we care
 about, and this one seemed faster although the difference was very small.

No test case yet because currently the rewriting always "fixes" the inputs to
not require this. The next patch which re-enables early promotion of easy cases
in SROA will include a test case that specifically exercises this aspect of the
alloca promoter.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188145 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/SROA.cpp | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 2793825..d35c3b5 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -762,9 +762,30 @@ public:
 
   virtual bool isInstInList(Instruction *I,
                             const SmallVectorImpl<Instruction*> &Insts) const {
+    Value *Ptr;
     if (LoadInst *LI = dyn_cast<LoadInst>(I))
-      return LI->getOperand(0) == &AI;
-    return cast<StoreInst>(I)->getPointerOperand() == &AI;
+      Ptr = LI->getOperand(0);
+    else
+      Ptr = cast<StoreInst>(I)->getPointerOperand();
+
+    // Only used to detect cycles, which will be rare and quickly found as
+    // we're walking up a chain of defs rather than down through uses.
+    SmallPtrSet<Value *, 4> Visited;
+
+    do {
+      if (Ptr == &AI)
+        return true;
+
+      if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr))
+        Ptr = BCI->getOperand(0);
+      else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
+        Ptr = GEPI->getPointerOperand();
+      else
+        return false;
+
+    } while (Visited.insert(Ptr));
+
+    return false;
   }
 
   virtual void updateDebugInfo(Instruction *Inst) const {
-- 
cgit v1.1


From 5b854f1ea55601790d9191c9720e77da35095340 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Sun, 11 Aug 2013 02:17:11 +0000
Subject: Re-instate r187323 which fast-tracks promotable allocas as soon as
 the SROA-based analysis has enough information. This should work now that
 both mem2reg *and* the SSAUpdater-based AllocaPromoter have been updated to
 be able to promote the types of allocas that the SROA analysis detects.

I've included tests for the AllocaPromoter that were only possible to
write once we fast-tracked promotable allocas without rewriting them.
This includes a test both for r187347 and r188145.

Original commit log for r187323:
"""
Now that mem2reg understands how to cope with a slightly wider set of uses of
an alloca, we can pre-compute promotability while analyzing an alloca for
splitting in SROA. That lets us short-circuit the common case of a bunch of
trivially promotable allocas. This cuts 20% to 30% off the run time of SROA for
typical frontend-generated IR sequneces I'm seeing. It gets the new SROA to
within 20% of ScalarRepl for such code. My current benchmark for these numbers
is PR15412, but it fits the general pattern of IR emitted by Clang so it should
be widely applicable.
"""

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188146 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/SROA.cpp | 93 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 81 insertions(+), 12 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index d35c3b5..be3ef6f 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -197,6 +197,18 @@ public:
   /// \brief Construct the slices of a particular alloca.
   AllocaSlices(const DataLayout &DL, AllocaInst &AI);
 
+  /// \brief Whether we determined during the trivial analysis of the alloca
+  /// that it was immediately promotable with mem2reg.
+  bool isAllocaPromotable() const { return IsAllocaPromotable; }
+
+  /// \brief A list of directly stored values when \c isAllocaPromotable is
+  /// true.
+  ///
+  /// The contents are undefined if the alloca is not trivially promotable.
+  /// This is used to detect other allocas which should be iterated on when
+  /// doing direct promotion.
+  ArrayRef<Value *> getStoredValues() const { return StoredValues; }
+
   /// \brief Test whether a pointer to the allocation escapes our analysis.
   ///
   /// If this is true, the slices are never fully built and should be
@@ -253,10 +265,20 @@ private:
   class SliceBuilder;
   friend class AllocaSlices::SliceBuilder;
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// \brief Handle to alloca instruction to simplify method interfaces.
   AllocaInst &AI;
-#endif
+
+  /// \brief A flag indicating if the alloca is trivially promotable.
+  ///
+  /// While walking the alloca's uses we track when the uses exceed what
+  /// mem2reg can trivially handle. This essentially should match the logic in
+  /// \c isAllocaPromotable but re-using the existing walk of the pointer uses.
+  bool IsAllocaPromotable;
+
+  /// \brief Storage for stored values.
+  ///
+  /// Only used while the alloca is trivially promotable.
+  SmallVector<Value *, 8> StoredValues;
 
   /// \brief The instruction responsible for this alloca not having a known set
   /// of slices.
@@ -325,9 +347,9 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
   SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
 
 public:
-  SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &S)
+  SliceBuilder(const DataLayout &DL, AllocaSlices &S)
       : PtrUseVisitor<SliceBuilder>(DL),
-        AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), S(S) {}
+        AllocSize(DL.getTypeAllocSize(S.AI.getAllocatedType())), S(S) {}
 
 private:
   void markAsDead(Instruction &I) {
@@ -380,6 +402,15 @@ private:
     if (GEPI.use_empty())
       return markAsDead(GEPI);
 
+    // FIXME: mem2reg shouldn't care about the nature of the GEP, but instead
+    // the offsets of the loads. Until then, we short-circuit here for the
+    // promotable case.
+    if (GEPI.hasAllZeroIndices())
+      return Base::enqueueUsers(GEPI);
+
+    // Otherwise, there is something in the GEP, so we disable mem2reg and
+    // accumulate it.
+    S.IsAllocaPromotable = false;
     return Base::visitGetElementPtrInst(GEPI);
   }
 
@@ -396,6 +427,13 @@ private:
     bool IsSplittable =
         Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
 
+    // mem2reg can only promote non-volatile loads and stores which exactly
+    // load the alloca (no offset and the right type).
+    if (IsVolatile || Offset != 0 || Ty != S.AI.getAllocatedType())
+      S.IsAllocaPromotable = false;
+    if (S.IsAllocaPromotable)
+      assert(Offset == 0);
+
     insertUse(I, Offset, Size, IsSplittable);
   }
 
@@ -436,6 +474,9 @@ private:
       return markAsDead(SI);
     }
 
+    if (S.IsAllocaPromotable)
+      S.StoredValues.push_back(ValOp);
+
     assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
            "All simple FCA stores should have been pre-split");
     handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
@@ -453,6 +494,8 @@ private:
     if (!IsOffsetKnown)
       return PI.setAborted(&II);
 
+    S.IsAllocaPromotable = false;
+
     insertUse(II, Offset,
               Length ? Length->getLimitedValue()
                      : AllocSize - Offset.getLimitedValue(),
@@ -469,6 +512,8 @@ private:
     if (!IsOffsetKnown)
       return PI.setAborted(&II);
 
+    S.IsAllocaPromotable = false;
+
     uint64_t RawOffset = Offset.getLimitedValue();
     uint64_t Size = Length ? Length->getLimitedValue()
                            : AllocSize - RawOffset;
@@ -529,6 +574,8 @@ private:
       return;
     }
 
+    S.IsAllocaPromotable = false;
+
     Base::visitIntrinsicInst(II);
   }
 
@@ -603,6 +650,8 @@ private:
       return;
     }
 
+    S.IsAllocaPromotable = false;
+
     insertUse(PN, Offset, PHISize);
   }
 
@@ -610,14 +659,18 @@ private:
     if (SI.use_empty())
       return markAsDead(SI);
     if (Value *Result = foldSelectInst(SI)) {
-      if (Result == *U)
+      if (Result == *U) {
         // If the result of the constant fold will be the pointer, recurse
         // through the select as if we had RAUW'ed it.
         enqueueUsers(SI);
-      else
+
+        // FIXME: mem2reg should support this pattern, but it doesn't.
+        S.IsAllocaPromotable = false;
+      } else {
         // Otherwise the operand to the select is dead, and we can replace it
         // with undef.
         S.DeadOperands.push_back(U);
+      }
 
       return;
     }
@@ -644,6 +697,8 @@ private:
       return;
     }
 
+    S.IsAllocaPromotable = false;
+
     insertUse(SI, Offset, SelectSize);
   }
 
@@ -654,12 +709,8 @@ private:
 };
 
 AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
-    :
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-      AI(AI),
-#endif
-      PointerEscapingInstr(0) {
-  SliceBuilder PB(DL, AI, *this);
+    : AI(AI), IsAllocaPromotable(true), PointerEscapingInstr(0) {
+  SliceBuilder PB(DL, *this);
   SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
   if (PtrI.isEscaped() || PtrI.isAborted()) {
     // FIXME: We should sink the escape vs. abort info into the caller nicely,
@@ -3339,6 +3390,24 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
   if (S.begin() == S.end())
     return Changed;
 
+  // Trivially promotable, don't go through the splitting and rewriting.
+  if (S.isAllocaPromotable()) {
+    DEBUG(dbgs() << "  Directly promoting alloca: " << AI << "\n");
+    PromotableAllocas.push_back(&AI);
+
+    // Walk through the stored values quickly here to handle directly
+    // promotable allocas that require iterating on other allocas.
+    ArrayRef<Value *> StoredValues = S.getStoredValues();
+    for (ArrayRef<Value *>::iterator SVI = StoredValues.begin(),
+                                     SVE = StoredValues.end();
+         SVI != SVE; ++SVI)
+      if ((*SVI)->getType()->isPointerTy())
+        if (AllocaInst *SAI =
+                dyn_cast<AllocaInst>((*SVI)->stripInBoundsOffsets()))
+          PostPromotionWorklist.insert(SAI);
+    return true;
+  }
+
   Changed |= splitAlloca(AI, S);
 
   DEBUG(dbgs() << "  Speculating PHIs\n");
-- 
cgit v1.1


From 23331c30aefae840f55b52e2ed343117e5599682 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <rsandifo@linux.vnet.ibm.com>
Date: Mon, 12 Aug 2013 07:26:09 +0000
Subject: Fix big-endian handling of integer-to-vector bitcasts in InstCombine

These functions used to assume that the lsb of an integer corresponds
to vector element 0, whereas for big-endian it's the other way around:
the msb is in the first element and the lsb is in the last element.

Fixes MultiSource/Benchmarks/mediabench/gsm/toast for z.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188155 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCasts.cpp | 52 +++++++++++++++----------
 1 file changed, 32 insertions(+), 20 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 361acdd..e2f64d7 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1488,12 +1488,17 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
 /// insertions into the vector.  See the example in the comment for
 /// OptimizeIntegerToVectorInsertions for the pattern this handles.
 /// The type of V is always a non-zero multiple of VecEltTy's size.
+/// Shift is the number of bits between the lsb of V and the lsb of
+/// the vector.
 ///
 /// This returns false if the pattern can't be matched or true if it can,
 /// filling in Elements with the elements found here.
-static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+static bool CollectInsertionElements(Value *V, unsigned Shift,
                                      SmallVectorImpl<Value*> &Elements,
-                                     Type *VecEltTy) {
+                                     Type *VecEltTy, InstCombiner &IC) {
+  assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
+         "Shift should be a multiple of the element type size");
+
   // Undef values never contribute useful bits to the result.
   if (isa<UndefValue>(V)) return true;
 
@@ -1505,8 +1510,12 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
       if (C->isNullValue())
         return true;
 
+    unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
+    if (IC.getDataLayout()->isBigEndian())
+      ElementIndex = Elements.size() - ElementIndex - 1;
+
     // Fail if multiple elements are inserted into this slot.
-    if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+    if (Elements[ElementIndex] != 0)
       return false;
 
     Elements[ElementIndex] = V;
@@ -1522,7 +1531,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
     // it to the right type so it gets properly inserted.
     if (NumElts == 1)
       return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
-                                      ElementIndex, Elements, VecEltTy);
+                                      Shift, Elements, VecEltTy, IC);
 
     // Okay, this is a constant that covers multiple elements.  Slice it up into
     // pieces and insert each element-sized piece into the vector.
@@ -1533,10 +1542,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
     Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
 
     for (unsigned i = 0; i != NumElts; ++i) {
+      unsigned ShiftI = Shift+i*ElementSize;
       Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
-                                                               i*ElementSize));
+                                                                  ShiftI));
       Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
-      if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+      if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC))
         return false;
     }
     return true;
@@ -1549,29 +1559,28 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
   switch (I->getOpcode()) {
   default: return false; // Unhandled case.
   case Instruction::BitCast:
-    return CollectInsertionElements(I->getOperand(0), ElementIndex,
-                                    Elements, VecEltTy);
+    return CollectInsertionElements(I->getOperand(0), Shift,
+                                    Elements, VecEltTy, IC);
   case Instruction::ZExt:
     if (!isMultipleOfTypeSize(
                           I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
                               VecEltTy))
       return false;
-    return CollectInsertionElements(I->getOperand(0), ElementIndex,
-                                    Elements, VecEltTy);
+    return CollectInsertionElements(I->getOperand(0), Shift,
+                                    Elements, VecEltTy, IC);
   case Instruction::Or:
-    return CollectInsertionElements(I->getOperand(0), ElementIndex,
-                                    Elements, VecEltTy) &&
-           CollectInsertionElements(I->getOperand(1), ElementIndex,
-                                    Elements, VecEltTy);
+    return CollectInsertionElements(I->getOperand(0), Shift,
+                                    Elements, VecEltTy, IC) &&
+           CollectInsertionElements(I->getOperand(1), Shift,
+                                    Elements, VecEltTy, IC);
   case Instruction::Shl: {
     // Must be shifting by a constant that is a multiple of the element size.
     ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
     if (CI == 0) return false;
-    if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
-    unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
-
-    return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
-                                    Elements, VecEltTy);
+    Shift += CI->getZExtValue();
+    if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
+    return CollectInsertionElements(I->getOperand(0), Shift,
+                                    Elements, VecEltTy, IC);
   }
 
   }
@@ -1594,12 +1603,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
 /// Into two insertelements that do "buildvector{%inc, %inc5}".
 static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
                                                 InstCombiner &IC) {
+  // We need to know the target byte order to perform this optimization.
+  if (!IC.getDataLayout()) return 0;
+
   VectorType *DestVecTy = cast<VectorType>(CI.getType());
   Value *IntInput = CI.getOperand(0);
 
   SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
   if (!CollectInsertionElements(IntInput, 0, Elements,
-                                DestVecTy->getElementType()))
+                                DestVecTy->getElementType(), IC))
     return 0;
 
   // If we succeeded, we know that all of the element are specified by Elements
-- 
cgit v1.1


From d976d43f23d67e18f097d72fd90923627f334c79 Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <samsonov@google.com>
Date: Mon, 12 Aug 2013 07:49:36 +0000
Subject: Introduce factory methods for SpecialCaseList

Summary:
Doing work in constructors is bad: this change suggests to
call SpecialCaseList::create(Path, Error) instead of
"new SpecialCaseList(Path)". Currently the latter may crash with
report_fatal_error, which is undesirable - sometimes we want to report
the error to user gracefully - for example, if he provides an incorrect
file as an argument of Clang's -fsanitize-blacklist flag.

Reviewers: pcc

Reviewed By: pcc

CC: llvm-commits

Differential Revision: http://llvm-reviews.chandlerc.com/D1327

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188156 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SpecialCaseList.cpp | 53 ++++++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 10 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp
index b98cb5b..5a3b192 100644
--- a/lib/Transforms/Utils/SpecialCaseList.cpp
+++ b/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -49,29 +49,58 @@ struct SpecialCaseList::Entry {
   }
 };
 
+SpecialCaseList::SpecialCaseList() : Entries() {}
+
 SpecialCaseList::SpecialCaseList(const StringRef Path) {
   // Validate and open blacklist file.
   if (Path.empty()) return;
   OwningPtr<MemoryBuffer> File;
   if (error_code EC = MemoryBuffer::getFile(Path, File)) {
-    report_fatal_error("Can't open blacklist file: " + Path + ": " +
+    report_fatal_error("Can't open file '" + Path + "': " +
                        EC.message());
   }
 
-  init(File.get());
+  std::string Error;
+  if (!parse(File.get(), Error))
+    report_fatal_error(Error);
 }
 
 SpecialCaseList::SpecialCaseList(const MemoryBuffer *MB) {
-  init(MB);
+  std::string Error;
+  if (!parse(MB, Error))
+    report_fatal_error(Error);
 }
 
-void SpecialCaseList::init(const MemoryBuffer *MB) {
+SpecialCaseList *SpecialCaseList::create(
+    const StringRef Path, std::string &Error) {
+  if (Path.empty())
+    return new SpecialCaseList();
+  OwningPtr<MemoryBuffer> File;
+  if (error_code EC = MemoryBuffer::getFile(Path, File)) {
+    Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str();
+    return 0;
+  }
+  return create(File.get(), Error);
+}
+
+SpecialCaseList *SpecialCaseList::create(
+    const MemoryBuffer *MB, std::string &Error) {
+  OwningPtr<SpecialCaseList> SCL(new SpecialCaseList());
+  if (!SCL->parse(MB, Error))
+    return 0;
+  return SCL.take();
+}
+
+bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
   // Iterate through each line in the blacklist file.
   SmallVector<StringRef, 16> Lines;
   SplitString(MB->getBuffer(), Lines, "\n\r");
   StringMap<StringMap<std::string> > Regexps;
+  assert(Entries.empty() &&
+         "parse() should be called on an empty SpecialCaseList");
+  int LineNo = 1;
   for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end();
-       I != E; ++I) {
+       I != E; ++I, ++LineNo) {
     // Ignore empty lines and lines starting with "#"
     if (I->empty() || I->startswith("#"))
       continue;
@@ -80,7 +109,9 @@ void SpecialCaseList::init(const MemoryBuffer *MB) {
     StringRef Prefix = SplitLine.first;
     if (SplitLine.second.empty()) {
       // Missing ':' in the line.
-      report_fatal_error("malformed blacklist line: " + SplitLine.first);
+      Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" +
+               SplitLine.first + "'").str();
+      return false;
     }
 
     std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("=");
@@ -113,10 +144,11 @@ void SpecialCaseList::init(const MemoryBuffer *MB) {
 
     // Check that the regexp is valid.
     Regex CheckRE(Regexp);
-    std::string Error;
-    if (!CheckRE.isValid(Error)) {
-      report_fatal_error("malformed blacklist regex: " + SplitLine.second +
-          ": " + Error);
+    std::string REError;
+    if (!CheckRE.isValid(REError)) {
+      Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" +
+               SplitLine.second + "': " + REError).str();
+      return false;
     }
 
     // Add this regexp into the proper group by its prefix.
@@ -135,6 +167,7 @@ void SpecialCaseList::init(const MemoryBuffer *MB) {
       Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
     }
   }
+  return true;
 }
 
 SpecialCaseList::~SpecialCaseList() {
-- 
cgit v1.1


From e39e1316f034e9932cb8da535541a3e35a0e490a Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <samsonov@google.com>
Date: Mon, 12 Aug 2013 11:46:09 +0000
Subject: Add SpecialCaseList::createOrDie() factory and use it in sanitizer
 passes

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188169 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/AddressSanitizer.cpp  | 4 ++--
 lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 6 +++---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp   | 2 +-
 lib/Transforms/Instrumentation/ThreadSanitizer.cpp   | 2 +-
 lib/Transforms/Utils/SpecialCaseList.cpp             | 7 +++++++
 5 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 7556522..2ee3e01 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -883,7 +883,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
   TD = getAnalysisIfAvailable<DataLayout>();
   if (!TD)
     return false;
-  BL.reset(new SpecialCaseList(BlacklistFile));
+  BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
   if (BL->isIn(M)) return false;
   C = &(M.getContext());
   int LongSize = TD->getPointerSizeInBits();
@@ -1076,7 +1076,7 @@ bool AddressSanitizer::doInitialization(Module &M) {
 
   if (!TD)
     return false;
-  BL.reset(new SpecialCaseList(BlacklistFile));
+  BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
   DynamicallyInitializedGlobals.Init(M);
 
   C = &(M.getContext());
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 0bbbfef..f5531e0 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -129,7 +129,7 @@ class DataFlowSanitizer : public ModulePass {
   Constant *DFSanUnionFn;
   Constant *DFSanUnionLoadFn;
   MDNode *ColdCallWeights;
-  SpecialCaseList Greylist;
+  OwningPtr<SpecialCaseList> Greylist;
   DenseMap<Value *, Function *> UnwrappedFnMap;
 
   Value *getShadowAddress(Value *Addr, Instruction *Pos);
@@ -211,7 +211,7 @@ ModulePass *llvm::createDataFlowSanitizerPass(void *(*getArgTLS)(),
 DataFlowSanitizer::DataFlowSanitizer(void *(*getArgTLS)(),
                                      void *(*getRetValTLS)())
     : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS),
-      Greylist(ClGreylistFile) {}
+      Greylist(SpecialCaseList::createOrDie(ClGreylistFile)) {}
 
 FunctionType *DataFlowSanitizer::getInstrumentedFunctionType(FunctionType *T) {
   llvm::SmallVector<Type *, 4> ArgTypes;
@@ -269,7 +269,7 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
 
 DataFlowSanitizer::InstrumentedABI
 DataFlowSanitizer::getInstrumentedABI(Function *F) {
-  if (Greylist.isIn(*F))
+  if (Greylist->isIn(*F))
     return IA_MemOnly;
   else
     return getDefaultInstrumentedABI();
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 0251f16..a78213d 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -338,7 +338,7 @@ bool MemorySanitizer::doInitialization(Module &M) {
   TD = getAnalysisIfAvailable<DataLayout>();
   if (!TD)
     return false;
-  BL.reset(new SpecialCaseList(BlacklistFile));
+  BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
   C = &(M.getContext());
   unsigned PtrSize = TD->getPointerSizeInBits(/* AddressSpace */0);
   switch (PtrSize) {
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index cc971a3..e19ceba 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -227,7 +227,7 @@ bool ThreadSanitizer::doInitialization(Module &M) {
   TD = getAnalysisIfAvailable<DataLayout>();
   if (!TD)
     return false;
-  BL.reset(new SpecialCaseList(BlacklistFile));
+  BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
 
   // Always insert a call to __tsan_init into the module's CTORs.
   IRBuilder<> IRB(M.getContext());
diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp
index 5a3b192..5ddaaba 100644
--- a/lib/Transforms/Utils/SpecialCaseList.cpp
+++ b/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -91,6 +91,13 @@ SpecialCaseList *SpecialCaseList::create(
   return SCL.take();
 }
 
+SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) {
+  std::string Error;
+  if (SpecialCaseList *SCL = create(Path, Error))
+    return SCL;
+  report_fatal_error(Error);
+}
+
 bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
   // Iterate through each line in the blacklist file.
   SmallVector<StringRef, 16> Lines;
-- 
cgit v1.1


From 655abf57edcc9954428ac405905005f82091add5 Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <samsonov@google.com>
Date: Mon, 12 Aug 2013 11:50:44 +0000
Subject: Remove unused SpecialCaseList constructors

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188171 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SpecialCaseList.cpp | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp
index 5ddaaba..5400bcd 100644
--- a/lib/Transforms/Utils/SpecialCaseList.cpp
+++ b/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -51,26 +51,6 @@ struct SpecialCaseList::Entry {
 
 SpecialCaseList::SpecialCaseList() : Entries() {}
 
-SpecialCaseList::SpecialCaseList(const StringRef Path) {
-  // Validate and open blacklist file.
-  if (Path.empty()) return;
-  OwningPtr<MemoryBuffer> File;
-  if (error_code EC = MemoryBuffer::getFile(Path, File)) {
-    report_fatal_error("Can't open file '" + Path + "': " +
-                       EC.message());
-  }
-
-  std::string Error;
-  if (!parse(File.get(), Error))
-    report_fatal_error(Error);
-}
-
-SpecialCaseList::SpecialCaseList(const MemoryBuffer *MB) {
-  std::string Error;
-  if (!parse(MB, Error))
-    report_fatal_error(Error);
-}
-
 SpecialCaseList *SpecialCaseList::create(
     const StringRef Path, std::string &Error) {
   if (Path.empty())
-- 
cgit v1.1


From 353476cbbb80881a754a90abc13851a884a4e26c Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Mon, 12 Aug 2013 17:46:44 +0000
Subject: Fix PR16797 - Support PHINodes with multiple inputs from the same
 basic block. Do not generate new vector values for the same entries because
 we know that the incoming values from the same block must be identical.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188185 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9312b4b..22a2519 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1187,10 +1187,21 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
       E->VectorizedValue = NewPhi;
 
+      // PHINodes may have multiple entries from the same block. We want to
+      // visit every block once.
+      SmallSet<BasicBlock*, 4> VisitedBBs;
+
       for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
         ValueList Operands;
         BasicBlock *IBB = PH->getIncomingBlock(i);
 
+        if (VisitedBBs.count(IBB)) {
+          NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB);
+          continue;
+        }
+
+        VisitedBBs.insert(IBB);
+
         // Prepare the operand vector.
         for (unsigned j = 0; j < E->Scalars.size(); ++j)
           Operands.push_back(cast<PHINode>(E->Scalars[j])->
-- 
cgit v1.1


From d275ff5d4c4f4f8bf9a77acd70605caa98c14246 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Mon, 12 Aug 2013 20:09:37 +0000
Subject: Move stack protector names to the same place.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188198 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/Internalize.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index d56a06f..26743dc 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -114,11 +114,6 @@ bool InternalizePass::runOnModule(Module &M) {
   CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
   bool Changed = false;
 
-  // Never internalize functions which code-gen might insert.
-  // FIXME: We should probably add this (and the __stack_chk_guard) via some
-  // type of call-back in CodeGen.
-  ExternalNames.insert("__stack_chk_fail");
-
   SmallPtrSet<GlobalValue *, 8> Used;
   collectUsedGlobalVariables(M, Used, false);
 
@@ -166,6 +161,9 @@ bool InternalizePass::runOnModule(Module &M) {
   ExternalNames.insert("llvm.global.annotations");
 
   // Never internalize symbols code-gen inserts.
+  // FIXME: We should probably add this (and the __stack_chk_guard) via some
+  // type of call-back in CodeGen.
+  ExternalNames.insert("__stack_chk_fail");
   ExternalNames.insert("__stack_chk_guard");
 
   // Mark all global variables with initializers that are not in the api as
-- 
cgit v1.1


From a90d91fd1add17b3c6af09a845ede940595098e9 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Mon, 12 Aug 2013 22:38:39 +0000
Subject: DataFlowSanitizer: fix a use-after-free.  Spotted by libgmalloc.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188216 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index f5531e0..af227d2 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -422,9 +422,12 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
         // instruction's next pointer and moving the next instruction to the
         // tail block from which we should continue.
         Instruction *Next = Inst->getNextNode();
+        // DFSanVisitor may delete Inst, so keep track of whether it was a
+        // terminator.
+        bool IsTerminator = isa<TerminatorInst>(Inst);
         if (!DFSF.SkipInsts.count(Inst))
           DFSanVisitor(DFSF).visit(Inst);
-        if (isa<TerminatorInst>(Inst))
+        if (IsTerminator)
           break;
         Inst = Next;
       }
-- 
cgit v1.1


From 4f96b7e1478be0b33cda589db40635a1e3a40c11 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Mon, 12 Aug 2013 22:38:43 +0000
Subject: Reapply r188119 now that the bug it exposed is fixed.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188217 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/SimplifyCFGPass.cpp | 165 +-----------------------------
 lib/Transforms/Utils/Local.cpp            | 147 +++++++++++++++++++++++---
 2 files changed, 140 insertions(+), 172 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 6d05640..8371f6d 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -66,161 +66,6 @@ FunctionPass *llvm::createCFGSimplificationPass() {
   return new CFGSimplifyPass();
 }
 
-/// changeToUnreachable - Insert an unreachable instruction before the specified
-/// instruction, making it and the rest of the code in the block dead.
-static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
-  BasicBlock *BB = I->getParent();
-  // Loop over all of the successors, removing BB's entry from any PHI
-  // nodes.
-  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
-    (*SI)->removePredecessor(BB);
-
-  // Insert a call to llvm.trap right before this.  This turns the undefined
-  // behavior into a hard fail instead of falling through into random code.
-  if (UseLLVMTrap) {
-    Function *TrapFn =
-      Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
-    CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
-    CallTrap->setDebugLoc(I->getDebugLoc());
-  }
-  new UnreachableInst(I->getContext(), I);
-
-  // All instructions after this are dead.
-  BasicBlock::iterator BBI = I, BBE = BB->end();
-  while (BBI != BBE) {
-    if (!BBI->use_empty())
-      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
-    BB->getInstList().erase(BBI++);
-  }
-}
-
-/// changeToCall - Convert the specified invoke into a normal call.
-static void changeToCall(InvokeInst *II) {
-  SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
-  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
-  NewCall->takeName(II);
-  NewCall->setCallingConv(II->getCallingConv());
-  NewCall->setAttributes(II->getAttributes());
-  NewCall->setDebugLoc(II->getDebugLoc());
-  II->replaceAllUsesWith(NewCall);
-
-  // Follow the call by a branch to the normal destination.
-  BranchInst::Create(II->getNormalDest(), II);
-
-  // Update PHI nodes in the unwind destination
-  II->getUnwindDest()->removePredecessor(II->getParent());
-  II->eraseFromParent();
-}
-
-static bool markAliveBlocks(BasicBlock *BB,
-                            SmallPtrSet<BasicBlock*, 128> &Reachable) {
-
-  SmallVector<BasicBlock*, 128> Worklist;
-  Worklist.push_back(BB);
-  Reachable.insert(BB);
-  bool Changed = false;
-  do {
-    BB = Worklist.pop_back_val();
-
-    // Do a quick scan of the basic block, turning any obviously unreachable
-    // instructions into LLVM unreachable insts.  The instruction combining pass
-    // canonicalizes unreachable insts into stores to null or undef.
-    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
-      if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
-        if (CI->doesNotReturn()) {
-          // If we found a call to a no-return function, insert an unreachable
-          // instruction after it.  Make sure there isn't *already* one there
-          // though.
-          ++BBI;
-          if (!isa<UnreachableInst>(BBI)) {
-            // Don't insert a call to llvm.trap right before the unreachable.
-            changeToUnreachable(BBI, false);
-            Changed = true;
-          }
-          break;
-        }
-      }
-
-      // Store to undef and store to null are undefined and used to signal that
-      // they should be changed to unreachable by passes that can't modify the
-      // CFG.
-      if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
-        // Don't touch volatile stores.
-        if (SI->isVolatile()) continue;
-
-        Value *Ptr = SI->getOperand(1);
-
-        if (isa<UndefValue>(Ptr) ||
-            (isa<ConstantPointerNull>(Ptr) &&
-             SI->getPointerAddressSpace() == 0)) {
-          changeToUnreachable(SI, true);
-          Changed = true;
-          break;
-        }
-      }
-    }
-
-    // Turn invokes that call 'nounwind' functions into ordinary calls.
-    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
-      Value *Callee = II->getCalledValue();
-      if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
-        changeToUnreachable(II, true);
-        Changed = true;
-      } else if (II->doesNotThrow()) {
-        if (II->use_empty() && II->onlyReadsMemory()) {
-          // jump to the normal destination branch.
-          BranchInst::Create(II->getNormalDest(), II);
-          II->getUnwindDest()->removePredecessor(II->getParent());
-          II->eraseFromParent();
-        } else
-          changeToCall(II);
-        Changed = true;
-      }
-    }
-
-    Changed |= ConstantFoldTerminator(BB, true);
-    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
-      if (Reachable.insert(*SI))
-        Worklist.push_back(*SI);
-  } while (!Worklist.empty());
-  return Changed;
-}
-
-/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
-/// if they are in a dead cycle.  Return true if a change was made, false
-/// otherwise.
-static bool removeUnreachableBlocksFromFn(Function &F) {
-  SmallPtrSet<BasicBlock*, 128> Reachable;
-  bool Changed = markAliveBlocks(F.begin(), Reachable);
-
-  // If there are unreachable blocks in the CFG...
-  if (Reachable.size() == F.size())
-    return Changed;
-
-  assert(Reachable.size() < F.size());
-  NumSimpl += F.size()-Reachable.size();
-
-  // Loop over all of the basic blocks that are not reachable, dropping all of
-  // their internal references...
-  for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
-    if (Reachable.count(BB))
-      continue;
-
-    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
-      if (Reachable.count(*SI))
-        (*SI)->removePredecessor(BB);
-    BB->dropAllReferences();
-  }
-
-  for (Function::iterator I = ++F.begin(); I != F.end();)
-    if (!Reachable.count(I))
-      I = F.getBasicBlockList().erase(I);
-    else
-      ++I;
-
-  return true;
-}
-
 /// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
 /// node) return blocks, merge them together to promote recursive block merging.
 static bool mergeEmptyReturnBlocks(Function &F) {
@@ -325,7 +170,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
 bool CFGSimplifyPass::runOnFunction(Function &F) {
   const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
   const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
-  bool EverChanged = removeUnreachableBlocksFromFn(F);
+  bool EverChanged = removeUnreachableBlocks(F);
   EverChanged |= mergeEmptyReturnBlocks(F);
   EverChanged |= iterativelySimplifyCFG(F, TTI, TD);
 
@@ -333,16 +178,16 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
   if (!EverChanged) return false;
 
   // iterativelySimplifyCFG can (rarely) make some loops dead.  If this happens,
-  // removeUnreachableBlocksFromFn is needed to nuke them, which means we should
+  // removeUnreachableBlocks is needed to nuke them, which means we should
   // iterate between the two optimizations.  We structure the code like this to
   // avoid reruning iterativelySimplifyCFG if the second pass of
-  // removeUnreachableBlocksFromFn doesn't do anything.
-  if (!removeUnreachableBlocksFromFn(F))
+  // removeUnreachableBlocks doesn't do anything.
+  if (!removeUnreachableBlocks(F))
     return true;
 
   do {
     EverChanged = iterativelySimplifyCFG(F, TTI, TD);
-    EverChanged |= removeUnreachableBlocksFromFn(F);
+    EverChanged |= removeUnreachableBlocks(F);
   } while (EverChanged);
 
   return true;
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 08e1808..4db3a72 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
@@ -43,6 +44,8 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
+
 //===----------------------------------------------------------------------===//
 //  Local constant propagation.
 //
@@ -1121,33 +1124,153 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
   return true;
 }
 
-bool llvm::removeUnreachableBlocks(Function &F) {
-  SmallPtrSet<BasicBlock*, 16> Reachable;
+/// changeToUnreachable - Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+  BasicBlock *BB = I->getParent();
+  // Loop over all of the successors, removing BB's entry from any PHI
+  // nodes.
+  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+    (*SI)->removePredecessor(BB);
+
+  // Insert a call to llvm.trap right before this.  This turns the undefined
+  // behavior into a hard fail instead of falling through into random code.
+  if (UseLLVMTrap) {
+    Function *TrapFn =
+      Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+    CallInst *CallTrap = CallInst::Create(TrapFn, "", I);
+    CallTrap->setDebugLoc(I->getDebugLoc());
+  }
+  new UnreachableInst(I->getContext(), I);
+
+  // All instructions after this are dead.
+  BasicBlock::iterator BBI = I, BBE = BB->end();
+  while (BBI != BBE) {
+    if (!BBI->use_empty())
+      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+    BB->getInstList().erase(BBI++);
+  }
+}
+
+/// changeToCall - Convert the specified invoke into a normal call.
+static void changeToCall(InvokeInst *II) {
+  SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
+  NewCall->takeName(II);
+  NewCall->setCallingConv(II->getCallingConv());
+  NewCall->setAttributes(II->getAttributes());
+  NewCall->setDebugLoc(II->getDebugLoc());
+  II->replaceAllUsesWith(NewCall);
+
+  // Follow the call by a branch to the normal destination.
+  BranchInst::Create(II->getNormalDest(), II);
+
+  // Update PHI nodes in the unwind destination
+  II->getUnwindDest()->removePredecessor(II->getParent());
+  II->eraseFromParent();
+}
+
+static bool markAliveBlocks(BasicBlock *BB,
+                            SmallPtrSet<BasicBlock*, 128> &Reachable) {
+
   SmallVector<BasicBlock*, 128> Worklist;
-  Worklist.push_back(&F.getEntryBlock());
-  Reachable.insert(&F.getEntryBlock());
+  Worklist.push_back(BB);
+  Reachable.insert(BB);
+  bool Changed = false;
   do {
-    BasicBlock *BB = Worklist.pop_back_val();
+    BB = Worklist.pop_back_val();
+
+    // Do a quick scan of the basic block, turning any obviously unreachable
+    // instructions into LLVM unreachable insts.  The instruction combining pass
+    // canonicalizes unreachable insts into stores to null or undef.
+    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+      if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+        if (CI->doesNotReturn()) {
+          // If we found a call to a no-return function, insert an unreachable
+          // instruction after it.  Make sure there isn't *already* one there
+          // though.
+          ++BBI;
+          if (!isa<UnreachableInst>(BBI)) {
+            // Don't insert a call to llvm.trap right before the unreachable.
+            changeToUnreachable(BBI, false);
+            Changed = true;
+          }
+          break;
+        }
+      }
+
+      // Store to undef and store to null are undefined and used to signal that
+      // they should be changed to unreachable by passes that can't modify the
+      // CFG.
+      if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+        // Don't touch volatile stores.
+        if (SI->isVolatile()) continue;
+
+        Value *Ptr = SI->getOperand(1);
+
+        if (isa<UndefValue>(Ptr) ||
+            (isa<ConstantPointerNull>(Ptr) &&
+             SI->getPointerAddressSpace() == 0)) {
+          changeToUnreachable(SI, true);
+          Changed = true;
+          break;
+        }
+      }
+    }
+
+    // Turn invokes that call 'nounwind' functions into ordinary calls.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      Value *Callee = II->getCalledValue();
+      if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+        changeToUnreachable(II, true);
+        Changed = true;
+      } else if (II->doesNotThrow()) {
+        if (II->use_empty() && II->onlyReadsMemory()) {
+          // jump to the normal destination branch.
+          BranchInst::Create(II->getNormalDest(), II);
+          II->getUnwindDest()->removePredecessor(II->getParent());
+          II->eraseFromParent();
+        } else
+          changeToCall(II);
+        Changed = true;
+      }
+    }
+
+    Changed |= ConstantFoldTerminator(BB, true);
     for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
       if (Reachable.insert(*SI))
         Worklist.push_back(*SI);
   } while (!Worklist.empty());
+  return Changed;
+}
+
+/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// if they are in a dead cycle.  Return true if a change was made, false
+/// otherwise.
+bool llvm::removeUnreachableBlocks(Function &F) {
+  SmallPtrSet<BasicBlock*, 128> Reachable;
+  bool Changed = markAliveBlocks(F.begin(), Reachable);
 
+  // If there are unreachable blocks in the CFG...
   if (Reachable.size() == F.size())
-    return false;
+    return Changed;
 
   assert(Reachable.size() < F.size());
-  for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ++I) {
-    if (Reachable.count(I))
+  NumRemoved += F.size()-Reachable.size();
+
+  // Loop over all of the basic blocks that are not reachable, dropping all of
+  // their internal references...
+  for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+    if (Reachable.count(BB))
       continue;
 
-    for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI)
+    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
       if (Reachable.count(*SI))
-        (*SI)->removePredecessor(I);
-    I->dropAllReferences();
+        (*SI)->removePredecessor(BB);
+    BB->dropAllReferences();
   }
 
-  for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;)
+  for (Function::iterator I = ++F.begin(); I != F.end();)
     if (!Reachable.count(I))
       I = F.getBasicBlockList().erase(I);
     else
-- 
cgit v1.1


From b6171c529670e5c240aaf9c08f5f1b6dba9d16fc Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Tue, 13 Aug 2013 15:51:25 +0000
Subject: Remove logic that decides whether to vectorize or not depending on
 O-levels

I have moved this logic into clang and opt.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188281 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/PassManagerBuilder.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index a6b3f4e..3883ea2 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -195,7 +195,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
   MPM.add(createLoopDeletionPass());          // Delete dead loops
 
-  if (!LateVectorize && LoopVectorize && OptLevel > 1 && SizeLevel < 2)
+  if (!LateVectorize && LoopVectorize)
       MPM.add(createLoopVectorizePass());
 
   if (!DisableUnrollLoops)
-- 
cgit v1.1


From ceda68fe2430e0e3c8bec5487d66881f34b275ed Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Tue, 13 Aug 2013 16:12:04 +0000
Subject: Also remove logic in LateVectorize

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188285 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/PassManagerBuilder.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 3883ea2..743dc42 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -249,7 +249,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
 
     // Add the various vectorization passes and relevant cleanup passes for
     // them since we are no longer in the middle of the main scalar pipeline.
-    if (LoopVectorize && OptLevel > 1 && SizeLevel < 2) {
+    if (LoopVectorize) {
       MPM.add(createLoopVectorizePass());
 
       if (!DisableUnrollLoops)
-- 
cgit v1.1


From a036a31d94ec3984ae94bee302e0714b4c3e5abb Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Tue, 13 Aug 2013 16:52:41 +0000
Subject: dfsan: fix lint warnings

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188293 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index af227d2..29413d5 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -138,7 +138,7 @@ class DataFlowSanitizer : public ModulePass {
   InstrumentedABI getInstrumentedABI(Function *F);
   InstrumentedABI getDefaultInstrumentedABI();
 
-public:
+ public:
   DataFlowSanitizer(void *(*getArgTLS)() = 0, void *(*getRetValTLS)() = 0);
   static char ID;
   bool doInitialization(Module &M);
@@ -172,7 +172,7 @@ struct DFSanFunction {
 };
 
 class DFSanVisitor : public InstVisitor<DFSanVisitor> {
-public:
+ public:
   DFSanFunction &DFSF;
   DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
 
-- 
cgit v1.1


From 6c1fa7caaefc88a5a867add402d90115823bd0eb Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nicholas@mxc.ca>
Date: Tue, 13 Aug 2013 22:51:58 +0000
Subject: Revert r187191, which broke opt -mem2reg on the testcases included in
 PR16867. However, opt -O2 doesn't run mem2reg directly so nobody noticed
 until r188146 when SROA started sending more things directly down the
 PromoteMemToReg path.

In order to revert r187191, I also revert dependent revisions r187296, r187322
and r188146. Fixes PR16867. Does not add the testcases from that PR, but both
of them should get added for both mem2reg and sroa when this revert gets
unreverted.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188327 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/SROA.cpp                   |  95 ++------
 lib/Transforms/Scalar/ScalarReplAggregates.cpp   |   2 +-
 lib/Transforms/Utils/Mem2Reg.cpp                 |   7 +-
 lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 262 +++++++++--------------
 4 files changed, 114 insertions(+), 252 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index be3ef6f..da441dc 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -197,18 +197,6 @@ public:
   /// \brief Construct the slices of a particular alloca.
   AllocaSlices(const DataLayout &DL, AllocaInst &AI);
 
-  /// \brief Whether we determined during the trivial analysis of the alloca
-  /// that it was immediately promotable with mem2reg.
-  bool isAllocaPromotable() const { return IsAllocaPromotable; }
-
-  /// \brief A list of directly stored values when \c isAllocaPromotable is
-  /// true.
-  ///
-  /// The contents are undefined if the alloca is not trivially promotable.
-  /// This is used to detect other allocas which should be iterated on when
-  /// doing direct promotion.
-  ArrayRef<Value *> getStoredValues() const { return StoredValues; }
-
   /// \brief Test whether a pointer to the allocation escapes our analysis.
   ///
   /// If this is true, the slices are never fully built and should be
@@ -265,20 +253,10 @@ private:
   class SliceBuilder;
   friend class AllocaSlices::SliceBuilder;
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// \brief Handle to alloca instruction to simplify method interfaces.
   AllocaInst &AI;
-
-  /// \brief A flag indicating if the alloca is trivially promotable.
-  ///
-  /// While walking the alloca's uses we track when the uses exceed what
-  /// mem2reg can trivially handle. This essentially should match the logic in
-  /// \c isAllocaPromotable but re-using the existing walk of the pointer uses.
-  bool IsAllocaPromotable;
-
-  /// \brief Storage for stored values.
-  ///
-  /// Only used while the alloca is trivially promotable.
-  SmallVector<Value *, 8> StoredValues;
+#endif
 
   /// \brief The instruction responsible for this alloca not having a known set
   /// of slices.
@@ -347,9 +325,9 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
   SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
 
 public:
-  SliceBuilder(const DataLayout &DL, AllocaSlices &S)
+  SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &S)
       : PtrUseVisitor<SliceBuilder>(DL),
-        AllocSize(DL.getTypeAllocSize(S.AI.getAllocatedType())), S(S) {}
+        AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), S(S) {}
 
 private:
   void markAsDead(Instruction &I) {
@@ -402,15 +380,6 @@ private:
     if (GEPI.use_empty())
       return markAsDead(GEPI);
 
-    // FIXME: mem2reg shouldn't care about the nature of the GEP, but instead
-    // the offsets of the loads. Until then, we short-circuit here for the
-    // promotable case.
-    if (GEPI.hasAllZeroIndices())
-      return Base::enqueueUsers(GEPI);
-
-    // Otherwise, there is something in the GEP, so we disable mem2reg and
-    // accumulate it.
-    S.IsAllocaPromotable = false;
     return Base::visitGetElementPtrInst(GEPI);
   }
 
@@ -427,13 +396,6 @@ private:
     bool IsSplittable =
         Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
 
-    // mem2reg can only promote non-volatile loads and stores which exactly
-    // load the alloca (no offset and the right type).
-    if (IsVolatile || Offset != 0 || Ty != S.AI.getAllocatedType())
-      S.IsAllocaPromotable = false;
-    if (S.IsAllocaPromotable)
-      assert(Offset == 0);
-
     insertUse(I, Offset, Size, IsSplittable);
   }
 
@@ -474,9 +436,6 @@ private:
       return markAsDead(SI);
     }
 
-    if (S.IsAllocaPromotable)
-      S.StoredValues.push_back(ValOp);
-
     assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
            "All simple FCA stores should have been pre-split");
     handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
@@ -494,8 +453,6 @@ private:
     if (!IsOffsetKnown)
       return PI.setAborted(&II);
 
-    S.IsAllocaPromotable = false;
-
     insertUse(II, Offset,
               Length ? Length->getLimitedValue()
                      : AllocSize - Offset.getLimitedValue(),
@@ -512,8 +469,6 @@ private:
     if (!IsOffsetKnown)
       return PI.setAborted(&II);
 
-    S.IsAllocaPromotable = false;
-
     uint64_t RawOffset = Offset.getLimitedValue();
     uint64_t Size = Length ? Length->getLimitedValue()
                            : AllocSize - RawOffset;
@@ -574,8 +529,6 @@ private:
       return;
     }
 
-    S.IsAllocaPromotable = false;
-
     Base::visitIntrinsicInst(II);
   }
 
@@ -650,8 +603,6 @@ private:
       return;
     }
 
-    S.IsAllocaPromotable = false;
-
     insertUse(PN, Offset, PHISize);
   }
 
@@ -659,18 +610,14 @@ private:
     if (SI.use_empty())
       return markAsDead(SI);
     if (Value *Result = foldSelectInst(SI)) {
-      if (Result == *U) {
+      if (Result == *U)
         // If the result of the constant fold will be the pointer, recurse
         // through the select as if we had RAUW'ed it.
         enqueueUsers(SI);
-
-        // FIXME: mem2reg should support this pattern, but it doesn't.
-        S.IsAllocaPromotable = false;
-      } else {
+      else
         // Otherwise the operand to the select is dead, and we can replace it
         // with undef.
         S.DeadOperands.push_back(U);
-      }
 
       return;
     }
@@ -697,8 +644,6 @@ private:
       return;
     }
 
-    S.IsAllocaPromotable = false;
-
     insertUse(SI, Offset, SelectSize);
   }
 
@@ -709,8 +654,12 @@ private:
 };
 
 AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
-    : AI(AI), IsAllocaPromotable(true), PointerEscapingInstr(0) {
-  SliceBuilder PB(DL, *this);
+    :
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+      AI(AI),
+#endif
+      PointerEscapingInstr(0) {
+  SliceBuilder PB(DL, AI, *this);
   SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
   if (PtrI.isEscaped() || PtrI.isAborted()) {
     // FIXME: We should sink the escape vs. abort info into the caller nicely,
@@ -3390,24 +3339,6 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
   if (S.begin() == S.end())
     return Changed;
 
-  // Trivially promotable, don't go through the splitting and rewriting.
-  if (S.isAllocaPromotable()) {
-    DEBUG(dbgs() << "  Directly promoting alloca: " << AI << "\n");
-    PromotableAllocas.push_back(&AI);
-
-    // Walk through the stored values quickly here to handle directly
-    // promotable allocas that require iterating on other allocas.
-    ArrayRef<Value *> StoredValues = S.getStoredValues();
-    for (ArrayRef<Value *>::iterator SVI = StoredValues.begin(),
-                                     SVE = StoredValues.end();
-         SVI != SVE; ++SVI)
-      if ((*SVI)->getType()->isPointerTy())
-        if (AllocaInst *SAI =
-                dyn_cast<AllocaInst>((*SVI)->stripInBoundsOffsets()))
-          PostPromotionWorklist.insert(SAI);
-    return true;
-  }
-
   Changed |= splitAlloca(AI, S);
 
   DEBUG(dbgs() << "  Speculating PHIs\n");
@@ -3478,7 +3409,7 @@ bool SROA::promoteAllocas(Function &F) {
 
   if (DT && !ForceSSAUpdater) {
     DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
-    PromoteMemToReg(PromotableAllocas, *DT, DL);
+    PromoteMemToReg(PromotableAllocas, *DT);
     PromotableAllocas.clear();
     return true;
   }
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 73b2edf..33bbe15 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1426,7 +1426,7 @@ bool SROA::performPromotion(Function &F) {
     if (Allocas.empty()) break;
 
     if (HasDomTree)
-      PromoteMemToReg(Allocas, *DT, TD);
+      PromoteMemToReg(Allocas, *DT);
     else {
       SSAUpdater SSA;
       for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index ebd7db6..61b3965 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -28,7 +27,6 @@ STATISTIC(NumPromoted, "Number of alloca's promoted");
 namespace {
   struct PromotePass : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-
     PromotePass() : FunctionPass(ID) {
       initializePromotePassPass(*PassRegistry::getPassRegistry());
     }
@@ -64,7 +62,6 @@ bool PromotePass::runOnFunction(Function &F) {
   bool Changed  = false;
 
   DominatorTree &DT = getAnalysis<DominatorTree>();
-  const DataLayout *DL = getAnalysisIfAvailable<DataLayout>();
 
   while (1) {
     Allocas.clear();
@@ -73,12 +70,12 @@ bool PromotePass::runOnFunction(Function &F) {
     // the entry node
     for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
       if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
-        if (isAllocaPromotable(AI, DL))
+        if (isAllocaPromotable(AI))
           Allocas.push_back(AI);
 
     if (Allocas.empty()) break;
 
-    PromoteMemToReg(Allocas, DT, DL);
+    PromoteMemToReg(Allocas, DT);
     NumPromoted += Allocas.size();
     Changed = true;
   }
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 6910180..1b51255 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -30,7 +30,6 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -46,7 +45,6 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
-#include "llvm/InstVisitor.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
@@ -58,16 +56,56 @@ STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");
 STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");
 STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted");
 
-namespace {
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+  // FIXME: If the memory unit is of pointer or integer type, we can permit
+  // assignments to subsections of the memory unit.
+
+  // Only allow direct and non-volatile loads and stores...
+  for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
+       UI != UE; ++UI) { // Loop over all of the uses of the alloca
+    const User *U = *UI;
+    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      // Note that atomic loads can be transformed; atomic semantics do
+      // not have any meaning for a local alloca.
+      if (LI->isVolatile())
+        return false;
+    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (SI->getOperand(0) == AI)
+        return false; // Don't allow a store OF the AI, only INTO the AI.
+      // Note that atomic stores can be transformed; atomic semantics do
+      // not have any meaning for a local alloca.
+      if (SI->isVolatile())
+        return false;
+    } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+      if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+          II->getIntrinsicID() != Intrinsic::lifetime_end)
+        return false;
+    } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+      if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+        return false;
+      if (!onlyUsedByLifetimeMarkers(BCI))
+        return false;
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+      if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+        return false;
+      if (!GEPI->hasAllZeroIndices())
+        return false;
+      if (!onlyUsedByLifetimeMarkers(GEPI))
+        return false;
+    } else {
+      return false;
+    }
+  }
 
-struct AllocaInfo : private InstVisitor<AllocaInfo, bool> {
-  const DataLayout *DL;
+  return true;
+}
+
+namespace {
 
+struct AllocaInfo {
   SmallVector<BasicBlock *, 32> DefiningBlocks;
   SmallVector<BasicBlock *, 32> UsingBlocks;
-  SmallVector<Instruction *, 8> DeadInsts;
 
-  Type *AllocaTy;
   StoreInst *OnlyStore;
   BasicBlock *OnlyBlock;
   bool OnlyUsedInOneBlock;
@@ -75,13 +113,9 @@ struct AllocaInfo : private InstVisitor<AllocaInfo, bool> {
   Value *AllocaPointerVal;
   DbgDeclareInst *DbgDeclare;
 
-  AllocaInfo(const DataLayout *DL) : DL(DL) {}
-
   void clear() {
     DefiningBlocks.clear();
     UsingBlocks.clear();
-    DeadInsts.clear();
-    AllocaTy = 0;
     OnlyStore = 0;
     OnlyBlock = 0;
     OnlyUsedInOneBlock = true;
@@ -91,116 +125,39 @@ struct AllocaInfo : private InstVisitor<AllocaInfo, bool> {
 
   /// Scan the uses of the specified alloca, filling in the AllocaInfo used
   /// by the rest of the pass to reason about the uses of this alloca.
-  bool analyzeAlloca(AllocaInst &AI) {
+  void AnalyzeAlloca(AllocaInst *AI) {
     clear();
 
-    AllocaTy = AI.getAllocatedType();
-    enqueueUsers(AI);
-
-    // Walk queued up uses in the worklist to handle nested uses.
-    while (!UseWorklist.empty()) {
-      U = UseWorklist.pop_back_val();
-      Instruction &I = *cast<Instruction>(U->getUser());
-      if (!visit(I))
-        return false; // Propagate failure to promote up.
+    // As we scan the uses of the alloca instruction, keep track of stores,
+    // and decide whether all of the loads and stores to the alloca are within
+    // the same basic block.
+    for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+         UI != E;) {
+      Instruction *User = cast<Instruction>(*UI++);
+
+      if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+        // Remember the basic blocks which define new values for the alloca
+        DefiningBlocks.push_back(SI->getParent());
+        AllocaPointerVal = SI->getOperand(0);
+        OnlyStore = SI;
+      } else {
+        LoadInst *LI = cast<LoadInst>(User);
+        // Otherwise it must be a load instruction, keep track of variable
+        // reads.
+        UsingBlocks.push_back(LI->getParent());
+        AllocaPointerVal = LI;
+      }
 
       if (OnlyUsedInOneBlock) {
         if (OnlyBlock == 0)
-          OnlyBlock = I.getParent();
-        else if (OnlyBlock != I.getParent())
+          OnlyBlock = User->getParent();
+        else if (OnlyBlock != User->getParent())
           OnlyUsedInOneBlock = false;
       }
     }
 
-    DbgDeclare = FindAllocaDbgDeclare(&AI);
-    return true;
-  }
-
-private:
-  // Befriend the base class so it can call through private visitor methods.
-  friend class InstVisitor<AllocaInfo, bool>;
-
-  /// \brief A use pointer that is non-null when visiting uses.
-  Use *U;
-
-  /// \brief A worklist for recursively visiting all uses of an alloca.
-  SmallVector<Use *, 8> UseWorklist;
-
-  /// \brief A set for preventing cyclic visitation.
-  SmallPtrSet<Use *, 8> VisitedUses;
-
-  void enqueueUsers(Instruction &I) {
-    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;
-         ++UI)
-      if (VisitedUses.insert(&UI.getUse()))
-        UseWorklist.push_back(&UI.getUse());
-  }
-
-  bool visitLoadInst(LoadInst &LI) {
-    if (LI.isVolatile() || LI.getType() != AllocaTy)
-      return false;
-
-    // Keep track of variable reads.
-    UsingBlocks.push_back(LI.getParent());
-    AllocaPointerVal = &LI;
-    return true;
-  }
-
-  bool visitStoreInst(StoreInst &SI) {
-    if (SI.isVolatile() || SI.getValueOperand() == U->get() ||
-        SI.getValueOperand()->getType() != AllocaTy)
-      return false;
-
-    // Remember the basic blocks which define new values for the alloca
-    DefiningBlocks.push_back(SI.getParent());
-    AllocaPointerVal = SI.getOperand(0);
-    OnlyStore = &SI;
-    return true;
-  }
-
-  bool visitBitCastInst(BitCastInst &BC) {
-    if (BC.use_empty())
-      DeadInsts.push_back(&BC);
-    else
-      enqueueUsers(BC);
-    return true;
+    DbgDeclare = FindAllocaDbgDeclare(AI);
   }
-
-  bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
-    if (GEPI.use_empty()) {
-      DeadInsts.push_back(&GEPI);
-      return true;
-    }
-
-    enqueueUsers(GEPI);
-
-    return GEPI.hasAllZeroIndices();
-  }
-
-  // We can promote through debug info intrinsics as they don't alter the
-  // value stored in memory.
-  bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) {
-    DeadInsts.push_back(&I);
-    return true;
-  }
-
-  bool visitIntrinsicInst(IntrinsicInst &II) {
-    switch (II.getIntrinsicID()) {
-    default:
-      return false;
-
-      // Lifetime intrinsics don't preclude promoting the memory to a register.
-      // FIXME: We should use these to promote to undef when outside of a valid
-      // lifetime.
-    case Intrinsic::lifetime_start:
-    case Intrinsic::lifetime_end:
-      DeadInsts.push_back(&II);
-      return true;
-    }
-  }
-
-  // The fallback is that the alloca cannot be promoted.
-  bool visitInstruction(Instruction &I) { return false; }
 };
 
 // Data package used by RenamePass()
@@ -278,7 +235,6 @@ struct PromoteMem2Reg {
   std::vector<AllocaInst *> Allocas;
   DominatorTree &DT;
   DIBuilder DIB;
-  const DataLayout *DL;
 
   /// An AliasSetTracker object to update.  If null, don't update it.
   AliasSetTracker *AST;
@@ -324,9 +280,9 @@ struct PromoteMem2Reg {
 
 public:
   PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
-                 const DataLayout *DL, AliasSetTracker *AST)
+                 AliasSetTracker *AST)
       : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
-        DIB(*DT.getRoot()->getParent()->getParent()), DL(DL), AST(AST) {}
+        DIB(*DT.getRoot()->getParent()->getParent()), AST(AST) {}
 
   void run();
 
@@ -357,39 +313,27 @@ private:
 
 } // end of anonymous namespace
 
-/// \brief Walk a small vector of dead instructions and recursively remove them
-/// and subsequently dead instructions.
-///
-/// This is only valid to call on dead instructions using an alloca which is
-/// promotable, as we leverage that assumption to delete them faster.
-static void removeDeadInstructions(AllocaInst *AI,
-                                   SmallVectorImpl<Instruction *> &DeadInsts) {
-  while (!DeadInsts.empty()) {
-    Instruction *I = DeadInsts.pop_back_val();
-
-    // Don't delete the alloca itself.
-    if (I == AI)
-      continue;
-
-    // Note that we open code the deletion algorithm here because we know
-    // apriori that all of the instructions using an alloca that reaches here
-    // are trivially dead when their use list becomes empty (The only risk are
-    // lifetime markers which we specifically want to nuke). By coding it here
-    // we can skip the triviality test and be more efficient.
-    //
-    // Null out all of the instruction's operands to see if any operand becomes
-    // dead as we go.
-    for (User::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE;
-         ++OI) {
-      Instruction *Op = dyn_cast<Instruction>(*OI);
-      if (!Op)
-        continue;
+static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+  // Knowing that this alloca is promotable, we know that it's safe to kill all
+  // instructions except for load and store.
 
-      OI->set(0);
-      if (!Op->use_empty())
-        continue;
+  for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+       UI != UE;) {
+    Instruction *I = cast<Instruction>(*UI);
+    ++UI;
+    if (isa<LoadInst>(I) || isa<StoreInst>(I))
+      continue;
 
-      DeadInsts.push_back(Op);
+    if (!I->getType()->isVoidTy()) {
+      // The only users of this bitcast/GEP instruction are lifetime intrinsics.
+      // Follow the use/def chain to erase them now instead of leaving it for
+      // dead code elimination later.
+      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+           UI != UE;) {
+        Instruction *Inst = cast<Instruction>(*UI);
+        ++UI;
+        Inst->eraseFromParent();
+      }
     }
     I->eraseFromParent();
   }
@@ -590,23 +534,17 @@ void PromoteMem2Reg::run() {
     PointerAllocaValues.resize(Allocas.size());
   AllocaDbgDeclares.resize(Allocas.size());
 
-  AllocaInfo Info(DL);
+  AllocaInfo Info;
   LargeBlockInfo LBI;
 
   for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
     AllocaInst *AI = Allocas[AllocaNum];
 
+    assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
     assert(AI->getParent()->getParent() == &F &&
            "All allocas should be in the same function, which is same as DF!");
 
-    // Calculate the set of read and write-locations for each alloca.  This is
-    // analogous to finding the 'uses' and 'definitions' of each variable.
-    bool Good = Info.analyzeAlloca(*AI);
-    (void)Good;
-    assert(Good && "Cannot promote non-promotable alloca!");
-
-    // Nuke all of the dead instructions.
-    removeDeadInstructions(AI, Info.DeadInsts);
+    removeLifetimeIntrinsicUsers(AI);
 
     if (AI->use_empty()) {
       // If there are no uses of the alloca, just delete it now.
@@ -620,6 +558,10 @@ void PromoteMem2Reg::run() {
       continue;
     }
 
+    // Calculate the set of read and write-locations for each alloca.  This is
+    // analogous to finding the 'uses' and 'definitions' of each variable.
+    Info.AnalyzeAlloca(AI);
+
     // If there is only a single store to this value, replace any loads of
     // it that are directly dominated by the definition with the value stored.
     if (Info.DefiningBlocks.size() == 1) {
@@ -1145,19 +1087,11 @@ NextIteration:
   goto NextIteration;
 }
 
-bool llvm::isAllocaPromotable(const AllocaInst *AI, const DataLayout *DL) {
-  // We cast away constness because we re-use the non-const analysis that the
-  // actual promotion routine uses. While it is non-const, it doesn't actually
-  // mutate anything at this phase, and we discard the non-const results that
-  // promotion uses to mutate the alloca.
-  return AllocaInfo(DL).analyzeAlloca(*const_cast<AllocaInst *>(AI));
-}
-
 void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
-                           const DataLayout *DL, AliasSetTracker *AST) {
+                           AliasSetTracker *AST) {
   // If there is nothing to do, bail out...
   if (Allocas.empty())
     return;
 
-  PromoteMem2Reg(Allocas, DT, DL, AST).run();
+  PromoteMem2Reg(Allocas, DT, AST).run();
 }
-- 
cgit v1.1


From eaa8f5533f9f678fe3c56aec0201a34e46eaaf54 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Tue, 13 Aug 2013 23:34:32 +0000
Subject: BBVectorize: Add initial stores to the write set when tracking uses

When computing the use set of a store, we need to add the store to the write
set prior to iterating over later instructions. Otherwise, if there is a later
aliasing load of that store, that load will not be tagged as a use, and bad
things will happen.

trackUsesOfI still adds later dependent stores of an instruction to that
instruction's write set, but it never sees the original instruction, and so
when tracking uses of a store, the store must be added to the write set by the
caller.

Fixes PR16834.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188329 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/BBVectorize.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index cbc1d63..83ee1a4 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -1182,6 +1182,8 @@ namespace {
       // Look for an instruction with which to pair instruction *I...
       DenseSet<Value *> Users;
       AliasSetTracker WriteSet(*AA);
+      if (I->mayWriteToMemory()) WriteSet.add(I);
+
       bool JAfterStart = IAfterStart;
       BasicBlock::iterator J = llvm::next(I);
       for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
@@ -1403,6 +1405,8 @@ namespace {
 
       DenseSet<Value *> Users;
       AliasSetTracker WriteSet(*AA);
+      if (I->mayWriteToMemory()) WriteSet.add(I);
+
       for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) {
         (void) trackUsesOfI(Users, WriteSet, I, J);
 
@@ -2804,6 +2808,8 @@ namespace {
 
     DenseSet<Value *> Users;
     AliasSetTracker WriteSet(*AA);
+    if (I->mayWriteToMemory()) WriteSet.add(I);
+
     for (; cast<Instruction>(L) != J; ++L)
       (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs);
 
@@ -2824,6 +2830,8 @@ namespace {
 
     DenseSet<Value *> Users;
     AliasSetTracker WriteSet(*AA);
+    if (I->mayWriteToMemory()) WriteSet.add(I);
+
     for (; cast<Instruction>(L) != J;) {
       if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) {
         // Move this instruction
@@ -2853,6 +2861,7 @@ namespace {
 
     DenseSet<Value *> Users;
     AliasSetTracker WriteSet(*AA);
+    if (I->mayWriteToMemory()) WriteSet.add(I);
 
     // Note: We cannot end the loop when we reach J because J could be moved
     // farther down the use chain by another instruction pairing. Also, J
-- 
cgit v1.1


From c2a484164d6d254db423c6192f32f73bb9d022ef Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 14 Aug 2013 00:24:05 +0000
Subject: Use array initializer, space around operator

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188337 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index e2d7966..0034ddf 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -180,12 +180,10 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
       // Now that I is pointing to the first non-allocation-inst in the block,
       // insert our getelementptr instruction...
       //
-      Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
-      Value *Idx[2];
-      Idx[0] = NullIdx;
-      Idx[1] = NullIdx;
+      Value *NullIdx = Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
+      Value *Idx[2] = { NullIdx, NullIdx };
       Instruction *GEP =
-           GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub");
+        GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
       InsertNewInstBefore(GEP, *It);
 
       // Now make everything use the getelementptr instead of the original
-- 
cgit v1.1


From 3ea117e1bca290c4043ca38d25f278275e5853cb Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 14 Aug 2013 00:24:34 +0000
Subject: Use type helper functions instead of cast

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188338 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCasts.cpp     |  8 +++-----
 lib/Transforms/InstCombine/InstructionCombining.cpp | 11 +++++------
 2 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index e2f64d7..d31f845 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1379,8 +1379,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
         GEP->accumulateConstantOffset(*TD, Offset)) {
       // Get the base pointer input of the bitcast, and the type it points to.
       Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
-      Type *GEPIdxTy =
-      cast<PointerType>(OrigBase->getType())->getElementType();
+      Type *GEPIdxTy = OrigBase->getType()->getPointerElementType();
       SmallVector<Value*, 8> NewIndices;
       if (FindElementAtOffset(GEPIdxTy, Offset.getSExtValue(), NewIndices)) {
         // If we were able to index down into an element, create the GEP
@@ -1797,10 +1796,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     // Okay, we have (bitcast (shuffle ..)).  Check to see if this is
     // a bitcast to a vector with the same # elts.
     if (SVI->hasOneUse() && DestTy->isVectorTy() &&
-        cast<VectorType>(DestTy)->getNumElements() ==
-              SVI->getType()->getNumElements() &&
+        DestTy->getVectorNumElements() == SVI->getType()->getNumElements() &&
         SVI->getType()->getNumElements() ==
-          cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
+        SVI->getOperand(0)->getType()->getVectorNumElements()) {
       BitCastInst *Tmp;
       // If either of the operands is a cast from CI.getType(), then
       // evaluating the shuffle in the casted destination's type will allow
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index b34ae21..768f466 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1231,9 +1231,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
       // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
       Type *SrcElTy = StrippedPtrTy->getElementType();
-      Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
+      Type *ResElTy = PtrOp->getType()->getPointerElementType();
       if (TD && SrcElTy->isArrayTy() &&
-          TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
+          TD->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
           TD->getTypeAllocSize(ResElTy)) {
         Value *Idx[2];
         Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
@@ -1287,8 +1287,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         // Check that changing to the array element type amounts to dividing the
         // index by a scale factor.
         uint64_t ResSize = TD->getTypeAllocSize(ResElTy);
-        uint64_t ArrayEltSize =
-          TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+        uint64_t ArrayEltSize
+          = TD->getTypeAllocSize(SrcElTy->getArrayElementType());
         if (ResSize && ArrayEltSize % ResSize == 0) {
           Value *Idx = GEP.getOperand(1);
           unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1354,8 +1354,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // field at Offset in 'A's type.  If so, we can pull the cast through the
       // GEP.
       SmallVector<Value*, 8> NewIndices;
-      Type *InTy =
-        cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
+      Type *InTy = BCI->getOperand(0)->getType()->getPointerElementType();
       if (FindElementAtOffset(InTy, Offset.getSExtValue(), NewIndices)) {
         Value *NGEP = GEP.isInBounds() ?
           Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) :
-- 
cgit v1.1


From fdc2660214265e8c32b9536a18ff983f035aaf02 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 14 Aug 2013 00:24:38 +0000
Subject: Fix always creating GEP with i32 indices

Use the pointer size if datalayout is available.
Use i64 if it's not, which is consistent with what other
places do when the pointer size is unknown.

The test doesn't really test this in a useful way
since it will be transformed to that later anyway,
but this now tests it for non-zero arrays and when
datalayout isn't available. The cases in
visitGetElementPtrInst should save an extra re-visit to
the newly created GEP since it won't need to cleanup after
itself.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188339 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 13 +++++++++----
 lib/Transforms/InstCombine/InstructionCombining.cpp       | 13 +++++++------
 2 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 0034ddf..58c61e7d 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -180,7 +180,10 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
       // Now that I is pointing to the first non-allocation-inst in the block,
       // insert our getelementptr instruction...
       //
-      Value *NullIdx = Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
+      Type *IdxTy = TD
+                  ? TD->getIntPtrType(AI.getContext())
+                  : Type::getInt64Ty(AI.getContext());
+      Value *NullIdx = Constant::getNullValue(IdxTy);
       Value *Idx[2] = { NullIdx, NullIdx };
       Instruction *GEP =
         GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
@@ -300,9 +303,11 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
       if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
         if (Constant *CSrc = dyn_cast<Constant>(CastOp))
           if (ASrcTy->getNumElements() != 0) {
-            Value *Idxs[2];
-            Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
-            Idxs[1] = Idxs[0];
+            Type *IdxTy = TD
+                        ? TD->getIntPtrType(LI.getContext())
+                        : Type::getInt64Ty(LI.getContext());
+            Value *Idx = Constant::getNullValue(IdxTy);
+            Value *Idxs[2] = { Idx, Idx };
             CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
             SrcTy = cast<PointerType>(CastOp->getType());
             SrcPTy = SrcTy->getElementType();
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 768f466..248c349 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1235,9 +1235,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       if (TD && SrcElTy->isArrayTy() &&
           TD->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
           TD->getTypeAllocSize(ResElTy)) {
-        Value *Idx[2];
-        Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
-        Idx[1] = GEP.getOperand(1);
+        Type *IdxType = TD->getIntPtrType(GEP.getContext());
+        Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
         Value *NewGEP = GEP.isInBounds() ?
           Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
           Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
@@ -1304,9 +1303,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
             // If the multiplication NewIdx * Scale may overflow then the new
             // GEP may not be "inbounds".
-            Value *Off[2];
-            Off[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
-            Off[1] = NewIdx;
+            Value *Off[2] = {
+              Constant::getNullValue(TD->getIntPtrType(GEP.getContext())),
+              NewIdx
+            };
+
             Value *NewGEP = GEP.isInBounds() && NSW ?
               Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
               Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
-- 
cgit v1.1


From 79663c1910ebf9bda8f758388eaa3171fb9a5134 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Wed, 14 Aug 2013 08:56:41 +0000
Subject: Fix a really terrifying but improbable bug in mem2reg. If you have
 seen extremely subtle miscompilations (such as a load getting replaced with
 the value stored *below* the load within a basic block) related to promoting
 an alloca to an SSA value, there is the dim possibility that you hit this.
 Please let me know if you won this unfortunate lottery.

The first half of mem2reg's core logic (as it is used both in the
standalone mem2reg pass and in SROA) builds up a mapping from
'Instruction *' to the index of that instruction within its basic block.
This allows quickly establishing which store dominate a particular load
even for large basic blocks. We cache this information throughout the
run of mem2reg over a function in order to amortize the cost of
computing it.

This is not in and of itself a strange pattern in LLVM. However, it
introduces a very important constraint: absolutely no instruction can be
deleted from the program without updating the mapping. Otherwise a newly
allocated instruction might get the same pointer address, and then end
up with a wrong index. Yes, LLVM routinely suffers from a *single
threaded* variant of the ABA problem. Most places in LLVM don't find
avoiding this an imposition because they don't both delete and create
new instructions iteratively, but mem2reg *loves* to do this... All the
time. Fortunately, the mem2reg code was really careful about updating
this cache to handle this eventuallity... except when it comes to the
debug declare intrinsic. Oops. The fix is to invalidate that pointer in
the cache when we delete it, the same as we do when deleting alloca
instructions and other instructions.

I've also caused the same bug in new code while working on a fix to
PR16867, so this seems to be a really unfortunate pattern. Hopefully in
subsequent patches the deletion of dead instructions can be consolidated
sufficiently to make it less likely that we'll see future occurences of
this bug.

Sorry for not having a test case, but I have literally no idea how to
reliably trigger this kind of thing. It may be single-threaded, but it
remains an ABA problem. It would require a really amazing number of
stars to align.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188367 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 1b51255..368b1fa 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -418,6 +418,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
     DIBuilder DIB(*AI->getParent()->getParent()->getParent());
     ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
     DDI->eraseFromParent();
+    LBI.deleteValue(DDI);
   }
   // Remove the (now dead) store and alloca.
   Info.OnlyStore->eraseFromParent();
@@ -521,8 +522,10 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
   LBI.deleteValue(AI);
 
   // The alloca's debuginfo can be removed as well.
-  if (DbgDeclareInst *DDI = Info.DbgDeclare)
+  if (DbgDeclareInst *DDI = Info.DbgDeclare) {
     DDI->eraseFromParent();
+    LBI.deleteValue(DDI);
+  }
 
   ++NumLocalPromoted;
 }
-- 
cgit v1.1


From fdb1a6c341c0e289f3f900cdab87f831262c0e93 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Wed, 14 Aug 2013 18:54:12 +0000
Subject: DataFlowSanitizer: greylist is now ABI list.

This replaces the old incomplete greylist functionality with an ABI
list, which can provide more detailed information about the ABI and
semantics of specific functions.  The pass treats every function in
the "uninstrumented" category in the ABI list file as conforming to
the "native" (i.e. unsanitized) ABI.  Unless the ABI list contains
additional categories for those functions, a call to one of those
functions will produce a warning message, as the labelling behaviour
of the function is unknown.  The other supported categories are
"functional", "discard" and "custom".

- "discard" -- This function does not write to (user-accessible) memory,
  and its return value is unlabelled.
- "functional" -- This function does not write to (user-accessible)
  memory, and the label of its return value is the union of the label of
  its arguments.
- "custom" -- Instead of calling the function, a custom wrapper __dfsw_F
  is called, where F is the name of the function.  This function may wrap
  the original function or provide its own implementation.

Differential Revision: http://llvm-reviews.chandlerc.com/D1345

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188402 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/DataFlowSanitizer.cpp          | 367 +++++++++++++++------
 1 file changed, 271 insertions(+), 96 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 29413d5..dd01d83 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -76,17 +76,20 @@ static cl::opt<bool> ClPreserveAlignment(
     cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
     cl::init(false));
 
-// The greylist file controls how shadow parameters are passed.
-// The program acts as though every function in the greylist is passed
-// parameters with zero shadow and that its return value also has zero shadow.
-// This avoids the use of TLS or extra function parameters to pass shadow state
-// and essentially makes the function conform to the "native" (i.e. unsanitized)
-// ABI.
-static cl::opt<std::string> ClGreylistFile(
-    "dfsan-greylist",
-    cl::desc("File containing the list of functions with a native ABI"),
+// The ABI list file controls how shadow parameters are passed.  The pass treats
+// every function labelled "uninstrumented" in the ABI list file as conforming
+// to the "native" (i.e. unsanitized) ABI.  Unless the ABI list contains
+// additional annotations for those functions, a call to one of those functions
+// will produce a warning message, as the labelling behaviour of the function is
+// unknown.  The other supported annotations are "functional" and "discard",
+// which are described below under DataFlowSanitizer::WrapperKind.
+static cl::opt<std::string> ClABIListFile(
+    "dfsan-abilist",
+    cl::desc("File listing native ABI functions and how the pass treats them"),
     cl::Hidden);
 
+// Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented
+// functions (see DataFlowSanitizer::InstrumentedABI below).
 static cl::opt<bool> ClArgsABI(
     "dfsan-args-abi",
     cl::desc("Use the argument ABI rather than the TLS ABI"),
@@ -102,13 +105,42 @@ class DataFlowSanitizer : public ModulePass {
     ShadowWidth = 16
   };
 
+  /// Which ABI should be used for instrumented functions?
   enum InstrumentedABI {
-    IA_None,
-    IA_MemOnly,
+    /// Argument and return value labels are passed through additional
+    /// arguments and by modifying the return type.
     IA_Args,
+
+    /// Argument and return value labels are passed through TLS variables
+    /// __dfsan_arg_tls and __dfsan_retval_tls.
     IA_TLS
   };
 
+  /// How should calls to uninstrumented functions be handled?
+  enum WrapperKind {
+    /// This function is present in an uninstrumented form but we don't know
+    /// how it should be handled.  Print a warning and call the function anyway.
+    /// Don't label the return value.
+    WK_Warning,
+
+    /// This function does not write to (user-accessible) memory, and its return
+    /// value is unlabelled.
+    WK_Discard,
+
+    /// This function does not write to (user-accessible) memory, and the label
+    /// of its return value is the union of the label of its arguments.
+    WK_Functional,
+
+    /// Instead of calling the function, a custom wrapper __dfsw_F is called,
+    /// where F is the name of the function.  This function may wrap the
+    /// original function or provide its own implementation.  This is similar to
+    /// the IA_Args ABI, except that IA_Args uses a struct return type to
+    /// pass the return value shadow in a register, while WK_Custom uses an
+    /// extra pointer argument to return the shadow.  This allows the wrapped
+    /// form of the function type to be expressed in C.
+    WK_Custom
+  };
+
   DataLayout *DL;
   Module *Mod;
   LLVMContext *Ctx;
@@ -126,20 +158,26 @@ class DataFlowSanitizer : public ModulePass {
   Constant *GetRetvalTLS;
   FunctionType *DFSanUnionFnTy;
   FunctionType *DFSanUnionLoadFnTy;
+  FunctionType *DFSanUnimplementedFnTy;
   Constant *DFSanUnionFn;
   Constant *DFSanUnionLoadFn;
+  Constant *DFSanUnimplementedFn;
   MDNode *ColdCallWeights;
-  OwningPtr<SpecialCaseList> Greylist;
+  OwningPtr<SpecialCaseList> ABIList;
   DenseMap<Value *, Function *> UnwrappedFnMap;
+  AttributeSet ReadOnlyNoneAttrs;
 
   Value *getShadowAddress(Value *Addr, Instruction *Pos);
   Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
-  FunctionType *getInstrumentedFunctionType(FunctionType *T);
-  InstrumentedABI getInstrumentedABI(Function *F);
-  InstrumentedABI getDefaultInstrumentedABI();
+  bool isInstrumented(Function *F);
+  FunctionType *getArgsFunctionType(FunctionType *T);
+  FunctionType *getCustomFunctionType(FunctionType *T);
+  InstrumentedABI getInstrumentedABI();
+  WrapperKind getWrapperKind(Function *F);
 
  public:
-  DataFlowSanitizer(void *(*getArgTLS)() = 0, void *(*getRetValTLS)() = 0);
+  DataFlowSanitizer(StringRef ABIListFile = StringRef(),
+                    void *(*getArgTLS)() = 0, void *(*getRetValTLS)() = 0);
   static char ID;
   bool doInitialization(Module &M);
   bool runOnModule(Module &M);
@@ -149,16 +187,19 @@ struct DFSanFunction {
   DataFlowSanitizer &DFS;
   Function *F;
   DataFlowSanitizer::InstrumentedABI IA;
+  bool IsNativeABI;
   Value *ArgTLSPtr;
   Value *RetvalTLSPtr;
+  AllocaInst *LabelReturnAlloca;
   DenseMap<Value *, Value *> ValShadowMap;
   DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
   std::vector<std::pair<PHINode *, PHINode *> > PHIFixups;
   DenseSet<Instruction *> SkipInsts;
 
-  DFSanFunction(DataFlowSanitizer &DFS, Function *F)
-      : DFS(DFS), F(F), IA(DFS.getInstrumentedABI(F)), ArgTLSPtr(0),
-        RetvalTLSPtr(0) {}
+  DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
+      : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()),
+        IsNativeABI(IsNativeABI), ArgTLSPtr(0), RetvalTLSPtr(0),
+        LabelReturnAlloca(0) {}
   Value *getArgTLSPtr();
   Value *getArgTLS(unsigned Index, Instruction *Pos);
   Value *getRetvalTLS();
@@ -203,17 +244,21 @@ char DataFlowSanitizer::ID;
 INITIALIZE_PASS(DataFlowSanitizer, "dfsan",
                 "DataFlowSanitizer: dynamic data flow analysis.", false, false)
 
-ModulePass *llvm::createDataFlowSanitizerPass(void *(*getArgTLS)(),
+ModulePass *llvm::createDataFlowSanitizerPass(StringRef ABIListFile,
+                                              void *(*getArgTLS)(),
                                               void *(*getRetValTLS)()) {
-  return new DataFlowSanitizer(getArgTLS, getRetValTLS);
+  return new DataFlowSanitizer(ABIListFile, getArgTLS, getRetValTLS);
 }
 
-DataFlowSanitizer::DataFlowSanitizer(void *(*getArgTLS)(),
+DataFlowSanitizer::DataFlowSanitizer(StringRef ABIListFile,
+                                     void *(*getArgTLS)(),
                                      void *(*getRetValTLS)())
     : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS),
-      Greylist(SpecialCaseList::createOrDie(ClGreylistFile)) {}
+      ABIList(SpecialCaseList::createOrDie(ABIListFile.empty() ? ClABIListFile
+                                                               : ABIListFile)) {
+}
 
-FunctionType *DataFlowSanitizer::getInstrumentedFunctionType(FunctionType *T) {
+FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
   llvm::SmallVector<Type *, 4> ArgTypes;
   std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
   for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
@@ -226,6 +271,18 @@ FunctionType *DataFlowSanitizer::getInstrumentedFunctionType(FunctionType *T) {
   return FunctionType::get(RetType, ArgTypes, T->isVarArg());
 }
 
+FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
+  assert(!T->isVarArg());
+  llvm::SmallVector<Type *, 4> ArgTypes;
+  std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
+  for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
+    ArgTypes.push_back(ShadowTy);
+  Type *RetType = T->getReturnType();
+  if (!RetType->isVoidTy())
+    ArgTypes.push_back(ShadowPtrTy);
+  return FunctionType::get(T->getReturnType(), ArgTypes, false);
+}
+
 bool DataFlowSanitizer::doInitialization(Module &M) {
   DL = getAnalysisIfAvailable<DataLayout>();
   if (!DL)
@@ -246,6 +303,8 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
   Type *DFSanUnionLoadArgs[2] = { ShadowPtrTy, IntptrTy };
   DFSanUnionLoadFnTy =
       FunctionType::get(ShadowTy, DFSanUnionLoadArgs, /*isVarArg=*/ false);
+  DFSanUnimplementedFnTy = FunctionType::get(
+      Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
 
   if (GetArgTLSPtr) {
     Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
@@ -267,23 +326,32 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
   return true;
 }
 
-DataFlowSanitizer::InstrumentedABI
-DataFlowSanitizer::getInstrumentedABI(Function *F) {
-  if (Greylist->isIn(*F))
-    return IA_MemOnly;
-  else
-    return getDefaultInstrumentedABI();
+bool DataFlowSanitizer::isInstrumented(Function *F) {
+  return !ABIList->isIn(*F, "uninstrumented");
 }
 
-DataFlowSanitizer::InstrumentedABI
-DataFlowSanitizer::getDefaultInstrumentedABI() {
+DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
   return ClArgsABI ? IA_Args : IA_TLS;
 }
 
+DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
+  if (ABIList->isIn(*F, "functional"))
+    return WK_Functional;
+  if (ABIList->isIn(*F, "discard"))
+    return WK_Discard;
+  if (ABIList->isIn(*F, "custom"))
+    return WK_Custom;
+
+  return WK_Warning;
+}
+
 bool DataFlowSanitizer::runOnModule(Module &M) {
   if (!DL)
     return false;
 
+  if (ABIList->isIn(M, "skip"))
+    return false;
+
   if (!GetArgTLSPtr) {
     Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
     ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
@@ -308,33 +376,44 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
   if (Function *F = dyn_cast<Function>(DFSanUnionLoadFn)) {
     F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
   }
+  DFSanUnimplementedFn =
+      Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
 
   std::vector<Function *> FnsToInstrument;
+  llvm::SmallPtrSet<Function *, 2> FnsWithNativeABI;
   for (Module::iterator i = M.begin(), e = M.end(); i != e; ++i) {
-    if (!i->isIntrinsic() && i != DFSanUnionFn && i != DFSanUnionLoadFn)
+    if (!i->isIntrinsic() &&
+        i != DFSanUnionFn &&
+        i != DFSanUnionLoadFn &&
+        i != DFSanUnimplementedFn)
       FnsToInstrument.push_back(&*i);
   }
 
-  // First, change the ABI of every function in the module.  Greylisted
+  AttrBuilder B;
+  B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
+  ReadOnlyNoneAttrs = AttributeSet::get(*Ctx, AttributeSet::FunctionIndex, B);
+
+  // First, change the ABI of every function in the module.  ABI-listed
   // functions keep their original ABI and get a wrapper function.
   for (std::vector<Function *>::iterator i = FnsToInstrument.begin(),
                                          e = FnsToInstrument.end();
        i != e; ++i) {
     Function &F = **i;
-
     FunctionType *FT = F.getFunctionType();
-    FunctionType *NewFT = getInstrumentedFunctionType(FT);
-    // If the function types are the same (i.e. void()), we don't need to do
-    // anything here.
-    if (FT != NewFT) {
-      switch (getInstrumentedABI(&F)) {
-      case IA_Args: {
+
+    if (FT->getNumParams() == 0 && !FT->isVarArg() &&
+        FT->getReturnType()->isVoidTy())
+      continue;
+
+    if (isInstrumented(&F)) {
+      if (getInstrumentedABI() == IA_Args) {
+        FunctionType *NewFT = getArgsFunctionType(FT);
         Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M);
-        NewF->setCallingConv(F.getCallingConv());
-        NewF->setAttributes(F.getAttributes().removeAttributes(
-            *Ctx, AttributeSet::ReturnIndex,
+        NewF->copyAttributesFrom(&F);
+        NewF->removeAttributes(
+            AttributeSet::ReturnIndex,
             AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
-                                             AttributeSet::ReturnIndex)));
+                                             AttributeSet::ReturnIndex));
         for (Function::arg_iterator FArg = F.arg_begin(),
                                     NewFArg = NewF->arg_begin(),
                                     FArgEnd = F.arg_end();
@@ -358,41 +437,63 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
         NewF->takeName(&F);
         F.eraseFromParent();
         *i = NewF;
-        break;
       }
-      case IA_MemOnly: {
-        assert(!FT->isVarArg() && "varargs not handled here yet");
-        assert(getDefaultInstrumentedABI() == IA_Args);
-        Function *NewF =
-            Function::Create(NewFT, GlobalValue::LinkOnceODRLinkage,
-                             std::string("dfsw$") + F.getName(), &M);
-        NewF->setCallingConv(F.getCallingConv());
-        NewF->setAttributes(F.getAttributes());
-
-        BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
-        std::vector<Value *> Args;
-        unsigned n = FT->getNumParams();
-        for (Function::arg_iterator i = NewF->arg_begin(); n != 0; ++i, --n)
-          Args.push_back(&*i);
-        CallInst *CI = CallInst::Create(&F, Args, "", BB);
-        if (FT->getReturnType()->isVoidTy())
-          ReturnInst::Create(*Ctx, BB);
-        else {
-          Value *InsVal = InsertValueInst::Create(
-              UndefValue::get(NewFT->getReturnType()), CI, 0, "", BB);
-          Value *InsShadow =
-              InsertValueInst::Create(InsVal, ZeroShadow, 1, "", BB);
-          ReturnInst::Create(*Ctx, InsShadow, BB);
-        }
-
-        Value *WrappedFnCst =
-            ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
-        F.replaceAllUsesWith(WrappedFnCst);
-        UnwrappedFnMap[WrappedFnCst] = &F;
-        break;
-      }
-      default:
-        break;
+               // Hopefully, nobody will try to indirectly call a vararg
+               // function... yet.
+    } else if (FT->isVarArg()) {
+      UnwrappedFnMap[&F] = &F;
+      *i = 0;
+    } else {
+      // Build a wrapper function for F.  The wrapper simply calls F, and is
+      // added to FnsToInstrument so that any instrumentation according to its
+      // WrapperKind is done in the second pass below.
+      FunctionType *NewFT = getInstrumentedABI() == IA_Args
+                                ? getArgsFunctionType(FT)
+                                : FT;
+      Function *NewF =
+          Function::Create(NewFT, GlobalValue::LinkOnceODRLinkage,
+                           std::string("dfsw$") + F.getName(), &M);
+      NewF->copyAttributesFrom(&F);
+      NewF->removeAttributes(
+              AttributeSet::ReturnIndex,
+              AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
+                                               AttributeSet::ReturnIndex));
+      if (getInstrumentedABI() == IA_TLS)
+        NewF->removeAttributes(AttributeSet::FunctionIndex,
+                               ReadOnlyNoneAttrs);
+
+      BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
+      std::vector<Value *> Args;
+      unsigned n = FT->getNumParams();
+      for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n)
+        Args.push_back(&*ai);
+      CallInst *CI = CallInst::Create(&F, Args, "", BB);
+      if (FT->getReturnType()->isVoidTy())
+        ReturnInst::Create(*Ctx, BB);
+      else
+        ReturnInst::Create(*Ctx, CI, BB);
+
+      Value *WrappedFnCst =
+          ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
+      F.replaceAllUsesWith(WrappedFnCst);
+      UnwrappedFnMap[WrappedFnCst] = &F;
+      *i = NewF;
+
+      if (!F.isDeclaration()) {
+        // This function is probably defining an interposition of an
+        // uninstrumented function and hence needs to keep the original ABI.
+        // But any functions it may call need to use the instrumented ABI, so
+        // we instrument it in a mode which preserves the original ABI.
+        FnsWithNativeABI.insert(&F);
+
+        // This code needs to rebuild the iterators, as they may be invalidated
+        // by the push_back, taking care that the new range does not include
+        // any functions added by this code.
+        size_t N = i - FnsToInstrument.begin(),
+               Count = e - FnsToInstrument.begin();
+        FnsToInstrument.push_back(&F);
+        i = FnsToInstrument.begin() + N;
+        e = FnsToInstrument.begin() + Count;
       }
     }
   }
@@ -400,12 +501,12 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
   for (std::vector<Function *>::iterator i = FnsToInstrument.begin(),
                                          e = FnsToInstrument.end();
        i != e; ++i) {
-    if ((*i)->isDeclaration())
+    if (!*i || (*i)->isDeclaration())
       continue;
 
     removeUnreachableBlocks(**i);
 
-    DFSanFunction DFSF(*this, *i);
+    DFSanFunction DFSF(*this, *i, FnsWithNativeABI.count(*i));
 
     // DFSanVisitor may create new basic blocks, which confuses df_iterator.
     // Build a copy of the list before iterating over it.
@@ -433,6 +534,10 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
       }
     }
 
+    // We will not necessarily be able to compute the shadow for every phi node
+    // until we have visited every block.  Therefore, the code that handles phi
+    // nodes adds them to the PHIFixups list so that they can be properly
+    // handled here.
     for (std::vector<std::pair<PHINode *, PHINode *> >::iterator
              i = DFSF.PHIFixups.begin(),
              e = DFSF.PHIFixups.end();
@@ -479,6 +584,8 @@ Value *DFSanFunction::getShadow(Value *V) {
   Value *&Shadow = ValShadowMap[V];
   if (!Shadow) {
     if (Argument *A = dyn_cast<Argument>(V)) {
+      if (IsNativeABI)
+        return DFS.ZeroShadow;
       switch (IA) {
       case DataFlowSanitizer::IA_TLS: {
         Value *ArgTLSPtr = getArgTLSPtr();
@@ -495,11 +602,9 @@ Value *DFSanFunction::getShadow(Value *V) {
         while (ArgIdx--)
           ++i;
         Shadow = i;
+        assert(Shadow->getType() == DFS.ShadowTy);
         break;
       }
-      default:
-        Shadow = DFS.ZeroShadow;
-        break;
       }
     } else {
       Shadow = DFS.ZeroShadow;
@@ -866,7 +971,7 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
 }
 
 void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
-  if (RI.getReturnValue()) {
+  if (!DFSF.IsNativeABI && RI.getReturnValue()) {
     switch (DFSF.IA) {
     case DataFlowSanitizer::IA_TLS: {
       Value *S = DFSF.getShadow(RI.getReturnValue());
@@ -884,8 +989,6 @@ void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
       RI.setOperand(0, InsShadow);
       break;
     }
-    default:
-      break;
     }
   }
 }
@@ -897,19 +1000,91 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
     return;
   }
 
+  IRBuilder<> IRB(CS.getInstruction());
+
   DenseMap<Value *, Function *>::iterator i =
       DFSF.DFS.UnwrappedFnMap.find(CS.getCalledValue());
   if (i != DFSF.DFS.UnwrappedFnMap.end()) {
-    CS.setCalledFunction(i->second);
-    DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);
-    return;
-  }
+    Function *F = i->second;
+    switch (DFSF.DFS.getWrapperKind(F)) {
+    case DataFlowSanitizer::WK_Warning: {
+      CS.setCalledFunction(F);
+      IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
+                     IRB.CreateGlobalStringPtr(F->getName()));
+      DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);
+      return;
+    }
+    case DataFlowSanitizer::WK_Discard: {
+      CS.setCalledFunction(F);
+      DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow);
+      return;
+    }
+    case DataFlowSanitizer::WK_Functional: {
+      CS.setCalledFunction(F);
+      visitOperandShadowInst(*CS.getInstruction());
+      return;
+    }
+    case DataFlowSanitizer::WK_Custom: {
+      // Don't try to handle invokes of custom functions, it's too complicated.
+      // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
+      // wrapper.
+      if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+        FunctionType *FT = F->getFunctionType();
+        FunctionType *CustomFT = DFSF.DFS.getCustomFunctionType(FT);
+        std::string CustomFName = "__dfsw_";
+        CustomFName += F->getName();
+        Constant *CustomF =
+            DFSF.DFS.Mod->getOrInsertFunction(CustomFName, CustomFT);
+        if (Function *CustomFn = dyn_cast<Function>(CustomF)) {
+          CustomFn->copyAttributesFrom(F);
+
+          // Custom functions returning non-void will write to the return label.
+          if (!FT->getReturnType()->isVoidTy()) {
+            CustomFn->removeAttributes(AttributeSet::FunctionIndex,
+                                       DFSF.DFS.ReadOnlyNoneAttrs);
+          }
+        }
 
-  IRBuilder<> IRB(CS.getInstruction());
+        std::vector<Value *> Args;
+
+        CallSite::arg_iterator i = CS.arg_begin();
+        for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+          Args.push_back(*i);
+
+        i = CS.arg_begin();
+        for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
+          Args.push_back(DFSF.getShadow(*i));
+
+        if (!FT->getReturnType()->isVoidTy()) {
+          if (!DFSF.LabelReturnAlloca) {
+            DFSF.LabelReturnAlloca =
+                new AllocaInst(DFSF.DFS.ShadowTy, "labelreturn",
+                               DFSF.F->getEntryBlock().begin());
+          }
+          Args.push_back(DFSF.LabelReturnAlloca);
+        }
+
+        CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
+        CustomCI->setCallingConv(CI->getCallingConv());
+        CustomCI->setAttributes(CI->getAttributes());
+
+        if (!FT->getReturnType()->isVoidTy()) {
+          LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca);
+          DFSF.setShadow(CustomCI, LabelLoad);
+        }
+
+        CI->replaceAllUsesWith(CustomCI);
+        CI->eraseFromParent();
+        return;
+      }
+      break;
+    }
+    }
+  }
 
   FunctionType *FT = cast<FunctionType>(
       CS.getCalledValue()->getType()->getPointerElementType());
-  if (DFSF.DFS.getDefaultInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
+  if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
     for (unsigned i = 0, n = FT->getNumParams(); i != n; ++i) {
       IRB.CreateStore(DFSF.getShadow(CS.getArgument(i)),
                       DFSF.getArgTLS(i, CS.getInstruction()));
@@ -930,7 +1105,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
       Next = CS->getNextNode();
     }
 
-    if (DFSF.DFS.getDefaultInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
+    if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
       IRBuilder<> NextIRB(Next);
       LoadInst *LI = NextIRB.CreateLoad(DFSF.getRetvalTLS());
       DFSF.SkipInsts.insert(LI);
@@ -940,8 +1115,8 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
 
   // Do all instrumentation for IA_Args down here to defer tampering with the
   // CFG in a way that SplitEdge may be able to detect.
-  if (DFSF.DFS.getDefaultInstrumentedABI() == DataFlowSanitizer::IA_Args) {
-    FunctionType *NewFT = DFSF.DFS.getInstrumentedFunctionType(FT);
+  if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) {
+    FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT);
     Value *Func =
         IRB.CreateBitCast(CS.getCalledValue(), PointerType::getUnqual(NewFT));
     std::vector<Value *> Args;
-- 
cgit v1.1


From ef8136dda15f6b57b8633cdcc0e2b78a944e2b5d Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Wed, 14 Aug 2013 20:51:38 +0000
Subject: DataFlowSanitizer: Instrumentation for memset.

Differential Revision: http://llvm-reviews.chandlerc.com/D1395

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188412 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/DataFlowSanitizer.cpp          | 23 +++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index dd01d83..b645ab8 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -159,9 +159,11 @@ class DataFlowSanitizer : public ModulePass {
   FunctionType *DFSanUnionFnTy;
   FunctionType *DFSanUnionLoadFnTy;
   FunctionType *DFSanUnimplementedFnTy;
+  FunctionType *DFSanSetLabelFnTy;
   Constant *DFSanUnionFn;
   Constant *DFSanUnionLoadFn;
   Constant *DFSanUnimplementedFn;
+  Constant *DFSanSetLabelFn;
   MDNode *ColdCallWeights;
   OwningPtr<SpecialCaseList> ABIList;
   DenseMap<Value *, Function *> UnwrappedFnMap;
@@ -235,6 +237,7 @@ class DFSanVisitor : public InstVisitor<DFSanVisitor> {
   void visitInsertValueInst(InsertValueInst &I);
   void visitAllocaInst(AllocaInst &I);
   void visitSelectInst(SelectInst &I);
+  void visitMemSetInst(MemSetInst &I);
   void visitMemTransferInst(MemTransferInst &I);
 };
 
@@ -305,6 +308,9 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
       FunctionType::get(ShadowTy, DFSanUnionLoadArgs, /*isVarArg=*/ false);
   DFSanUnimplementedFnTy = FunctionType::get(
       Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
+  Type *DFSanSetLabelArgs[3] = { ShadowTy, Type::getInt8PtrTy(*Ctx), IntptrTy };
+  DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
+                                        DFSanSetLabelArgs, /*isVarArg=*/false);
 
   if (GetArgTLSPtr) {
     Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
@@ -378,6 +384,11 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
   }
   DFSanUnimplementedFn =
       Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
+  DFSanSetLabelFn =
+      Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy);
+  if (Function *F = dyn_cast<Function>(DFSanSetLabelFn)) {
+    F->addAttribute(1, Attribute::ZExt);
+  }
 
   std::vector<Function *> FnsToInstrument;
   llvm::SmallPtrSet<Function *, 2> FnsWithNativeABI;
@@ -385,7 +396,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
     if (!i->isIntrinsic() &&
         i != DFSanUnionFn &&
         i != DFSanUnionLoadFn &&
-        i != DFSanUnimplementedFn)
+        i != DFSanUnimplementedFn &&
+        i != DFSanSetLabelFn)
       FnsToInstrument.push_back(&*i);
   }
 
@@ -947,6 +959,15 @@ void DFSanVisitor::visitSelectInst(SelectInst &I) {
   }
 }
 
+void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
+  IRBuilder<> IRB(&I);
+  Value *ValShadow = DFSF.getShadow(I.getValue());
+  IRB.CreateCall3(
+      DFSF.DFS.DFSanSetLabelFn, ValShadow,
+      IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)),
+      IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy));
+}
+
 void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
   IRBuilder<> IRB(&I);
   Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
-- 
cgit v1.1


From 1b6e10f53bec0cd261924734bd5eb58c75c8f550 Mon Sep 17 00:00:00 2001
From: Mark Lacey <mark.lacey@apple.com>
Date: Wed, 14 Aug 2013 22:11:42 +0000
Subject: Fix small typo: s/succ/Succ/

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188415 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/Local.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 4db3a72..f2fac5e 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -536,7 +536,7 @@ static bool CanMergeValues(Value *First, Value *Second) {
 }
 
 /// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
-/// almost-empty BB ending in an unconditional branch to Succ, into succ.
+/// almost-empty BB ending in an unconditional branch to Succ, into Succ.
 ///
 /// Assumption: Succ is the single successor for BB.
 ///
-- 
cgit v1.1


From a77d9f726a7e3c51f04d1d74d091ae1a87d63544 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Thu, 15 Aug 2013 18:51:12 +0000
Subject: DataFlowSanitizer: Add a debugging feature to help us track nonzero
 labels.

Summary:
When the -dfsan-debug-nonzero-labels parameter is supplied, the code
is instrumented such that when a call parameter, return value or load
produces a nonzero label, the function __dfsan_nonzero_label is called.
The idea is that a debugger breakpoint can be set on this function
in a nominally label-free program to help identify any bugs in the
instrumentation pass causing labels to be introduced.

Reviewers: eugenis

CC: llvm-commits

Differential Revision: http://llvm-reviews.chandlerc.com/D1405

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188472 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/DataFlowSanitizer.cpp          | 50 +++++++++++++++++++++-
 1 file changed, 48 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index b645ab8..7159cc0 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -95,6 +95,12 @@ static cl::opt<bool> ClArgsABI(
     cl::desc("Use the argument ABI rather than the TLS ABI"),
     cl::Hidden);
 
+static cl::opt<bool> ClDebugNonzeroLabels(
+    "dfsan-debug-nonzero-labels",
+    cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
+             "load or return with a nonzero label"),
+    cl::Hidden);
+
 namespace {
 
 class DataFlowSanitizer : public ModulePass {
@@ -160,10 +166,12 @@ class DataFlowSanitizer : public ModulePass {
   FunctionType *DFSanUnionLoadFnTy;
   FunctionType *DFSanUnimplementedFnTy;
   FunctionType *DFSanSetLabelFnTy;
+  FunctionType *DFSanNonzeroLabelFnTy;
   Constant *DFSanUnionFn;
   Constant *DFSanUnionLoadFn;
   Constant *DFSanUnimplementedFn;
   Constant *DFSanSetLabelFn;
+  Constant *DFSanNonzeroLabelFn;
   MDNode *ColdCallWeights;
   OwningPtr<SpecialCaseList> ABIList;
   DenseMap<Value *, Function *> UnwrappedFnMap;
@@ -197,6 +205,7 @@ struct DFSanFunction {
   DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
   std::vector<std::pair<PHINode *, PHINode *> > PHIFixups;
   DenseSet<Instruction *> SkipInsts;
+  DenseSet<Value *> NonZeroChecks;
 
   DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
       : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()),
@@ -311,6 +320,8 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
   Type *DFSanSetLabelArgs[3] = { ShadowTy, Type::getInt8PtrTy(*Ctx), IntptrTy };
   DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
                                         DFSanSetLabelArgs, /*isVarArg=*/false);
+  DFSanNonzeroLabelFnTy = FunctionType::get(
+      Type::getVoidTy(*Ctx), ArrayRef<Type *>(), /*isVarArg=*/false);
 
   if (GetArgTLSPtr) {
     Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
@@ -389,6 +400,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
   if (Function *F = dyn_cast<Function>(DFSanSetLabelFn)) {
     F->addAttribute(1, Attribute::ZExt);
   }
+  DFSanNonzeroLabelFn =
+      Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
 
   std::vector<Function *> FnsToInstrument;
   llvm::SmallPtrSet<Function *, 2> FnsWithNativeABI;
@@ -397,7 +410,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
         i != DFSanUnionFn &&
         i != DFSanUnionLoadFn &&
         i != DFSanUnimplementedFn &&
-        i != DFSanSetLabelFn)
+        i != DFSanSetLabelFn &&
+        i != DFSanNonzeroLabelFn)
       FnsToInstrument.push_back(&*i);
   }
 
@@ -560,6 +574,31 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
             val, DFSF.getShadow(i->first->getIncomingValue(val)));
       }
     }
+
+    // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
+    // places (i.e. instructions in basic blocks we haven't even begun visiting
+    // yet).  To make our life easier, do this work in a pass after the main
+    // instrumentation.
+    if (ClDebugNonzeroLabels) {
+      for (DenseSet<Value *>::iterator i = DFSF.NonZeroChecks.begin(),
+                                       e = DFSF.NonZeroChecks.end();
+           i != e; ++i) {
+        Instruction *Pos;
+        if (Instruction *I = dyn_cast<Instruction>(*i))
+          Pos = I->getNextNode();
+        else
+          Pos = DFSF.F->getEntryBlock().begin();
+        while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
+          Pos = Pos->getNextNode();
+        IRBuilder<> IRB(Pos);
+        Instruction *NeInst = cast<Instruction>(
+            IRB.CreateICmpNE(*i, DFSF.DFS.ZeroShadow));
+        BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
+            NeInst, /*Unreachable=*/ false, ColdCallWeights));
+        IRBuilder<> ThenIRB(BI);
+        ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn);
+      }
+    }
   }
 
   return false;
@@ -618,6 +657,7 @@ Value *DFSanFunction::getShadow(Value *V) {
         break;
       }
       }
+      NonZeroChecks.insert(Shadow);
     } else {
       Shadow = DFS.ZeroShadow;
     }
@@ -814,7 +854,11 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
   Value *LoadedShadow =
       DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI);
   Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
-  DFSF.setShadow(&LI, DFSF.DFS.combineShadows(LoadedShadow, PtrShadow, &LI));
+  Value *CombinedShadow = DFSF.DFS.combineShadows(LoadedShadow, PtrShadow, &LI);
+  if (CombinedShadow != DFSF.DFS.ZeroShadow)
+    DFSF.NonZeroChecks.insert(CombinedShadow);
+
+  DFSF.setShadow(&LI, CombinedShadow);
 }
 
 void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
@@ -1131,6 +1175,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
       LoadInst *LI = NextIRB.CreateLoad(DFSF.getRetvalTLS());
       DFSF.SkipInsts.insert(LI);
       DFSF.setShadow(CS.getInstruction(), LI);
+      DFSF.NonZeroChecks.insert(LI);
     }
   }
 
@@ -1184,6 +1229,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
           ExtractValueInst::Create(NewCS.getInstruction(), 1, "", Next);
       DFSF.SkipInsts.insert(ExShadow);
       DFSF.setShadow(ExVal, ExShadow);
+      DFSF.NonZeroChecks.insert(ExShadow);
 
       CS.getInstruction()->replaceAllUsesWith(ExVal);
     }
-- 
cgit v1.1


From b187b69170ec50fa8cf9d434ae204b86b8eb4339 Mon Sep 17 00:00:00 2001
From: Yunzhong Gao <Yunzhong_Gao@playstation.sony.com>
Date: Thu, 15 Aug 2013 20:58:59 +0000
Subject: Fixing a corner-case bug in strchr and strrchr lib call optimizations
 where the input character is not converted to char before comparing with
 zero.

The patch was discussed in this thread:
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130812/184069.html


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188489 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyLibCalls.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 094c201..93720be 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -477,7 +477,7 @@ struct StrChrOpt : public LibCallOptimization {
 
     // Compute the offset, make sure to handle the case when we're searching for
     // zero (a weird way to spell strlen).
-    size_t I = CharC->getSExtValue() == 0 ?
+    size_t I = (255 & CharC->getSExtValue()) == 0 ?
         Str.size() : Str.find(CharC->getSExtValue());
     if (I == StringRef::npos) // Didn't find the char.  strchr returns null.
       return Constant::getNullValue(CI->getType());
@@ -513,7 +513,7 @@ struct StrRChrOpt : public LibCallOptimization {
     }
 
     // Compute the offset.
-    size_t I = CharC->getSExtValue() == 0 ?
+    size_t I = (255 & CharC->getSExtValue()) == 0 ?
         Str.size() : Str.rfind(CharC->getSExtValue());
     if (I == StringRef::npos) // Didn't find the char. Return null.
       return Constant::getNullValue(CI->getType());
-- 
cgit v1.1


From c34540aa861105d515ed73eb2ae614d9453016b6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 15 Aug 2013 23:11:03 +0000
Subject: Fix spelling

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188506 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 22a2519..506346d 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -75,8 +75,8 @@ private:
   DebugLoc DbgLoc;
 };
 
-/// A helper class for numbering instructions in multible blocks.
-/// Numbers starts at zero for each basic block.
+/// A helper class for numbering instructions in multiple blocks.
+/// Numbers start at zero for each basic block.
 struct BlockNumbering {
 
   BlockNumbering(BasicBlock *Bb) : BB(Bb), Valid(false) {}
-- 
cgit v1.1


From a630cb032cc09aaec92fa4ce26891abfa7fa1348 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 15 Aug 2013 23:11:07 +0000
Subject: Don't do FoldCmpLoadFromIndexedGlobal for non inbounds GEPs

This path wasn't tested before without a datalayout,
so add some more tests and re-run with and without one.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188507 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCompares.cpp | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c0225ae..4ac4753 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -226,8 +226,8 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
 Instruction *InstCombiner::
 FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
                              CmpInst &ICI, ConstantInt *AndCst) {
-  // We need TD information to know the pointer size unless this is inbounds.
-  if (!GEP->isInBounds() && TD == 0) return 0;
+  if (!GEP->isInBounds())
+    return 0;
 
   Constant *Init = GV->getInitializer();
   if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
@@ -390,13 +390,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   // order the state machines in complexity of the generated code.
   Value *Idx = GEP->getOperand(2);
 
-  // If the index is larger than the pointer size of the target, truncate the
-  // index down like the GEP would do implicitly.  We don't have to do this for
-  // an inbounds GEP because the index can't be out of range.
-  if (!GEP->isInBounds() &&
-      Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
-    Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
-
   // If the comparison is only true for one or two elements, emit direct
   // comparisons.
   if (SecondTrueElement != Overdefined) {
-- 
cgit v1.1


From 4658ce9c1f023ca8198add9eabc9ec466d23c8cb Mon Sep 17 00:00:00 2001
From: Jim Grosbach <grosbach@apple.com>
Date: Fri, 16 Aug 2013 00:15:20 +0000
Subject: InstCombine: Simplify if(x!=0 && x!=-1).

When both constants are positive or both constants are negative,
InstCombine already simplifies comparisons like this, but when
it's exactly zero and -1, the operand sorting ends up reversed
and the pattern fails to match. Handle that special case.

Follow up for rdar://14689217

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188512 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b474bd8..bc8506a 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -849,10 +849,15 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
     case ICmpInst::ICMP_SGT:        // (X != 13 & X s> 15) -> X s> 15
       return RHS;
     case ICmpInst::ICMP_NE:
+      // Special case to get the ordering right when the values wrap around
+      // zero.
+      if (LHSCst->getValue() == 0 && RHSCst->getValue() == -1)
+        std::swap(LHSCst, RHSCst);
       if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
         Constant *AddCST = ConstantExpr::getNeg(LHSCst);
         Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
-        return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1));
+        return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1),
+                                      Val->getName()+".cmp");
       }
       break;                        // (X != 13 & X != 15) -> no change
     }
-- 
cgit v1.1


From bff3c587f68530faa1d2be962255254e9adce264 Mon Sep 17 00:00:00 2001
From: Jim Grosbach <grosbach@apple.com>
Date: Fri, 16 Aug 2013 17:03:36 +0000
Subject: InstCombine: Use isAllOnesValue() instead of explicit -1.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188563 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index bc8506a..d40385c 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -851,7 +851,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
     case ICmpInst::ICMP_NE:
       // Special case to get the ordering right when the values wrap around
       // zero.
-      if (LHSCst->getValue() == 0 && RHSCst->getValue() == -1)
+      if (LHSCst->getValue() == 0 && RHSCst->getValue().isAllOnesValue())
         std::swap(LHSCst, RHSCst);
       if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
         Constant *AddCST = ConstantExpr::getNeg(LHSCst);
-- 
cgit v1.1


From 190673610f41be171589be8a0263d8c10a8f3edf Mon Sep 17 00:00:00 2001
From: Joerg Sonnenberger <joerg@bec.de>
Date: Sat, 17 Aug 2013 11:04:47 +0000
Subject: PR 16899: Do not modify the basic block using the iterator, but keep
 the next value. This avoids crashes due to invalidation.

Patch by Joey Gouly.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188605 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 506346d..ee9c5f2 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1895,12 +1895,14 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
   if (Incoming.size() > 1)
     Changed |= tryToVectorizeList(Incoming, R);
 
-  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
-    if (isa<DbgInfoIntrinsic>(it))
+  llvm::Instruction *I;
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
+    I = it++;
+    if (isa<DbgInfoIntrinsic>(I))
       continue;
 
     // Try to vectorize reductions that use PHINodes.
-    if (PHINode *P = dyn_cast<PHINode>(it)) {
+    if (PHINode *P = dyn_cast<PHINode>(I)) {
       // Check that the PHI is a reduction PHI.
       if (P->getNumIncomingValues() != 2)
         return Changed;
@@ -1922,7 +1924,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
     }
 
     // Try to vectorize trees that start at compare instructions.
-    if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
+    if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
       if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
         Changed |= true;
         continue;
-- 
cgit v1.1


From 353149ea2f8d4d3bf1ec82e90f80154c6959d56e Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Mon, 19 Aug 2013 00:24:20 +0000
Subject: Remove SpecialCaseList::findCategory.

It turned out that I didn't need this for DFSan.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188646 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SpecialCaseList.cpp | 35 --------------------------------
 1 file changed, 35 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp
index 5400bcd..cf12bbf 100644
--- a/lib/Transforms/Utils/SpecialCaseList.cpp
+++ b/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -169,12 +169,6 @@ SpecialCaseList::~SpecialCaseList() {
   }
 }
 
-bool SpecialCaseList::findCategory(const Function &F,
-                                   StringRef &Category) const {
-  return findCategory(*F.getParent(), Category) ||
-         findCategory("fun", F.getName(), Category);
-}
-
 bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const {
   return isIn(*F.getParent(), Category) ||
          inSectionCategory("fun", F.getName(), Category);
@@ -191,13 +185,6 @@ static StringRef GetGVTypeString(const GlobalVariable &G) {
   return "<unknown type>";
 }
 
-bool SpecialCaseList::findCategory(const GlobalVariable &G,
-                                   StringRef &Category) const {
-  return findCategory(*G.getParent(), Category) ||
-         findCategory("global", G.getName(), Category) ||
-         findCategory("type", GetGVTypeString(G), Category);
-}
-
 bool SpecialCaseList::isIn(const GlobalVariable &G,
                            const StringRef Category) const {
   return isIn(*G.getParent(), Category) ||
@@ -205,32 +192,10 @@ bool SpecialCaseList::isIn(const GlobalVariable &G,
          inSectionCategory("type", GetGVTypeString(G), Category);
 }
 
-bool SpecialCaseList::findCategory(const Module &M, StringRef &Category) const {
-  return findCategory("src", M.getModuleIdentifier(), Category);
-}
-
 bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const {
   return inSectionCategory("src", M.getModuleIdentifier(), Category);
 }
 
-bool SpecialCaseList::findCategory(const StringRef Section,
-                                   const StringRef Query,
-                                   StringRef &Category) const {
-  StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
-  if (I == Entries.end()) return false;
-
-  for (StringMap<Entry>::const_iterator II = I->second.begin(),
-                                        IE = I->second.end();
-       II != IE; ++II) {
-    if (II->getValue().match(Query)) {
-      Category = II->first();
-      return true;
-    }
-  }
-
-  return false;
-}
-
 bool SpecialCaseList::inSectionCategory(const StringRef Section,
                                         const StringRef Query,
                                         const StringRef Category) const {
-- 
cgit v1.1


From 2063637fa7c9ebc880cf858674eb45727d4ea295 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <michael.m.kuperstein@intel.com>
Date: Mon, 19 Aug 2013 06:55:47 +0000
Subject: Adds missing TLI check for library simplification of * pow(x, 0.5) ->
 fabs(sqrt(x)) * pow(2.0, x) -> exp2(x)

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188656 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyLibCalls.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 93720be..ff0d5d9 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1133,9 +1133,11 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
 
     Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
     if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
-      if (Op1C->isExactlyValue(1.0))  // pow(1.0, x) -> 1.0
+      // pow(1.0, x) -> 1.0
+      if (Op1C->isExactlyValue(1.0))
         return Op1C;
-      if (Op1C->isExactlyValue(2.0))  // pow(2.0, x) -> exp2(x)
+      // pow(2.0, x) -> exp2(x)
+      if (Op1C->isExactlyValue(2.0) && TLI->has(LibFunc::exp2))
         return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
     }
 
@@ -1145,7 +1147,8 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
     if (Op2C->getValueAPF().isZero())  // pow(x, 0.0) -> 1.0
       return ConstantFP::get(CI->getType(), 1.0);
 
-    if (Op2C->isExactlyValue(0.5)) {
+    if (Op2C->isExactlyValue(0.5) &&
+        TLI->has(LibFunc::sqrt) && TLI->has(LibFunc::fabs)) {
       // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
       // This is faster than calling pow, and still handles negative zero
       // and negative infinity correctly.
-- 
cgit v1.1


From 2b762cc75df2cbab21b5a1945fdf913a66de0f81 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Mon, 19 Aug 2013 19:00:35 +0000
Subject: Introduce SpecialCaseList::isIn overload for GlobalAliases.

Differential Revision: http://llvm-reviews.chandlerc.com/D1437

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188688 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SpecialCaseList.cpp | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp
index cf12bbf..2ef692c 100644
--- a/lib/Transforms/Utils/SpecialCaseList.cpp
+++ b/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -174,7 +174,7 @@ bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const {
          inSectionCategory("fun", F.getName(), Category);
 }
 
-static StringRef GetGVTypeString(const GlobalVariable &G) {
+static StringRef GetGlobalTypeString(const GlobalValue &G) {
   // Types of GlobalVariables are always pointer types.
   Type *GType = G.getType()->getElementType();
   // For now we support blacklisting struct types only.
@@ -189,7 +189,19 @@ bool SpecialCaseList::isIn(const GlobalVariable &G,
                            const StringRef Category) const {
   return isIn(*G.getParent(), Category) ||
          inSectionCategory("global", G.getName(), Category) ||
-         inSectionCategory("type", GetGVTypeString(G), Category);
+         inSectionCategory("type", GetGlobalTypeString(G), Category);
+}
+
+bool SpecialCaseList::isIn(const GlobalAlias &GA,
+                           const StringRef Category) const {
+  if (isIn(*GA.getParent(), Category))
+    return true;
+
+  if (isa<FunctionType>(GA.getType()->getElementType()))
+    return inSectionCategory("fun", GA.getName(), Category);
+
+  return inSectionCategory("global", GA.getName(), Category) ||
+         inSectionCategory("type", GetGlobalTypeString(GA), Category);
 }
 
 bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const {
-- 
cgit v1.1


From 89062b838789d61460886c5c4c3838690a800de7 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 19 Aug 2013 21:40:31 +0000
Subject: Revert non-test parts of r188507

Re-add the inboundsless tests I didn't add originally

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188710 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCompares.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 4ac4753..93466ea 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -226,7 +226,8 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
 Instruction *InstCombiner::
 FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
                              CmpInst &ICI, ConstantInt *AndCst) {
-  if (!GEP->isInBounds())
+  // We need TD information to know the pointer size unless this is inbounds.
+  if (!GEP->isInBounds() && TD == 0)
     return 0;
 
   Constant *Init = GV->getInitializer();
@@ -390,6 +391,13 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   // order the state machines in complexity of the generated code.
   Value *Idx = GEP->getOperand(2);
 
+  // If the index is larger than the pointer size of the target, truncate the
+  // index down like the GEP would do implicitly.  We don't have to do this for
+  // an inbounds GEP because the index can't be out of range.
+  if (!GEP->isInBounds() &&
+      Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
+    Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
+
   // If the comparison is only true for one or two elements, emit direct
   // comparisons.
   if (SecondTrueElement != Overdefined) {
-- 
cgit v1.1


From 5c40cc2e1e560ab8ba1b79fc5cf5a302643b2f4c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 19 Aug 2013 22:17:18 +0000
Subject: commonPointerCast cleanups to make address space change easier

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188719 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCasts.cpp | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index d31f845..bad3756 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1370,15 +1370,21 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
       return &CI;
     }
 
+    if (!TD)
+      return commonCastTransforms(CI);
+
     // If the GEP has a single use, and the base pointer is a bitcast, and the
     // GEP computes a constant offset, see if we can convert these three
     // instructions into fewer.  This typically happens with unions and other
     // non-type-safe code.
-    APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
-    if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
+    unsigned OffsetBits = TD->getPointerSizeInBits();
+    APInt Offset(OffsetBits, 0);
+    BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0));
+    if (GEP->hasOneUse() &&
+        BCI &&
         GEP->accumulateConstantOffset(*TD, Offset)) {
       // Get the base pointer input of the bitcast, and the type it points to.
-      Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
+      Value *OrigBase = BCI->getOperand(0);
       Type *GEPIdxTy = OrigBase->getType()->getPointerElementType();
       SmallVector<Value*, 8> NewIndices;
       if (FindElementAtOffset(GEPIdxTy, Offset.getSExtValue(), NewIndices)) {
@@ -1386,8 +1392,8 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
         // and bitcast the result.  This eliminates one bitcast, potentially
         // two.
         Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
-        Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
-        Builder->CreateGEP(OrigBase, NewIndices);
+          Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
+          Builder->CreateGEP(OrigBase, NewIndices);
         NGEP->takeName(GEP);
 
         if (isa<BitCastInst>(CI))
-- 
cgit v1.1


From c4ad982f0bbf4c3d8fc7babc832f0a75e615ba19 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 19 Aug 2013 22:17:34 +0000
Subject: Cleanup visitGetElementPtr to make address space change easier

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188720 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstructionCombining.cpp           | 24 ++++++++++++----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 248c349..03797a3 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1319,15 +1319,20 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     }
   }
 
+  if (!TD)
+    return 0;
+
   /// See if we can simplify:
   ///   X = bitcast A* to B*
   ///   Y = gep X, <...constant indices...>
   /// into a gep of the original struct.  This is important for SROA and alias
   /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
-    APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0);
-    if (TD &&
-        !isa<BitCastInst>(BCI->getOperand(0)) &&
+    Value *Operand = BCI->getOperand(0);
+    PointerType *OpType = cast<PointerType>(Operand->getType());
+    unsigned OffsetBits = TD->getPointerSizeInBits();
+    APInt Offset(OffsetBits, 0);
+    if (!isa<BitCastInst>(Operand) &&
         GEP.accumulateConstantOffset(*TD, Offset) &&
         StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
 
@@ -1336,8 +1341,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       if (!Offset) {
         // If the bitcast is of an allocation, and the allocation will be
         // converted to match the type of the cast, don't touch this.
-        if (isa<AllocaInst>(BCI->getOperand(0)) ||
-            isAllocationFn(BCI->getOperand(0), TLI)) {
+        if (isa<AllocaInst>(Operand) || isAllocationFn(Operand, TLI)) {
           // See if the bitcast simplifies, if so, don't nuke this GEP yet.
           if (Instruction *I = visitBitCast(*BCI)) {
             if (I != BCI) {
@@ -1348,18 +1352,18 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             return &GEP;
           }
         }
-        return new BitCastInst(BCI->getOperand(0), GEP.getType());
+        return new BitCastInst(Operand, GEP.getType());
       }
 
       // Otherwise, if the offset is non-zero, we need to find out if there is a
       // field at Offset in 'A's type.  If so, we can pull the cast through the
       // GEP.
       SmallVector<Value*, 8> NewIndices;
-      Type *InTy = BCI->getOperand(0)->getType()->getPointerElementType();
+      Type *InTy = OpType->getElementType();
       if (FindElementAtOffset(InTy, Offset.getSExtValue(), NewIndices)) {
         Value *NGEP = GEP.isInBounds() ?
-          Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) :
-          Builder->CreateGEP(BCI->getOperand(0), NewIndices);
+          Builder->CreateInBoundsGEP(Operand, NewIndices) :
+          Builder->CreateGEP(Operand, NewIndices);
 
         if (NGEP->getType() == GEP.getType())
           return ReplaceInstUsesWith(GEP, NGEP);
@@ -1372,8 +1376,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   return 0;
 }
 
-
-
 static bool
 isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
                      const TargetLibraryInfo *TLI) {
-- 
cgit v1.1


From 8e3367ea36d7b25c79b9d3f14842e8f9c7c5801e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 19 Aug 2013 22:17:40 +0000
Subject: Teach InstCombine visitGetElementPtr about address spaces

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188721 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombine.h           |  4 +--
 lib/Transforms/InstCombine/InstCombineCasts.cpp    |  5 +--
 .../InstCombine/InstructionCombining.cpp           | 37 ++++++++++++----------
 3 files changed, 26 insertions(+), 20 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index b3084cc..d035c53 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -212,8 +212,8 @@ private:
   bool ShouldChangeType(Type *From, Type *To) const;
   Value *dyn_castNegVal(Value *V) const;
   Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const;
-  Type *FindElementAtOffset(Type *Ty, int64_t Offset,
-                                  SmallVectorImpl<Value*> &NewIndices);
+  Type *FindElementAtOffset(Type *PtrTy, int64_t Offset,
+                            SmallVectorImpl<Value*> &NewIndices);
   Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
 
   /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index bad3756..76796b9 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1385,9 +1385,10 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
         GEP->accumulateConstantOffset(*TD, Offset)) {
       // Get the base pointer input of the bitcast, and the type it points to.
       Value *OrigBase = BCI->getOperand(0);
-      Type *GEPIdxTy = OrigBase->getType()->getPointerElementType();
       SmallVector<Value*, 8> NewIndices;
-      if (FindElementAtOffset(GEPIdxTy, Offset.getSExtValue(), NewIndices)) {
+      if (FindElementAtOffset(OrigBase->getType(),
+                              Offset.getSExtValue(),
+                              NewIndices)) {
         // If we were able to index down into an element, create the GEP
         // and bitcast the result.  This eliminates one bitcast, potentially
         // two.
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 03797a3..f0a14a3 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -755,19 +755,25 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
   return ReplaceInstUsesWith(I, NewPN);
 }
 
-/// FindElementAtOffset - Given a type and a constant offset, determine whether
-/// or not there is a sequence of GEP indices into the type that will land us at
-/// the specified offset.  If so, fill them into NewIndices and return the
-/// resultant element type, otherwise return null.
-Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset,
-                                          SmallVectorImpl<Value*> &NewIndices) {
-  if (!TD) return 0;
-  if (!Ty->isSized()) return 0;
+/// FindElementAtOffset - Given a pointer type and a constant offset, determine
+/// whether or not there is a sequence of GEP indices into the pointed type that
+/// will land us at the specified offset.  If so, fill them into NewIndices and
+/// return the resultant element type, otherwise return null.
+Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
+                                        SmallVectorImpl<Value*> &NewIndices) {
+  assert(PtrTy->isPtrOrPtrVectorTy());
+
+  if (!TD)
+    return 0;
+
+  Type *Ty = PtrTy->getPointerElementType();
+  if (!Ty->isSized())
+    return 0;
 
   // Start with the index over the outer type.  Note that the type size
   // might be zero (even if the offset isn't zero) if the indexed type
   // is something like [0 x {int, int}]
-  Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+  Type *IntPtrTy = TD->getIntPtrType(PtrTy);
   int64_t FirstIdx = 0;
   if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
     FirstIdx = Offset/TySize;
@@ -1235,7 +1241,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       if (TD && SrcElTy->isArrayTy() &&
           TD->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
           TD->getTypeAllocSize(ResElTy)) {
-        Type *IdxType = TD->getIntPtrType(GEP.getContext());
+        Type *IdxType = TD->getIntPtrType(GEP.getType());
         Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
         Value *NewGEP = GEP.isInBounds() ?
           Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
@@ -1260,7 +1266,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
           // Earlier transforms ensure that the index has type IntPtrType, which
           // considerably simplifies the logic by eliminating implicit casts.
-          assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+          assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
                  "Index not cast to pointer width?");
 
           bool NSW;
@@ -1295,7 +1301,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
           // Earlier transforms ensure that the index has type IntPtrType, which
           // considerably simplifies the logic by eliminating implicit casts.
-          assert(Idx->getType() == TD->getIntPtrType(GEP.getContext()) &&
+          assert(Idx->getType() == TD->getIntPtrType(GEP.getType()) &&
                  "Index not cast to pointer width?");
 
           bool NSW;
@@ -1304,7 +1310,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // If the multiplication NewIdx * Scale may overflow then the new
             // GEP may not be "inbounds".
             Value *Off[2] = {
-              Constant::getNullValue(TD->getIntPtrType(GEP.getContext())),
+              Constant::getNullValue(TD->getIntPtrType(GEP.getType())),
               NewIdx
             };
 
@@ -1330,7 +1336,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
     Value *Operand = BCI->getOperand(0);
     PointerType *OpType = cast<PointerType>(Operand->getType());
-    unsigned OffsetBits = TD->getPointerSizeInBits();
+    unsigned OffsetBits = TD->getPointerTypeSizeInBits(OpType);
     APInt Offset(OffsetBits, 0);
     if (!isa<BitCastInst>(Operand) &&
         GEP.accumulateConstantOffset(*TD, Offset) &&
@@ -1359,8 +1365,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // field at Offset in 'A's type.  If so, we can pull the cast through the
       // GEP.
       SmallVector<Value*, 8> NewIndices;
-      Type *InTy = OpType->getElementType();
-      if (FindElementAtOffset(InTy, Offset.getSExtValue(), NewIndices)) {
+      if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
         Value *NGEP = GEP.isInBounds() ?
           Builder->CreateInBoundsGEP(Operand, NewIndices) :
           Builder->CreateGEP(Operand, NewIndices);
-- 
cgit v1.1


From c2d722efbfd4860dcb7a344be2031ec24cb6691f Mon Sep 17 00:00:00 2001
From: Jakub Staszak <kubastaszak@gmail.com>
Date: Mon, 19 Aug 2013 22:47:55 +0000
Subject: Use pop_back_val() instead of both back() and pop_back().

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188723 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineWorklist.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 19959c0..1109558 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -74,8 +74,7 @@ public:
   }
 
   Instruction *RemoveOne() {
-    Instruction *I = Worklist.back();
-    Worklist.pop_back();
+    Instruction *I = Worklist.pop_back_val();
     WorklistMap.erase(I);
     return I;
   }
-- 
cgit v1.1


From 66d1fa6f4b443ac9f8bcea5d1f71a73ada733a42 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Mon, 19 Aug 2013 23:35:46 +0000
Subject: Add a llvm.copysign intrinsic

This adds a llvm.copysign intrinsic; We already have Libfunc recognition for
copysign (which is turned into the FCOPYSIGN SDAG node). In order to
autovectorize calls to copysign in the loop vectorizer, we need a corresponding
intrinsic as well.

In addition to the expected changes to the language reference, the loop
vectorizer, BasicTTI, and the SDAG builder (the intrinsic is transformed into
an FCOPYSIGN node, just like the function call), this also adds FCOPYSIGN to a
few lists in LegalizeVector{Ops,Types} so that vector copysigns can be
expanded.

In TargetLoweringBase::initActions, I've made the default action for FCOPYSIGN
be Expand for vector types. This seems correct for all in-tree targets, and I
think is the right thing to do because, previously, there was no way to generate
vector-values FCOPYSIGN nodes (and most targets don't specify an action for
vector-typed FCOPYSIGN).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188728 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index e452acd..a7026f6 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1767,6 +1767,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
     case Intrinsic::log10:
     case Intrinsic::log2:
     case Intrinsic::fabs:
+    case Intrinsic::copysign:
     case Intrinsic::floor:
     case Intrinsic::ceil:
     case Intrinsic::trunc:
@@ -1831,6 +1832,10 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
   case LibFunc::fabsf:
   case LibFunc::fabsl:
     return Intrinsic::fabs;
+  case LibFunc::copysign:
+  case LibFunc::copysignf:
+  case LibFunc::copysignl:
+    return Intrinsic::copysign;
   case LibFunc::floor:
   case LibFunc::floorf:
   case LibFunc::floorl:
-- 
cgit v1.1


From 16a2253e4011d27a9426f81f55501fd5dfb863bd Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Tue, 20 Aug 2013 21:21:45 +0000
Subject: SLPVectorizer: Fix invalid iterator errors

Update iterator when the SLP vectorizer changes the instructions in the basic
block by restarting the traversal of the basic block.

Patch by Yi Jiang!

Fixes PR 16899.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188832 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 64 ++++++++++++++++++++++++------
 1 file changed, 51 insertions(+), 13 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ee9c5f2..3c24af8 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1875,6 +1875,8 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
 bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
   bool Changed = false;
   SmallVector<Value *, 4> Incoming;
+  SmallSet<Instruction *, 16> VisitedInstrs;
+
   // Collect the incoming values from the PHIs.
   for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie;
        ++instr) {
@@ -1883,9 +1885,21 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
     if (!P)
       break;
 
+    // We may go through BB multiple times so skip the one we have checked.
+    if (VisitedInstrs.count(instr))
+      continue;
+    VisitedInstrs.insert(instr);
+
     // Stop constructing the list when you reach a different type.
     if (Incoming.size() && P->getType() != Incoming[0]->getType()) {
-      Changed |= tryToVectorizeList(Incoming, R);
+      if (tryToVectorizeList(Incoming, R)) {
+        // We would like to start over since some instructions are deleted
+        // and the iterator may become invalid value.
+        Changed = true;
+        instr = BB->begin();
+        ie = BB->end();
+      }
+
       Incoming.clear();
     }
 
@@ -1895,14 +1909,20 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
   if (Incoming.size() > 1)
     Changed |= tryToVectorizeList(Incoming, R);
 
-  llvm::Instruction *I;
-  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
-    I = it++;
-    if (isa<DbgInfoIntrinsic>(I))
+  VisitedInstrs.clear();
+
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
+
+    // We may go through BB multiple times so skip the one we have checked.
+    if (VisitedInstrs.count(it))
+      continue;
+    VisitedInstrs.insert(it);
+
+    if (isa<DbgInfoIntrinsic>(it))
       continue;
 
     // Try to vectorize reductions that use PHINodes.
-    if (PHINode *P = dyn_cast<PHINode>(I)) {
+    if (PHINode *P = dyn_cast<PHINode>(it)) {
       // Check that the PHI is a reduction PHI.
       if (P->getNumIncomingValues() != 2)
         return Changed;
@@ -1919,20 +1939,38 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       if (Inst == P)
         Inst = BI->getOperand(1);
 
-      Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
+      if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) {
+        // We would like to start over since some instructions are deleted
+        // and the iterator may become invalid value.
+        Changed = true;
+        it = BB->begin();
+        e = BB->end();
+      }
       continue;
     }
 
     // Try to vectorize trees that start at compare instructions.
-    if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
+    if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
       if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
-        Changed |= true;
+        Changed = true;
+        // We would like to start over since some instructions are deleted
+        // and the iterator may become invalid value.
+        it = BB->begin();
+        e = BB->end();
         continue;
       }
-      for (int i = 0; i < 2; ++i)
-        if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
-          Changed |=
-              tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);
+
+      for (int i = 0; i < 2; ++i) {
+         if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) {
+            if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) {
+              Changed = true;
+              // We would like to start over since some instructions are deleted
+              // and the iterator may become invalid value.
+              it = BB->begin();
+              e = BB->end();
+            }
+         }
+      }
       continue;
     }
   }
-- 
cgit v1.1


From f675b3c6444af3e2f547b050eedb8a12584110ef Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Tue, 20 Aug 2013 23:52:00 +0000
Subject: Move registering the execution of a basic block to the beginning
 rather than the end.

There are situations which can affect the correctness (or at least expectation)
of the gcov output. For instance, if a call to __gcov_flush() occurs within a
block before the execution count is registered and then the program aborts in
some way, then that block will not be marked as executed. This is not normally
what the user expects.

If we move the code that's registering when a block is executed to the
beginning, we can catch these types of situations.

PR16893


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188849 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/GCOVProfiling.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 4c2681f..61c53b4 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -519,15 +519,15 @@ bool GCOVProfiler::emitProfileArcs() {
         TerminatorInst *TI = BB->getTerminator();
         int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
         if (Successors) {
-          IRBuilder<> Builder(TI);
-          
           if (Successors == 1) {
+            IRBuilder<> Builder(BB->getFirstInsertionPt());
             Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
                                                                 Edge);
             Value *Count = Builder.CreateLoad(Counter);
             Count = Builder.CreateAdd(Count, Builder.getInt64(1));
             Builder.CreateStore(Count, Counter);
           } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+            IRBuilder<> Builder(BI);
             Value *Sel = Builder.CreateSelect(BI->getCondition(),
                                               Builder.getInt64(Edge),
                                               Builder.getInt64(Edge + 1));
@@ -543,6 +543,7 @@ bool GCOVProfiler::emitProfileArcs() {
             for (int i = 0; i != Successors; ++i)
               ComplexEdgeSuccs.insert(TI->getSuccessor(i));
           }
+
           Edge += Successors;
         }
       }
@@ -554,14 +555,13 @@ bool GCOVProfiler::emitProfileArcs() {
         GlobalVariable *EdgeState = getEdgeStateValue();
         
         for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
-          IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
+          IRBuilder<> Builder(ComplexEdgePreds[i + 1]->getFirstInsertionPt());
           Builder.CreateStore(Builder.getInt32(i), EdgeState);
         }
+
         for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
-          // call runtime to perform increment
-          BasicBlock::iterator InsertPt =
-            ComplexEdgeSuccs[i+1]->getFirstInsertionPt();
-          IRBuilder<> Builder(InsertPt);
+          // Call runtime to perform increment.
+          IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstInsertionPt());
           Value *CounterPtrArray =
             Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
                                                i * ComplexEdgePreds.size());
-- 
cgit v1.1


From 5d7a73f866c6729b9cb7a1cca9711b68d125a981 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 21 Aug 2013 18:54:47 +0000
Subject: Fix typo

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188915 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3c24af8..73e8056 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -308,7 +308,7 @@ private:
   /// \returns the index of the last instrucion in the BB from \p VL.
   int getLastIndex(ArrayRef<Value *> VL);
 
-  /// \returns the Instrucion in the bundle \p VL.
+  /// \returns the Instruction in the bundle \p VL.
   Instruction *getLastInstruction(ArrayRef<Value *> VL);
 
   /// \returns a vector from a collection of scalars in \p VL.
-- 
cgit v1.1


From 551dac1f62026ef32ad294d8c1cc5b545b05935a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 21 Aug 2013 18:54:50 +0000
Subject: Use attribute helper function

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188916 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 73e8056..c9b8e7b 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1592,8 +1592,7 @@ struct SLPVectorizer : public FunctionPass {
       return false;
 
     // Don't vectorize when the attribute NoImplicitFloat is used.
-    if (F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                       Attribute::NoImplicitFloat))
+    if (F.hasFnAttribute(Attribute::NoImplicitFloat))
       return false;
 
     DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
-- 
cgit v1.1


From 52c7d8e4ebe3be0890880026e174fd2fe6544220 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 21 Aug 2013 19:53:10 +0000
Subject: Teach InstCombine about address spaces

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188926 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCasts.cpp    | 45 ++++++++++++++--------
 lib/Transforms/InstCombine/InstCombineCompares.cpp |  9 +++--
 2 files changed, 33 insertions(+), 21 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 76796b9..a35631f 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1338,14 +1338,18 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
   // If the source integer type is not the intptr_t type for this target, do a
   // trunc or zext to the intptr_t type, then inttoptr of it.  This allows the
   // cast to be exposed to other transforms.
-  if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() !=
-      TD->getPointerSizeInBits()) {
-    Type *Ty = TD->getIntPtrType(CI.getContext());
-    if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
-      Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
-
-    Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
-    return new IntToPtrInst(P, CI.getType());
+
+  if (TD) {
+    unsigned AS = CI.getAddressSpace();
+    if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+        TD->getPointerSizeInBits(AS)) {
+      Type *Ty = TD->getIntPtrType(CI.getContext(), AS);
+      if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+        Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+      Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+      return new IntToPtrInst(P, CI.getType());
+    }
   }
 
   if (Instruction *I = commonCastTransforms(CI))
@@ -1377,7 +1381,8 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
     // GEP computes a constant offset, see if we can convert these three
     // instructions into fewer.  This typically happens with unions and other
     // non-type-safe code.
-    unsigned OffsetBits = TD->getPointerSizeInBits();
+    unsigned AS = GEP->getPointerAddressSpace();
+    unsigned OffsetBits = TD->getPointerSizeInBits(AS);
     APInt Offset(OffsetBits, 0);
     BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0));
     if (GEP->hasOneUse() &&
@@ -1412,16 +1417,22 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
   // If the destination integer type is not the intptr_t type for this target,
   // do a ptrtoint to intptr_t then do a trunc or zext.  This allows the cast
   // to be exposed to other transforms.
-  if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) {
-    Type *Ty = TD->getIntPtrType(CI.getContext());
-    if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
-      Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
 
-    Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty);
-    return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false);
-  }
+  if (!TD)
+    return commonPointerCastTransforms(CI);
+
+  Type *Ty = CI.getType();
+  unsigned AS = CI.getPointerAddressSpace();
+
+  if (Ty->getScalarSizeInBits() == TD->getPointerSizeInBits(AS))
+    return commonPointerCastTransforms(CI);
+
+  Type *PtrTy = TD->getIntPtrType(CI.getContext(), AS);
+  if (Ty->isVectorTy()) // Handle vectors of pointers.
+    PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
 
-  return commonPointerCastTransforms(CI);
+  Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy);
+  return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
 }
 
 /// OptimizeVectorResize - This input value (which is known to have vector type)
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 93466ea..18a0872 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -563,16 +563,18 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
     }
   }
 
+
+
   // Okay, we know we have a single variable index, which must be a
   // pointer/array/vector index.  If there is no offset, life is simple, return
   // the index.
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  Type *IntPtrTy = TD.getIntPtrType(GEP->getOperand(0)->getType());
+  unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth();
   if (Offset == 0) {
     // Cast to intptrty in case a truncation occurs.  If an extension is needed,
     // we don't need to bother extending: the extension won't affect where the
     // computation crosses zero.
     if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
-      Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
       VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy);
     }
     return VariableIdx;
@@ -594,7 +596,6 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
     return 0;
 
   // Okay, we can do this evaluation.  Start by converting the index to intptr.
-  Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
   if (VariableIdx->getType() != IntPtrTy)
     VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy,
                                             true /*Signed*/);
@@ -2478,7 +2479,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       case Instruction::IntToPtr:
         // icmp pred inttoptr(X), null -> icmp pred X, 0
         if (RHSC->isNullValue() && TD &&
-            TD->getIntPtrType(RHSC->getContext()) ==
+            TD->getIntPtrType(RHSC->getType()) ==
                LHSI->getOperand(0)->getType())
           return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
                         Constant::getNullValue(LHSI->getOperand(0)->getType()));
-- 
cgit v1.1


From dfdf7f44a9a3c3f27d3d836b0f25daf08db5e418 Mon Sep 17 00:00:00 2001
From: Yunzhong Gao <Yunzhong_Gao@playstation.sony.com>
Date: Wed, 21 Aug 2013 22:11:15 +0000
Subject: No functionality change. Replace "(255 & value)" with "(0xFF &
 value)" to improve clarity.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188941 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyLibCalls.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index ff0d5d9..83636fb 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -477,7 +477,7 @@ struct StrChrOpt : public LibCallOptimization {
 
     // Compute the offset, make sure to handle the case when we're searching for
     // zero (a weird way to spell strlen).
-    size_t I = (255 & CharC->getSExtValue()) == 0 ?
+    size_t I = (0xFF & CharC->getSExtValue()) == 0 ?
         Str.size() : Str.find(CharC->getSExtValue());
     if (I == StringRef::npos) // Didn't find the char.  strchr returns null.
       return Constant::getNullValue(CI->getType());
@@ -513,7 +513,7 @@ struct StrRChrOpt : public LibCallOptimization {
     }
 
     // Compute the offset.
-    size_t I = (255 & CharC->getSExtValue()) == 0 ?
+    size_t I = (0xFF & CharC->getSExtValue()) == 0 ?
         Str.size() : Str.rfind(CharC->getSExtValue());
     if (I == StringRef::npos) // Didn't find the char. Return null.
       return Constant::getNullValue(CI->getType());
-- 
cgit v1.1


From 4920bf77be434681a4f782c80ca0ab982abfb726 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Wed, 21 Aug 2013 22:53:29 +0000
Subject: Removed trailing whitespace.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188956 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/StripSymbols.cpp | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 2791106..14bbdc4 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -9,7 +9,7 @@
 //
 // The StripSymbols transformation implements code stripping. Specifically, it
 // can delete:
-// 
+//
 //   * names for virtual registers
 //   * symbols for internal globals and functions
 //   * debug information
@@ -39,7 +39,7 @@ namespace {
     bool OnlyDebugInfo;
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit StripSymbols(bool ODI = false) 
+    explicit StripSymbols(bool ODI = false)
       : ModulePass(ID), OnlyDebugInfo(ODI) {
         initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
       }
@@ -144,7 +144,7 @@ static void RemoveDeadConstant(Constant *C) {
   assert(C->use_empty() && "Constant is not dead!");
   SmallPtrSet<Constant*, 4> Operands;
   for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
-    if (OnlyUsedBy(C->getOperand(i), C)) 
+    if (OnlyUsedBy(C->getOperand(i), C))
       Operands.insert(cast<Constant>(C->getOperand(i)));
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
     if (!GV->hasLocalLinkage()) return;   // Don't delete non static globals.
@@ -182,7 +182,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
   for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
     StructType *STy = StructTypes[i];
     if (STy->isLiteral() || STy->getName().empty()) continue;
-    
+
     if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
       continue;
 
@@ -199,7 +199,7 @@ static void findUsedValues(GlobalVariable *LLVMUsed,
   ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
 
   for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
-    if (GlobalValue *GV = 
+    if (GlobalValue *GV =
           dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
       UsedValues.insert(GV);
 }
@@ -217,22 +217,22 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
       if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
         I->setName("");     // Internal symbols can't participate in linkage
   }
-  
+
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
       if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
         I->setName("");     // Internal symbols can't participate in linkage
     StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
   }
-  
+
   // Remove all names from types.
   StripTypeNames(M, PreserveDbgInfo);
 
   return true;
 }
 
-// StripDebugInfo - Strip debug info in the module if it exists.  
-// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and 
+// StripDebugInfo - Strip debug info in the module if it exists.
+// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and
 // llvm.dbg.region.end calls, and any globals they point to if now dead.
 static bool StripDebugInfo(Module &M) {
 
@@ -307,13 +307,13 @@ bool StripDebugDeclare::runOnModule(Module &M) {
       assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
       CI->eraseFromParent();
       if (Arg1->use_empty()) {
-        if (Constant *C = dyn_cast<Constant>(Arg1)) 
+        if (Constant *C = dyn_cast<Constant>(Arg1))
           DeadConstants.push_back(C);
-        else 
+        else
           RecursivelyDeleteTriviallyDeadInstructions(Arg1);
       }
       if (Arg2->use_empty())
-        if (Constant *C = dyn_cast<Constant>(Arg2)) 
+        if (Constant *C = dyn_cast<Constant>(Arg2))
           DeadConstants.push_back(C);
     }
     Declare->eraseFromParent();
@@ -337,7 +337,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
 
   // Debugging infomration is encoded in llvm IR using metadata. This is designed
   // such a way that debug info for symbols preserved even if symbols are
-  // optimized away by the optimizer. This special pass removes debug info for 
+  // optimized away by the optimizer. This special pass removes debug info for
   // such symbols.
 
   // llvm.dbg.gv keeps track of debug info for global variables.
-- 
cgit v1.1


From 55d4c38074145bf9f594142b6b4cdca60699f4d1 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Wed, 21 Aug 2013 22:53:54 +0000
Subject: Fixed typo.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188957 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/StripSymbols.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 14bbdc4..3d601b6 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -335,7 +335,7 @@ bool StripDebugDeclare::runOnModule(Module &M) {
 bool StripDeadDebugInfo::runOnModule(Module &M) {
   bool Changed = false;
 
-  // Debugging infomration is encoded in llvm IR using metadata. This is designed
+  // Debugging information is encoded in llvm IR using metadata. This is designed
   // such a way that debug info for symbols preserved even if symbols are
   // optimized away by the optimizer. This special pass removes debug info for
   // such symbols.
-- 
cgit v1.1


From 978de6b56a4eb4d3dbc1b65f2b095a192b240f90 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 22 Aug 2013 02:42:55 +0000
Subject: Teach LoopVectorize about address space sizes

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188980 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index a7026f6..380c309 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2667,7 +2667,8 @@ bool LoopVectorizationLegality::canVectorize() {
 
 static Type *convertPointerToIntegerType(DataLayout &DL, Type *Ty) {
   if (Ty->isPointerTy())
-    return DL.getIntPtrType(Ty->getContext());
+    return DL.getIntPtrType(Ty);
+
   return Ty;
 }
 
-- 
cgit v1.1


From 474be0d0f83eb6543bd4091946b40bb4967a3c11 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Thu, 22 Aug 2013 12:45:17 +0000
Subject: Teach the SLP vectorizer the correct way to check for consecutive
 access using GEPs. Previously, it used a number of different heuristics for
 analyzing the GEPs. Several of these were conservatively correct, but failed
 to fall back to SCEV even when SCEV might have given a reasonable answer. One
 was simply incorrect in how it was formulated.

There was good code already to recursively evaluate the constant offsets
in GEPs, look through pointer casts, etc. I gathered this into a form
code like the SLP code can use in a previous commit, which allows all of
this code to become quite simple.

There is some performance (compile time) concern here at first glance as
we're directly attempting to walk both pointers constant GEP chains.
However, a couple of thoughts:

1) The very common cases where there is a dynamic pointer, and a second
   pointer at a constant offset (usually a stride) from it, this code
   will actually not do any unnecessary work.

2) InstCombine and other passes work very hard to collapse constant
   GEPs, so it will be rare that we iterate here for a long time.

That said, if there remain performance problems here, there are some
obvious things that can improve the situation immensely. Doing
a vectorizer-pass-wide memoizer for each individual layer of pointer
values, their base values, and the constant offset is likely to be able
to completely remove redundant work and strictly limit the scaling of
the work to scrape these GEPs. Since this optimization was not done on
the prior version (which would still benefit from it), I've not done it
here. But if folks have benchmarks that slow down it should be straight
forward for them to add.

I've added a test case, but I'm not really confident of the amount of
testing done for different access patterns, strides, and pointer
manipulation.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189007 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 64 +++++++-----------------------
 1 file changed, 15 insertions(+), 49 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c9b8e7b..b1f097e 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -992,63 +992,29 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
   if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
     return false;
 
-  // Calculate a constant offset from the base pointer without using SCEV
-  // in the supported cases.
-  // TODO: Add support for the case where one of the pointers is a GEP that
-  // uses the other pointer.
-  GetElementPtrInst *GepA = dyn_cast<GetElementPtrInst>(PtrA);
-  GetElementPtrInst *GepB = dyn_cast<GetElementPtrInst>(PtrB);
-
-  unsigned BW = DL->getPointerSizeInBits(ASA);
+  unsigned PtrBitWidth = DL->getPointerSizeInBits(ASA);
   Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
-  int64_t Sz = DL->getTypeStoreSize(Ty);
+  APInt Size(PtrBitWidth, DL->getTypeStoreSize(Ty));
 
-  // Check if PtrA is the base and PtrB is a constant offset.
-  if (GepB && GepB->getPointerOperand() == PtrA) {
-    APInt Offset(BW, 0);
-    if (GepB->accumulateConstantOffset(*DL, Offset))
-      return Offset.getSExtValue() == Sz;
-    return false;
-  }
+  APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
+  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetA);
+  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetB);
 
-  // Check if PtrB is the base and PtrA is a constant offset.
-  if (GepA && GepA->getPointerOperand() == PtrB) {
-    APInt Offset(BW, 0);
-    if (GepA->accumulateConstantOffset(*DL, Offset))
-      return Offset.getSExtValue() == -Sz;
-    return false;
-  }
+  APInt OffsetDelta = OffsetB - OffsetA;
 
-  // If both pointers are GEPs:
-  if (GepA && GepB) {
-    // Check that they have the same base pointer and number of indices.
-    if (GepA->getPointerOperand() != GepB->getPointerOperand() ||
-        GepA->getNumIndices() != GepB->getNumIndices())
-      return false;
+  // Check if they are based on the same pointer. That makes the offsets
+  // sufficient.
+  if (PtrA == PtrB)
+    return OffsetDelta == Size;
 
-    // Try to strip the geps. This makes SCEV faster.
-    // Make sure that all of the indices except for the last are identical.
-    int LastIdx = GepA->getNumIndices();
-    for (int i = 0; i < LastIdx - 1; i++) {
-      if (GepA->getOperand(i+1) != GepB->getOperand(i+1))
-          return false;
-    }
-
-    PtrA = GepA->getOperand(LastIdx);
-    PtrB = GepB->getOperand(LastIdx);
-    Sz = 1;
-  }
-
-  ConstantInt *CA = dyn_cast<ConstantInt>(PtrA);
-  ConstantInt *CB = dyn_cast<ConstantInt>(PtrB);
-  if (CA && CB) {
-    return (CA->getSExtValue() + Sz == CB->getSExtValue());
-  }
+  // Compute the necessary base pointer delta to have the necessary final delta
+  // equal to the size.
+  APInt BaseDelta = Size - OffsetDelta;
 
-  // Calculate the distance.
+  // Otherwise compute the distance with SCEV between the base pointers.
   const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
   const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
-  const SCEV *C = SE->getConstant(PtrSCEVA->getType(), Sz);
+  const SCEV *C = SE->getConstant(BaseDelta);
   const SCEV *X = SE->getAddExpr(PtrSCEVA, C);
   return X == PtrSCEVB;
 }
-- 
cgit v1.1


From f1366c552480f7c6b2b46b03e19bb798b3a47c66 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Thu, 22 Aug 2013 20:08:08 +0000
Subject: DataFlowSanitizer: Prefix the name of each instrumented function with
 "dfs$".

DFSan changes the ABI of each function in the module.  This makes it possible
for a function with the native ABI to be called with the instrumented ABI,
or vice versa, thus possibly invoking undefined behavior.  A simple way
of statically detecting instances of this problem is to prepend the prefix
"dfs$" to the name of each instrumented-ABI function.

This will not catch every such problem; in particular function pointers passed
across the instrumented-native barrier cannot be used on the other side.
These problems could potentially be caught dynamically.

Differential Revision: http://llvm-reviews.chandlerc.com/D1373

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189052 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/DataFlowSanitizer.cpp          | 58 +++++++++++++++++++---
 1 file changed, 51 insertions(+), 7 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 7159cc0..9a46911 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -179,11 +179,13 @@ class DataFlowSanitizer : public ModulePass {
 
   Value *getShadowAddress(Value *Addr, Instruction *Pos);
   Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
-  bool isInstrumented(Function *F);
+  bool isInstrumented(const Function *F);
+  bool isInstrumented(const GlobalAlias *GA);
   FunctionType *getArgsFunctionType(FunctionType *T);
   FunctionType *getCustomFunctionType(FunctionType *T);
   InstrumentedABI getInstrumentedABI();
   WrapperKind getWrapperKind(Function *F);
+  void addGlobalNamePrefix(GlobalValue *GV);
 
  public:
   DataFlowSanitizer(StringRef ABIListFile = StringRef(),
@@ -343,10 +345,14 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
   return true;
 }
 
-bool DataFlowSanitizer::isInstrumented(Function *F) {
+bool DataFlowSanitizer::isInstrumented(const Function *F) {
   return !ABIList->isIn(*F, "uninstrumented");
 }
 
+bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
+  return !ABIList->isIn(*GA, "uninstrumented");
+}
+
 DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
   return ClArgsABI ? IA_Args : IA_TLS;
 }
@@ -362,6 +368,25 @@ DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
   return WK_Warning;
 }
 
+void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) {
+  std::string GVName = GV->getName(), Prefix = "dfs$";
+  GV->setName(Prefix + GVName);
+
+  // Try to change the name of the function in module inline asm.  We only do
+  // this for specific asm directives, currently only ".symver", to try to avoid
+  // corrupting asm which happens to contain the symbol name as a substring.
+  // Note that the substitution for .symver assumes that the versioned symbol
+  // also has an instrumented name.
+  std::string Asm = GV->getParent()->getModuleInlineAsm();
+  std::string SearchStr = ".symver " + GVName + ",";
+  size_t Pos = Asm.find(SearchStr);
+  if (Pos != std::string::npos) {
+    Asm.replace(Pos, SearchStr.size(),
+                ".symver " + Prefix + GVName + "," + Prefix);
+    GV->getParent()->setModuleInlineAsm(Asm);
+  }
+}
+
 bool DataFlowSanitizer::runOnModule(Module &M) {
   if (!DL)
     return false;
@@ -415,6 +440,21 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
       FnsToInstrument.push_back(&*i);
   }
 
+  // Give function aliases prefixes when necessary.
+  for (Module::alias_iterator i = M.alias_begin(), e = M.alias_end(); i != e;) {
+    GlobalAlias *GA = &*i;
+    ++i;
+    // Don't stop on weak.  We assume people aren't playing games with the
+    // instrumentedness of overridden weak aliases.
+    if (Function *F = dyn_cast<Function>(
+            GA->resolveAliasedGlobal(/*stopOnWeak=*/false))) {
+      bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
+      if (GAInst && FInst) {
+        addGlobalNamePrefix(GA);
+      }
+    }
+  }
+
   AttrBuilder B;
   B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
   ReadOnlyNoneAttrs = AttributeSet::get(*Ctx, AttributeSet::FunctionIndex, B);
@@ -427,12 +467,13 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
     Function &F = **i;
     FunctionType *FT = F.getFunctionType();
 
-    if (FT->getNumParams() == 0 && !FT->isVarArg() &&
-        FT->getReturnType()->isVoidTy())
-      continue;
+    bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
+                              FT->getReturnType()->isVoidTy());
 
     if (isInstrumented(&F)) {
-      if (getInstrumentedABI() == IA_Args) {
+      // Instrumented functions get a 'dfs$' prefix.  This allows us to more
+      // easily identify cases of mismatching ABIs.
+      if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) {
         FunctionType *NewFT = getArgsFunctionType(FT);
         Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M);
         NewF->copyAttributesFrom(&F);
@@ -463,13 +504,16 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
         NewF->takeName(&F);
         F.eraseFromParent();
         *i = NewF;
+        addGlobalNamePrefix(NewF);
+      } else {
+        addGlobalNamePrefix(&F);
       }
                // Hopefully, nobody will try to indirectly call a vararg
                // function... yet.
     } else if (FT->isVarArg()) {
       UnwrappedFnMap[&F] = &F;
       *i = 0;
-    } else {
+    } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
       // Build a wrapper function for F.  The wrapper simply calls F, and is
       // added to FnsToInstrument so that any instrumentation according to its
       // WrapperKind is done in the second pass below.
-- 
cgit v1.1


From 4f68e9ea8e6a9d98b60bbdde719dcb9d68991980 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Thu, 22 Aug 2013 20:08:11 +0000
Subject: DataFlowSanitizer: Factor the wrapper builder out to
 buildWrapperFunction.

Differential Revision: http://llvm-reviews.chandlerc.com/D1441

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189053 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/DataFlowSanitizer.cpp          | 53 ++++++++++++++--------
 1 file changed, 33 insertions(+), 20 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 9a46911..e92d88d 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -186,6 +186,9 @@ class DataFlowSanitizer : public ModulePass {
   InstrumentedABI getInstrumentedABI();
   WrapperKind getWrapperKind(Function *F);
   void addGlobalNamePrefix(GlobalValue *GV);
+  Function *buildWrapperFunction(Function *F, StringRef NewFName,
+                                 GlobalValue::LinkageTypes NewFLink,
+                                 FunctionType *NewFT);
 
  public:
   DataFlowSanitizer(StringRef ABIListFile = StringRef(),
@@ -387,6 +390,33 @@ void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) {
   }
 }
 
+Function *
+DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
+                                        GlobalValue::LinkageTypes NewFLink,
+                                        FunctionType *NewFT) {
+  FunctionType *FT = F->getFunctionType();
+  Function *NewF = Function::Create(NewFT, NewFLink, NewFName,
+                                    F->getParent());
+  NewF->copyAttributesFrom(F);
+  NewF->removeAttributes(
+      AttributeSet::ReturnIndex,
+      AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
+                                       AttributeSet::ReturnIndex));
+
+  BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
+  std::vector<Value *> Args;
+  unsigned n = FT->getNumParams();
+  for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n)
+    Args.push_back(&*ai);
+  CallInst *CI = CallInst::Create(F, Args, "", BB);
+  if (FT->getReturnType()->isVoidTy())
+    ReturnInst::Create(*Ctx, BB);
+  else
+    ReturnInst::Create(*Ctx, CI, BB);
+
+  return NewF;
+}
+
 bool DataFlowSanitizer::runOnModule(Module &M) {
   if (!DL)
     return false;
@@ -521,27 +551,10 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
                                 ? getArgsFunctionType(FT)
                                 : FT;
       Function *NewF =
-          Function::Create(NewFT, GlobalValue::LinkOnceODRLinkage,
-                           std::string("dfsw$") + F.getName(), &M);
-      NewF->copyAttributesFrom(&F);
-      NewF->removeAttributes(
-              AttributeSet::ReturnIndex,
-              AttributeFuncs::typeIncompatible(NewFT->getReturnType(),
-                                               AttributeSet::ReturnIndex));
+          buildWrapperFunction(&F, std::string("dfsw$") + std::string(F.getName()),
+                               GlobalValue::LinkOnceODRLinkage, NewFT);
       if (getInstrumentedABI() == IA_TLS)
-        NewF->removeAttributes(AttributeSet::FunctionIndex,
-                               ReadOnlyNoneAttrs);
-
-      BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
-      std::vector<Value *> Args;
-      unsigned n = FT->getNumParams();
-      for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n)
-        Args.push_back(&*ai);
-      CallInst *CI = CallInst::Create(&F, Args, "", BB);
-      if (FT->getReturnType()->isVoidTy())
-        ReturnInst::Create(*Ctx, BB);
-      else
-        ReturnInst::Create(*Ctx, CI, BB);
+        NewF->removeAttributes(AttributeSet::FunctionIndex, ReadOnlyNoneAttrs);
 
       Value *WrappedFnCst =
           ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
-- 
cgit v1.1


From 054cec05b84e878a68e7ecc71342312e76850649 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Thu, 22 Aug 2013 20:08:15 +0000
Subject: DataFlowSanitizer: Replace non-instrumented aliases of instrumented
 functions, and vice versa, with wrappers.

Differential Revision: http://llvm-reviews.chandlerc.com/D1442

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189054 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index e92d88d..1bf6b99 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -470,7 +470,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
       FnsToInstrument.push_back(&*i);
   }
 
-  // Give function aliases prefixes when necessary.
+  // Give function aliases prefixes when necessary, and build wrappers where the
+  // instrumentedness is inconsistent.
   for (Module::alias_iterator i = M.alias_begin(), e = M.alias_end(); i != e;) {
     GlobalAlias *GA = &*i;
     ++i;
@@ -481,6 +482,16 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
       bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
       if (GAInst && FInst) {
         addGlobalNamePrefix(GA);
+      } else if (GAInst != FInst) {
+        // Non-instrumented alias of an instrumented function, or vice versa.
+        // Replace the alias with a native-ABI wrapper of the aliasee.  The pass
+        // below will take care of instrumenting it.
+        Function *NewF =
+            buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType());
+        GA->replaceAllUsesWith(NewF);
+        NewF->takeName(GA);
+        GA->eraseFromParent();
+        FnsToInstrument.push_back(NewF);
       }
     }
   }
-- 
cgit v1.1


From e0e66b9dfaca5f9cae3bba563b05a6230d64e21d Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Fri, 23 Aug 2013 00:23:24 +0000
Subject: Update StripDeadDebugInfo to use DebugInfoFinder so that it is no
 longer stale to the point of not working and more resilient to debug info
 changes.

The current version of StripDeadDebugInfo became stale and no longer actually
worked since it was expecting an older version of debug info.

This patch updates it to use DebugInfoFinder and the modern DebugInfo classes as
much as possible to make it more redundent to such changes. Additionally, the
only place where that was avoided (the code where we replace the old sets with
the new), I call verify on the DIContextUnit implying that if the format changes
and my live set changes no longer make sense an assert will be hit. In order to
ensure that that occurs I have included a test case.

The actual stripping of the dead debug info follows the same strategy as was
used before in this class: find the live set and replace the old set in the
given compile unit (which may contain dead global variables/functions) with the
new live one.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189078 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/StripSymbols.cpp | 155 +++++++++++++++++++++---------------
 1 file changed, 93 insertions(+), 62 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 3d601b6..fe9667e 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -332,76 +332,107 @@ bool StripDebugDeclare::runOnModule(Module &M) {
   return true;
 }
 
+/// Remove any debug info for global variables/functions in the given module for
+/// which said global variable/function no longer exists (i.e. is null).
+///
+/// Debugging information is encoded in llvm IR using metadata. This is designed
+/// such a way that debug info for symbols preserved even if symbols are
+/// optimized away by the optimizer. This special pass removes debug info for
+/// such symbols.
 bool StripDeadDebugInfo::runOnModule(Module &M) {
   bool Changed = false;
 
-  // Debugging information is encoded in llvm IR using metadata. This is designed
-  // such a way that debug info for symbols preserved even if symbols are
-  // optimized away by the optimizer. This special pass removes debug info for
-  // such symbols.
-
-  // llvm.dbg.gv keeps track of debug info for global variables.
-  if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
-    SmallVector<MDNode *, 8> MDs;
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
-      if (NMD->getOperand(i)) {
-        assert(DIGlobalVariable(NMD->getOperand(i)).isGlobalVariable() &&
-          "A MDNode in llvm.dbg.gv should be a DIGlobalVariable.");
-        MDs.push_back(NMD->getOperand(i));
-      }
-      else
-        Changed = true;
-    NMD->eraseFromParent();
-    NMD = NULL;
-
-    for (SmallVectorImpl<MDNode *>::iterator I = MDs.begin(),
-           E = MDs.end(); I != E; ++I) {
-      GlobalVariable *GV = DIGlobalVariable(*I).getGlobal();
-      if (GV && M.getGlobalVariable(GV->getName(), true)) {
-        if (!NMD)
-          NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
-        NMD->addOperand(*I);
-      }
+  LLVMContext &C = M.getContext();
+
+  // Find all debug info in F. This is actually overkill in terms of what we
+  // want to do, but we want to try and be as redundent as possible in the face
+  // of potential debug info changes by using the formal interfaces given to us
+  // as much as possible.
+  DebugInfoFinder F;
+  F.processModule(M);
+
+  // For each compile unit, find the live set of global variables/functions and
+  // replace the current list of potentially dead global variables/functions
+  // with the live list.
+  SmallVector<Value *, 64> LiveGlobalVariables;
+  SmallVector<Value *, 64> LiveSubprograms;
+  DenseSet<const MDNode *> VisitedSet;
+
+  for (DebugInfoFinder::iterator CI = F.compile_unit_begin(),
+         CE = F.compile_unit_end(); CI != CE; ++CI) {
+    // Create our compile unit.
+    DICompileUnit DIC(*CI);
+    assert(DIC.Verify() && "DIC must verify as a DICompileUnit.");
+
+    // Create our live subprogram list.
+    DIArray SPs = DIC.getSubprograms();
+    bool SubprogramChange = false;
+    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+      DISubprogram DISP(SPs.getElement(i));
+      assert(DISP.Verify() && "DISP must verify as a DISubprogram.");
+
+      // Make sure we visit each subprogram only once.
+      if (!VisitedSet.insert(DISP).second)
+        continue;
+
+      // If the function referenced by DISP is not null, the function is live.
+      if (DISP.getFunction())
+        LiveSubprograms.push_back(DISP);
       else
-        Changed = true;
+        SubprogramChange = true;
     }
-  }
 
-  // llvm.dbg.sp keeps track of debug info for subprograms.
-  if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) {
-    SmallVector<MDNode *, 8> MDs;
-    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
-      if (NMD->getOperand(i)) {
-        assert(DISubprogram(NMD->getOperand(i)).isSubprogram() &&
-          "A MDNode in llvm.dbg.sp should be a DISubprogram.");
-        MDs.push_back(NMD->getOperand(i));
-      }
+    // Create our live global variable list.
+    DIArray GVs = DIC.getGlobalVariables();
+    bool GlobalVariableChange = false;
+    for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
+      DIGlobalVariable DIG(GVs.getElement(i));
+      assert(DIG.Verify() && "DIG must verify as DIGlobalVariable.");
+
+      // Make sure we only visit each global variable only once.
+      if (!VisitedSet.insert(DIG).second)
+        continue;
+
+      // If the global variable referenced by DIG is not null, the global
+      // variable is live.
+      if (DIG.getGlobal())
+        LiveGlobalVariables.push_back(DIG);
       else
-        Changed = true;
-    NMD->eraseFromParent();
-    NMD = NULL;
-
-    for (SmallVectorImpl<MDNode *>::iterator I = MDs.begin(),
-           E = MDs.end(); I != E; ++I) {
-      bool FnIsLive = false;
-      if (Function *F = DISubprogram(*I).getFunction())
-        if (M.getFunction(F->getName()))
-          FnIsLive = true;
-      if (FnIsLive) {
-          if (!NMD)
-            NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
-          NMD->addOperand(*I);
-      } else {
-        // Remove llvm.dbg.lv.fnname named mdnode which may have been used
-        // to hold debug info for dead function's local variables.
-        StringRef FName = DISubprogram(*I).getLinkageName();
-        if (FName.empty())
-          FName = DISubprogram(*I).getName();
-        if (NamedMDNode *LVNMD = M.getNamedMetadata(
-                "llvm.dbg.lv." + Function::getRealLinkageName(FName)))
-          LVNMD->eraseFromParent();
-      }
+        GlobalVariableChange = true;
     }
+
+    // If we found dead subprograms or global variables, replace the current
+    // subprogram list/global variable list with our new live subprogram/global
+    // variable list.
+    if (SubprogramChange) {
+      // Make sure that 9 is still the index of the subprograms. This is to make
+      // sure that an assert is hit if the location of the subprogram array
+      // changes. This is just to make sure that this is updated if such an
+      // event occurs.
+      assert(DIC->getNumOperands() >= 10 &&
+             SPs == DIC->getOperand(9) &&
+             "DICompileUnits is expected to store Subprograms in operand "
+             "9.");
+      DIC->replaceOperandWith(9, MDNode::get(C, LiveSubprograms));
+      Changed = true;
+    }
+
+    if (GlobalVariableChange) {
+      // Make sure that 10 is still the index of global variables. This is to
+      // make sure that an assert is hit if the location of the subprogram array
+      // changes. This is just to make sure that this index is updated if such
+      // an event occurs.
+      assert(DIC->getNumOperands() >= 11 &&
+             GVs == DIC->getOperand(10) &&
+             "DICompileUnits is expected to store Global Variables in operand "
+             "10.");
+      DIC->replaceOperandWith(10, MDNode::get(C, LiveGlobalVariables));
+      Changed = true;
+    }
+
+    // Reset lists for the next iteration.
+    LiveSubprograms.clear();
+    LiveGlobalVariables.clear();
   }
 
   return Changed;
-- 
cgit v1.1


From bbe88b7f0daf0027bf094d0e9fe36adc5243ca5d Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <samsonov@google.com>
Date: Fri, 23 Aug 2013 07:42:51 +0000
Subject: 80 cols

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189091 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 1bf6b99..232893d 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -561,9 +561,9 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
       FunctionType *NewFT = getInstrumentedABI() == IA_Args
                                 ? getArgsFunctionType(FT)
                                 : FT;
-      Function *NewF =
-          buildWrapperFunction(&F, std::string("dfsw$") + std::string(F.getName()),
-                               GlobalValue::LinkOnceODRLinkage, NewFT);
+      Function *NewF = buildWrapperFunction(
+          &F, std::string("dfsw$") + std::string(F.getName()),
+          GlobalValue::LinkOnceODRLinkage, NewFT);
       if (getInstrumentedABI() == IA_TLS)
         NewF->removeAttributes(AttributeSet::FunctionIndex, ReadOnlyNoneAttrs);
 
-- 
cgit v1.1


From a8a7099c1849fcbb4a68642a292fd0250aa46505 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <rsandifo@linux.vnet.ibm.com>
Date: Fri, 23 Aug 2013 10:27:02 +0000
Subject: Turn MipsOptimizeMathLibCalls into a target-independent scalar
 transform

...so that it can be used for z too.  Most of the code is the same.
The only real change is to use TargetTransformInfo to test when a sqrt
instruction is available.

The pass is opt-in because at the moment it only handles sqrt.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189097 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/CMakeLists.txt              |   1 +
 lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp | 156 ++++++++++++++++++++++
 lib/Transforms/Scalar/Scalar.cpp                  |   5 +
 3 files changed, 162 insertions(+)
 create mode 100644 lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index f5d1db1..7fa7807 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_library(LLVMScalarOpts
   LoopUnswitch.cpp
   LowerAtomic.cpp
   MemCpyOptimizer.cpp
+  PartiallyInlineLibCalls.cpp
   Reassociate.cpp
   Reg2Mem.cpp
   SCCP.cpp
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
new file mode 100644
index 0000000..15cee44
--- /dev/null
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -0,0 +1,156 @@
+//===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to partially inline the fast path of well-known library
+// functions, such as using square-root instructions for cases where sqrt()
+// does not need to set errno.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "partially-inline-libcalls"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+namespace {
+  class PartiallyInlineLibCalls : public FunctionPass {
+  public:
+    static char ID;
+
+    PartiallyInlineLibCalls() :
+      FunctionPass(ID) {
+      initializePartiallyInlineLibCallsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool runOnFunction(Function &F);
+
+  private:
+    /// Optimize calls to sqrt.
+    bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
+                      BasicBlock &CurrBB, Function::iterator &BB);
+  };
+
+  char PartiallyInlineLibCalls::ID = 0;
+}
+
+INITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls",
+                "Partially inline calls to library functions", false, false)
+
+void PartiallyInlineLibCalls::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetLibraryInfo>();
+  AU.addRequired<TargetTransformInfo>();
+  FunctionPass::getAnalysisUsage(AU);
+}
+
+bool PartiallyInlineLibCalls::runOnFunction(Function &F) {
+  bool Changed = false;
+  Function::iterator CurrBB;
+  TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+  const TargetTransformInfo *TTI = &getAnalysis<TargetTransformInfo>();
+  for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
+    CurrBB = BB++;
+
+    for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
+         II != IE; ++II) {
+      CallInst *Call = dyn_cast<CallInst>(&*II);
+      Function *CalledFunc;
+
+      if (!Call || !(CalledFunc = Call->getCalledFunction()))
+        continue;
+
+      // Skip if function either has local linkage or is not a known library
+      // function.
+      LibFunc::Func LibFunc;
+      if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() ||
+          !TLI->getLibFunc(CalledFunc->getName(), LibFunc))
+        continue;
+
+      switch (LibFunc) {
+      case LibFunc::sqrtf:
+      case LibFunc::sqrt:
+        if (TTI->haveFastSqrt(Call->getType()) &&
+            optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
+          break;
+        continue;
+      default:
+        continue;
+      }
+
+      Changed = true;
+      break;
+    }
+  }
+
+  return Changed;
+}
+
+bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
+                                           Function *CalledFunc,
+                                           BasicBlock &CurrBB,
+                                           Function::iterator &BB) {
+  // There is no need to change the IR, since backend will emit sqrt
+  // instruction if the call has already been marked read-only.
+  if (Call->onlyReadsMemory())
+    return false;
+
+  // Do the following transformation:
+  //
+  // (before)
+  // dst = sqrt(src)
+  //
+  // (after)
+  // v0 = sqrt_noreadmem(src) # native sqrt instruction.
+  // if (v0 is a NaN)
+  //   v1 = sqrt(src)         # library call.
+  // dst = phi(v0, v1)
+  //
+
+  // Move all instructions following Call to newly created block JoinBB.
+  // Create phi and replace all uses.
+  BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode(), this);
+  IRBuilder<> Builder(JoinBB, JoinBB->begin());
+  PHINode *Phi = Builder.CreatePHI(Call->getType(), 2);
+  Call->replaceAllUsesWith(Phi);
+
+  // Create basic block LibCallBB and insert a call to library function sqrt.
+  BasicBlock *LibCallBB = BasicBlock::Create(CurrBB.getContext(), "call.sqrt",
+                                             CurrBB.getParent(), JoinBB);
+  Builder.SetInsertPoint(LibCallBB);
+  Instruction *LibCall = Call->clone();
+  Builder.Insert(LibCall);
+  Builder.CreateBr(JoinBB);
+
+  // Add attribute "readnone" so that backend can use a native sqrt instruction
+  // for this call. Insert a FP compare instruction and a conditional branch
+  // at the end of CurrBB.
+  Call->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
+  CurrBB.getTerminator()->eraseFromParent();
+  Builder.SetInsertPoint(&CurrBB);
+  Value *FCmp = Builder.CreateFCmpOEQ(Call, Call);
+  Builder.CreateCondBr(FCmp, JoinBB, LibCallBB);
+
+  // Add phi operands.
+  Phi->addIncoming(Call, &CurrBB);
+  Phi->addIncoming(LibCall, LibCallBB);
+
+  BB = JoinBB;
+  return true;
+}
+
+FunctionPass *llvm::createPartiallyInlineLibCallsPass() {
+  return new PartiallyInlineLibCalls();
+}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 758334d..952811b 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -50,6 +50,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLowerAtomicPass(Registry);
   initializeLowerExpectIntrinsicPass(Registry);
   initializeMemCpyOptPass(Registry);
+  initializePartiallyInlineLibCallsPass(Registry);
   initializeReassociatePass(Registry);
   initializeRegToMemPass(Registry);
   initializeSCCPPass(Registry);
@@ -123,6 +124,10 @@ void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createMemCpyOptPass());
 }
 
+void LLVMAddPartiallyInlineLibCallsPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createPartiallyInlineLibCallsPass());
+}
+
 void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createPromoteMemoryToRegisterPass());
 }
-- 
cgit v1.1


From 7c7b8e57f8f3d20610cdbf1888e2f0101752f986 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Fri, 23 Aug 2013 12:11:00 +0000
Subject: [msan] Fix handling of va_arg overflow area on x86_64.

The code was erroneously reading overflow area shadow from the TLS slot,
bypassing the local copy. Reading shadow directly from TLS is wrong, because
it can be overwritten by a nested vararg call, if that happens before va_start.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189104 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index a78213d..ae73a2e 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1970,8 +1970,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
       Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
       Value *OverflowArgAreaShadowPtr =
         MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB);
-      Value *SrcPtr =
-        getShadowPtrForVAArgument(VAArgTLSCopy, IRB, AMD64FpEndOffset);
+      Value *SrcPtr = IRB.CreateConstGEP1_32(VAArgTLSCopy, AMD64FpEndOffset);
       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);
     }
   }
-- 
cgit v1.1


From f3c0314310f4c82de6467c4fe4ec4114b9281f04 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Fri, 23 Aug 2013 18:45:06 +0000
Subject: DataFlowSanitizer: correctly combine labels in the case where they
 are equal.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189133 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 232893d..2fa3383 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -775,7 +775,7 @@ Value *DataFlowSanitizer::combineShadows(Value *V1, Value *V2,
     BasicBlock *Tail = BI->getSuccessor(0);
     PHINode *Phi = PHINode::Create(ShadowTy, 2, "", Tail->begin());
     Phi->addIncoming(Call, Call->getParent());
-    Phi->addIncoming(ZeroShadow, Head);
+    Phi->addIncoming(V1, Head);
     Pos = Phi;
     return Phi;
   } else {
-- 
cgit v1.1


From 0b6962f4be35aca7054ff68ef9bbbb2e03617d31 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Sat, 24 Aug 2013 12:54:27 +0000
Subject: Add a function object to compare the first or second component of a
 std::pair.

Replace instances of this scattered around the code base.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189169 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 31 ++++--------------------
 1 file changed, 5 insertions(+), 26 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 368b1fa..8f6eee3 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -431,16 +431,6 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
   return true;
 }
 
-namespace {
-/// This is a helper predicate used to search by the first element of a pair.
-struct StoreIndexSearchPredicate {
-  bool operator()(const std::pair<unsigned, StoreInst *> &LHS,
-                  const std::pair<unsigned, StoreInst *> &RHS) {
-    return LHS.first < RHS.first;
-  }
-};
-}
-
 /// Many allocas are only used within a single basic block.  If this is the
 /// case, avoid traversing the CFG and inserting a lot of potentially useless
 /// PHI nodes by just performing a single linear pass over the basic block
@@ -473,8 +463,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
 
   // Sort the stores by their index, making it efficient to do a lookup with a
   // binary search.
-  std::sort(StoresByIndex.begin(), StoresByIndex.end(),
-            StoreIndexSearchPredicate());
+  std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());
 
   // Walk all of the loads from this alloca, replacing them with the nearest
   // store above them, if any.
@@ -489,7 +478,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
     StoresByIndexTy::iterator I =
         std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
                          std::make_pair(LoadIdx, static_cast<StoreInst *>(0)),
-                         StoreIndexSearchPredicate());
+                         less_first());
 
     if (I == StoresByIndex.begin())
       // If there is no store before this load, the load takes the undef value.
@@ -849,16 +838,6 @@ void PromoteMem2Reg::ComputeLiveInBlocks(
   }
 }
 
-namespace {
-typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
-
-struct DomTreeNodeCompare {
-  bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
-    return LHS.second < RHS.second;
-  }
-};
-} // end anonymous namespace
-
 /// At this point, we're committed to promoting the alloca using IDF's, and the
 /// standard SSA construction algorithm.  Determine which blocks need phi nodes
 /// and see if we can optimize out some work by avoiding insertion of dead phi
@@ -876,9 +855,9 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
 
   // Use a priority queue keyed on dominator tree level so that inserted nodes
   // are handled from the bottom of the dominator tree upwards.
-  typedef std::priority_queue<DomTreeNodePair,
-                              SmallVector<DomTreeNodePair, 32>,
-                              DomTreeNodeCompare> IDFPriorityQueue;
+  typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
+  typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+                              less_second> IDFPriorityQueue;
   IDFPriorityQueue PQ;
 
   for (SmallPtrSet<BasicBlock *, 32>::const_iterator I = DefBlocks.begin(),
-- 
cgit v1.1


From ecc52e0c5b5a21f606e43febf2f65ca7daab1c97 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sat, 24 Aug 2013 19:55:38 +0000
Subject: Check if in set on insertion instead of separately

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189179 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b1f097e..2610e24 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1161,13 +1161,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         ValueList Operands;
         BasicBlock *IBB = PH->getIncomingBlock(i);
 
-        if (VisitedBBs.count(IBB)) {
+        if (!VisitedBBs.insert(IBB)) {
           NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB);
           continue;
         }
 
-        VisitedBBs.insert(IBB);
-
         // Prepare the operand vector.
         for (unsigned j = 0; j < E->Scalars.size(); ++j)
           Operands.push_back(cast<PHINode>(E->Scalars[j])->
@@ -1851,9 +1849,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       break;
 
     // We may go through BB multiple times so skip the one we have checked.
-    if (VisitedInstrs.count(instr))
+    if (!VisitedInstrs.insert(instr))
       continue;
-    VisitedInstrs.insert(instr);
 
     // Stop constructing the list when you reach a different type.
     if (Incoming.size() && P->getType() != Incoming[0]->getType()) {
@@ -1879,9 +1876,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
   for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
 
     // We may go through BB multiple times so skip the one we have checked.
-    if (VisitedInstrs.count(it))
+    if (!VisitedInstrs.insert(it))
       continue;
-    VisitedInstrs.insert(it);
 
     if (isa<DbgInfoIntrinsic>(it))
       continue;
-- 
cgit v1.1


From 1b00d910058c31abb7cc5333b42cd380a3c8e128 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 26 Aug 2013 17:56:35 +0000
Subject: Vectorize starting from insertelements building a vector

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189233 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 60 ++++++++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2610e24..60749b4 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1132,16 +1132,21 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     return E->VectorizedValue;
   }
 
-  Type *ScalarTy = E->Scalars[0]->getType();
-  if (StoreInst *SI = dyn_cast<StoreInst>(E->Scalars[0]))
+  Instruction *VL0 = cast<Instruction>(E->Scalars[0]);
+  Type *ScalarTy = VL0->getType();
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL0))
     ScalarTy = SI->getValueOperand()->getType();
   VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());
 
   if (E->NeedToGather) {
+    BasicBlock *BB = VL0->getParent();
+    BasicBlock::iterator NextInst = getLastInstruction(E->Scalars);
+    ++NextInst;
+    assert(NextInst != BB->end());
+    Builder.SetInsertPoint(NextInst);
     return Gather(E->Scalars, VecTy);
   }
 
-  Instruction *VL0 = cast<Instruction>(E->Scalars[0]);
   unsigned Opcode = VL0->getOpcode();
   assert(Opcode == getSameOpcode(E->Scalars) && "Invalid opcode");
 
@@ -1835,6 +1840,40 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
   return 0;
 }
 
+/// \brief Recognize construction of vectors like
+///  %ra = insertelement <4 x float> undef, float %s0, i32 0
+///  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+///  %rc = insertelement <4 x float> %rb, float %s2, i32 2
+///  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+///
+/// Returns true if it matches
+///
+static bool findBuildVector(InsertElementInst *IE,
+                            SmallVectorImpl<Value *> &Ops) {
+  if (!isa<UndefValue>(IE->getOperand(0)))
+    return false;
+
+  while (true) {
+    Ops.push_back(IE->getOperand(1));
+
+    if (IE->use_empty())
+      return false;
+
+    InsertElementInst *NextUse = dyn_cast<InsertElementInst>(IE->use_back());
+    if (!NextUse)
+      return true;
+
+    // If this isn't the final use, make sure the next insertelement is the only
+    // use. It's OK if the final constructed vector is used multiple times
+    if (!IE->hasOneUse())
+      return false;
+
+    IE = NextUse;
+  }
+
+  return false;
+}
+
 bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
   bool Changed = false;
   SmallVector<Value *, 4> Incoming;
@@ -1934,6 +1973,21 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       }
       continue;
     }
+
+    // Try to vectorize trees that start at insertelement instructions.
+    if (InsertElementInst *IE = dyn_cast<InsertElementInst>(it)) {
+      SmallVector<Value *, 8> Ops;
+      if (!findBuildVector(IE, Ops))
+        continue;
+
+      if (tryToVectorizeList(Ops, R)) {
+        Changed = true;
+        it = BB->begin();
+        e = BB->end();
+      }
+
+      continue;
+    }
   }
 
   return Changed;
-- 
cgit v1.1


From 6a804acc4ae77c014e4ef97c37f8e720ef360394 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 26 Aug 2013 17:56:38 +0000
Subject: Constify functions

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189234 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 60749b4..0313e98 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -278,7 +278,7 @@ private:
 
   /// \returns the pointer to the vectorized value if \p VL is already
   /// vectorized, or NULL. They may happen in cycles.
-  Value *alreadyVectorized(ArrayRef<Value *> VL);
+  Value *alreadyVectorized(ArrayRef<Value *> VL) const;
 
   /// \brief Take the pointer operand from the Load/Store instruction.
   /// \returns NULL if this is not a valid Load/Store instruction.
@@ -319,7 +319,7 @@ private:
     NeedToGather(0) {}
 
     /// \returns true if the scalars in VL are equal to this entry.
-    bool isSame(ArrayRef<Value *> VL) {
+    bool isSame(ArrayRef<Value *> VL) const {
       assert(VL.size() == Scalars.size() && "Invalid size");
       for (int i = 0, e = VL.size(); i != e; ++i)
         if (VL[i] != Scalars[i])
@@ -1098,10 +1098,12 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
   return Vec;
 }
 
-Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) {
-  if (ScalarToTreeEntry.count(VL[0])) {
-    int Idx = ScalarToTreeEntry[VL[0]];
-    TreeEntry *En = &VectorizableTree[Idx];
+Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const {
+  SmallDenseMap<Value*, int>::const_iterator Entry
+    = ScalarToTreeEntry.find(VL[0]);
+  if (Entry != ScalarToTreeEntry.end()) {
+    int Idx = Entry->second;
+    const TreeEntry *En = &VectorizableTree[Idx];
     if (En->isSame(VL) && En->VectorizedValue)
       return En->VectorizedValue;
   }
-- 
cgit v1.1


From 57aa3aad33b50583d5a82735777d0f0dc03ff122 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 26 Aug 2013 18:38:29 +0000
Subject: Fix unused variable in release build

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189264 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0313e98..93376af 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1141,10 +1141,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
   VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());
 
   if (E->NeedToGather) {
-    BasicBlock *BB = VL0->getParent();
     BasicBlock::iterator NextInst = getLastInstruction(E->Scalars);
     ++NextInst;
-    assert(NextInst != BB->end());
+    assert(NextInst != VL0->getParent()->end());
     Builder.SetInsertPoint(NextInst);
     return Gather(E->Scalars, VecTy);
   }
@@ -1270,7 +1269,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
 
       if (Value *V = alreadyVectorized(E->Scalars))
         return V;
-      
+
       Value *V = Builder.CreateSelect(Cond, True, False);
       E->VectorizedValue = V;
       return V;
-- 
cgit v1.1


From 551023c1e4596c7114b8c9ec8ca0fe87e06b10a1 Mon Sep 17 00:00:00 2001
From: Yi Jiang <yjiang@apple.com>
Date: Mon, 26 Aug 2013 18:57:55 +0000
Subject: test commit. Remove blank line

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189265 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 1 -
 1 file changed, 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 93376af..23a876d 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1914,7 +1914,6 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
   VisitedInstrs.clear();
 
   for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
-
     // We may go through BB multiple times so skip the one we have checked.
     if (!VisitedInstrs.insert(it))
       continue;
-- 
cgit v1.1


From 4bf6326d0871961a7402c797a0399a2bdaf53574 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Mon, 26 Aug 2013 22:33:26 +0000
Subject: LoopVectorize: Implement partial loop unrolling when vectorization is
 not profitable. This patch enables unrolling of loops when vectorization is
 legal but not profitable. We add a new class InnerLoopUnroller, that extends
 InnerLoopVectorizer and replaces some of the vector-specific logic with
 scalars.

This patch does not introduce any runtime regressions and improves the following workloads:

SingleSource/Benchmarks/Shootout/matrix -22.64%
SingleSource/Benchmarks/Shootout-C++/matrix -13.06%
External/SPEC/CINT2006/464_h264ref/464_h264ref  -3.99%
SingleSource/Benchmarks/Adobe-C++/simple_types_constant_folding -1.95%


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189281 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 662 +++++++++++++++++++++--------
 1 file changed, 495 insertions(+), 167 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 380c309..5b1e9b2 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -126,6 +126,9 @@ static const unsigned MaxVectorWidth = 64;
 /// Maximum vectorization unroll count.
 static const unsigned MaxUnrollFactor = 16;
 
+/// The cost of a loop that is considered 'small' by the unroller.
+static const unsigned SmallLoopCost = 20;
+
 namespace {
 
 // Forward declarations.
@@ -167,7 +170,9 @@ public:
     updateAnalysis();
   }
 
-private:
+  virtual ~InnerLoopVectorizer() {}
+
+protected:
   /// A small list of PHINodes.
   typedef SmallVector<PHINode*, 4> PhiVector;
   /// When we unroll loops we have multiple vector values for each scalar.
@@ -187,7 +192,13 @@ private:
   /// Create an empty loop, based on the loop ranges of the old loop.
   void createEmptyLoop(LoopVectorizationLegality *Legal);
   /// Copy and widen the instructions from the old loop.
-  void vectorizeLoop(LoopVectorizationLegality *Legal);
+  virtual void vectorizeLoop(LoopVectorizationLegality *Legal);
+
+  /// \brief The Loop exit block may have single value PHI nodes where the
+  /// incoming value is 'Undef'. While vectorizing we only handled real values
+  /// that were defined inside the loop. Here we fix the 'undef case'.
+  /// See PR14725.
+  void fixLCSSAPHIs();
 
   /// A helper function that computes the predicate of the block BB, assuming
   /// that the header block of the loop is set to True. It returns the *entry*
@@ -201,16 +212,23 @@ private:
   void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB,
                             PhiVector *PV);
 
+  /// Vectorize a single PHINode in a block. This method handles the induction
+  /// variable canonicalization. It supports both VF = 1 for unrolled loops and
+  /// arbitrary length vectors.
+  void widenPHIInstruction(Instruction *PN, VectorParts &Entry,
+                           LoopVectorizationLegality *Legal,
+                           unsigned UF, unsigned VF, PhiVector *PV);
+
   /// Insert the new loop to the loop hierarchy and pass manager
   /// and update the analysis passes.
   void updateAnalysis();
 
   /// This instruction is un-vectorizable. Implement it as a sequence
   /// of scalars.
-  void scalarizeInstruction(Instruction *Instr);
+  virtual void scalarizeInstruction(Instruction *Instr);
 
   /// Vectorize Load and Store instructions,
-  void vectorizeMemoryInstruction(Instruction *Instr,
+  virtual void vectorizeMemoryInstruction(Instruction *Instr,
                                   LoopVectorizationLegality *Legal);
 
   /// Create a broadcast instruction. This method generates a broadcast
@@ -218,12 +236,12 @@ private:
   /// value. If this is the induction variable then we extend it to N, N+1, ...
   /// this is needed because each iteration in the loop corresponds to a SIMD
   /// element.
-  Value *getBroadcastInstrs(Value *V);
+  virtual Value *getBroadcastInstrs(Value *V);
 
   /// This function adds 0, 1, 2 ... to each vector element, starting at zero.
   /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
   /// The sequence starts at StartIndex.
-  Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
+  virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
 
   /// When we go over instructions in the basic block we rely on previous
   /// values within the current basic block or on loop invariant values.
@@ -233,7 +251,7 @@ private:
   VectorParts &getVectorValue(Value *V);
 
   /// Generate a shuffle sequence that will reverse the vector Vec.
-  Value *reverseVector(Value *Vec);
+  virtual Value *reverseVector(Value *Vec);
 
   /// This is a helper class that holds the vectorizer state. It maps scalar
   /// instructions to vector instructions. When the code is 'unrolled' then
@@ -291,6 +309,8 @@ private:
   /// The vectorization SIMD factor to use. Each vector will have this many
   /// vector elements.
   unsigned VF;
+
+protected:
   /// The vectorization unroll factor to use. Each scalar is vectorized to this
   /// many different vector instructions.
   unsigned UF;
@@ -326,6 +346,23 @@ private:
   EdgeMaskCache MaskCache;
 };
 
+class InnerLoopUnroller : public InnerLoopVectorizer {
+public:
+  InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
+                    DominatorTree *DT, DataLayout *DL,
+                    const TargetLibraryInfo *TLI, unsigned UnrollFactor) :
+    InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
+
+private:
+  virtual void vectorizeLoop(LoopVectorizationLegality *Legal);
+  virtual void scalarizeInstruction(Instruction *Instr);
+  virtual void vectorizeMemoryInstruction(Instruction *Instr,
+                                          LoopVectorizationLegality *Legal);
+  virtual Value *getBroadcastInstrs(Value *V);
+  virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
+  virtual Value *reverseVector(Value *Vec);
+};
+
 /// \brief Look for a meaningful debug location on the instruction or it's
 /// operands.
 static Instruction *getDebugLocFromInstOrOperands(Instruction *I) {
@@ -875,7 +912,7 @@ struct LoopVectorize : public LoopPass {
 
     LoopVectorizeHints Hints(L);
 
-    if (Hints.Width == 1) {
+    if (Hints.Width == 1 && Hints.Unroll == 1) {
       DEBUG(dbgs() << "LV: Not vectorizing.\n");
       return false;
     }
@@ -914,16 +951,23 @@ struct LoopVectorize : public LoopPass {
 
     if (VF.Width == 1) {
       DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
-      return false;
     }
 
     DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
           F->getParent()->getModuleIdentifier()<<"\n");
     DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n");
 
-    // If we decided that it is *legal* to vectorize the loop then do it.
-    InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
-    LB.vectorize(&LVL);
+    if (VF.Width == 1) {
+      if (UF == 1)
+        return false;
+      // We decided not to vectorize, but we may want to unroll.
+      InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
+      Unroller.vectorize(&LVL);
+    } else {
+      // If we decided that it is *legal* to vectorize the loop then do it.
+      InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
+      LB.vectorize(&LVL);
+    }
 
     // Mark the loop as already vectorized to avoid vectorizing again.
     Hints.setAlreadyVectorized(L);
@@ -2136,11 +2180,11 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
     (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
     (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
   }// end of for each redux variable.
+ 
+  fixLCSSAPHIs();
+}
 
-  // The Loop exit block may have single value PHI nodes where the incoming
-  // value is 'undef'. While vectorizing we only handled real values that
-  // were defined inside the loop. Here we handle the 'undef case'.
-  // See PR14725.
+void InnerLoopVectorizer::fixLCSSAPHIs() {
   for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
        LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
     PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
@@ -2149,7 +2193,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
       LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()),
                             LoopMiddleBlock);
   }
-}
+} 
 
 InnerLoopVectorizer::VectorParts
 InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
@@ -2210,161 +2254,185 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
   return BlockMask;
 }
 
-void
-InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
-                                          BasicBlock *BB, PhiVector *PV) {
-  // For each instruction in the old loop.
-  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
-    VectorParts &Entry = WidenMap.get(it);
-    switch (it->getOpcode()) {
-    case Instruction::Br:
-      // Nothing to do for PHIs and BR, since we already took care of the
-      // loop control flow instructions.
-      continue;
-    case Instruction::PHI:{
-      PHINode* P = cast<PHINode>(it);
-      // Handle reduction variables:
-      if (Legal->getReductionVars()->count(P)) {
-        for (unsigned part = 0; part < UF; ++part) {
-          // This is phase one of vectorizing PHIs.
-          Type *VecTy = VectorType::get(it->getType(), VF);
-          Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
-                                        LoopVectorBody-> getFirstInsertionPt());
-        }
-        PV->push_back(P);
-        continue;
-      }
+void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
+                                              InnerLoopVectorizer::VectorParts &Entry,
+                                              LoopVectorizationLegality *Legal,
+                                              unsigned UF, unsigned VF, PhiVector *PV) {
+  PHINode* P = cast<PHINode>(PN);
+  // Handle reduction variables:
+  if (Legal->getReductionVars()->count(P)) {
+    for (unsigned part = 0; part < UF; ++part) {
+      // This is phase one of vectorizing PHIs.
+      Type *VecTy = (VF == 1) ? PN->getType() :
+      VectorType::get(PN->getType(), VF);
+      Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
+                                    LoopVectorBody-> getFirstInsertionPt());
+    }
+    PV->push_back(P);
+    return;
+  }
 
-      setDebugLocFromInst(Builder, P);
-      // Check for PHI nodes that are lowered to vector selects.
-      if (P->getParent() != OrigLoop->getHeader()) {
-        // We know that all PHIs in non header blocks are converted into
-        // selects, so we don't have to worry about the insertion order and we
-        // can just use the builder.
-        // At this point we generate the predication tree. There may be
-        // duplications since this is a simple recursive scan, but future
-        // optimizations will clean it up.
-
-        unsigned NumIncoming = P->getNumIncomingValues();
-
-        // Generate a sequence of selects of the form:
-        // SELECT(Mask3, In3,
-        //      SELECT(Mask2, In2,
-        //                   ( ...)))
-        for (unsigned In = 0; In < NumIncoming; In++) {
-          VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
-                                            P->getParent());
-          VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
-
-          for (unsigned part = 0; part < UF; ++part) {
-            // We might have single edge PHIs (blocks) - use an identity
-            // 'select' for the first PHI operand.
-            if (In == 0)
-              Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
-                                                 In0[part]);
-            else
-              // Select between the current value and the previous incoming edge
-              // based on the incoming mask.
-              Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
-                                                 Entry[part], "predphi");
-          }
-        }
-        continue;
+  setDebugLocFromInst(Builder, P);
+  // Check for PHI nodes that are lowered to vector selects.
+  if (P->getParent() != OrigLoop->getHeader()) {
+    // We know that all PHIs in non header blocks are converted into
+    // selects, so we don't have to worry about the insertion order and we
+    // can just use the builder.
+    // At this point we generate the predication tree. There may be
+    // duplications since this is a simple recursive scan, but future
+    // optimizations will clean it up.
+
+    unsigned NumIncoming = P->getNumIncomingValues();
+
+    // Generate a sequence of selects of the form:
+    // SELECT(Mask3, In3,
+    //      SELECT(Mask2, In2,
+    //                   ( ...)))
+    for (unsigned In = 0; In < NumIncoming; In++) {
+      VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
+                                        P->getParent());
+      VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
+
+      for (unsigned part = 0; part < UF; ++part) {
+        // We might have single edge PHIs (blocks) - use an identity
+        // 'select' for the first PHI operand.
+        if (In == 0)
+          Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+                                             In0[part]);
+        else
+          // Select between the current value and the previous incoming edge
+          // based on the incoming mask.
+          Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+                                             Entry[part], "predphi");
       }
+    }
+    return;
+  }
 
-      // This PHINode must be an induction variable.
-      // Make sure that we know about it.
-      assert(Legal->getInductionVars()->count(P) &&
-             "Not an induction variable");
-
-      LoopVectorizationLegality::InductionInfo II =
-        Legal->getInductionVars()->lookup(P);
-
-      switch (II.IK) {
-      case LoopVectorizationLegality::IK_NoInduction:
-        llvm_unreachable("Unknown induction");
-      case LoopVectorizationLegality::IK_IntInduction: {
-        assert(P->getType() == II.StartValue->getType() && "Types must match");
-        Type *PhiTy = P->getType();
-        Value *Broadcasted;
-        if (P == OldInduction) {
-          // Handle the canonical induction variable. We might have had to
-          // extend the type.
-          Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
-        } else {
-          // Handle other induction variables that are now based on the
-          // canonical one.
-          Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
-                                                   "normalized.idx");
-          NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
-          Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
-                                          "offset.idx");
-        }
-        Broadcasted = getBroadcastInstrs(Broadcasted);
-        // After broadcasting the induction variable we need to make the vector
-        // consecutive by adding 0, 1, 2, etc.
+  // This PHINode must be an induction variable.
+  // Make sure that we know about it.
+  assert(Legal->getInductionVars()->count(P) &&
+         "Not an induction variable");
+
+  LoopVectorizationLegality::InductionInfo II =
+  Legal->getInductionVars()->lookup(P);
+
+  switch (II.IK) {
+    case LoopVectorizationLegality::IK_NoInduction:
+      llvm_unreachable("Unknown induction");
+    case LoopVectorizationLegality::IK_IntInduction: {
+      assert(P->getType() == II.StartValue->getType() && "Types must match");
+      Type *PhiTy = P->getType();
+      Value *Broadcasted;
+      if (P == OldInduction) {
+        // Handle the canonical induction variable. We might have had to
+        // extend the type.
+        Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
+      } else {
+        // Handle other induction variables that are now based on the
+        // canonical one.
+        Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
+                                                 "normalized.idx");
+        NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
+        Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
+                                        "offset.idx");
+      }
+      Broadcasted = getBroadcastInstrs(Broadcasted);
+      // After broadcasting the induction variable we need to make the vector
+      // consecutive by adding 0, 1, 2, etc.
+      for (unsigned part = 0; part < UF; ++part)
+        Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
+      return;
+    }
+    case LoopVectorizationLegality::IK_ReverseIntInduction:
+    case LoopVectorizationLegality::IK_PtrInduction:
+    case LoopVectorizationLegality::IK_ReversePtrInduction:
+      // Handle reverse integer and pointer inductions.
+      Value *StartIdx = ExtendedIdx;
+      // This is the normalized GEP that starts counting at zero.
+      Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
+                                               "normalized.idx");
+
+      // Handle the reverse integer induction variable case.
+      if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
+        IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
+        Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
+                                               "resize.norm.idx");
+        Value *ReverseInd  = Builder.CreateSub(II.StartValue, CNI,
+                                               "reverse.idx");
+
+        // This is a new value so do not hoist it out.
+        Value *Broadcasted = getBroadcastInstrs(ReverseInd);
+        // After broadcasting the induction variable we need to make the
+        // vector consecutive by adding  ... -3, -2, -1, 0.
         for (unsigned part = 0; part < UF; ++part)
-          Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
-        continue;
+          Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
+                                             true);
+        return;
       }
-      case LoopVectorizationLegality::IK_ReverseIntInduction:
-      case LoopVectorizationLegality::IK_PtrInduction:
-      case LoopVectorizationLegality::IK_ReversePtrInduction:
-        // Handle reverse integer and pointer inductions.
-        Value *StartIdx = ExtendedIdx;
-        // This is the normalized GEP that starts counting at zero.
-        Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
-                                                 "normalized.idx");
 
-        // Handle the reverse integer induction variable case.
-        if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
-          IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
-          Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
-                                                 "resize.norm.idx");
-          Value *ReverseInd  = Builder.CreateSub(II.StartValue, CNI,
-                                                 "reverse.idx");
-
-          // This is a new value so do not hoist it out.
-          Value *Broadcasted = getBroadcastInstrs(ReverseInd);
-          // After broadcasting the induction variable we need to make the
-          // vector consecutive by adding  ... -3, -2, -1, 0.
-          for (unsigned part = 0; part < UF; ++part)
-            Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
-                                               true);
+      // Handle the pointer induction variable case.
+      assert(P->getType()->isPointerTy() && "Unexpected type.");
+
+      // Is this a reverse induction ptr or a consecutive induction ptr.
+      bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
+                      II.IK);
+
+      // This is the vector of results. Notice that we don't generate
+      // vector geps because scalar geps result in better code.
+      for (unsigned part = 0; part < UF; ++part) {
+        if (VF == 1) {
+          int EltIndex = (part) * (Reverse ? -1 : 1);
+          Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+          Value *GlobalIdx;
+          if (Reverse)
+            GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
+          else
+            GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+
+          Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
+                                             "next.gep");
+          Entry[part] = SclrGep;
           continue;
         }
 
-        // Handle the pointer induction variable case.
-        assert(P->getType()->isPointerTy() && "Unexpected type.");
-
-        // Is this a reverse induction ptr or a consecutive induction ptr.
-        bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
-                        II.IK);
-
-        // This is the vector of results. Notice that we don't generate
-        // vector geps because scalar geps result in better code.
-        for (unsigned part = 0; part < UF; ++part) {
-          Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
-          for (unsigned int i = 0; i < VF; ++i) {
-            int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
-            Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
-            Value *GlobalIdx;
-            if (!Reverse)
-              GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
-            else
-              GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
-
-            Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
-                                               "next.gep");
-            VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
-                                                 Builder.getInt32(i),
-                                                 "insert.gep");
-          }
-          Entry[part] = VecVal;
+        Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
+        for (unsigned int i = 0; i < VF; ++i) {
+          int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
+          Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+          Value *GlobalIdx;
+          if (!Reverse)
+            GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+          else
+            GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
+
+          Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
+                                             "next.gep");
+          VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
+                                               Builder.getInt32(i),
+                                               "insert.gep");
         }
-        continue;
+        Entry[part] = VecVal;
       }
+      return;
+  }
+}
 
+void
+InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
+                                          BasicBlock *BB, PhiVector *PV) {
+  // For each instruction in the old loop.
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+    VectorParts &Entry = WidenMap.get(it);
+    switch (it->getOpcode()) {
+    case Instruction::Br:
+      // Nothing to do for PHIs and BR, since we already took care of the
+      // loop control flow instructions.
+      continue;
+    case Instruction::PHI:{
+      // Vectorize PHINodes.
+      widenPHIInstruction(it, Entry, Legal, UF, VF, PV);
+      continue;
     }// End of PHI.
 
     case Instruction::Add:
@@ -2423,8 +2491,10 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
       VectorParts &Cond = getVectorValue(it->getOperand(0));
       VectorParts &Op0  = getVectorValue(it->getOperand(1));
       VectorParts &Op1  = getVectorValue(it->getOperand(2));
-      Value *ScalarCond = Builder.CreateExtractElement(Cond[0],
-                                                       Builder.getInt32(0));
+
+      Value *ScalarCond = (VF == 1) ? Cond[0] :
+        Builder.CreateExtractElement(Cond[0], Builder.getInt32(0));
+
       for (unsigned Part = 0; Part < UF; ++Part) {
         Entry[Part] = Builder.CreateSelect(
           InvariantCond ? ScalarCond : Cond[Part],
@@ -2485,7 +2555,8 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
         break;
       }
       /// Vectorize casts.
-      Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
+      Type *DestTy = (VF == 1) ? CI->getType() :
+                                 VectorType::get(CI->getType(), VF);
 
       VectorParts &A = getVectorValue(it->getOperand(0));
       for (unsigned Part = 0; Part < UF; ++Part)
@@ -2515,7 +2586,10 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
             VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
             Args.push_back(Arg[Part]);
           }
-          Type *Tys[] = { VectorType::get(CI->getType()->getScalarType(), VF) };
+          Type *Tys[] = {CI->getType()};
+          if (VF > 1)
+            Tys[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+
           Function *F = Intrinsic::getDeclaration(M, ID, Tys);
           Entry[Part] = Builder.CreateCall(F, Args);
         }
@@ -4267,7 +4341,19 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
   else if (UF < 1)
     UF = 1;
 
-  if (Legal->getReductionVars()->size()) {
+  bool HasReductions = Legal->getReductionVars()->size();
+
+  // Decide if we want to unroll if we decided that it is legal to vectorize
+  // but not profitable.
+  if (VF == 1) {
+    if (TheLoop->getNumBlocks() > 1 || !HasReductions ||
+        LoopCost > SmallLoopCost)
+      return 1;
+
+    return UF;
+  }
+
+  if (HasReductions) {
     DEBUG(dbgs() << "LV: Unrolling because of reductions. \n");
     return UF;
   }
@@ -4277,9 +4363,9 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
   // to estimate the cost of the loop and unroll until the cost of the
   // loop overhead is about 5% of the cost of the loop.
   DEBUG(dbgs() << "LV: Loop cost is "<< LoopCost <<" \n");
-  if (LoopCost < 20) {
+  if (LoopCost < SmallLoopCost) {
     DEBUG(dbgs() << "LV: Unrolling to reduce branch cost. \n");
-    unsigned NewUF = 20/LoopCost + 1;
+    unsigned NewUF = SmallLoopCost / (LoopCost + 1);
     return std::min(NewUF, UF);
   }
 
@@ -4701,3 +4787,245 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
 
   return false;
 }
+
+void
+InnerLoopUnroller::vectorizeLoop(LoopVectorizationLegality *Legal) {
+  // In order to support reduction variables we need to be able to unroll
+  // Phi nodes. Phi nodes have cycles, so we need to unroll them in two
+  // stages. See InnerLoopVectorizer::vectorizeLoop for more details.
+  PhiVector RdxPHIsToFix;
+
+  // Scan the loop in a topological order to ensure that defs are vectorized
+  // before users.
+  LoopBlocksDFS DFS(OrigLoop);
+  DFS.perform(LI);
+
+  // Unroll all of the blocks in the original loop.
+  for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), be = DFS.endRPO();
+       bb != be; ++bb)
+    vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix);
+
+  // Create the 'reduced' values for each of the induction vars.
+  // The reduced values are the vector values that we scalarize and combine
+  // after the loop is finished.
+  for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
+       it != e; ++it) {
+    PHINode *RdxPhi = *it;
+    assert(RdxPhi && "Unable to recover vectorized PHI");
+
+    // Find the reduction variable descriptor.
+    assert(Legal->getReductionVars()->count(RdxPhi) &&
+           "Unable to find the reduction variable");
+    LoopVectorizationLegality::ReductionDescriptor RdxDesc =
+    (*Legal->getReductionVars())[RdxPhi];
+
+    setDebugLocFromInst(Builder, RdxDesc.StartValue);
+
+    // We need to generate a reduction vector from the incoming scalar.
+    // To do so, we need to generate the 'identity' vector and overide
+    // one of the elements with the incoming scalar reduction. We need
+    // to do it in the vector-loop preheader.
+    Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
+
+    // This is the vector-clone of the value that leaves the loop.
+    VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
+    Type *VecTy = VectorExit[0]->getType();
+
+    // Find the reduction identity variable. Zero for addition, or, xor,
+    // one for multiplication, -1 for And.
+    Value *Identity;
+    Value *VectorStart;
+    if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
+        RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
+      // MinMax reduction have the start value as their identify.
+      VectorStart = Identity = RdxDesc.StartValue;
+
+    } else {
+      Identity = LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
+                                                        VecTy->getScalarType());
+
+      // This vector is the Identity vector where the first element is the
+      // incoming scalar reduction.
+      VectorStart = RdxDesc.StartValue;
+    }
+
+    // Fix the vector-loop phi.
+    // We created the induction variable so we know that the
+    // preheader is the first entry.
+    BasicBlock *VecPreheader = Induction->getIncomingBlock(0);
+
+    // Reductions do not have to start at zero. They can start with
+    // any loop invariant values.
+    VectorParts &VecRdxPhi = WidenMap.get(RdxPhi);
+    BasicBlock *Latch = OrigLoop->getLoopLatch();
+    Value *LoopVal = RdxPhi->getIncomingValueForBlock(Latch);
+    VectorParts &Val = getVectorValue(LoopVal);
+    for (unsigned part = 0; part < UF; ++part) {
+      // Make sure to add the reduction stat value only to the
+      // first unroll part.
+      Value *StartVal = (part == 0) ? VectorStart : Identity;
+      cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal, VecPreheader);
+      cast<PHINode>(VecRdxPhi[part])->addIncoming(Val[part], LoopVectorBody);
+    }
+
+    // Before each round, move the insertion point right between
+    // the PHIs and the values we are going to write.
+    // This allows us to write both PHINodes and the extractelement
+    // instructions.
+    Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
+
+    VectorParts RdxParts;
+    setDebugLocFromInst(Builder, RdxDesc.LoopExitInstr);
+    for (unsigned part = 0; part < UF; ++part) {
+      // This PHINode contains the vectorized reduction variable, or
+      // the initial value vector, if we bypass the vector loop.
+      VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
+      PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
+      Value *StartVal = (part == 0) ? VectorStart : Identity;
+      for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+        NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
+      NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody);
+      RdxParts.push_back(NewPhi);
+    }
+
+    // Reduce all of the unrolled parts into a single vector.
+    Value *ReducedPartRdx = RdxParts[0];
+    unsigned Op = getReductionBinOp(RdxDesc.Kind);
+    setDebugLocFromInst(Builder, ReducedPartRdx);
+    for (unsigned part = 1; part < UF; ++part) {
+      if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+        ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
+                                             RdxParts[part], ReducedPartRdx,
+                                             "bin.rdx");
+      else
+        ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
+                                        ReducedPartRdx, RdxParts[part]);
+    }
+
+    // Now, we need to fix the users of the reduction variable
+    // inside and outside of the scalar remainder loop.
+    // We know that the loop is in LCSSA form. We need to update the
+    // PHI nodes in the exit blocks.
+    for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
+         LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
+      PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
+      if (!LCSSAPhi) continue;
+
+      // All PHINodes need to have a single entry edge, or two if
+      // we already fixed them.
+      assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
+
+      // We found our reduction value exit-PHI. Update it with the
+      // incoming bypass edge.
+      if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
+        // Add an edge coming from the bypass.
+        LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
+        break;
+      }
+    }// end of the LCSSA phi scan.
+
+    // Fix the scalar loop reduction variable with the incoming reduction sum
+    // from the vector body and from the backedge value.
+    int IncomingEdgeBlockIdx =
+    (RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch());
+    assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
+    // Pick the other block.
+    int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
+    (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
+    (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
+  }// end of for each redux variable.
+
+  fixLCSSAPHIs();
+}
+
+void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
+  assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
+  // Holds vector parameters or scalars, in case of uniform vals.
+  SmallVector<VectorParts, 4> Params;
+
+  setDebugLocFromInst(Builder, Instr);
+
+  // Find all of the vectorized parameters.
+  for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+    Value *SrcOp = Instr->getOperand(op);
+
+    // If we are accessing the old induction variable, use the new one.
+    if (SrcOp == OldInduction) {
+      Params.push_back(getVectorValue(SrcOp));
+      continue;
+    }
+
+    // Try using previously calculated values.
+    Instruction *SrcInst = dyn_cast<Instruction>(SrcOp);
+
+    // If the src is an instruction that appeared earlier in the basic block
+    // then it should already be vectorized.
+    if (SrcInst && OrigLoop->contains(SrcInst)) {
+      assert(WidenMap.has(SrcInst) && "Source operand is unavailable");
+      // The parameter is a vector value from earlier.
+      Params.push_back(WidenMap.get(SrcInst));
+    } else {
+      // The parameter is a scalar from outside the loop. Maybe even a constant.
+      VectorParts Scalars;
+      Scalars.append(UF, SrcOp);
+      Params.push_back(Scalars);
+    }
+  }
+
+  assert(Params.size() == Instr->getNumOperands() &&
+         "Invalid number of operands");
+
+  // Does this instruction return a value ?
+  bool IsVoidRetTy = Instr->getType()->isVoidTy();
+
+  Value *UndefVec = IsVoidRetTy ? 0 :
+  UndefValue::get(Instr->getType());
+  // Create a new entry in the WidenMap and initialize it to Undef or Null.
+  VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
+
+  // For each vector unroll 'part':
+  for (unsigned Part = 0; Part < UF; ++Part) {
+    // For each scalar that we create:
+
+    Instruction *Cloned = Instr->clone();
+      if (!IsVoidRetTy)
+        Cloned->setName(Instr->getName() + ".cloned");
+      // Replace the operands of the cloned instrucions with extracted scalars.
+      for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+        Value *Op = Params[op][Part];
+        Cloned->setOperand(op, Op);
+      }
+
+      // Place the cloned scalar in the new loop.
+      Builder.Insert(Cloned);
+
+      // If the original scalar returns a value we need to place it in a vector
+      // so that future users will be able to use it.
+      if (!IsVoidRetTy)
+        VecResults[Part] = Cloned;
+  }
+}
+
+void
+InnerLoopUnroller::vectorizeMemoryInstruction(Instruction *Instr,
+                                              LoopVectorizationLegality*) {
+  return scalarizeInstruction(Instr);
+}
+
+Value *InnerLoopUnroller::reverseVector(Value *Vec) {
+  return Vec;
+}
+
+Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) {
+  return V;
+}
+
+Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx,
+                                               bool Negate) {
+  // When unrolling and the VF is 1, we only need to add a simple scalar.
+  Type *ITy = Val->getType();
+  assert(!ITy->isVectorTy() && "Val must be a scalar");
+  Constant *C = ConstantInt::get(ITy, StartIdx, Negate);
+  return Builder.CreateAdd(Val, C, "induction");
+}
+
-- 
cgit v1.1


From 4b272493b7deaa139484b16ee45ae87a46ceef57 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 26 Aug 2013 23:08:37 +0000
Subject: Fix inserting instructions before last in bundle.

The builder inserts from before the insert point,
not after, so this would insert before the last
instruction in the bundle instead of after it.

I'm not sure if this can actually be a problem
with any of the current insertions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189285 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 36 +++++++++++++++++-------------
 1 file changed, 20 insertions(+), 16 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 23a876d..57cd2a7 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -311,6 +311,10 @@ private:
   /// \returns the Instruction in the bundle \p VL.
   Instruction *getLastInstruction(ArrayRef<Value *> VL);
 
+  /// \brief Set the Builder insert point to one after the last instruction in
+  /// the bundle
+  void setInsertPointAfterBundle(ArrayRef<Value *> VL);
+
   /// \returns a vector from a collection of scalars in \p VL.
   Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
 
@@ -1068,6 +1072,15 @@ Instruction *BoUpSLP::getLastInstruction(ArrayRef<Value *> VL) {
   return I;
 }
 
+void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
+  Instruction *VL0 = cast<Instruction>(VL[0]);
+  Instruction *LastInst = getLastInstruction(VL);
+  BasicBlock::iterator NextInst = LastInst;
+  ++NextInst;
+  Builder.SetInsertPoint(VL0->getParent(), NextInst);
+  Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+}
+
 Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
   Value *Vec = UndefValue::get(Ty);
   // Generate the 'InsertElement' instruction.
@@ -1141,10 +1154,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
   VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());
 
   if (E->NeedToGather) {
-    BasicBlock::iterator NextInst = getLastInstruction(E->Scalars);
-    ++NextInst;
-    assert(NextInst != VL0->getParent()->end());
-    Builder.SetInsertPoint(NextInst);
+    setInsertPointAfterBundle(E->Scalars);
     return Gather(E->Scalars, VecTy);
   }
 
@@ -1212,8 +1222,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       for (int i = 0, e = E->Scalars.size(); i < e; ++i)
         INVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
 
-      Builder.SetInsertPoint(getLastInstruction(E->Scalars));
-      Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+      setInsertPointAfterBundle(E->Scalars);
 
       Value *InVec = vectorizeTree(INVL);
 
@@ -1233,8 +1242,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         RHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
       }
 
-      Builder.SetInsertPoint(getLastInstruction(E->Scalars));
-      Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+      setInsertPointAfterBundle(E->Scalars);
 
       Value *L = vectorizeTree(LHSV);
       Value *R = vectorizeTree(RHSV);
@@ -1260,8 +1268,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         FalseVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(2));
       }
 
-      Builder.SetInsertPoint(getLastInstruction(E->Scalars));
-      Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+      setInsertPointAfterBundle(E->Scalars);
 
       Value *Cond = vectorizeTree(CondVec);
       Value *True = vectorizeTree(TrueVec);
@@ -1298,8 +1305,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
       }
 
-      Builder.SetInsertPoint(getLastInstruction(E->Scalars));
-      Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+      setInsertPointAfterBundle(E->Scalars);
 
       Value *LHS = vectorizeTree(LHSVL);
       Value *RHS = vectorizeTree(RHSVL);
@@ -1319,8 +1325,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     case Instruction::Load: {
       // Loads are inserted at the head of the tree because we don't want to
       // sink them all the way down past store instructions.
-      Builder.SetInsertPoint(getLastInstruction(E->Scalars));
-      Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+      setInsertPointAfterBundle(E->Scalars);
 
       LoadInst *LI = cast<LoadInst>(VL0);
       Value *VecPtr =
@@ -1339,8 +1344,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       for (int i = 0, e = E->Scalars.size(); i < e; ++i)
         ValueOp.push_back(cast<StoreInst>(E->Scalars[i])->getValueOperand());
 
-      Builder.SetInsertPoint(getLastInstruction(E->Scalars));
-      Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+      setInsertPointAfterBundle(E->Scalars);
 
       Value *VecValue = vectorizeTree(ValueOp);
       Value *VecPtr =
-- 
cgit v1.1


From 442d5f6c4b2395914c5970012916ce3746a39c39 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Tue, 27 Aug 2013 04:43:03 +0000
Subject: Fixed typo.

Noticed by Stephen Checkoway <s@pahtak.org>.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189312 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/StripSymbols.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index fe9667e..1792aa8 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -345,7 +345,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
   LLVMContext &C = M.getContext();
 
   // Find all debug info in F. This is actually overkill in terms of what we
-  // want to do, but we want to try and be as redundent as possible in the face
+  // want to do, but we want to try and be as resilient as possible in the face
   // of potential debug info changes by using the formal interfaces given to us
   // as much as possible.
   DebugInfoFinder F;
-- 
cgit v1.1


From bd28f5c8562d537031cc50979bc1609cadbfaad2 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Tue, 27 Aug 2013 18:52:47 +0000
Subject: Refactor 'vectorizeLoop' no functionality change.

This patch merges LoopVectorize of InnerLoopVectorizer and InnerLoopUnroller by adding checks for VF=1. This helps in erasing the Unroller code that is almost identical to the InnerLoopVectorizer code.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189391 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 240 +++++++----------------------
 1 file changed, 53 insertions(+), 187 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5b1e9b2..2ee1441 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -354,7 +354,6 @@ public:
     InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
 
 private:
-  virtual void vectorizeLoop(LoopVectorizationLegality *Legal);
   virtual void scalarizeInstruction(Instruction *Instr);
   virtual void vectorizeMemoryInstruction(Instruction *Instr,
                                           LoopVectorizationLegality *Legal);
@@ -2049,18 +2048,31 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
     if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
         RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
       // MinMax reduction have the start value as their identify.
-      VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
-                                                         "minmax.ident");
+      if (VF == 1) {
+        VectorStart = Identity = RdxDesc.StartValue;
+      } else {
+        VectorStart = Identity = Builder.CreateVectorSplat(VF,
+                                                           RdxDesc.StartValue,
+                                                           "minmax.ident");
+      }
     } else {
+      // Handle other reduction kinds:
       Constant *Iden =
-        LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
-                                                        VecTy->getScalarType());
-      Identity = ConstantVector::getSplat(VF, Iden);
-
-      // This vector is the Identity vector where the first element is the
-      // incoming scalar reduction.
-      VectorStart = Builder.CreateInsertElement(Identity,
-                                                RdxDesc.StartValue, Zero);
+      LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
+                                                      VecTy->getScalarType());
+      if (VF == 1) {
+        Identity = Iden;
+        // This vector is the Identity vector where the first element is the
+        // incoming scalar reduction.
+        VectorStart = RdxDesc.StartValue;
+      } else {
+        Identity = ConstantVector::getSplat(VF, Iden);
+
+        // This vector is the Identity vector where the first element is the
+        // incoming scalar reduction.
+        VectorStart = Builder.CreateInsertElement(Identity,
+                                                  RdxDesc.StartValue, Zero);
+      }
     }
 
     // Fix the vector-loop phi.
@@ -2116,37 +2128,40 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
                                         ReducedPartRdx, RdxParts[part]);
     }
 
-    // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
-    // and vector ops, reducing the set of values being computed by half each
-    // round.
-    assert(isPowerOf2_32(VF) &&
-           "Reduction emission only supported for pow2 vectors!");
-    Value *TmpVec = ReducedPartRdx;
-    SmallVector<Constant*, 32> ShuffleMask(VF, 0);
-    for (unsigned i = VF; i != 1; i >>= 1) {
-      // Move the upper half of the vector to the lower half.
-      for (unsigned j = 0; j != i/2; ++j)
-        ShuffleMask[j] = Builder.getInt32(i/2 + j);
-
-      // Fill the rest of the mask with undef.
-      std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
-                UndefValue::get(Builder.getInt32Ty()));
-
-      Value *Shuf =
+    if (VF > 1) {
+      // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
+      // and vector ops, reducing the set of values being computed by half each
+      // round.
+      assert(isPowerOf2_32(VF) &&
+             "Reduction emission only supported for pow2 vectors!");
+      Value *TmpVec = ReducedPartRdx;
+      SmallVector<Constant*, 32> ShuffleMask(VF, 0);
+      for (unsigned i = VF; i != 1; i >>= 1) {
+        // Move the upper half of the vector to the lower half.
+        for (unsigned j = 0; j != i/2; ++j)
+          ShuffleMask[j] = Builder.getInt32(i/2 + j);
+
+        // Fill the rest of the mask with undef.
+        std::fill(&ShuffleMask[i/2], ShuffleMask.end(),
+                  UndefValue::get(Builder.getInt32Ty()));
+
+        Value *Shuf =
         Builder.CreateShuffleVector(TmpVec,
                                     UndefValue::get(TmpVec->getType()),
                                     ConstantVector::get(ShuffleMask),
                                     "rdx.shuf");
 
-      if (Op != Instruction::ICmp && Op != Instruction::FCmp)
-        TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
-                                     "bin.rdx");
-      else
-        TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
-    }
+        if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+          TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+                                       "bin.rdx");
+        else
+          TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
+      }
 
-    // The result is in the first element of the vector.
-    Value *Scalar0 = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+      // The result is in the first element of the vector.
+      ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
+                                                    Builder.getInt32(0));
+    }
 
     // Now, we need to fix the users of the reduction variable
     // inside and outside of the scalar remainder loop.
@@ -2165,7 +2180,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
       // incoming bypass edge.
       if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
         // Add an edge coming from the bypass.
-        LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock);
+        LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
         break;
       }
     }// end of the LCSSA phi scan.
@@ -2177,7 +2192,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
     assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
     // Pick the other block.
     int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
-    (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
+    (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
     (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
   }// end of for each redux variable.
  
@@ -4788,155 +4803,6 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
   return false;
 }
 
-void
-InnerLoopUnroller::vectorizeLoop(LoopVectorizationLegality *Legal) {
-  // In order to support reduction variables we need to be able to unroll
-  // Phi nodes. Phi nodes have cycles, so we need to unroll them in two
-  // stages. See InnerLoopVectorizer::vectorizeLoop for more details.
-  PhiVector RdxPHIsToFix;
-
-  // Scan the loop in a topological order to ensure that defs are vectorized
-  // before users.
-  LoopBlocksDFS DFS(OrigLoop);
-  DFS.perform(LI);
-
-  // Unroll all of the blocks in the original loop.
-  for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), be = DFS.endRPO();
-       bb != be; ++bb)
-    vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix);
-
-  // Create the 'reduced' values for each of the induction vars.
-  // The reduced values are the vector values that we scalarize and combine
-  // after the loop is finished.
-  for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
-       it != e; ++it) {
-    PHINode *RdxPhi = *it;
-    assert(RdxPhi && "Unable to recover vectorized PHI");
-
-    // Find the reduction variable descriptor.
-    assert(Legal->getReductionVars()->count(RdxPhi) &&
-           "Unable to find the reduction variable");
-    LoopVectorizationLegality::ReductionDescriptor RdxDesc =
-    (*Legal->getReductionVars())[RdxPhi];
-
-    setDebugLocFromInst(Builder, RdxDesc.StartValue);
-
-    // We need to generate a reduction vector from the incoming scalar.
-    // To do so, we need to generate the 'identity' vector and overide
-    // one of the elements with the incoming scalar reduction. We need
-    // to do it in the vector-loop preheader.
-    Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
-
-    // This is the vector-clone of the value that leaves the loop.
-    VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
-    Type *VecTy = VectorExit[0]->getType();
-
-    // Find the reduction identity variable. Zero for addition, or, xor,
-    // one for multiplication, -1 for And.
-    Value *Identity;
-    Value *VectorStart;
-    if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
-        RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
-      // MinMax reduction have the start value as their identify.
-      VectorStart = Identity = RdxDesc.StartValue;
-
-    } else {
-      Identity = LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
-                                                        VecTy->getScalarType());
-
-      // This vector is the Identity vector where the first element is the
-      // incoming scalar reduction.
-      VectorStart = RdxDesc.StartValue;
-    }
-
-    // Fix the vector-loop phi.
-    // We created the induction variable so we know that the
-    // preheader is the first entry.
-    BasicBlock *VecPreheader = Induction->getIncomingBlock(0);
-
-    // Reductions do not have to start at zero. They can start with
-    // any loop invariant values.
-    VectorParts &VecRdxPhi = WidenMap.get(RdxPhi);
-    BasicBlock *Latch = OrigLoop->getLoopLatch();
-    Value *LoopVal = RdxPhi->getIncomingValueForBlock(Latch);
-    VectorParts &Val = getVectorValue(LoopVal);
-    for (unsigned part = 0; part < UF; ++part) {
-      // Make sure to add the reduction stat value only to the
-      // first unroll part.
-      Value *StartVal = (part == 0) ? VectorStart : Identity;
-      cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal, VecPreheader);
-      cast<PHINode>(VecRdxPhi[part])->addIncoming(Val[part], LoopVectorBody);
-    }
-
-    // Before each round, move the insertion point right between
-    // the PHIs and the values we are going to write.
-    // This allows us to write both PHINodes and the extractelement
-    // instructions.
-    Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
-
-    VectorParts RdxParts;
-    setDebugLocFromInst(Builder, RdxDesc.LoopExitInstr);
-    for (unsigned part = 0; part < UF; ++part) {
-      // This PHINode contains the vectorized reduction variable, or
-      // the initial value vector, if we bypass the vector loop.
-      VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
-      PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
-      Value *StartVal = (part == 0) ? VectorStart : Identity;
-      for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
-        NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
-      NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody);
-      RdxParts.push_back(NewPhi);
-    }
-
-    // Reduce all of the unrolled parts into a single vector.
-    Value *ReducedPartRdx = RdxParts[0];
-    unsigned Op = getReductionBinOp(RdxDesc.Kind);
-    setDebugLocFromInst(Builder, ReducedPartRdx);
-    for (unsigned part = 1; part < UF; ++part) {
-      if (Op != Instruction::ICmp && Op != Instruction::FCmp)
-        ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
-                                             RdxParts[part], ReducedPartRdx,
-                                             "bin.rdx");
-      else
-        ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
-                                        ReducedPartRdx, RdxParts[part]);
-    }
-
-    // Now, we need to fix the users of the reduction variable
-    // inside and outside of the scalar remainder loop.
-    // We know that the loop is in LCSSA form. We need to update the
-    // PHI nodes in the exit blocks.
-    for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
-         LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
-      PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
-      if (!LCSSAPhi) continue;
-
-      // All PHINodes need to have a single entry edge, or two if
-      // we already fixed them.
-      assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
-
-      // We found our reduction value exit-PHI. Update it with the
-      // incoming bypass edge.
-      if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
-        // Add an edge coming from the bypass.
-        LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
-        break;
-      }
-    }// end of the LCSSA phi scan.
-
-    // Fix the scalar loop reduction variable with the incoming reduction sum
-    // from the vector body and from the backedge value.
-    int IncomingEdgeBlockIdx =
-    (RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch());
-    assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
-    // Pick the other block.
-    int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
-    (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
-    (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
-  }// end of for each redux variable.
-
-  fixLCSSAPHIs();
-}
 
 void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
   assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
-- 
cgit v1.1


From ffba4c7e69cd0d0ef346e9845b918a030ca51ae8 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Tue, 27 Aug 2013 22:09:06 +0000
Subject: DataFlowSanitizer: Implement trampolines for function pointers passed
 to custom functions.

Differential Revision: http://llvm-reviews.chandlerc.com/D1503

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189408 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/DataFlowSanitizer.cpp          | 80 +++++++++++++++++++++-
 1 file changed, 77 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 2fa3383..8ee5482 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -48,6 +48,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/IRBuilder.h"
@@ -182,6 +183,7 @@ class DataFlowSanitizer : public ModulePass {
   bool isInstrumented(const Function *F);
   bool isInstrumented(const GlobalAlias *GA);
   FunctionType *getArgsFunctionType(FunctionType *T);
+  FunctionType *getTrampolineFunctionType(FunctionType *T);
   FunctionType *getCustomFunctionType(FunctionType *T);
   InstrumentedABI getInstrumentedABI();
   WrapperKind getWrapperKind(Function *F);
@@ -189,6 +191,7 @@ class DataFlowSanitizer : public ModulePass {
   Function *buildWrapperFunction(Function *F, StringRef NewFName,
                                  GlobalValue::LinkageTypes NewFLink,
                                  FunctionType *NewFT);
+  Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
 
  public:
   DataFlowSanitizer(StringRef ABIListFile = StringRef(),
@@ -288,9 +291,10 @@ FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
   return FunctionType::get(RetType, ArgTypes, T->isVarArg());
 }
 
-FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
+FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
   assert(!T->isVarArg());
   llvm::SmallVector<Type *, 4> ArgTypes;
+  ArgTypes.push_back(T->getPointerTo());
   std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes));
   for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
     ArgTypes.push_back(ShadowTy);
@@ -300,6 +304,27 @@ FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
   return FunctionType::get(T->getReturnType(), ArgTypes, false);
 }
 
+FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
+  assert(!T->isVarArg());
+  llvm::SmallVector<Type *, 4> ArgTypes;
+  for (FunctionType::param_iterator i = T->param_begin(), e = T->param_end(); i != e; ++i) {
+    FunctionType *FT;
+    if (isa<PointerType>(*i) &&
+        (FT = dyn_cast<FunctionType>(cast<PointerType>(*i)->getElementType()))) {
+      ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo());
+      ArgTypes.push_back(Type::getInt8PtrTy(*Ctx));
+    } else {
+      ArgTypes.push_back(*i);
+    }
+  }
+  for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
+    ArgTypes.push_back(ShadowTy);
+  Type *RetType = T->getReturnType();
+  if (!RetType->isVoidTy())
+    ArgTypes.push_back(ShadowPtrTy);
+  return FunctionType::get(T->getReturnType(), ArgTypes, false);
+}
+
 bool DataFlowSanitizer::doInitialization(Module &M) {
   DL = getAnalysisIfAvailable<DataLayout>();
   if (!DL)
@@ -417,6 +442,39 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
   return NewF;
 }
 
+Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
+                                                          StringRef FName) {
+  FunctionType *FTT = getTrampolineFunctionType(FT);
+  Constant *C = Mod->getOrInsertFunction(FName, FTT);
+  Function *F = dyn_cast<Function>(C);
+  if (F && F->isDeclaration()) {
+    F->setLinkage(GlobalValue::LinkOnceODRLinkage);
+    BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+    std::vector<Value *> Args;
+    Function::arg_iterator AI = F->arg_begin(); ++AI;
+    for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
+      Args.push_back(&*AI);
+    CallInst *CI =
+        CallInst::Create(&F->getArgumentList().front(), Args, "", BB);
+    ReturnInst *RI;
+    if (FT->getReturnType()->isVoidTy())
+      RI = ReturnInst::Create(*Ctx, BB);
+    else
+      RI = ReturnInst::Create(*Ctx, CI, BB);
+
+    DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
+    Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI;
+    for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N)
+      DFSF.ValShadowMap[ValAI] = ShadowAI;
+    DFSanVisitor(DFSF).visitCallInst(*CI);
+    if (!FT->getReturnType()->isVoidTy())
+      new StoreInst(DFSF.getShadow(RI->getReturnValue()),
+                    &F->getArgumentList().back(), RI);
+  }
+
+  return C;
+}
+
 bool DataFlowSanitizer::runOnModule(Module &M) {
   if (!DL)
     return false;
@@ -1181,8 +1239,24 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
         std::vector<Value *> Args;
 
         CallSite::arg_iterator i = CS.arg_begin();
-        for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
-          Args.push_back(*i);
+        for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) {
+          Type *T = (*i)->getType();
+          FunctionType *ParamFT;
+          if (isa<PointerType>(T) &&
+              (ParamFT = dyn_cast<FunctionType>(
+                   cast<PointerType>(T)->getElementType()))) {
+            std::string TName = "dfst";
+            TName += utostr(FT->getNumParams() - n);
+            TName += "$";
+            TName += F->getName();
+            Constant *T = DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName);
+            Args.push_back(T);
+            Args.push_back(
+                IRB.CreateBitCast(*i, Type::getInt8PtrTy(*DFSF.DFS.Ctx)));
+          } else {
+            Args.push_back(*i);
+          }
+        }
 
         i = CS.arg_begin();
         for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
-- 
cgit v1.1


From f1db2a6a0e0ca2e0f03169ba4dc7c8332bd6ed74 Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <samsonov@google.com>
Date: Wed, 28 Aug 2013 11:25:12 +0000
Subject: 80 cols

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189473 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 8ee5482..9b9e725 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -307,10 +307,11 @@ FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
 FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
   assert(!T->isVarArg());
   llvm::SmallVector<Type *, 4> ArgTypes;
-  for (FunctionType::param_iterator i = T->param_begin(), e = T->param_end(); i != e; ++i) {
+  for (FunctionType::param_iterator i = T->param_begin(), e = T->param_end();
+       i != e; ++i) {
     FunctionType *FT;
-    if (isa<PointerType>(*i) &&
-        (FT = dyn_cast<FunctionType>(cast<PointerType>(*i)->getElementType()))) {
+    if (isa<PointerType>(*i) && (FT = dyn_cast<FunctionType>(cast<PointerType>(
+                                     *i)->getElementType()))) {
       ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo());
       ArgTypes.push_back(Type::getInt8PtrTy(*Ctx));
     } else {
-- 
cgit v1.1


From 435798e96a64738b55a01055dde1bc9a88a15191 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Wed, 28 Aug 2013 18:33:10 +0000
Subject: Disable unrolling in the loop vectorizer when disabled in the pass
 manager

When unrolling is disabled in the pass manager, the loop vectorizer should also
not unroll loops. This will allow the -fno-unroll-loops option in Clang to
behave as expected (even for vectorizable loops). The loop vectorizer's
-force-vector-unroll option will (continue to) override the pass-manager
setting (including -force-vector-unroll=0 to force use of the internal
auto-selection logic).

In order to test this, I added a flag to opt (-disable-loop-unrolling) to force
disable unrolling through opt (the analog of -fno-unroll-loops in Clang). Also,
this fixes a small bug in opt where the loop vectorizer was enabled only after
the pass manager populated the queue of passes (the global_alias.ll test needed
a slight update to the RUN line as a result of this fix).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189499 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/PassManagerBuilder.cpp  |  4 ++--
 lib/Transforms/Vectorize/LoopVectorize.cpp | 17 +++++++++++------
 2 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 743dc42..d4c0c2c 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -196,7 +196,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   MPM.add(createLoopDeletionPass());          // Delete dead loops
 
   if (!LateVectorize && LoopVectorize)
-      MPM.add(createLoopVectorizePass());
+      MPM.add(createLoopVectorizePass(DisableUnrollLoops));
 
   if (!DisableUnrollLoops)
     MPM.add(createLoopUnrollPass());          // Unroll small loops
@@ -250,7 +250,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
     // Add the various vectorization passes and relevant cleanup passes for
     // them since we are no longer in the middle of the main scalar pipeline.
     if (LoopVectorize) {
-      MPM.add(createLoopVectorizePass());
+      MPM.add(createLoopVectorizePass(DisableUnrollLoops));
 
       if (!DisableUnrollLoops)
         MPM.add(createLoopUnrollPass());    // Unroll small loops
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2ee1441..0afc73e 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -761,9 +761,9 @@ struct LoopVectorizeHints {
   /// Vectorization unroll factor.
   unsigned Unroll;
 
-  LoopVectorizeHints(const Loop *L)
+  LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
   : Width(VectorizationFactor)
-  , Unroll(VectorizationUnroll)
+  , Unroll(DisableUnrolling ? 1 : VectorizationUnroll)
   , LoopID(L->getLoopID()) {
     getHints(L);
     // The command line options override any loop metadata except for when
@@ -772,6 +772,9 @@ struct LoopVectorizeHints {
       Width = VectorizationFactor;
     if (VectorizationUnroll.getNumOccurrences() > 0)
       Unroll = VectorizationUnroll;
+
+    DEBUG(if (DisableUnrolling && Unroll == 1)
+            dbgs() << "LV: Unrolling disabled by the pass manager\n");
   }
 
   /// Return the loop vectorizer metadata prefix.
@@ -878,7 +881,8 @@ struct LoopVectorize : public LoopPass {
   /// Pass identification, replacement for typeid
   static char ID;
 
-  explicit LoopVectorize() : LoopPass(ID) {
+  explicit LoopVectorize(bool NoUnrolling = false)
+    : LoopPass(ID), DisableUnrolling(NoUnrolling) {
     initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
   }
 
@@ -888,6 +892,7 @@ struct LoopVectorize : public LoopPass {
   TargetTransformInfo *TTI;
   DominatorTree *DT;
   TargetLibraryInfo *TLI;
+  bool DisableUnrolling;
 
   virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
     // We only vectorize innermost loops.
@@ -909,7 +914,7 @@ struct LoopVectorize : public LoopPass {
     DEBUG(dbgs() << "LV: Checking a loop in \"" <<
           L->getHeader()->getParent()->getName() << "\"\n");
 
-    LoopVectorizeHints Hints(L);
+    LoopVectorizeHints Hints(L, DisableUnrolling);
 
     if (Hints.Width == 1 && Hints.Unroll == 1) {
       DEBUG(dbgs() << "LV: Not vectorizing.\n");
@@ -4786,8 +4791,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
 
 namespace llvm {
-  Pass *createLoopVectorizePass() {
-    return new LoopVectorize();
+  Pass *createLoopVectorizePass(bool NoUnrolling) {
+    return new LoopVectorize(NoUnrolling);
   }
 }
 
-- 
cgit v1.1


From b70d79e7e93cbca738130a71c9431e104acd317b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 28 Aug 2013 22:17:26 +0000
Subject: Fix typo.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189524 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index f3de6e2..8877b99 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -106,8 +106,8 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
 }
 
 // If we have a PHI node with a vector type that has only 2 uses: feed
-// itself and be an operand of extractelemnt at a constant location,
-// try to replace the PHI of the vector type with a PHI of a scalar type
+// itself and be an operand of extractelement at a constant location,
+// try to replace the PHI of the vector type with a PHI of a scalar type.
 Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
   // Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
   if (!PN->hasNUses(2))
-- 
cgit v1.1


From f1cd7983b195b2ab9c106c88b4da82983fe63f92 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Wed, 28 Aug 2013 23:40:29 +0000
Subject: Vectorizer/PassManager:  I am working on moving the vectorizer out of
 the SCC passes. This patch moves the SLP-vectorizer and BB-vectorizer back
 into SCC passes for two reasons: 1. They are a kind of cannonicalization. 2.
 The performance measurements show that it is better to keep them in.

There should be no functional change if you are not enabling the LateVectorization mode.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189539 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/PassManagerBuilder.cpp | 64 +++++++++----------------------
 1 file changed, 18 insertions(+), 46 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index d4c0c2c..68d69a9 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -216,22 +216,20 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
 
   addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
 
-  if (!LateVectorize) {
-    if (SLPVectorize)
-      MPM.add(createSLPVectorizerPass());   // Vectorize parallel scalar chains.
-
-    if (BBVectorize) {
-      MPM.add(createBBVectorizePass());
-      MPM.add(createInstructionCombiningPass());
-      if (OptLevel > 1 && UseGVNAfterVectorization)
-        MPM.add(createGVNPass());           // Remove redundancies
-      else
-        MPM.add(createEarlyCSEPass());      // Catch trivial redundancies
-
-      // BBVectorize may have significantly shortened a loop body; unroll again.
-      if (!DisableUnrollLoops)
-        MPM.add(createLoopUnrollPass());
-    }
+  if (SLPVectorize)
+    MPM.add(createSLPVectorizerPass());   // Vectorize parallel scalar chains.
+
+  if (BBVectorize) {
+    MPM.add(createBBVectorizePass());
+    MPM.add(createInstructionCombiningPass());
+    if (OptLevel > 1 && UseGVNAfterVectorization)
+      MPM.add(createGVNPass());           // Remove redundancies
+    else
+      MPM.add(createEarlyCSEPass());      // Catch trivial redundancies
+
+    // BBVectorize may have significantly shortened a loop body; unroll again.
+    if (!DisableUnrollLoops)
+      MPM.add(createLoopUnrollPass());
   }
 
   MPM.add(createAggressiveDCEPass());         // Delete dead instructions
@@ -241,7 +239,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   // As an experimental mode, run any vectorization passes in a separate
   // pipeline from the CGSCC pass manager that runs iteratively with the
   // inliner.
-  if (LateVectorize) {
+  if (LateVectorize && LoopVectorize) {
     // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
     // pass manager that we are specifically trying to avoid. To prevent this
     // we must insert a no-op module pass to reset the pass manager.
@@ -249,35 +247,9 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
 
     // Add the various vectorization passes and relevant cleanup passes for
     // them since we are no longer in the middle of the main scalar pipeline.
-    if (LoopVectorize) {
-      MPM.add(createLoopVectorizePass(DisableUnrollLoops));
-
-      if (!DisableUnrollLoops)
-        MPM.add(createLoopUnrollPass());    // Unroll small loops
-
-      // FIXME: Is this necessary/useful? Should we also do SimplifyCFG?
-      MPM.add(createInstructionCombiningPass());
-    }
-
-    if (SLPVectorize) {
-      MPM.add(createSLPVectorizerPass());   // Vectorize parallel scalar chains.
-
-      // FIXME: Is this necessary/useful? Should we also do SimplifyCFG?
-      MPM.add(createInstructionCombiningPass());
-    }
-
-    if (BBVectorize) {
-      MPM.add(createBBVectorizePass());
-      MPM.add(createInstructionCombiningPass());
-      if (OptLevel > 1 && UseGVNAfterVectorization)
-        MPM.add(createGVNPass());           // Remove redundancies
-      else
-        MPM.add(createEarlyCSEPass());      // Catch trivial redundancies
-
-      // BBVectorize may have significantly shortened a loop body; unroll again.
-      if (!DisableUnrollLoops)
-        MPM.add(createLoopUnrollPass());
-    }
+    MPM.add(createLoopVectorizePass(DisableUnrollLoops));
+    MPM.add(createInstructionCombiningPass());
+    MPM.add(createCFGSimplificationPass());
   }
 
   if (!DisableUnitAtATime) {
-- 
cgit v1.1


From 32f258b96a723b771eb44a2c0689b8bf4dd871ee Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Thu, 29 Aug 2013 03:29:57 +0000
Subject: Add getUnrollingPreferences to TTI

Allow targets to customize the default behavior of the generic loop unrolling
transformation. This will be used by the PowerPC backend when targeting the A2
core (which is in-order with a deep pipeline), and using more aggressive
defaults is important.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189565 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/LoopUnrollPass.cpp | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 80d060b..f8ff275 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -55,6 +55,8 @@ namespace {
       CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
 
       UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
+      UserAllowPartial = (P != -1) ||
+                         (UnrollAllowPartial.getNumOccurrences() > 0);
 
       initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
     }
@@ -76,6 +78,7 @@ namespace {
     unsigned CurrentThreshold;
     bool     CurrentAllowPartial;
     bool     UserThreshold;        // CurrentThreshold is user-specified.
+    bool     UserAllowPartial;     // CurrentAllowPartial is user-specified.
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -145,16 +148,20 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
         << "] Loop %" << Header->getName() << "\n");
   (void)Header;
 
+  TargetTransformInfo::UnrollingPreferences UP;
+  bool HasUP = TTI.getUnrollingPreferences(UP);
+
   // Determine the current unrolling threshold.  While this is normally set
   // from UnrollThreshold, it is overridden to a smaller value if the current
   // function is marked as optimize-for-size, and the unroll threshold was
   // not user specified.
-  unsigned Threshold = CurrentThreshold;
+  unsigned Threshold = (HasUP && !UserThreshold) ? UP.Threshold :
+                                                   CurrentThreshold;
   if (!UserThreshold &&
       Header->getParent()->getAttributes().
         hasAttribute(AttributeSet::FunctionIndex,
                      Attribute::OptimizeForSize))
-    Threshold = OptSizeUnrollThreshold;
+    Threshold = HasUP ? UP.OptSizeThreshold : OptSizeUnrollThreshold;
 
   // Find trip count and trip multiple if count is not available
   unsigned TripCount = 0;
@@ -184,6 +191,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     Count = TripCount;
   }
 
+  bool Runtime = (HasUP && UnrollRuntime.getNumOccurrences() == 0) ?
+                 UP.Runtime : UnrollRuntime;
+
   // Enforce the threshold.
   if (Threshold != NoThreshold) {
     unsigned NumInlineCandidates;
@@ -204,7 +214,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     if (TripCount != 1 && Size > Threshold) {
       DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
             << " because size: " << Size << ">" << Threshold << "\n");
-      if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) {
+      bool AllowPartial = (HasUP && !UserAllowPartial) ? UP.Partial :
+                                                         CurrentAllowPartial;
+      if (!AllowPartial && !(Runtime && TripCount == 0)) {
         DEBUG(dbgs() << "  will not try to unroll partially because "
               << "-unroll-allow-partial not given\n");
         return false;
@@ -215,7 +227,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
         while (Count != 0 && TripCount%Count != 0)
           Count--;
       }
-      else if (UnrollRuntime) {
+      else if (Runtime) {
         // Reduce unroll count to be a lower power-of-two value
         while (Count != 0 && Size > Threshold) {
           Count >>= 1;
@@ -231,7 +243,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   // Unroll the loop.
-  if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM))
+  if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, &LPM))
     return false;
 
   return true;
-- 
cgit v1.1


From f208398528efde82bc49f48d0fef0587c1f192bb Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Thu, 29 Aug 2013 03:33:15 +0000
Subject: Revert: r189565 - Add getUnrollingPreferences to TTI

Revert unintentional commit (of an unreviewed change).

Original commit message:

Add getUnrollingPreferences to TTI

Allow targets to customize the default behavior of the generic loop unrolling
transformation. This will be used by the PowerPC backend when targeting the A2
core (which is in-order with a deep pipeline), and using more aggressive
defaults is important.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189566 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/LoopUnrollPass.cpp | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index f8ff275..80d060b 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -55,8 +55,6 @@ namespace {
       CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
 
       UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
-      UserAllowPartial = (P != -1) ||
-                         (UnrollAllowPartial.getNumOccurrences() > 0);
 
       initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
     }
@@ -78,7 +76,6 @@ namespace {
     unsigned CurrentThreshold;
     bool     CurrentAllowPartial;
     bool     UserThreshold;        // CurrentThreshold is user-specified.
-    bool     UserAllowPartial;     // CurrentAllowPartial is user-specified.
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -148,20 +145,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
         << "] Loop %" << Header->getName() << "\n");
   (void)Header;
 
-  TargetTransformInfo::UnrollingPreferences UP;
-  bool HasUP = TTI.getUnrollingPreferences(UP);
-
   // Determine the current unrolling threshold.  While this is normally set
   // from UnrollThreshold, it is overridden to a smaller value if the current
   // function is marked as optimize-for-size, and the unroll threshold was
   // not user specified.
-  unsigned Threshold = (HasUP && !UserThreshold) ? UP.Threshold :
-                                                   CurrentThreshold;
+  unsigned Threshold = CurrentThreshold;
   if (!UserThreshold &&
       Header->getParent()->getAttributes().
         hasAttribute(AttributeSet::FunctionIndex,
                      Attribute::OptimizeForSize))
-    Threshold = HasUP ? UP.OptSizeThreshold : OptSizeUnrollThreshold;
+    Threshold = OptSizeUnrollThreshold;
 
   // Find trip count and trip multiple if count is not available
   unsigned TripCount = 0;
@@ -191,9 +184,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     Count = TripCount;
   }
 
-  bool Runtime = (HasUP && UnrollRuntime.getNumOccurrences() == 0) ?
-                 UP.Runtime : UnrollRuntime;
-
   // Enforce the threshold.
   if (Threshold != NoThreshold) {
     unsigned NumInlineCandidates;
@@ -214,9 +204,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     if (TripCount != 1 && Size > Threshold) {
       DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
             << " because size: " << Size << ">" << Threshold << "\n");
-      bool AllowPartial = (HasUP && !UserAllowPartial) ? UP.Partial :
-                                                         CurrentAllowPartial;
-      if (!AllowPartial && !(Runtime && TripCount == 0)) {
+      if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) {
         DEBUG(dbgs() << "  will not try to unroll partially because "
               << "-unroll-allow-partial not given\n");
         return false;
@@ -227,7 +215,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
         while (Count != 0 && TripCount%Count != 0)
           Count--;
       }
-      else if (Runtime) {
+      else if (UnrollRuntime) {
         // Reduce unroll count to be a lower power-of-two value
         while (Count != 0 && Size > Threshold) {
           Count >>= 1;
@@ -243,7 +231,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   // Unroll the loop.
-  if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, &LPM))
+  if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM))
     return false;
 
   return true;
-- 
cgit v1.1


From 6e4d93b2325b268d5394b82201c1a782cca78fae Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Fri, 30 Aug 2013 00:48:37 +0000
Subject: Random cleanup: No need to use a std::vector here, since
 createInternalizePass uses an ArrayRef.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189632 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/PassManagerBuilder.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 68d69a9..395e323 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -276,11 +276,8 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
   // Now that composite has been compiled, scan through the module, looking
   // for a main function.  If main is defined, mark all other functions
   // internal.
-  if (Internalize) {
-    std::vector<const char*> E;
-    E.push_back("main");
-    PM.add(createInternalizePass(E));
-  }
+  if (Internalize)
+    PM.add(createInternalizePass("main"));
 
   // Propagate constants at call sites into the functions they call.  This
   // opens opportunities for globalopt (and inlining) by substituting function
@@ -321,6 +318,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
   // The IPO passes may leave cruft around.  Clean up after them.
   PM.add(createInstructionCombiningPass());
   PM.add(createJumpThreadingPass());
+
   // Break up allocas
   if (UseNewSROA)
     PM.add(createSROAPass());
@@ -334,6 +332,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
   PM.add(createLICMPass());                 // Hoist loop invariants.
   PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
   PM.add(createMemCpyOptPass());            // Remove dead memcpys.
+
   // Nuke dead stores.
   PM.add(createDeadStoreEliminationPass());
 
-- 
cgit v1.1


From a8517ee7329636e3b2862784f1abf1f8d892fee7 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Fri, 30 Aug 2013 14:35:35 +0000
Subject: InstCombine: Check for zero shift amounts before subtracting one
 causing integer overflow.

PR17026. Also avoid undefined shifts and shift amounts larger than 64 bits
(those are always undef because we can't represent integer types that large).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189672 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineSimplifyDemanded.cpp    | 25 +++++++++++++---------
 1 file changed, 15 insertions(+), 10 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index a7bfe09..a2492d8 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -845,21 +845,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
 Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
   Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
 
-  unsigned ShlAmt = cast<ConstantInt>(Shl->getOperand(1))->getZExtValue();
-  unsigned ShrAmt = cast<ConstantInt>(Shr->getOperand(1))->getZExtValue();
+  const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
+  const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
+  if (!ShlOp1 || !ShrOp1)
+      return 0; // Noop.
+
+  Value *VarX = Shr->getOperand(0);
+  Type *Ty = VarX->getType();
+  unsigned BitWidth = Ty->getIntegerBitWidth();
+  if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
+    return 0; // Undef.
+
+  unsigned ShlAmt = ShlOp1.getZExtValue();
+  unsigned ShrAmt = ShrOp1.getZExtValue();
 
   KnownOne.clearAllBits();
   KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
   KnownZero &= DemandedMask;
 
-  if (ShlAmt == 0 || ShrAmt == 0)
-    return 0;
-
-  Value *VarX = Shr->getOperand(0);
-  Type *Ty = VarX->getType();
-
-  APInt BitMask1(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
-  APInt BitMask2(APInt::getAllOnesValue(Ty->getIntegerBitWidth()));
+  APInt BitMask1(APInt::getAllOnesValue(BitWidth));
+  APInt BitMask2(APInt::getAllOnesValue(BitWidth));
 
   bool isLshr = (Shr->getOpcode() == Instruction::LShr);
   BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
-- 
cgit v1.1


From 86d49563a65b3990d8ea7dac62d9222c1fd3b1cf Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Fri, 30 Aug 2013 21:07:33 +0000
Subject: Compulsive reformatting.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189697 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/Internalize.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 26743dc..29d1a89 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -141,8 +141,11 @@ bool InternalizePass::runOnModule(Module &M) {
         !I->hasLocalLinkage() &&  // Can't already have internal linkage
         !ExternalNames.count(I->getName())) {// Not marked to keep external?
       I->setLinkage(GlobalValue::InternalLinkage);
-      // Remove a callgraph edge from the external node to this function.
-      if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+
+      if (ExternalNode)
+        // Remove a callgraph edge from the external node to this function.
+        ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+
       Changed = true;
       ++NumFunctions;
       DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
-- 
cgit v1.1


From bc870037f66cc2e9096241c573bf1b7211fccf7b Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Sat, 31 Aug 2013 18:19:35 +0000
Subject: SimplifyLibCalls: When emitting an overloaded fp function check that
 it's available.

The existing code missed some edge cases when e.g. we're going to emit sqrtf but
only the availability of sqrt was checked. This happens on odd platforms like
windows.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189724 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyLibCalls.cpp | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 83636fb..4e7055b 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -118,6 +118,21 @@ static bool callHasFloatingPointArgument(const CallInst *CI) {
   return false;
 }
 
+/// \brief Check whether the overloaded unary floating point function
+/// corresponing to \a Ty is available.
+static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
+                            LibFunc::Func DoubleFn, LibFunc::Func FloatFn,
+                            LibFunc::Func LongDoubleFn) {
+  switch (Ty->getTypeID()) {
+  case Type::FloatTyID:
+    return TLI->has(FloatFn);
+  case Type::DoubleTyID:
+    return TLI->has(DoubleFn);
+  default:
+    return TLI->has(LongDoubleFn);
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Fortified Library Call Optimizations
 //===----------------------------------------------------------------------===//
@@ -1137,7 +1152,9 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
       if (Op1C->isExactlyValue(1.0))
         return Op1C;
       // pow(2.0, x) -> exp2(x)
-      if (Op1C->isExactlyValue(2.0) && TLI->has(LibFunc::exp2))
+      if (Op1C->isExactlyValue(2.0) &&
+          hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
+                          LibFunc::exp2l))
         return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
     }
 
@@ -1148,7 +1165,10 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
       return ConstantFP::get(CI->getType(), 1.0);
 
     if (Op2C->isExactlyValue(0.5) &&
-        TLI->has(LibFunc::sqrt) && TLI->has(LibFunc::fabs)) {
+        hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf,
+                        LibFunc::sqrtl) &&
+        hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf,
+                        LibFunc::fabsl)) {
       // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
       // This is faster than calling pow, and still handles negative zero
       // and negative infinity correctly.
@@ -1181,7 +1201,7 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
   virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     Value *Ret = NULL;
     if (UnsafeFPShrink && Callee->getName() == "exp2" &&
-        TLI->has(LibFunc::exp2)) {
+        TLI->has(LibFunc::exp2f)) {
       UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
       Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B);
     }
-- 
cgit v1.1


From 6a9b29ec9b42e792732659e510a655449a41b661 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Tue, 3 Sep 2013 10:04:11 +0000
Subject: [msan] Fix select instrumentation.

Select condition shadow was being ignored resulting in false negatives.
This change OR-s sign-extended condition shadow into the result shadow.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189785 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index ae73a2e..f2cf7a7 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1743,9 +1743,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
   void visitSelectInst(SelectInst& I) {
     IRBuilder<> IRB(&I);
-    setShadow(&I,  IRB.CreateSelect(I.getCondition(),
-              getShadow(I.getTrueValue()), getShadow(I.getFalseValue()),
-              "_msprop"));
+    // a = select b, c, d
+    // Sa = (sext Sb) | (select b, Sc, Sd)
+    Value *S = IRB.CreateSelect(I.getCondition(), getShadow(I.getTrueValue()),
+                                getShadow(I.getFalseValue()));
+    Value *S2 = IRB.CreateSExt(getShadow(I.getCondition()), S->getType());
+    setShadow(&I, IRB.CreateOr(S, S2, "_msprop"));
     if (MS.TrackOrigins) {
       // Origins are always i32, so any vector conditions must be flattened.
       // FIXME: consider tracking vector origins for app vectors?
-- 
cgit v1.1


From 69086b2962b16a9e78aea0605202c5ea126049ae Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Tue, 3 Sep 2013 13:05:29 +0000
Subject: [msan] Fix handling of select with struct arguments.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189796 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index f2cf7a7..e9b78ac 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1744,11 +1744,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   void visitSelectInst(SelectInst& I) {
     IRBuilder<> IRB(&I);
     // a = select b, c, d
-    // Sa = (sext Sb) | (select b, Sc, Sd)
     Value *S = IRB.CreateSelect(I.getCondition(), getShadow(I.getTrueValue()),
                                 getShadow(I.getFalseValue()));
-    Value *S2 = IRB.CreateSExt(getShadow(I.getCondition()), S->getType());
-    setShadow(&I, IRB.CreateOr(S, S2, "_msprop"));
+    if (I.getType()->isAggregateType()) {
+      // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
+      // an extra "select". This results in much more compact IR.
+      // Sa = select Sb, poisoned, (select b, Sc, Sd)
+      S = IRB.CreateSelect(getShadow(I.getCondition()),
+                           getPoisonedShadow(getShadowTy(I.getType())), S,
+                           "_msprop_select_agg");
+    } else {
+      // Sa = (sext Sb) | (select b, Sc, Sd)
+      S = IRB.CreateOr(
+          S, IRB.CreateSExt(getShadow(I.getCondition()), S->getType()),
+          "_msprop_select");
+    }
+    setShadow(&I, S);
     if (MS.TrackOrigins) {
       // Origins are always i32, so any vector conditions must be flattened.
       // FIXME: consider tracking vector origins for app vectors?
-- 
cgit v1.1


From 89008539a322b9ce1d66837b3342d0be312bcbba Mon Sep 17 00:00:00 2001
From: Yi Jiang <yjiang@apple.com>
Date: Tue, 3 Sep 2013 17:26:04 +0000
Subject: In this patch we are trying to do two things: 1) If the width of
 vectorization list candidate is bigger than vector reg width, we will break
 it down to fit the vector reg. 2) We do not vectorize the width which is not
 power of two.

The performance result shows it will help some spec benchmarks. mesa improved 6.97% and ammp improved 1.54%.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189830 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 47 +++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 57cd2a7..1f288bc 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1781,28 +1781,53 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
   // Check that all of the parts are scalar instructions of the same type.
   Instruction *I0 = dyn_cast<Instruction>(VL[0]);
   if (!I0)
-    return 0;
+    return false;
 
   unsigned Opcode0 = I0->getOpcode();
+  
+  Type *Ty0 = I0->getType();
+  unsigned Sz = DL->getTypeSizeInBits(Ty0);
+  unsigned VF = MinVecRegSize / Sz;
 
   for (int i = 0, e = VL.size(); i < e; ++i) {
     Type *Ty = VL[i]->getType();
     if (Ty->isAggregateType() || Ty->isVectorTy())
-      return 0;
+      return false;
     Instruction *Inst = dyn_cast<Instruction>(VL[i]);
     if (!Inst || Inst->getOpcode() != Opcode0)
-      return 0;
+      return false;
   }
 
-  R.buildTree(VL);
-  int Cost = R.getTreeCost();
-
-  if (Cost >= -SLPCostThreshold)
-    return false;
+  bool Changed = false;
+    
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    unsigned OpsWidth = 0;
+      
+    if (i + VF > e) 
+      OpsWidth = e - i;
+    else
+      OpsWidth = VF;
+
+    if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
+      break;
 
-  DEBUG(dbgs() << "SLP: Vectorizing pair at cost:" << Cost << ".\n");
-  R.vectorizeTree();
-  return true;
+    DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " << "\n");
+    ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
+      
+    R.buildTree(Ops);
+    int Cost = R.getTreeCost();
+       
+    if (Cost < -SLPCostThreshold) {
+      DEBUG(dbgs() << "SLP: Vectorizing pair at cost:" << Cost << ".\n");
+      R.vectorizeTree();
+        
+      // Move to the next bundle.
+      i += VF - 1;
+      Changed = true;
+    }
+  }
+    
+  return Changed; 
 }
 
 bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
-- 
cgit v1.1


From aa31d35dc7e47360f7f2b6c4af47b3024bf7e4da Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 3 Sep 2013 21:05:15 +0000
Subject: Use type form of getIntPtrType in alloca visitor.

This doesn't actually matter, since alloca is always
0 address space, but this is more consistent.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189853 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 58c61e7d..4c382a3 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -154,7 +154,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
   // Ensure that the alloca array size argument has type intptr_t, so that
   // any casting is exposed early.
   if (TD) {
-    Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
+    Type *IntPtrTy = TD->getIntPtrType(AI.getType());
     if (AI.getArraySize()->getType() != IntPtrTy) {
       Value *V = Builder->CreateIntCast(AI.getArraySize(),
                                         IntPtrTy, false);
@@ -181,7 +181,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
       // insert our getelementptr instruction...
       //
       Type *IdxTy = TD
-                  ? TD->getIntPtrType(AI.getContext())
+                  ? TD->getIntPtrType(AI.getType())
                   : Type::getInt64Ty(AI.getContext());
       Value *NullIdx = Constant::getNullValue(IdxTy);
       Value *Idx[2] = { NullIdx, NullIdx };
-- 
cgit v1.1


From 330943afb7ac076ff6b347a0151dd54ec90d54e2 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 3 Sep 2013 21:05:48 +0000
Subject: Teach InstCombineLoadCast about address spaces.

This is another one that doesn't matter much,
but uses the right GEP index types in the first
place.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189854 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 4c382a3..88e16e9 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -304,8 +304,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
         if (Constant *CSrc = dyn_cast<Constant>(CastOp))
           if (ASrcTy->getNumElements() != 0) {
             Type *IdxTy = TD
-                        ? TD->getIntPtrType(LI.getContext())
-                        : Type::getInt64Ty(LI.getContext());
+                        ? TD->getIntPtrType(SrcTy)
+                        : Type::getInt64Ty(SrcTy->getContext());
             Value *Idx = Constant::getNullValue(IdxTy);
             Value *Idxs[2] = { Idx, Idx };
             CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
-- 
cgit v1.1


From 7b15c0afc1403c451e32d6ae4ecc43a83af496be Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Tue, 3 Sep 2013 21:33:17 +0000
Subject: Enable late-vectorization by default. This patch changes the default
 setting for the LateVectorization flag that controls where the
 loop-vectorizer is ran.

Perf gains:
SingleSource/Benchmarks/Shootout/matrix -37.33%
MultiSource/Benchmarks/PAQ8p/paq8p  -22.83%
SingleSource/Benchmarks/Linpack/linpack-pc  -16.22%
SingleSource/Benchmarks/Shootout-C++/ary3 -15.16%
MultiSource/Benchmarks/TSVC/NodeSplitting-flt/NodeSplitting-flt -10.34%
MultiSource/Benchmarks/TSVC/NodeSplitting-dbl/NodeSplitting-dbl -7.12%

Regressions:
SingleSource/Benchmarks/Misc/lowercase  15.10%
MultiSource/Benchmarks/TSVC/Equivalencing-flt/Equivalencing-flt 13.18%
SingleSource/Benchmarks/Shootout-C++/matrix 8.27%
SingleSource/Benchmarks/CoyoteBench/lpbench 7.30%


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189858 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/PassManagerBuilder.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 395e323..2008c5d 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -33,7 +33,7 @@ RunLoopVectorization("vectorize-loops",
                      cl::desc("Run the Loop vectorization passes"));
 
 static cl::opt<bool>
-LateVectorization("late-vectorize", cl::init(false), cl::Hidden,
+LateVectorization("late-vectorize", cl::init(true), cl::Hidden,
                   cl::desc("Run the vectorization pasess late in the pass "
                            "pipeline (after the inliner)"));
 
-- 
cgit v1.1


From 72196ab082c1f2132d209e61d93d084d55fb6c18 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Tue, 3 Sep 2013 22:40:54 +0000
Subject: [objc-arc] Turn off the objc_retainBlock -> objc_retain optimization.

The reason that I am turning off this optimization is that there is an
additional case where a block can escape that has come up. Specifically, this
occurs when a block is used in a scope outside of its current scope.

This can cause a captured retainable object pointer whose life is preserved by
the objc_retainBlock to be deallocated before the block is invoked.

An example of the code needed to trigger the bug is:

----
\#import <Foundation/Foundation.h>
int main(int argc, const char * argv[]) {
  void (^somethingToDoLater)();

  {
    NSObject *obj = [NSObject new];

    somethingToDoLater = ^{
      [obj self]; // Crashes here
    };
  }

  NSLog(@"test.");

  somethingToDoLater();
  return 0;
}
----

In the next commit, I remove all the dead code that results from this.

Once I put in the fixing commit I will bring back the tests that I deleted in
this commit.

rdar://14802782.
rdar://14868830.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189869 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 0385de5..b774166 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -1510,11 +1510,6 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       }
       break;
     }
-    case IC_RetainBlock:
-      // If we strength reduce an objc_retainBlock to an objc_retain, continue
-      // onto the objc_retain peephole optimizations. Otherwise break.
-      OptimizeRetainBlockCall(F, Inst, Class);
-      break;
     case IC_RetainRV:
       if (OptimizeRetainRVCall(F, Inst))
         continue;
-- 
cgit v1.1


From be0857051fee0a782e0a2d249b37db166cd28e1c Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Tue, 3 Sep 2013 22:40:56 +0000
Subject: [objc-arc] Remove dead code from previous commit.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189870 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 140 ---------------------------------
 1 file changed, 140 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index b774166..b419a7e 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -176,91 +176,6 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
   return 0;
 }
 
-/// \brief Test whether the given retainable object pointer escapes.
-///
-/// This differs from regular escape analysis in that a use as an
-/// argument to a call is not considered an escape.
-///
-static bool DoesRetainableObjPtrEscape(const User *Ptr) {
-  DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Target: " << *Ptr << "\n");
-
-  // Walk the def-use chains.
-  SmallVector<const Value *, 4> Worklist;
-  Worklist.push_back(Ptr);
-  // If Ptr has any operands add them as well.
-  for (User::const_op_iterator I = Ptr->op_begin(), E = Ptr->op_end(); I != E;
-       ++I) {
-    Worklist.push_back(*I);
-  }
-
-  // Ensure we do not visit any value twice.
-  SmallPtrSet<const Value *, 8> VisitedSet;
-
-  do {
-    const Value *V = Worklist.pop_back_val();
-
-    DEBUG(dbgs() << "Visiting: " << *V << "\n");
-
-    for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
-         UI != UE; ++UI) {
-      const User *UUser = *UI;
-
-      DEBUG(dbgs() << "User: " << *UUser << "\n");
-
-      // Special - Use by a call (callee or argument) is not considered
-      // to be an escape.
-      switch (GetBasicInstructionClass(UUser)) {
-      case IC_StoreWeak:
-      case IC_InitWeak:
-      case IC_StoreStrong:
-      case IC_Autorelease:
-      case IC_AutoreleaseRV: {
-        DEBUG(dbgs() << "User copies pointer arguments. Pointer Escapes!\n");
-        // These special functions make copies of their pointer arguments.
-        return true;
-      }
-      case IC_IntrinsicUser:
-        // Use by the use intrinsic is not an escape.
-        continue;
-      case IC_User:
-      case IC_None:
-        // Use by an instruction which copies the value is an escape if the
-        // result is an escape.
-        if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
-            isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
-
-          if (VisitedSet.insert(UUser)) {
-            DEBUG(dbgs() << "User copies value. Ptr escapes if result escapes."
-                  " Adding to list.\n");
-            Worklist.push_back(UUser);
-          } else {
-            DEBUG(dbgs() << "Already visited node.\n");
-          }
-          continue;
-        }
-        // Use by a load is not an escape.
-        if (isa<LoadInst>(UUser))
-          continue;
-        // Use by a store is not an escape if the use is the address.
-        if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
-          if (V != SI->getValueOperand())
-            continue;
-        break;
-      default:
-        // Regular calls and other stuff are not considered escapes.
-        continue;
-      }
-      // Otherwise, conservatively assume an escape.
-      DEBUG(dbgs() << "Assuming ptr escapes.\n");
-      return true;
-    }
-  } while (!Worklist.empty());
-
-  // No escapes found.
-  DEBUG(dbgs() << "Ptr does not escape.\n");
-  return false;
-}
-
 /// This is a wrapper around getUnderlyingObjCPtr along the lines of
 /// GetUnderlyingObjects except that it returns early when it sees the first
 /// alloca.
@@ -1188,13 +1103,9 @@ namespace {
     unsigned ARCAnnotationProvenanceSourceMDKind;
 #endif // ARC_ANNOATIONS
 
-    bool IsRetainBlockOptimizable(const Instruction *Inst);
-
     bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
     void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
                                    InstructionClass &Class);
-    bool OptimizeRetainBlockCall(Function &F, Instruction *RetainBlock,
-                                 InstructionClass &Class);
     void OptimizeIndividualCalls(Function &F);
 
     void CheckForCFGHazards(const BasicBlock *BB,
@@ -1283,22 +1194,6 @@ void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
 }
 
-bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
-  // Without the magic metadata tag, we have to assume this might be an
-  // objc_retainBlock call inserted to convert a block pointer to an id,
-  // in which case it really is needed.
-  if (!Inst->getMetadata(CopyOnEscapeMDKind))
-    return false;
-
-  // If the pointer "escapes" (not including being used in a call),
-  // the copy may be needed.
-  if (DoesRetainableObjPtrEscape(Inst))
-    return false;
-
-  // Otherwise, it's not needed.
-  return true;
-}
-
 /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
 /// not a return value.  Or, if it can be paired with an
 /// objc_autoreleaseReturnValue, delete the pair and return true.
@@ -1399,41 +1294,6 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
 
 }
 
-// \brief Attempt to strength reduce objc_retainBlock calls to objc_retain
-// calls.
-//
-// Specifically: If an objc_retainBlock call has the copy_on_escape metadata and
-// does not escape (following the rules of block escaping), strength reduce the
-// objc_retainBlock to an objc_retain.
-//
-// TODO: If an objc_retainBlock call is dominated period by a previous
-// objc_retainBlock call, strength reduce the objc_retainBlock to an
-// objc_retain.
-bool
-ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst,
-                                    InstructionClass &Class) {
-  assert(GetBasicInstructionClass(Inst) == Class);
-  assert(IC_RetainBlock == Class);
-
-  // If we can not optimize Inst, return false.
-  if (!IsRetainBlockOptimizable(Inst))
-    return false;
-
-  Changed = true;
-  ++NumPeeps;
-
-  DEBUG(dbgs() << "Strength reduced retainBlock => retain.\n");
-  DEBUG(dbgs() << "Old: " << *Inst << "\n");
-  CallInst *RetainBlock = cast<CallInst>(Inst);
-  Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
-  RetainBlock->setCalledFunction(NewDecl);
-  // Remove copy_on_escape metadata.
-  RetainBlock->setMetadata(CopyOnEscapeMDKind, 0);
-  Class = IC_Retain;
-  DEBUG(dbgs() << "New: " << *Inst << "\n");
-  return true;
-}
-
 /// Visit each call, one at a time, and make simplifications without doing any
 /// additional analysis.
 void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
-- 
cgit v1.1


From 79869ee59abc3aeebda6e15540273a7e5936adb4 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Tue, 3 Sep 2013 23:34:36 +0000
Subject: Add r159136 back now that pr13124 has been fixed.

Original message:
If a constant or a function has linkonce_odr linkage and unnamed_addr, mark
hidden. Being linkonce_odr guarantees that it is available in every dso that
needs it. Being a constant/function with unnamed_addr guarantees that the
copies don't have to be merged.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189886 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 64cd515..ab9d30d 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1914,6 +1914,12 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
     return true;
   }
 
+  if (GV->hasLinkOnceODRLinkage() && GV->hasUnnamedAddr() && GV->isConstant() &&
+      GV->getVisibility() != GlobalValue::HiddenVisibility) {
+    GV->setVisibility(GlobalValue::HiddenVisibility);
+    return true;
+  }
+
   if (!GV->hasLocalLinkage())
     return false;
 
@@ -1926,6 +1932,7 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
   if (!GS.isCompared && !GV->hasUnnamedAddr()) {
     GV->setUnnamedAddr(true);
     NumUnnamed++;
+    return true;
   }
 
   if (GV->isConstant() || !GV->hasInitializer())
@@ -2105,6 +2112,10 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
       F->eraseFromParent();
       Changed = true;
       ++NumFnDeleted;
+    } else if (F->hasLinkOnceODRLinkage() && F->hasUnnamedAddr() &&
+               F->getVisibility() != GlobalValue::HiddenVisibility) {
+      F->setVisibility(GlobalValue::HiddenVisibility);
+      Changed = true;
     } else if (F->hasLocalLinkage()) {
       if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
           !F->hasAddressTaken()) {
-- 
cgit v1.1


From 7bfabdac4ebf82f9f6a9ee7a00fd948f729dc7fe Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Wed, 4 Sep 2013 11:57:13 +0000
Subject: InstCombine: look for masked compares with subset relation

Even in cases which aren't universally optimisable like "(A & B) != 0 && (A &
C) != 0", the masks can make one of the comparisons completely redundant. In
this case, since we've gone to the effort of spotting masked comparisons we
should combine them.

rdar://problem/7625728

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189930 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 86 +++++++++++++++++++---
 1 file changed, 75 insertions(+), 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index d40385c..099a780 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -488,6 +488,26 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
   return result;
 }
 
+/// Convert an analysis of a masked ICmp into its equivalent if all boolean
+/// operations had the opposite sense. Since each "NotXXX" flag (recording !=)
+/// is adjacent to the corresponding normal flag (recording ==), this just
+/// involves swapping those bits over.
+static unsigned conjugateICmpMask(unsigned Mask) {
+  unsigned NewMask;
+  NewMask = (Mask & (FoldMskICmp_AMask_AllOnes | FoldMskICmp_BMask_AllOnes |
+                     FoldMskICmp_Mask_AllZeroes | FoldMskICmp_AMask_Mixed |
+                     FoldMskICmp_BMask_Mixed))
+            << 1;
+
+  NewMask |=
+      (Mask & (FoldMskICmp_AMask_NotAllOnes | FoldMskICmp_BMask_NotAllOnes |
+               FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_AMask_NotMixed |
+               FoldMskICmp_BMask_NotMixed))
+      >> 1;
+
+  return NewMask;
+}
+
 /// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z)
 /// if possible. The returned predicate is either == or !=. Returns false if
 /// decomposition fails.
@@ -618,8 +638,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
 /// foldLogOpOfMaskedICmps:
 /// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
 /// into a single (icmp(A & X) ==/!= Y)
-static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
-                                     ICmpInst::Predicate NEWCC,
+static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
                                      llvm::InstCombiner::BuilderTy* Builder) {
   Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
   ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -629,8 +648,24 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
   assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) &&
          "foldLogOpOfMaskedICmpsHelper must return an equality predicate.");
 
-  if (NEWCC == ICmpInst::ICMP_NE)
-    mask >>= 1; // treat "Not"-states as normal states
+  // In full generality:
+  //     (icmp (A & B) Op C) | (icmp (A & D) Op E)
+  // ==  ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ]
+  //
+  // If the latter can be converted into (icmp (A & X) Op Y) then the former is
+  // equivalent to (icmp (A & X) !Op Y).
+  //
+  // Therefore, we can pretend for the rest of this function that we're dealing
+  // with the conjunction, provided we flip the sense of any comparisons (both
+  // input and output).
+
+  // In most cases we're going to produce an EQ for the "&&" case.
+  ICmpInst::Predicate NEWCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
+  if (!IsAnd) {
+    // Convert the masking analysis into its equivalent with negated
+    // comparisons.
+    mask = conjugateICmpMask(mask);
+  }
 
   if (mask & FoldMskICmp_Mask_AllZeroes) {
     // (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
@@ -657,6 +692,40 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
     Value* newAnd = Builder->CreateAnd(A, newAnd1);
     return Builder->CreateICmp(NEWCC, newAnd, A);
   }
+
+  // Remaining cases assume at least that B and D are constant, and depend on
+  // their actual values. This isn't strictly, necessary, just a "handle the
+  // easy cases for now" decision.
+  ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+  if (BCst == 0) return 0;
+  ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+  if (DCst == 0) return 0;
+
+  if (mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) {
+    // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and
+    // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+    //     -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)
+    // Only valid if one of the masks is a superset of the other (check "B&D" is
+    // the same as either B or D).
+    APInt NewMask = BCst->getValue() & DCst->getValue();
+
+    if (NewMask == BCst->getValue())
+      return LHS;
+    else if (NewMask == DCst->getValue())
+      return RHS;
+  }
+  if (mask & FoldMskICmp_AMask_NotAllOnes) {
+    // (icmp ne (A & B), B) & (icmp ne (A & D), D)
+    //     -> (icmp ne (A & B), A) or (icmp ne (A & D), A)
+    // Only valid if one of the masks is a superset of the other (check "B|D" is
+    // the same as either B or D).
+    APInt NewMask = BCst->getValue() | DCst->getValue();
+
+    if (NewMask == BCst->getValue())
+      return LHS;
+    else if (NewMask == DCst->getValue())
+      return RHS;
+  }
   if (mask & FoldMskICmp_BMask_Mixed) {
     // (icmp eq (A & B), C) & (icmp eq (A & D), E)
     // We already know that B & C == C && D & E == E.
@@ -665,14 +734,9 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
     // contradict, then we can transform to
     // -> (icmp eq (A & (B|D)), (C|E))
     // Currently, we only handle the case of B, C, D, and E being constant.
-    ConstantInt *BCst = dyn_cast<ConstantInt>(B);
-    if (BCst == 0) return 0;
-    ConstantInt *DCst = dyn_cast<ConstantInt>(D);
-    if (DCst == 0) return 0;
     // we can't simply use C and E, because we might actually handle
     //   (icmp ne (A & B), B) & (icmp eq (A & D), D)
     // with B and D, having a single bit set
-
     ConstantInt *CCst = dyn_cast<ConstantInt>(C);
     if (CCst == 0) return 0;
     if (LHSCC != NEWCC)
@@ -715,7 +779,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   }
 
   // handle (roughly):  (icmp eq (A & B), C) & (icmp eq (A & D), E)
-  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
+  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, true, Builder))
     return V;
 
   // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
@@ -1479,7 +1543,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
 
   // handle (roughly):
   // (icmp ne (A & B), C) | (icmp ne (A & D), E)
-  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder))
+  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, false, Builder))
     return V;
 
   Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
-- 
cgit v1.1


From 0415b1810bbf93f434f1c561e172bf24c1cb37dc Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Wed, 4 Sep 2013 11:57:17 +0000
Subject: InstCombine: allow unmasked icmps to be combined with logical ops

"(icmp op i8 A, B)" is equivalent to "(icmp op i8 (A & 0xff), B)" as a
degenerate case. Allowing this as a "masked" comparison when analysing "(icmp)
&/| (icmp)" allows us to combine them in more cases.

rdar://problem/7625728

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189931 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 38 +++++++++++++++++-----
 1 file changed, 29 insertions(+), 9 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 099a780..98a7b2d 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -568,14 +568,22 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
     L21 = L22 = L1 = 0;
   } else {
     // Look for ANDs in the LHS icmp.
-    if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
-      if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
-        L21 = L22 = 0;
-    } else {
-      if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
-        return 0;
-      std::swap(L1, L2);
+    if (!L1->getType()->isIntegerTy()) {
+      // You can icmp pointers, for example. They really aren't masks.
+      L11 = L12 = 0;
+    } else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+      // Any icmp can be viewed as being trivially masked; if it allows us to
+      // remove one, it's worth it.
+      L11 = L1;
+      L12 = Constant::getAllOnesValue(L1->getType());
+    }
+
+    if (!L2->getType()->isIntegerTy()) {
+      // You can icmp pointers, for example. They really aren't masks.
       L21 = L22 = 0;
+    } else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) {
+      L21 = L2;
+      L22 = Constant::getAllOnesValue(L2->getType());
     }
   }
 
@@ -596,7 +604,14 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
       return 0;
     }
     E = R2; R1 = 0; ok = true;
-  } else if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+  } else if (R1->getType()->isIntegerTy()) {
+    if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+      // As before, model no mask as a trivial mask if it'll let us do an
+      // optimisation.
+      R11 = R1;
+      R12 = Constant::getAllOnesValue(R1->getType());
+    }
+
     if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
       A = R11; D = R12; E = R2; ok = true;
     } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
@@ -609,7 +624,12 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
     return 0;
 
   // Look for ANDs in on the right side of the RHS icmp.
-  if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+  if (!ok && R2->getType()->isIntegerTy()) {
+    if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+      R11 = R2;
+      R12 = Constant::getAllOnesValue(R2->getType());
+    }
+
     if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
       A = R11; D = R12; E = R1; ok = true;
     } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
-- 
cgit v1.1


From 9718158222ad9c52b2fb14609a341d4e24def8bb Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Wed, 4 Sep 2013 16:09:01 +0000
Subject: Revert "Add r159136 back now that pr13124 has been fixed."

This reverts commit r189886.

I found a corner case where this optimization is not valid:

Say we have a "linkonce_odr unnamed_addr" in two translation units:
* In TU 1 this optimization kicks in and makes it hidden.
* In TU 2 it gets const merged with a constant that is *not* unnamed_addr,
  resulting in a non unnamed_addr constant with default visibility.
* The static linker rules for combining visibility them produce a hidden
  symbol, which is incorrect from the point of view of the non unnamed_addr
  constant.

The one place we can do this is when we know that the symbol is not used from
another TU in the same shared object, i.e., during LTO. I will move it there.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189954 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index ab9d30d..64cd515 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1914,12 +1914,6 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
     return true;
   }
 
-  if (GV->hasLinkOnceODRLinkage() && GV->hasUnnamedAddr() && GV->isConstant() &&
-      GV->getVisibility() != GlobalValue::HiddenVisibility) {
-    GV->setVisibility(GlobalValue::HiddenVisibility);
-    return true;
-  }
-
   if (!GV->hasLocalLinkage())
     return false;
 
@@ -1932,7 +1926,6 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
   if (!GS.isCompared && !GV->hasUnnamedAddr()) {
     GV->setUnnamedAddr(true);
     NumUnnamed++;
-    return true;
   }
 
   if (GV->isConstant() || !GV->hasInitializer())
@@ -2112,10 +2105,6 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
       F->eraseFromParent();
       Changed = true;
       ++NumFnDeleted;
-    } else if (F->hasLinkOnceODRLinkage() && F->hasUnnamedAddr() &&
-               F->getVisibility() != GlobalValue::HiddenVisibility) {
-      F->setVisibility(GlobalValue::HiddenVisibility);
-      Changed = true;
     } else if (F->hasLocalLinkage()) {
       if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
           !F->hasAddressTaken()) {
-- 
cgit v1.1


From 8b08904e6c7fae76a1df500c154671dcb4d00b10 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Wed, 4 Sep 2013 18:16:02 +0000
Subject: Remove dead code.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189967 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/Internalize.cpp | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 29d1a89..12d6ea1 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -52,8 +52,6 @@ namespace {
     explicit InternalizePass();
     explicit InternalizePass(ArrayRef<const char *> exportList);
     void LoadFile(const char *Filename);
-    void ClearExportList();
-    void AddToExportList(const std::string &val);
     virtual bool runOnModule(Module &M);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -101,14 +99,6 @@ void InternalizePass::LoadFile(const char *Filename) {
   }
 }
 
-void InternalizePass::ClearExportList() {
-  ExternalNames.clear();
-}
-
-void InternalizePass::AddToExportList(const std::string &val) {
-  ExternalNames.insert(val);
-}
-
 bool InternalizePass::runOnModule(Module &M) {
   CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
   CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
-- 
cgit v1.1


From 0fb771667eb2f8a5cd623ce0ee3775b4d1416576 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Wed, 4 Sep 2013 18:37:36 +0000
Subject: Refactor duplicated logic to a helper function.

No functionality change.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189969 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/Internalize.cpp | 89 +++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 36 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 12d6ea1..cd20a1d 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -99,6 +99,27 @@ void InternalizePass::LoadFile(const char *Filename) {
   }
 }
 
+static bool shouldInternalize(const GlobalValue &GV,
+                              const std::set<std::string> &ExternalNames) {
+  // Function must be defined here
+  if (GV.isDeclaration())
+    return false;
+
+  // Available externally is really just a "declaration with a body".
+  if (GV.hasAvailableExternallyLinkage())
+    return false;
+
+  // Already has internal linkage
+  if (GV.hasLocalLinkage())
+    return false;
+
+  // Marked to keep external?
+  if (ExternalNames.count(GV.getName()))
+    return false;
+
+  return true;
+}
+
 bool InternalizePass::runOnModule(Module &M) {
   CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
   CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
@@ -124,22 +145,20 @@ bool InternalizePass::runOnModule(Module &M) {
 
   // Mark all functions not in the api as internal.
   // FIXME: maybe use private linkage?
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    if (!I->isDeclaration() &&         // Function must be defined here
-        // Available externally is really just a "declaration with a body".
-        !I->hasAvailableExternallyLinkage() &&
-        !I->hasLocalLinkage() &&  // Can't already have internal linkage
-        !ExternalNames.count(I->getName())) {// Not marked to keep external?
-      I->setLinkage(GlobalValue::InternalLinkage);
-
-      if (ExternalNode)
-        // Remove a callgraph edge from the external node to this function.
-        ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
-
-      Changed = true;
-      ++NumFunctions;
-      DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
-    }
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    if (!shouldInternalize(*I, ExternalNames))
+      continue;
+
+    I->setLinkage(GlobalValue::InternalLinkage);
+
+    if (ExternalNode)
+      // Remove a callgraph edge from the external node to this function.
+      ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+
+    Changed = true;
+    ++NumFunctions;
+    DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
+  }
 
   // Never internalize the llvm.used symbol.  It is used to implement
   // attribute((used)).
@@ -163,29 +182,27 @@ bool InternalizePass::runOnModule(Module &M) {
   // internal as well.
   // FIXME: maybe use private linkage?
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
-    if (!I->isDeclaration() && !I->hasLocalLinkage() &&
-        // Available externally is really just a "declaration with a body".
-        !I->hasAvailableExternallyLinkage() &&
-        !ExternalNames.count(I->getName())) {
-      I->setLinkage(GlobalValue::InternalLinkage);
-      Changed = true;
-      ++NumGlobals;
-      DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
-    }
+       I != E; ++I) {
+    if (!shouldInternalize(*I, ExternalNames))
+      continue;
+
+    I->setLinkage(GlobalValue::InternalLinkage);
+    Changed = true;
+    ++NumGlobals;
+    DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
+  }
 
   // Mark all aliases that are not in the api as internal as well.
   for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
-       I != E; ++I)
-    if (!I->isDeclaration() && !I->hasInternalLinkage() &&
-        // Available externally is really just a "declaration with a body".
-        !I->hasAvailableExternallyLinkage() &&
-        !ExternalNames.count(I->getName())) {
-      I->setLinkage(GlobalValue::InternalLinkage);
-      Changed = true;
-      ++NumAliases;
-      DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
-    }
+       I != E; ++I) {
+    if (!shouldInternalize(*I, ExternalNames))
+      continue;
+
+    I->setLinkage(GlobalValue::InternalLinkage);
+    Changed = true;
+    ++NumAliases;
+    DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
+  }
 
   return Changed;
 }
-- 
cgit v1.1


From 1d7df349ab7fa051ffe88589b06a1aa514f84a00 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Wed, 4 Sep 2013 18:53:21 +0000
Subject: Small simplification given that insert of an empty range is a nop.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189971 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/Internalize.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index cd20a1d..ff56654 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -70,8 +70,7 @@ InternalizePass::InternalizePass()
   initializeInternalizePassPass(*PassRegistry::getPassRegistry());
   if (!APIFile.empty())           // If a filename is specified, use it.
     LoadFile(APIFile.c_str());
-  if (!APIList.empty())           // If a list is specified, use it as well.
-    ExternalNames.insert(APIList.begin(), APIList.end());
+  ExternalNames.insert(APIList.begin(), APIList.end());
 }
 
 InternalizePass::InternalizePass(ArrayRef<const char *> exportList)
-- 
cgit v1.1


From 775079c227083be3fe22f6ae071d5b74a7ade745 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Wed, 4 Sep 2013 20:08:46 +0000
Subject: Rename some variables to match the style guide.

I am about to patch this code, and this makes the diff far more readable.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189982 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/Internalize.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index ff56654..f2feacc 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -50,7 +50,7 @@ namespace {
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit InternalizePass();
-    explicit InternalizePass(ArrayRef<const char *> exportList);
+    explicit InternalizePass(ArrayRef<const char *> ExportList);
     void LoadFile(const char *Filename);
     virtual bool runOnModule(Module &M);
 
@@ -73,11 +73,11 @@ InternalizePass::InternalizePass()
   ExternalNames.insert(APIList.begin(), APIList.end());
 }
 
-InternalizePass::InternalizePass(ArrayRef<const char *> exportList)
+InternalizePass::InternalizePass(ArrayRef<const char *> ExportList)
   : ModulePass(ID){
   initializeInternalizePassPass(*PassRegistry::getPassRegistry());
-  for(ArrayRef<const char *>::const_iterator itr = exportList.begin();
-        itr != exportList.end(); itr++) {
+  for(ArrayRef<const char *>::const_iterator itr = ExportList.begin();
+        itr != ExportList.end(); itr++) {
     ExternalNames.insert(*itr);
   }
 }
@@ -210,6 +210,6 @@ ModulePass *llvm::createInternalizePass() {
   return new InternalizePass();
 }
 
-ModulePass *llvm::createInternalizePass(ArrayRef<const char *> el) {
-  return new InternalizePass(el);
+ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) {
+  return new InternalizePass(ExportList);
 }
-- 
cgit v1.1


From a83aeae350d822ec84ded34ff3fdf1e0917d3bfd Mon Sep 17 00:00:00 2001
From: Nick Lewycky <nicholas@mxc.ca>
Date: Thu, 5 Sep 2013 08:19:58 +0000
Subject: Declare missing dependency on AliasAnalysis. Patch by Liu Xin!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190035 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/FunctionAttrs.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 1366883..20d1680 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -136,6 +136,7 @@ namespace {
 char FunctionAttrs::ID = 0;
 INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
                 "Deduce function attributes", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_AG_DEPENDENCY(CallGraph)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
-- 
cgit v1.1


From 466fa17aba4014d8ce1e42e9f5622f5b872949f3 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 5 Sep 2013 19:15:21 +0000
Subject: Remove unused argument.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190090 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 64cd515..4659271 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -80,7 +80,6 @@ namespace {
     bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
     bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
     bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
-                               const SmallPtrSet<const PHINode*, 16> &PHIUsers,
                                const GlobalStatus &GS);
     bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
 
@@ -1931,14 +1930,13 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
   if (GV->isConstant() || !GV->hasInitializer())
     return false;
 
-  return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
+  return ProcessInternalGlobal(GV, GVI, GS);
 }
 
 /// ProcessInternalGlobal - Analyze the specified global variable and optimize
 /// it if possible.  If we make a change, return true.
 bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
                                       Module::global_iterator &GVI,
-                                const SmallPtrSet<const PHINode*, 16> &PHIUsers,
                                       const GlobalStatus &GS) {
   // If this is a first class global and has only one accessing function
   // and this function is main (which we know is not recursive), we replace
-- 
cgit v1.1


From 596aa123f46158639c836f1d53b89a9d7898c4b7 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 5 Sep 2013 19:48:28 +0000
Subject: Consistently use dbgs() in debug printing

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190093 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombine.h            |  4 ++--
 lib/Transforms/InstCombine/InstCombinePHI.cpp       | 10 +++++-----
 lib/Transforms/InstCombine/InstCombineWorklist.h    |  4 ++--
 lib/Transforms/InstCombine/InstructionCombining.cpp | 16 ++++++++--------
 4 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index d035c53..e29bf1a 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -271,7 +271,7 @@ public:
     if (&I == V)
       V = UndefValue::get(I.getType());
 
-    DEBUG(errs() << "IC: Replacing " << I << "\n"
+    DEBUG(dbgs() << "IC: Replacing " << I << "\n"
                     "    with " << *V << '\n');
 
     I.replaceAllUsesWith(V);
@@ -283,7 +283,7 @@ public:
   // instruction.  Instead, visit methods should return the value returned by
   // this function.
   Instruction *EraseInstFromFunction(Instruction &I) {
-    DEBUG(errs() << "IC: ERASE " << I << '\n');
+    DEBUG(dbgs() << "IC: ERASE " << I << '\n');
 
     assert(I.use_empty() && "Cannot erase instruction that is used!");
     // Make sure that we reprocess all operands now that we reduced their
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index bd14e81..4689c42 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -688,10 +688,10 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
   // extracted out of it.  First, sort the users by their offset and size.
   array_pod_sort(PHIUsers.begin(), PHIUsers.end());
 
-  DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
-            for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
-              errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
-        );
+  DEBUG(dbgs() << "SLICING UP PHI: " << FirstPhi << '\n';
+        for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+          dbgs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] << '\n';
+    );
 
   // PredValues - This is a temporary used when rewriting PHI nodes.  It is
   // hoisted out here to avoid construction/destruction thrashing.
@@ -772,7 +772,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
       }
       PredValues.clear();
 
-      DEBUG(errs() << "  Made element PHI for offset " << Offset << ": "
+      DEBUG(dbgs() << "  Made element PHI for offset " << Offset << ": "
                    << *EltPHI << '\n');
       ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
     }
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 1109558..f84db27 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -37,7 +37,7 @@ public:
   /// in it.
   void Add(Instruction *I) {
     if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
-      DEBUG(errs() << "IC: ADD: " << *I << '\n');
+      DEBUG(dbgs() << "IC: ADD: " << *I << '\n');
       Worklist.push_back(I);
     }
   }
@@ -54,7 +54,7 @@ public:
     assert(Worklist.empty() && "Worklist must be empty to add initial group");
     Worklist.reserve(NumEntries+16);
     WorklistMap.resize(NumEntries);
-    DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+    DEBUG(dbgs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
     for (unsigned Idx = 0; NumEntries; --NumEntries) {
       Instruction *I = List[NumEntries-1];
       WorklistMap.insert(std::make_pair(I, Idx++));
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index f0a14a3..803c727 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2216,7 +2216,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
       // DCE instruction if trivially dead.
       if (isInstructionTriviallyDead(Inst, TLI)) {
         ++NumDeadInst;
-        DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
+        DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
         Inst->eraseFromParent();
         continue;
       }
@@ -2224,7 +2224,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
       // ConstantProp instruction if trivially constant.
       if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
         if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) {
-          DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+          DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
                        << *Inst << '\n');
           Inst->replaceAllUsesWith(C);
           ++NumConstProp;
@@ -2300,7 +2300,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
 bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
   MadeIRChange = false;
 
-  DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+  DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
                << F.getName() << "\n");
 
   {
@@ -2345,7 +2345,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
 
     // Check to see if we can DCE the instruction.
     if (isInstructionTriviallyDead(I, TLI)) {
-      DEBUG(errs() << "IC: DCE: " << *I << '\n');
+      DEBUG(dbgs() << "IC: DCE: " << *I << '\n');
       EraseInstFromFunction(*I);
       ++NumDeadInst;
       MadeIRChange = true;
@@ -2355,7 +2355,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
     // Instruction isn't dead, see if we can constant propagate it.
     if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
       if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
-        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+        DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
 
         // Add operands to the worklist.
         ReplaceInstUsesWith(*I, C);
@@ -2403,13 +2403,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
     std::string OrigI;
 #endif
     DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
-    DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+    DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
 
     if (Instruction *Result = visit(*I)) {
       ++NumCombined;
       // Should we replace the old instruction with a new one?
       if (Result != I) {
-        DEBUG(errs() << "IC: Old = " << *I << '\n'
+        DEBUG(dbgs() << "IC: Old = " << *I << '\n'
                      << "    New = " << *Result << '\n');
 
         if (!I->getDebugLoc().isUnknown())
@@ -2438,7 +2438,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
         EraseInstFromFunction(*I);
       } else {
 #ifndef NDEBUG
-        DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
+        DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
                      << "    New = " << *I << '\n');
 #endif
 
-- 
cgit v1.1


From ce8e4647bf4fff92bf5855b494461a039200b418 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 6 Sep 2013 00:18:43 +0000
Subject: Teach CodeGenPrepare about address spaces

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190112 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/CodeGenPrepare.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 44804a2..67bf1bb 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -1035,7 +1035,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
   case Instruction::IntToPtr:
     // This inttoptr is a no-op if the integer type is pointer sized.
     if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
-        TLI.getPointerTy())
+        TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
       return MatchAddr(AddrInst->getOperand(0), Depth);
     return false;
   case Instruction::BitCast:
@@ -1573,9 +1573,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   } else {
     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
                  << *MemoryInst);
-    Type *IntPtrTy =
-          TLI->getDataLayout()->getIntPtrType(AccessTy->getContext());
-
+    Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
     Value *Result = 0;
 
     // Start with the base register. Do this first so that subsequent address
-- 
cgit v1.1


From 4598bd53ab89c3d120ad8249abbfdc7e2d64d291 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 6 Sep 2013 00:37:24 +0000
Subject: Use type helper functions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190113 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 2 +-
 lib/Transforms/Scalar/CodeGenPrepare.cpp            | 3 +--
 lib/Transforms/Utils/SimplifyCFG.cpp                | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 8877b99..805c5d2 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -294,7 +294,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
                                          SmallVectorImpl<Constant*> &Mask) {
   assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
          "Invalid CollectSingleShuffleElements");
-  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+  unsigned NumElts = V->getType()->getVectorNumElements();
 
   if (isa<UndefValue>(V)) {
     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 67bf1bb..c1a83bf 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -1418,8 +1418,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
     Value *Address = User->getOperand(OpNo);
     if (!Address->getType()->isPointerTy())
       return false;
-    Type *AddressAccessTy =
-      cast<PointerType>(Address->getType())->getElementType();
+    Type *AddressAccessTy = Address->getType()->getPointerElementType();
 
     // Do a match against the root of this address, ignoring profitability. This
     // will tell us if the addressing mode for the memory operation will
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index c4c1423..0dea844 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3160,7 +3160,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
 /// and use it to remove dead cases.
 static bool EliminateDeadSwitchCases(SwitchInst *SI) {
   Value *Cond = SI->getCondition();
-  unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth();
+  unsigned Bits = Cond->getType()->getIntegerBitWidth();
   APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
   ComputeMaskedBits(Cond, KnownZero, KnownOne);
 
-- 
cgit v1.1


From 0b3d39235aaed8bc66ccffb3942bf7b5f185329c Mon Sep 17 00:00:00 2001
From: Manman Ren <manman.ren@gmail.com>
Date: Fri, 6 Sep 2013 22:47:05 +0000
Subject: TBAA: add isTBAAVtableAccess to MDNode so clients can call the
 function instead of having its own implementation.

The implementation of isTBAAVtableAccess is in TypeBasedAliasAnalysis.cpp
since it is related to the format of TBAA metadata.

The path for struct-path tbaa will be exercised by
test/Instrumentation/ThreadSanitizer/read_from_global.ll, vptr_read.ll, and
vptr_update.ll when struct-path tbaa is on by default.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190216 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index e19ceba..2c8a7c4 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -240,12 +240,8 @@ bool ThreadSanitizer::doInitialization(Module &M) {
 }
 
 static bool isVtableAccess(Instruction *I) {
-  if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) {
-    if (Tag->getNumOperands() < 1) return false;
-    if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
-      if (Tag1->getString() == "vtable pointer") return true;
-    }
-  }
+  if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa))
+    return Tag->isTBAAVtableAccess();
   return false;
 }
 
-- 
cgit v1.1


From db3a9e64f856e3a233a427da1f3969fd3a65a438 Mon Sep 17 00:00:00 2001
From: Bob Wilson <bob.wilson@apple.com>
Date: Mon, 9 Sep 2013 19:14:35 +0000
Subject: Revert patches to add case-range support for PR1255.

The work on this project was left in an unfinished and inconsistent state.
Hopefully someone will eventually get a chance to implement this feature, but
in the meantime, it is better to put things back the way the were.  I have
left support in the bitcode reader to handle the case-range bitcode format,
so that we do not lose bitcode compatibility with the llvm 3.3 release.

This reverts the following commits: 155464, 156374, 156377, 156613, 156704,
156757, 156804 156808, 156985, 157046, 157112, 157183, 157315, 157384, 157575,
157576, 157586, 157612, 157810, 157814, 157815, 157880, 157881, 157882, 157884,
157887, 157901, 158979, 157987, 157989, 158986, 158997, 159076, 159101, 159100,
159200, 159201, 159207, 159527, 159532, 159540, 159583, 159618, 159658, 159659,
159660, 159661, 159703, 159704, 160076, 167356, 172025, 186736

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190328 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/DebugIR.cpp |  1 +
 lib/Transforms/Utils/CodeExtractor.cpp     |  3 +-
 lib/Transforms/Utils/Local.cpp             | 45 ++++++++++------------
 lib/Transforms/Utils/LowerSwitch.cpp       | 62 +++++++++++++++++++-----------
 4 files changed, 62 insertions(+), 49 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DebugIR.cpp b/lib/Transforms/Instrumentation/DebugIR.cpp
index 651381d..9489bb2 100644
--- a/lib/Transforms/Instrumentation/DebugIR.cpp
+++ b/lib/Transforms/Instrumentation/DebugIR.cpp
@@ -25,6 +25,7 @@
 #include "llvm/InstVisitor.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/Cloning.h"
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 82013f9..6f00864 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -665,8 +665,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     TheSwitch->setCondition(call);
     TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
     // Remove redundant case
-    SwitchInst::CaseIt ToBeRemoved(TheSwitch, NumExitBlocks-1);
-    TheSwitch->removeCase(ToBeRemoved);
+    TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
     break;
   }
 }
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index f2fac5e..8f7314d 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -196,33 +196,28 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
       // Otherwise, we can fold this switch into a conditional branch
       // instruction if it has only one non-default destination.
       SwitchInst::CaseIt FirstCase = SI->case_begin();
-      IntegersSubset& Case = FirstCase.getCaseValueEx();
-      if (Case.isSingleNumber()) {
-        // FIXME: Currently work with ConstantInt based numbers.
-        Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
-             Case.getSingleNumber(0).toConstantInt(),
-            "cond");
-
-        // Insert the new branch.
-        BranchInst *NewBr = Builder.CreateCondBr(Cond,
-                                FirstCase.getCaseSuccessor(),
-                                SI->getDefaultDest());
-        MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
-        if (MD && MD->getNumOperands() == 3) {
-          ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2));
-          ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1));
-          assert(SICase && SIDef);
-          // The TrueWeight should be the weight for the single case of SI.
-          NewBr->setMetadata(LLVMContext::MD_prof,
-                 MDBuilder(BB->getContext()).
-                 createBranchWeights(SICase->getValue().getZExtValue(),
-                                     SIDef->getValue().getZExtValue()));
-        }
+      Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
+          FirstCase.getCaseValue(), "cond");
 
-        // Delete the old switch.
-        SI->eraseFromParent();
-        return true;
+      // Insert the new branch.
+      BranchInst *NewBr = Builder.CreateCondBr(Cond,
+                                               FirstCase.getCaseSuccessor(),
+                                               SI->getDefaultDest());
+      MDNode* MD = SI->getMetadata(LLVMContext::MD_prof);
+      if (MD && MD->getNumOperands() == 3) {
+        ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2));
+        ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1));
+        assert(SICase && SIDef);
+        // The TrueWeight should be the weight for the single case of SI.
+        NewBr->setMetadata(LLVMContext::MD_prof,
+                        MDBuilder(BB->getContext()).
+                        createBranchWeights(SICase->getValue().getZExtValue(),
+                                            SIDef->getValue().getZExtValue()));
       }
+
+      // Delete the old switch.
+      SI->eraseFromParent();
+      return true;
     }
     return false;
   }
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 955b853..2d2a8a5 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -66,6 +66,18 @@ namespace {
                              BasicBlock* OrigBlock, BasicBlock* Default);
     unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
   };
+
+  /// The comparison function for sorting the switch case values in the vector.
+  /// WARNING: Case ranges should be disjoint!
+  struct CaseCmp {
+    bool operator () (const LowerSwitch::CaseRange& C1,
+                      const LowerSwitch::CaseRange& C2) {
+
+      const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+      const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+      return CI1->getValue().slt(CI2->getValue());
+    }
+  };
 }
 
 char LowerSwitch::ID = 0;
@@ -147,7 +159,7 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
   Function::iterator FI = OrigBlock;
   F->getBasicBlockList().insert(++FI, NewNode);
 
-  ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_ULT,
+  ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
                                 Val, Pivot.Low, "Pivot");
   NewNode->getInstList().push_back(Comp);
   BranchInst::Create(LBranch, RBranch, Comp, NewNode);
@@ -222,34 +234,40 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
 
 // Clusterify - Transform simple list of Cases into list of CaseRange's
 unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
-
-  IntegersSubsetToBB TheClusterifier;
+  unsigned numCmps = 0;
 
   // Start with "simple" cases
-  for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
-       i != e; ++i) {
-    BasicBlock *SuccBB = i.getCaseSuccessor();
-    IntegersSubset CaseRanges = i.getCaseValueEx();
-    TheClusterifier.add(CaseRanges, SuccBB);
-  }
-  
-  TheClusterifier.optimize();
+  for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+    Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(),
+                              i.getCaseSuccessor()));
   
-  size_t numCmps = 0;
-  for (IntegersSubsetToBB::RangeIterator i = TheClusterifier.begin(),
-       e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
-    IntegersSubsetToBB::Cluster &C = *i;
-    
-    // FIXME: Currently work with ConstantInt based numbers.
-    // Changing it to APInt based is a pretty heavy for this commit.
-    Cases.push_back(CaseRange(C.first.getLow().toConstantInt(),
-                              C.first.getHigh().toConstantInt(), C.second));
-    if (C.first.isSingleNumber())
+  std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+  // Merge case into clusters
+  if (Cases.size()>=2)
+    for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
+      int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+      int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+      BasicBlock* nextBB = J->BB;
+      BasicBlock* currentBB = I->BB;
+
+      // If the two neighboring cases go to the same destination, merge them
+      // into a single case.
+      if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+        I->High = J->High;
+        J = Cases.erase(J);
+      } else {
+        I = J++;
+      }
+    }
+
+  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+    if (I->Low != I->High)
       // A range counts double, since it requires two compares.
       ++numCmps;
   }
 
-  return numCmps;  
+  return numCmps;
 }
 
 // processSwitchInst - Replace the specified switch instruction with a sequence
-- 
cgit v1.1


From 2c6ef1c4339c2961745bc3747753e4f2d23ee7c6 Mon Sep 17 00:00:00 2001
From: Quentin Colombet <qcolombet@apple.com>
Date: Mon, 9 Sep 2013 20:56:48 +0000
Subject: [InstCombiner] Expose opportunities to merge subtract and comparison.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Several architectures use the same instruction to perform both a comparison and
a subtract. The instruction selection framework does not allow to consider
different basic blocks to expose such fusion opportunities.

Therefore, these instructions are “merged” by CSE at MI IR level.

To increase the likelihood of CSE to apply in such situation, we reorder the
operands of the comparison, when they have the same complexity, so that they
matches the order of the most frequent subtract.
E.g.,

icmp A, B
...
sub B, A

<rdar://problem/14514580>


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190352 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCompares.cpp | 47 +++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 18a0872..29de6f7 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2037,14 +2037,59 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
 
 }
 
+/// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst
+/// should be swapped.
+/// The descision is based on how many times these two operands are reused
+/// as subtract operands and their positions in those instructions.
+/// The rational is that several architectures use the same instruction for
+/// both subtract and cmp, thus it is better if the order of those operands
+/// match.
+/// \return true if Op0 and Op1 should be swapped.
+static bool swapMayExposeCSEOpportunities(const Value * Op0,
+                                          const Value * Op1) {
+  // Filter out pointer value as those cannot appears directly in subtract.
+  // FIXME: we may want to go through inttoptrs or bitcasts.
+  if (Op0->getType()->isPointerTy())
+    return false;
+  // Count every uses of both Op0 and Op1 in a subtract.
+  // Each time Op0 is the first operand, count -1: swapping is bad, the
+  // subtract has already the same layout as the compare.
+  // Each time Op0 is the second operand, count +1: swapping is good, the
+  // subtract has a diffrent layout as the compare.
+  // At the end, if the benefit is greater than 0, Op0 should come second to
+  // expose more CSE opportunities.
+  int GlobalSwapBenefits = 0;
+  for (Value::const_use_iterator UI = Op0->use_begin(), UIEnd = Op0->use_end(); UI != UIEnd; ++UI) {
+    const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(*UI);
+    if (!BinOp || BinOp->getOpcode() != Instruction::Sub)
+      continue;
+    // If Op0 is the first argument, this is not beneficial to swap the
+    // arguments.
+    int LocalSwapBenefits = -1;
+    unsigned Op1Idx = 1;
+    if (BinOp->getOperand(Op1Idx) == Op0) {
+      Op1Idx = 0;
+      LocalSwapBenefits = 1;
+    }
+    if (BinOp->getOperand(Op1Idx) != Op1)
+      continue;
+    GlobalSwapBenefits += LocalSwapBenefits;
+  }
+  return GlobalSwapBenefits > 0;
+}
+
 Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   bool Changed = false;
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  unsigned Op0Cplxity = getComplexity(Op0);
+  unsigned Op1Cplxity = getComplexity(Op1);
 
   /// Orders the operands of the compare so that they are listed from most
   /// complex to least complex.  This puts constants before unary operators,
   /// before binary operators.
-  if (getComplexity(Op0) < getComplexity(Op1)) {
+  if (Op0Cplxity < Op1Cplxity ||
+        (Op0Cplxity == Op1Cplxity &&
+         swapMayExposeCSEOpportunities(Op0, Op1))) {
     I.swapOperands();
     std::swap(Op0, Op1);
     Changed = true;
-- 
cgit v1.1


From b1c54930cb1450534d6f3f0ebd81eb1e042a3246 Mon Sep 17 00:00:00 2001
From: Eli Friedman <eli.friedman@gmail.com>
Date: Mon, 9 Sep 2013 22:00:13 +0000
Subject: Don't shrink atomic ops to bool in GlobalOpt.

LLVM IR doesn't currently allow atomic bool load/store operations, and the
transformation is dubious anyway because it isn't profitable on all platforms.

PR17163.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190357 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 4659271..29c1b6a 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2046,11 +2046,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
 
     // Otherwise, if the global was not a boolean, we can shrink it to be a
     // boolean.
-    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
-      if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
-        ++NumShrunkToBool;
-        return true;
+    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
+      if (GS.Ordering == NotAtomic) {
+        if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+          ++NumShrunkToBool;
+          return true;
+        }
       }
+    }
   }
 
   return false;
-- 
cgit v1.1


From ffdee89a3c3b283ac02e6d0522e8e8d461bd3ff1 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 10 Sep 2013 00:41:53 +0000
Subject: Use StringRef::npos for StringRef instead of std::string one

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190375 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyLibCalls.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 4e7055b..cbdd070 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -789,7 +789,7 @@ struct StrPBrkOpt : public LibCallOptimization {
     // Constant folding.
     if (HasS1 && HasS2) {
       size_t I = S1.find_first_of(S2);
-      if (I == std::string::npos) // No match.
+      if (I == StringRef::npos) // No match.
         return Constant::getNullValue(CI->getType());
 
       return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
@@ -927,7 +927,7 @@ struct StrStrOpt : public LibCallOptimization {
 
     // If both strings are known, constant fold it.
     if (HasStr1 && HasStr2) {
-      std::string::size_type Offset = SearchStr.find(ToFindStr);
+      size_t Offset = SearchStr.find(ToFindStr);
 
       if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
         return Constant::getNullValue(CI->getType());
@@ -1384,7 +1384,7 @@ struct PrintFOpt : public LibCallOptimization {
 
     // printf("foo\n") --> puts("foo")
     if (FormatStr[FormatStr.size()-1] == '\n' &&
-        FormatStr.find('%') == std::string::npos) {  // no format characters.
+        FormatStr.find('%') == StringRef::npos) { // No format characters.
       // Create a string literal with no \n on it.  We expect the constant merge
       // pass to be run after this pass, to merge duplicate strings.
       FormatStr = FormatStr.drop_back();
-- 
cgit v1.1


From f3d4b35f24f4e54b2cb99e06f47a958e5557d01e Mon Sep 17 00:00:00 2001
From: Kostya Serebryany <kcc@google.com>
Date: Tue, 10 Sep 2013 13:16:56 +0000
Subject: [asan] refactor the use-after-return API so that the size class is
 computed at compile time instead of at run-time. llvm part

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190407 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/AddressSanitizer.cpp           | 42 ++++++++++++++++------
 1 file changed, 32 insertions(+), 10 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 2ee3e01..98794e1 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -59,6 +59,7 @@ static const uint64_t kDefaultShort64bitShadowOffset = 0x7FFF8000;  // < 2G.
 static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
 static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa8000;
 
+static const size_t kMinStackMallocSize = 1 << 6;  // 64B
 static const size_t kMaxStackMallocSize = 1 << 16;  // 64K
 static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
 static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
@@ -78,8 +79,9 @@ static const char *const kAsanInitName = "__asan_init_v3";
 static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
 static const char *const kAsanMappingOffsetName = "__asan_mapping_offset";
 static const char *const kAsanMappingScaleName = "__asan_mapping_scale";
-static const char *const kAsanStackMallocName = "__asan_stack_malloc";
-static const char *const kAsanStackFreeName = "__asan_stack_free";
+static const int         kMaxAsanStackMallocSizeClass = 10;
+static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_";
+static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_";
 static const char *const kAsanGenPrefix = "__asan_gen_";
 static const char *const kAsanPoisonStackMemoryName =
     "__asan_poison_stack_memory";
@@ -396,7 +398,8 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   uint64_t TotalStackSize;
   unsigned StackAlignment;
 
-  Function *AsanStackMallocFunc, *AsanStackFreeFunc;
+  Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
+           *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
   Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
 
   // Stores a place and arguments of poisoning/unpoisoning call for alloca.
@@ -1271,11 +1274,15 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
 
 void FunctionStackPoisoner::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(*C);
-  AsanStackMallocFunc = checkInterfaceFunction(M.getOrInsertFunction(
-      kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL));
-  AsanStackFreeFunc = checkInterfaceFunction(M.getOrInsertFunction(
-      kAsanStackFreeName, IRB.getVoidTy(),
-      IntptrTy, IntptrTy, IntptrTy, NULL));
+  for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) {
+    std::string Suffix = itostr(i);
+    AsanStackMallocFunc[i] = checkInterfaceFunction(
+        M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy,
+                              IntptrTy, IntptrTy, NULL));
+    AsanStackFreeFunc[i] = checkInterfaceFunction(M.getOrInsertFunction(
+        kAsanStackFreeNameTemplate + Suffix, IRB.getVoidTy(), IntptrTy,
+        IntptrTy, IntptrTy, NULL));
+  }
   AsanPoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
       kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
   AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
@@ -1344,12 +1351,24 @@ void FunctionStackPoisoner::poisonRedZones(
   }
 }
 
+// Fake stack allocator (asan_fake_stack.h) has 11 size classes
+// for every power of 2 from kMinStackMallocSize to kMaxAsanStackMallocSizeClass
+static int StackMallocSizeClass(uint64_t LocalStackSize) {
+  assert(LocalStackSize <= kMaxStackMallocSize);
+  uint64_t MaxSize = kMinStackMallocSize;
+  for (int i = 0; ; i++, MaxSize *= 2)
+    if (LocalStackSize <= MaxSize)
+      return i;
+  llvm_unreachable("impossible LocalStackSize");
+}
+
 void FunctionStackPoisoner::poisonStack() {
   uint64_t LocalStackSize = TotalStackSize +
                             (AllocaVec.size() + 1) * RedzoneSize();
 
   bool DoStackMalloc = ASan.CheckUseAfterReturn
       && LocalStackSize <= kMaxStackMallocSize;
+  int StackMallocIdx = -1;
 
   assert(AllocaVec.size() > 0);
   Instruction *InsBefore = AllocaVec[0];
@@ -1367,7 +1386,9 @@ void FunctionStackPoisoner::poisonStack() {
   Value *LocalStackBase = OrigStackBase;
 
   if (DoStackMalloc) {
-    LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc,
+    StackMallocIdx = StackMallocSizeClass(LocalStackSize);
+    assert(StackMallocIdx <= kMaxAsanStackMallocSizeClass);
+    LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc[StackMallocIdx],
         ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
   }
 
@@ -1442,8 +1463,9 @@ void FunctionStackPoisoner::poisonStack() {
     // Unpoison the stack.
     poisonRedZones(AllocaVec, IRBRet, ShadowBase, false);
     if (DoStackMalloc) {
+      assert(StackMallocIdx >= 0);
       // In use-after-return mode, mark the whole stack frame unaddressable.
-      IRBRet.CreateCall3(AsanStackFreeFunc, LocalStackBase,
+      IRBRet.CreateCall3(AsanStackFreeFunc[StackMallocIdx], LocalStackBase,
                          ConstantInt::get(IntptrTy, LocalStackSize),
                          OrigStackBase);
     } else if (HavePoisonedAllocas) {
-- 
cgit v1.1


From 8e12d95d15e4140311919a3b60461817baf68ca5 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Tue, 10 Sep 2013 18:46:15 +0000
Subject: LoopVectorize: PHI nodes are always at the beginning of a block, no
 need to scan the whole block.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190422 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0afc73e..dff3c0f 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2175,7 +2175,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
     for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
          LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
       PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
-      if (!LCSSAPhi) continue;
+      if (!LCSSAPhi) break;
 
       // All PHINodes need to have a single entry edge, or two if
       // we already fixed them.
@@ -2208,7 +2208,7 @@ void InnerLoopVectorizer::fixLCSSAPHIs() {
   for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
        LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
     PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
-    if (!LCSSAPhi) continue;
+    if (!LCSSAPhi) break;
     if (LCSSAPhi->getNumIncomingValues() == 1)
       LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()),
                             LoopMiddleBlock);
-- 
cgit v1.1


From 14807bd8c801f976c999e5a6699f31ee9642021a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 10 Sep 2013 19:55:24 +0000
Subject: Teach ScalarEvolution about pointer address spaces

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190425 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/IndVarSimplify.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index d51e034..9c092e6 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1492,7 +1492,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
     assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
     // We could handle pointer IVs other than i8*, but we need to compensate for
     // gep index scaling. See canExpandBackedgeTakenCount comments.
-    assert(SE->getSizeOfExpr(
+    assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
              cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
            && "unit stride pointer IV must be i8*");
 
-- 
cgit v1.1


From 63a9660a414299bbb93a1edf914c5200413adf73 Mon Sep 17 00:00:00 2001
From: Eli Friedman <eli.friedman@gmail.com>
Date: Tue, 10 Sep 2013 22:42:31 +0000
Subject: Remove unused functions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190442 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/CodeGenPrepare.cpp | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index c1a83bf..bd02a45 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -840,11 +840,6 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
   }
 };
 
-static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
-  AM.print(OS);
-  return OS;
-}
-
 void ExtAddrMode::print(raw_ostream &OS) const {
   bool NeedPlus = false;
   OS << "[";
-- 
cgit v1.1


From 5912a125193b39916a8ea81fe75502869f1b3ef5 Mon Sep 17 00:00:00 2001
From: Eli Friedman <eli.friedman@gmail.com>
Date: Tue, 10 Sep 2013 23:09:24 +0000
Subject: Fix mistake in r190442.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190446 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/CodeGenPrepare.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index bd02a45..9c5633b 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -840,6 +840,13 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
   }
 };
 
+#ifndef NDEBUG
+static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
+  AM.print(OS);
+  return OS;
+}
+#endif
+
 void ExtAddrMode::print(raw_ostream &OS) const {
   bool NeedPlus = false;
   OS << "[";
-- 
cgit v1.1


From 8e5eb2b160687bbe3afbe522e4af7518c1bea73f Mon Sep 17 00:00:00 2001
From: Eli Friedman <eli.friedman@gmail.com>
Date: Tue, 10 Sep 2013 23:45:25 +0000
Subject: Don't assert on invalid loop vectorization hint.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190450 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index dff3c0f..9e7335a 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -864,15 +864,18 @@ private:
     unsigned Val = C->getZExtValue();
 
     if (Hint == "width") {
-      assert(isPowerOf2_32(Val) && Val <= MaxVectorWidth &&
-             "Invalid width metadata");
-      Width = Val;
+      if (isPowerOf2_32(Val) && Val <= MaxVectorWidth)
+        Width = Val;
+      else
+        DEBUG(dbgs() << "LV: ignoring invalid width hint metadata");
     } else if (Hint == "unroll") {
-      assert(isPowerOf2_32(Val) && Val <= MaxUnrollFactor &&
-             "Invalid unroll metadata");
-      Unroll = Val;
-    } else
+      if (isPowerOf2_32(Val) && Val <= MaxUnrollFactor)
+        Unroll = Val;
+      else
+        DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata");
+    } else {
       DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint);
+    }
   }
 };
 
-- 
cgit v1.1


From 22647a078301cd4e9533d6d12431c9a4ae7c29ac Mon Sep 17 00:00:00 2001
From: Eli Friedman <eli.friedman@gmail.com>
Date: Wed, 11 Sep 2013 00:36:54 +0000
Subject: Get rid of unused isPodLike definitions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190461 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombinePHI.cpp |  2 --
 lib/Transforms/Scalar/EarlyCSE.cpp            | 10 ----------
 2 files changed, 12 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 4689c42..e99eaf3 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -604,8 +604,6 @@ namespace llvm {
              LHS.Width == RHS.Width;
     }
   };
-  template <>
-  struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
 }
 
 
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 3c08634..5266894 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -72,11 +72,6 @@ namespace {
 }
 
 namespace llvm {
-// SimpleValue is POD.
-template<> struct isPodLike<SimpleValue> {
-  static const bool value = true;
-};
-
 template<> struct DenseMapInfo<SimpleValue> {
   static inline SimpleValue getEmptyKey() {
     return DenseMapInfo<Instruction*>::getEmptyKey();
@@ -220,11 +215,6 @@ namespace {
 }
 
 namespace llvm {
-  // CallValue is POD.
-  template<> struct isPodLike<CallValue> {
-    static const bool value = true;
-  };
-
   template<> struct DenseMapInfo<CallValue> {
     static inline CallValue getEmptyKey() {
       return DenseMapInfo<Instruction*>::getEmptyKey();
-- 
cgit v1.1


From f834dce7c7d13af85be5bc8b789c1d7793db8a58 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 11 Sep 2013 05:09:35 +0000
Subject: Add braces

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190490 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 07d991b..20fccea 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -988,10 +988,9 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
     Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
                            Preheader->getTerminator());
 
-
   if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
                             CurLoop, BECount,
-                            StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
+                            StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) {
     Expander.clear();
     // If we generated new code for the base pointer, clean up.
     deleteIfDeadInstruction(BasePtr, *SE, TLI);
@@ -1007,17 +1006,21 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
 
   const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
                                          SCEV::FlagNUW);
-  if (StoreSize != 1)
+  if (StoreSize != 1) {
     NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
                                SCEV::FlagNUW);
+  }
 
   Value *NumBytes =
     Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
 
   CallInst *NewCall;
-  if (SplatValue)
-    NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
-  else {
+  if (SplatValue) {
+    NewCall = Builder.CreateMemSet(BasePtr,
+                                   SplatValue,
+                                   NumBytes,
+                                   StoreAlignment);
+  } else {
     Module *M = TheStore->getParent()->getParent()->getParent();
     Value *MSP = M->getOrInsertFunction("memset_pattern16",
                                         Builder.getVoidTy(),
-- 
cgit v1.1


From 11250c1194830aa4cec72788dcd04f06cfe33f50 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 11 Sep 2013 05:09:42 +0000
Subject: Teach loop-idiom about address space pointer sizes

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190491 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 33 ++++++++++++++++++----------
 1 file changed, 21 insertions(+), 12 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 20fccea..32af415 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -953,6 +953,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
   Value *SplatValue = isBytewiseValue(StoredVal);
   Constant *PatternValue = 0;
 
+  unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
+
   // If we're allowed to form a memset, and the stored value would be acceptable
   // for memset, use it.
   if (SplatValue && TLI->has(LibFunc::memset) &&
@@ -961,8 +963,10 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
       CurLoop->isLoopInvariant(SplatValue)) {
     // Keep and use SplatValue.
     PatternValue = 0;
-  } else if (TLI->has(LibFunc::memset_pattern16) &&
+  } else if (DestAS == 0 &&
+             TLI->has(LibFunc::memset_pattern16) &&
              (PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+    // Don't create memset_pattern16s with address spaces.
     // It looks like we can use PatternValue!
     SplatValue = 0;
   } else {
@@ -978,14 +982,15 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
   IRBuilder<> Builder(Preheader->getTerminator());
   SCEVExpander Expander(*SE, "loop-idiom");
 
+  Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
+
   // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
   // this into a memset in the loop preheader now if we want.  However, this
   // would be unsafe to do if there is anything else in the loop that may read
   // or write to the aliased location.  Check for any overlap by generating the
   // base pointer and checking the region.
-  unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
   Value *BasePtr =
-    Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
+    Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy,
                            Preheader->getTerminator());
 
   if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
@@ -1001,7 +1006,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
 
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
   // pointer size if it isn't already.
-  Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+  Type *IntPtr = Builder.getIntPtrTy(TD, DestAS);
   BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
 
   const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
@@ -1021,11 +1026,15 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
                                    NumBytes,
                                    StoreAlignment);
   } else {
+    // Everything is emitted in default address space
+    Type *Int8PtrTy = DestInt8PtrTy;
+
     Module *M = TheStore->getParent()->getParent()->getParent();
     Value *MSP = M->getOrInsertFunction("memset_pattern16",
                                         Builder.getVoidTy(),
-                                        Builder.getInt8PtrTy(),
-                                        Builder.getInt8PtrTy(), IntPtr,
+                                        Int8PtrTy,
+                                        Int8PtrTy,
+                                        IntPtr,
                                         (void*)0);
 
     // Otherwise we should form a memset_pattern16.  PatternValue is known to be
@@ -1035,7 +1044,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
                                             PatternValue, ".memset_pattern");
     GV->setUnnamedAddr(true); // Ok to merge these.
     GV->setAlignment(16);
-    Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
+    Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
     NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
   }
 
@@ -1111,17 +1120,17 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
 
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
   // pointer size if it isn't already.
-  Type *IntPtr = TD->getIntPtrType(SI->getContext());
-  BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+  Type *IntPtrTy = Builder.getIntPtrTy(TD, SI->getPointerAddressSpace());
+  BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
 
-  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1),
                                          SCEV::FlagNUW);
   if (StoreSize != 1)
-    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
                                SCEV::FlagNUW);
 
   Value *NumBytes =
-    Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+    Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
 
   CallInst *NewCall =
     Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
-- 
cgit v1.1


From cf16bae9fe1b566e3f6a011df6805426767610b7 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 11 Sep 2013 07:29:40 +0000
Subject: Use type form of getIntPtrType

This doesn't change anything since malloc always returns
address space 0.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190498 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 29c1b6a..7ba7f86 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1504,7 +1504,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
     unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
     if (StructType *ST = dyn_cast<StructType>(FieldTy))
       TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
-    Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+    Type *IntPtrTy = TD->getIntPtrType(CI->getType());
     Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
                                         ConstantInt::get(IntPtrTy, TypeSize),
                                         NElems, 0,
@@ -1734,7 +1734,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
     // If this is a fixed size array, transform the Malloc to be an alloc of
     // structs.  malloc [100 x struct],1 -> malloc struct, 100
     if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
-      Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+      Type *IntPtrTy = TD->getIntPtrType(CI->getType());
       unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
       Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
       Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
-- 
cgit v1.1


From 15f387c93ef8d5c23f110143996c8b9b4a089864 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Wed, 11 Sep 2013 17:42:27 +0000
Subject: Give internal classes hidden visibility.

Worth 100k on a linux/x86_64 Release+Asserts clang.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190534 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/DebugIR.h       | 2 +-
 lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h | 2 +-
 lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h  | 4 ++--
 lib/Transforms/ObjCARC/ProvenanceAnalysis.h    | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DebugIR.h b/lib/Transforms/Instrumentation/DebugIR.h
index 13774cf..06fea48 100644
--- a/lib/Transforms/Instrumentation/DebugIR.h
+++ b/lib/Transforms/Instrumentation/DebugIR.h
@@ -21,7 +21,7 @@
 
 namespace llvm {
 
-class DebugIR : public llvm::ModulePass {
+class LLVM_LIBRARY_VISIBILITY DebugIR : public llvm::ModulePass {
   /// If true, write a source file to disk.
   bool WriteSourceToDisk;
 
diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index 4eac39d..43554b6 100644
--- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -29,7 +29,7 @@ namespace objcarc {
 
 /// Declarations for ObjC runtime functions and constants. These are initialized
 /// lazily to avoid cluttering up the Module with unused declarations.
-class ARCRuntimeEntryPoints {
+class LLVM_LIBRARY_VISIBILITY ARCRuntimeEntryPoints {
 public:
   enum EntryPointType {
     EPT_AutoreleaseRV,
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
index 41ccfe2..646ed55 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
+++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
@@ -35,8 +35,8 @@ namespace objcarc {
   /// TODO: This class could be generalized to know about other ObjC-specific
   /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
   /// even though their offsets are dynamic.
-  class ObjCARCAliasAnalysis : public ImmutablePass,
-                               public AliasAnalysis {
+  class LLVM_LIBRARY_VISIBILITY ObjCARCAliasAnalysis : public ImmutablePass,
+                                                       public AliasAnalysis {
   public:
     static char ID; // Class identification, replacement for typeinfo
     ObjCARCAliasAnalysis() : ImmutablePass(ID) {
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index a13fb9e..db96cf5 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -46,7 +46,7 @@ namespace objcarc {
 /// an ``independent provenance source'' of a pointer to determine whether or
 /// not two pointers have the same provenance source and thus could
 /// potentially be related.
-class ProvenanceAnalysis {
+class LLVM_LIBRARY_VISIBILITY ProvenanceAnalysis {
   AliasAnalysis *AA;
 
   typedef std::pair<const Value *, const Value *> ValuePairTy;
-- 
cgit v1.1


From 55c06ae7afa3f862a6bb4a4441fe485c135f5b5e Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Wed, 11 Sep 2013 18:05:11 +0000
Subject: Revert "Give internal classes hidden visibility."

It works with clang, but GCC has different rules so we can't make all of those
hidden. This reverts commit r190534.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190536 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/DebugIR.h       | 2 +-
 lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h | 2 +-
 lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h  | 4 ++--
 lib/Transforms/ObjCARC/ProvenanceAnalysis.h    | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DebugIR.h b/lib/Transforms/Instrumentation/DebugIR.h
index 06fea48..13774cf 100644
--- a/lib/Transforms/Instrumentation/DebugIR.h
+++ b/lib/Transforms/Instrumentation/DebugIR.h
@@ -21,7 +21,7 @@
 
 namespace llvm {
 
-class LLVM_LIBRARY_VISIBILITY DebugIR : public llvm::ModulePass {
+class DebugIR : public llvm::ModulePass {
   /// If true, write a source file to disk.
   bool WriteSourceToDisk;
 
diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index 43554b6..4eac39d 100644
--- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -29,7 +29,7 @@ namespace objcarc {
 
 /// Declarations for ObjC runtime functions and constants. These are initialized
 /// lazily to avoid cluttering up the Module with unused declarations.
-class LLVM_LIBRARY_VISIBILITY ARCRuntimeEntryPoints {
+class ARCRuntimeEntryPoints {
 public:
   enum EntryPointType {
     EPT_AutoreleaseRV,
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
index 646ed55..41ccfe2 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
+++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
@@ -35,8 +35,8 @@ namespace objcarc {
   /// TODO: This class could be generalized to know about other ObjC-specific
   /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
   /// even though their offsets are dynamic.
-  class LLVM_LIBRARY_VISIBILITY ObjCARCAliasAnalysis : public ImmutablePass,
-                                                       public AliasAnalysis {
+  class ObjCARCAliasAnalysis : public ImmutablePass,
+                               public AliasAnalysis {
   public:
     static char ID; // Class identification, replacement for typeinfo
     ObjCARCAliasAnalysis() : ImmutablePass(ID) {
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index db96cf5..a13fb9e 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -46,7 +46,7 @@ namespace objcarc {
 /// an ``independent provenance source'' of a pointer to determine whether or
 /// not two pointers have the same provenance source and thus could
 /// potentially be related.
-class LLVM_LIBRARY_VISIBILITY ProvenanceAnalysis {
+class ProvenanceAnalysis {
   AliasAnalysis *AA;
 
   typedef std::pair<const Value *, const Value *> ValuePairTy;
-- 
cgit v1.1


From 4f7e2c38e864d7eaeb407ac501478e9579624d1b Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Wed, 11 Sep 2013 19:25:43 +0000
Subject: Add getUnrollingPreferences to TTI

Allow targets to customize the default behavior of the generic loop unrolling
transformation. This will be used by the PowerPC backend when targeting the A2
core (which is in-order with a deep pipeline), and using more aggressive
defaults is important.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190542 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/LoopUnrollPass.cpp | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 80d060b..d47a3c3 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -55,6 +55,9 @@ namespace {
       CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
 
       UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
+      UserAllowPartial = (P != -1) ||
+                         (UnrollAllowPartial.getNumOccurrences() > 0);
+      UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0);
 
       initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
     }
@@ -75,7 +78,9 @@ namespace {
     unsigned CurrentCount;
     unsigned CurrentThreshold;
     bool     CurrentAllowPartial;
+    bool     UserCount;            // CurrentCount is user-specified.
     bool     UserThreshold;        // CurrentThreshold is user-specified.
+    bool     UserAllowPartial;     // CurrentAllowPartial is user-specified.
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -145,16 +150,24 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
         << "] Loop %" << Header->getName() << "\n");
   (void)Header;
 
+  TargetTransformInfo::UnrollingPreferences UP;
+  UP.Threshold = CurrentThreshold;
+  UP.OptSizeThreshold = OptSizeUnrollThreshold;
+  UP.Count = CurrentCount;
+  UP.Partial = CurrentAllowPartial;
+  UP.Runtime = UnrollRuntime;
+  TTI.getUnrollingPreferences(L, UP);
+
   // Determine the current unrolling threshold.  While this is normally set
   // from UnrollThreshold, it is overridden to a smaller value if the current
   // function is marked as optimize-for-size, and the unroll threshold was
   // not user specified.
-  unsigned Threshold = CurrentThreshold;
+  unsigned Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
   if (!UserThreshold &&
       Header->getParent()->getAttributes().
         hasAttribute(AttributeSet::FunctionIndex,
                      Attribute::OptimizeForSize))
-    Threshold = OptSizeUnrollThreshold;
+    Threshold = UP.OptSizeThreshold;
 
   // Find trip count and trip multiple if count is not available
   unsigned TripCount = 0;
@@ -167,11 +180,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
     TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
   }
+
+  bool Runtime = UnrollRuntime.getNumOccurrences() == 0 ?
+                 UP.Runtime : UnrollRuntime;
+
   // Use a default unroll-count if the user doesn't specify a value
   // and the trip count is a run-time value.  The default is different
   // for run-time or compile-time trip count loops.
-  unsigned Count = CurrentCount;
-  if (UnrollRuntime && CurrentCount == 0 && TripCount == 0)
+  unsigned Count = UserCount ? CurrentCount : UP.Count;
+  if (Runtime && Count == 0 && TripCount == 0)
     Count = UnrollRuntimeCount;
 
   if (Count == 0) {
@@ -204,7 +221,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     if (TripCount != 1 && Size > Threshold) {
       DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
             << " because size: " << Size << ">" << Threshold << "\n");
-      if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) {
+      bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
+      if (!AllowPartial && !(Runtime && TripCount == 0)) {
         DEBUG(dbgs() << "  will not try to unroll partially because "
               << "-unroll-allow-partial not given\n");
         return false;
@@ -215,7 +233,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
         while (Count != 0 && TripCount%Count != 0)
           Count--;
       }
-      else if (UnrollRuntime) {
+      else if (Runtime) {
         // Reduce unroll count to be a lower power-of-two value
         while (Count != 0 && Size > Threshold) {
           Count >>= 1;
@@ -231,7 +249,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   // Unroll the loop.
-  if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM))
+  if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, &LPM))
     return false;
 
   return true;
-- 
cgit v1.1


From 9a8392b8acf2803f344060f21b0ce4a07df3de7e Mon Sep 17 00:00:00 2001
From: Duncan Sands <baldrick@free.fr>
Date: Fri, 13 Sep 2013 08:16:06 +0000
Subject: Avoid a compiler warning about Found not being used when assertions
 are disabled.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190668 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/FunctionAttrs.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 20d1680..386cb71 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -367,6 +367,7 @@ namespace {
         }
       }
       assert(Found && "Capturing call-site captured nothing?");
+      (void)Found;
       return false;
     }
 
-- 
cgit v1.1


From 993a0c56ec166ed1e6cc5b9275f81bc3ca4ed880 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Fri, 13 Sep 2013 12:54:49 +0000
Subject: [msan] Add source file:line to stack origin reports.

Compiler part.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190689 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index e9b78ac..075a72f 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -213,7 +213,7 @@ class MemorySanitizer : public FunctionPass {
   Value *MsanCopyOriginFn;
   /// \brief Run-time helper that generates a new origin value for a stack
   /// allocation.
-  Value *MsanSetAllocaOriginFn;
+  Value *MsanSetAllocaOrigin4Fn;
   /// \brief Run-time helper that poisons stack on function entry.
   Value *MsanPoisonStackFn;
   /// \brief MSan runtime replacements for memmove, memcpy and memset.
@@ -281,9 +281,9 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
   MsanCopyOriginFn = M.getOrInsertFunction(
     "__msan_copy_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(),
     IRB.getInt8PtrTy(), IntptrTy, NULL);
-  MsanSetAllocaOriginFn = M.getOrInsertFunction(
-    "__msan_set_alloca_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
-    IRB.getInt8PtrTy(), NULL);
+  MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
+    "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
+    IRB.getInt8PtrTy(), IntptrTy, NULL);
   MsanPoisonStackFn = M.getOrInsertFunction(
     "__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL);
   MemmoveFn = M.getOrInsertFunction(
@@ -1734,10 +1734,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       Value *Descr =
           createPrivateNonConstGlobalForString(*F.getParent(),
                                                StackDescription.str());
-      IRB.CreateCall3(MS.MsanSetAllocaOriginFn,
+
+      IRB.CreateCall4(MS.MsanSetAllocaOrigin4Fn,
                       IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
                       ConstantInt::get(MS.IntptrTy, Size),
-                      IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()));
+                      IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
+                      IRB.CreatePointerCast(&F, MS.IntptrTy));
     }
   }
 
-- 
cgit v1.1


From 3748de6e2d7620794ff93b896d85aff6cc0ea9d2 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Sat, 14 Sep 2013 09:28:14 +0000
Subject: Remove the long, long defunct IR block placement pass.

This pass was based on the previous (essentially unused) profiling
infrastructure and the assumption that by ordering the basic blocks at
the IR level in a particular way, the correct layout would happen in the
end. This sometimes worked, and mostly didn't. It also was a really
naive implementation of the classical paper that dates from when branch
predictors were primarily directional and when loop structure wasn't
commonly available. It also didn't factor into the equation
non-fallthrough branches and other machine level details.

Anyways, for all of these reasons and more, I wrote
MachineBlockPlacement, which completely supercedes this pass. It both
uses modern profile information infrastructure, and actually works. =]

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190748 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/BasicBlockPlacement.cpp | 152 --------------------------
 lib/Transforms/Scalar/CMakeLists.txt          |   1 -
 lib/Transforms/Scalar/Scalar.cpp              |   1 -
 3 files changed, 154 deletions(-)
 delete mode 100644 lib/Transforms/Scalar/BasicBlockPlacement.cpp

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
deleted file mode 100644
index e755008..0000000
--- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-//===-- BasicBlockPlacement.cpp - Basic Block Code Layout optimization ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a very simple profile guided basic block placement
-// algorithm.  The idea is to put frequently executed blocks together at the
-// start of the function, and hopefully increase the number of fall-through
-// conditional branches.  If there is no profile information for a particular
-// function, this pass basically orders blocks in depth-first order
-//
-// The algorithm implemented here is basically "Algo1" from "Profile Guided Code
-// Positioning" by Pettis and Hansen, except that it uses basic block counts
-// instead of edge counts.  This should be improved in many ways, but is very
-// simple for now.
-//
-// Basically we "place" the entry block, then loop over all successors in a DFO,
-// placing the most frequently executed successor until we run out of blocks.  I
-// told you this was _extremely_ simplistic. :) This is also much slower than it
-// could be.  When it becomes important, this pass will be rewritten to use a
-// better algorithm, and then we can worry about efficiency.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "block-placement"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include <set>
-using namespace llvm;
-
-STATISTIC(NumMoved, "Number of basic blocks moved");
-
-namespace {
-  struct BlockPlacement : public FunctionPass {
-    static char ID; // Pass identification, replacement for typeid
-    BlockPlacement() : FunctionPass(ID) {
-      initializeBlockPlacementPass(*PassRegistry::getPassRegistry());
-    }
-
-    virtual bool runOnFunction(Function &F);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      AU.addRequired<ProfileInfo>();
-      //AU.addPreserved<ProfileInfo>();  // Does this work?
-    }
-  private:
-    /// PI - The profile information that is guiding us.
-    ///
-    ProfileInfo *PI;
-
-    /// NumMovedBlocks - Every time we move a block, increment this counter.
-    ///
-    unsigned NumMovedBlocks;
-
-    /// PlacedBlocks - Every time we place a block, remember it so we don't get
-    /// into infinite loops.
-    std::set<BasicBlock*> PlacedBlocks;
-
-    /// InsertPos - This an iterator to the next place we want to insert a
-    /// block.
-    Function::iterator InsertPos;
-
-    /// PlaceBlocks - Recursively place the specified blocks and any unplaced
-    /// successors.
-    void PlaceBlocks(BasicBlock *BB);
-  };
-}
-
-char BlockPlacement::ID = 0;
-INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement",
-                "Profile Guided Basic Block Placement", false, false)
-INITIALIZE_AG_DEPENDENCY(ProfileInfo)
-INITIALIZE_PASS_END(BlockPlacement, "block-placement",
-                "Profile Guided Basic Block Placement", false, false)
-
-FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
-
-bool BlockPlacement::runOnFunction(Function &F) {
-  PI = &getAnalysis<ProfileInfo>();
-
-  NumMovedBlocks = 0;
-  InsertPos = F.begin();
-
-  // Recursively place all blocks.
-  PlaceBlocks(F.begin());
-
-  PlacedBlocks.clear();
-  NumMoved += NumMovedBlocks;
-  return NumMovedBlocks != 0;
-}
-
-
-/// PlaceBlocks - Recursively place the specified blocks and any unplaced
-/// successors.
-void BlockPlacement::PlaceBlocks(BasicBlock *BB) {
-  assert(!PlacedBlocks.count(BB) && "Already placed this block!");
-  PlacedBlocks.insert(BB);
-
-  // Place the specified block.
-  if (&*InsertPos != BB) {
-    // Use splice to move the block into the right place.  This avoids having to
-    // remove the block from the function then readd it, which causes a bunch of
-    // symbol table traffic that is entirely pointless.
-    Function::BasicBlockListType &Blocks = BB->getParent()->getBasicBlockList();
-    Blocks.splice(InsertPos, Blocks, BB);
-
-    ++NumMovedBlocks;
-  } else {
-    // This block is already in the right place, we don't have to do anything.
-    ++InsertPos;
-  }
-
-  // Keep placing successors until we run out of ones to place.  Note that this
-  // loop is very inefficient (N^2) for blocks with many successors, like switch
-  // statements.  FIXME!
-  while (1) {
-    // Okay, now place any unplaced successors.
-    succ_iterator SI = succ_begin(BB), E = succ_end(BB);
-
-    // Scan for the first unplaced successor.
-    for (; SI != E && PlacedBlocks.count(*SI); ++SI)
-      /*empty*/;
-    if (SI == E) return;  // No more successors to place.
-
-    double MaxExecutionCount = PI->getExecutionCount(*SI);
-    BasicBlock *MaxSuccessor = *SI;
-
-    // Scan for more frequently executed successors
-    for (; SI != E; ++SI)
-      if (!PlacedBlocks.count(*SI)) {
-        double Count = PI->getExecutionCount(*SI);
-        if (Count > MaxExecutionCount ||
-            // Prefer to not disturb the code.
-            (Count == MaxExecutionCount && *SI == &*InsertPos)) {
-          MaxExecutionCount = Count;
-          MaxSuccessor = *SI;
-        }
-      }
-
-    // Now that we picked the maximally executed successor, place it.
-    PlaceBlocks(MaxSuccessor);
-  }
-}
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 7fa7807..3b89fd4 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -1,6 +1,5 @@
 add_llvm_library(LLVMScalarOpts
   ADCE.cpp
-  BasicBlockPlacement.cpp
   CodeGenPrepare.cpp
   ConstantProp.cpp
   CorrelatedValuePropagation.cpp
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 952811b..0c3ffbc 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -28,7 +28,6 @@ using namespace llvm;
 /// ScalarOpts library.
 void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeADCEPass(Registry);
-  initializeBlockPlacementPass(Registry);
   initializeCodeGenPreparePass(Registry);
   initializeConstantPropagationPass(Registry);
   initializeCorrelatedValuePropagationPass(Registry);
-- 
cgit v1.1


From 4873c157f3b6776968f63f66bc76f839bdaf128e Mon Sep 17 00:00:00 2001
From: Robert Wilhelm <robert.wilhelm@gmx.net>
Date: Sat, 14 Sep 2013 09:34:59 +0000
Subject: Fix spelling.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190750 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/FlattenCFG.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 9cbe15d..0beb6fd 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -372,7 +372,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
 
 /// Check whether \param BB is the merge block of a if-region.  If yes, check
 /// whether there exists an adjacent if-region upstream, the two if-regions
-/// contain identical instuctions and can be legally merged.  \returns true if
+/// contain identical instructions and can be legally merged.  \returns true if
 /// the two if-regions are merged.
 ///
 /// From:
-- 
cgit v1.1


From 94ee55d4b39d6506cf4e0f4e4b1c0b7fbbfeaed5 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Sun, 15 Sep 2013 22:04:42 +0000
Subject: Replace some unnecessary vector copies with references.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190770 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/LoopUnroll.cpp | 2 --
 1 file changed, 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index cb581b3..0b679fe 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -239,8 +239,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
     DEBUG(dbgs() << "!\n");
   }
 
-  std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
-
   bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
   BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
 
-- 
cgit v1.1


From 1e3037f0be430ef2339838bbdede11f45658bd82 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Mon, 16 Sep 2013 01:08:15 +0000
Subject: Implement function prefix data as an IR feature.

Previous discussion:
http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-July/063909.html

Differential Revision: http://llvm-reviews.chandlerc.com/D1191

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190773 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalDCE.cpp | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 201f320..901295d 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -179,6 +179,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
     // any globals used will be marked as needed.
     Function *F = cast<Function>(G);
 
+    if (F->hasPrefixData())
+      MarkUsedGlobalsAsNeeded(F->getPrefixData());
+
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
       for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
         for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
-- 
cgit v1.1


From e5c8c5a1bcecff7e2aa60672be6af2062ad63e6a Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Mon, 16 Sep 2013 13:24:32 +0000
Subject: [msan] Check return value of main().

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190782 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 075a72f..9a67cfc 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -427,6 +427,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   bool LoadShadow;
   bool PoisonStack;
   bool PoisonUndef;
+  bool CheckReturnValue;
   OwningPtr<VarArgHelper> VAHelper;
 
   struct ShadowOriginAndInsertPoint {
@@ -449,6 +450,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     LoadShadow = SanitizeFunction;
     PoisonStack = SanitizeFunction && ClPoisonStack;
     PoisonUndef = SanitizeFunction && ClPoisonUndef;
+    // FIXME: Consider using SpecialCaseList to specify a list of functions that
+    // must always return fully initialized values. For now, we hardcode "main".
+    CheckReturnValue = SanitizeFunction && (F.getName() == "main");
 
     DEBUG(if (!InsertChecks)
           dbgs() << "MemorySanitizer is not inserting checks into '"
@@ -1686,12 +1690,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
   void visitReturnInst(ReturnInst &I) {
     IRBuilder<> IRB(&I);
-    if (Value *RetVal = I.getReturnValue()) {
-      // Set the shadow for the RetVal.
+    Value *RetVal = I.getReturnValue();
+    if (!RetVal) return;
+    Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
+    if (CheckReturnValue) {
+      insertCheck(RetVal, &I);
+      Value *Shadow = getCleanShadow(RetVal);
+      IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+    } else {
       Value *Shadow = getShadow(RetVal);
-      Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
-      DEBUG(dbgs() << "Return: " << *Shadow << "\n" << *ShadowPtr << "\n");
       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
+      // FIXME: make it conditional if ClStoreCleanOrigin==0
       if (MS.TrackOrigins)
         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
     }
-- 
cgit v1.1


From 5721d2f674008cd2cfa5696441e46daa035408b3 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Mon, 16 Sep 2013 16:17:24 +0000
Subject: Don't vectorize if there are outside loop users of the induction
 variable.

We would have to compute the pre increment value, either by computing it on
every loop iteration or by splitting the edge out of the loop and inserting a
computation for it there.

For now, just give up vectorizing such loops.

Fixes PR17179.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190790 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9e7335a..1d82c7b 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2866,6 +2866,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
           DEBUG(dbgs() << "LV: Found an induction variable.\n");
           Inductions[Phi] = InductionInfo(StartValue, IK);
+
+          // Until we explicitly handle the case of an induction variable with
+          // an outside loop user we have to give up vectorizing this loop.
+          if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+            return false;
+
           continue;
         }
 
-- 
cgit v1.1


From 4b28ee208895d2a9c98b9e63d0c39985500e9291 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 16 Sep 2013 22:43:16 +0000
Subject: MemCpyOptimizer: Use max legal int size instead of pointer size

If there are no legal integers, assume 1 byte.

This makes more sense than using the pointer size as
a guess for the maximum GPR width.

It is conceivable to want to use some 64-bit pointers
on a target where 64-bit integers aren't legal.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190817 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/MemCpyOptimizer.cpp | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 8f61ffd..9912d3d 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -170,14 +170,17 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
   // pessimize the llvm optimizer.
   //
   // Since we don't have perfect knowledge here, make some assumptions: assume
-  // the maximum GPR width is the same size as the pointer size and assume that
-  // this width can be stored.  If so, check to see whether we will end up
-  // actually reducing the number of stores used.
+  // the maximum GPR width is the same size as the largest legal integer
+  // size. If so, check to see whether we will end up actually reducing the
+  // number of stores used.
   unsigned Bytes = unsigned(End-Start);
-  unsigned NumPointerStores = Bytes/TD.getPointerSize();
+  unsigned MaxIntSize = TD.getLargestLegalIntTypeSize();
+  if (MaxIntSize == 0)
+    MaxIntSize = 1;
+  unsigned NumPointerStores = Bytes / MaxIntSize;
 
   // Assume the remaining bytes if any are done a byte at a time.
-  unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
+  unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize;
 
   // If we will reduce the # stores (according to this heuristic), do the
   // transformation.  This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
-- 
cgit v1.1


From 80361492ae7ea9fedbb5a55c72d4aea6a3d600b1 Mon Sep 17 00:00:00 2001
From: Stepan Dyatkovskiy <stpworld@narod.ru>
Date: Tue, 17 Sep 2013 09:36:11 +0000
Subject: Bugfix for PR17099: Wrong cast operation. MergeFunctions emits
 Bitcast instead of pointer-to-integer operation. Patch fixes
 MergeFunctions::writeThunk function. It replaces unconditional Bitcast
 creation with "Value* createCast(...)" method, that checks operand types and
 selects proper instruction. See unit-test as example.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190859 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/MergeFunctions.cpp | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 4ce749c..0f09b90 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -713,6 +713,19 @@ void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
   writeThunk(F, G);
 }
 
+// Helper for writeThunk,
+// Selects proper bitcast operation,
+// but a bit simplier then CastInst::getCastOpcode.
+static Value* createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
+  Type *SrcTy = V->getType();
+  if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
+    return Builder.CreateIntToPtr(V, DestTy);
+  else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
+    return Builder.CreatePtrToInt(V, DestTy);
+  else
+    return Builder.CreateBitCast(V, DestTy);
+}
+
 // Replace G with a simple tail call to bitcast(F). Also replace direct uses
 // of G with bitcast(F). Deletes G.
 void MergeFunctions::writeThunk(Function *F, Function *G) {
@@ -738,7 +751,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
   FunctionType *FFTy = F->getFunctionType();
   for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
        AI != AE; ++AI) {
-    Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i)));
+    Args.push_back(createCast(Builder, (Value*)AI, FFTy->getParamType(i)));
     ++i;
   }
 
@@ -748,13 +761,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
   if (NewG->getReturnType()->isVoidTy()) {
     Builder.CreateRetVoid();
   } else {
-    Type *RetTy = NewG->getReturnType();
-    if (CI->getType()->isIntegerTy() && RetTy->isPointerTy())
-      Builder.CreateRet(Builder.CreateIntToPtr(CI, RetTy));
-    else if (CI->getType()->isPointerTy() && RetTy->isIntegerTy())
-      Builder.CreateRet(Builder.CreatePtrToInt(CI, RetTy));
-    else
-      Builder.CreateRet(Builder.CreateBitCast(CI, RetTy));
+    Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType()));
   }
 
   NewG->copyAttributesFrom(G);
-- 
cgit v1.1


From 671c3ba921d5b8271307a8caa5e29f512d2e8e82 Mon Sep 17 00:00:00 2001
From: Kostya Serebryany <kcc@google.com>
Date: Tue, 17 Sep 2013 12:14:50 +0000
Subject: [asan] inline the calls to __asan_stack_free_* with small sizes. Yet
 another 10%-20% speedup for use-after-return

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190863 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/AddressSanitizer.cpp           | 51 ++++++++++++++++++++--
 1 file changed, 48 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 98794e1..75ecc94 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -88,10 +88,12 @@ static const char *const kAsanPoisonStackMemoryName =
 static const char *const kAsanUnpoisonStackMemoryName =
     "__asan_unpoison_stack_memory";
 
+// These constants must match the definitions in the run-time library.
 static const int kAsanStackLeftRedzoneMagic = 0xf1;
 static const int kAsanStackMidRedzoneMagic = 0xf2;
 static const int kAsanStackRightRedzoneMagic = 0xf3;
 static const int kAsanStackPartialRedzoneMagic = 0xf4;
+static const int kAsanStackAfterReturnMagic = 0xf5;
 
 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
 static const size_t kNumberOfAccessSizes = 5;
@@ -519,6 +521,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   void poisonRedZones(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> &IRB,
                       Value *ShadowBase, bool DoPoison);
   void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> &IRB, bool DoPoison);
+
+  void SetShadowToStackAfterReturnInlined(IRBuilder<> &IRB, Value *ShadowBase,
+                                          int Size);
 };
 
 }  // namespace
@@ -1362,6 +1367,22 @@ static int StackMallocSizeClass(uint64_t LocalStackSize) {
   llvm_unreachable("impossible LocalStackSize");
 }
 
+// Set Size bytes starting from ShadowBase to kAsanStackAfterReturnMagic.
+// We can not use MemSet intrinsic because it may end up calling the actual
+// memset. Size is a multiple of 8.
+// Currently this generates 8-byte stores on x86_64; it may be better to
+// generate wider stores.
+void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined(
+    IRBuilder<> &IRB, Value *ShadowBase, int Size) {
+  assert(!(Size % 8));
+  assert(kAsanStackAfterReturnMagic == 0xf5);
+  for (int i = 0; i < Size; i += 8) {
+    Value *p = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i));
+    IRB.CreateStore(ConstantInt::get(IRB.getInt64Ty(), 0xf5f5f5f5f5f5f5f5ULL),
+                    IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo()));
+  }
+}
+
 void FunctionStackPoisoner::poisonStack() {
   uint64_t LocalStackSize = TotalStackSize +
                             (AllocaVec.size() + 1) * RedzoneSize();
@@ -1465,9 +1486,33 @@ void FunctionStackPoisoner::poisonStack() {
     if (DoStackMalloc) {
       assert(StackMallocIdx >= 0);
       // In use-after-return mode, mark the whole stack frame unaddressable.
-      IRBRet.CreateCall3(AsanStackFreeFunc[StackMallocIdx], LocalStackBase,
-                         ConstantInt::get(IntptrTy, LocalStackSize),
-                         OrigStackBase);
+      if (StackMallocIdx <= 4) {
+        // For small sizes inline the whole thing:
+        // if LocalStackBase != OrigStackBase:
+        //     memset(ShadowBase, kAsanStackAfterReturnMagic, ShadowSize);
+        //     **SavedFlagPtr(LocalStackBase) = 0
+        // FIXME: if LocalStackBase != OrigStackBase don't call poisonRedZones.
+        Value *Cmp = IRBRet.CreateICmpNE(LocalStackBase, OrigStackBase);
+        TerminatorInst *PoisonTerm =
+            SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+        IRBuilder<> IRBPoison(PoisonTerm);
+        int ClassSize = kMinStackMallocSize << StackMallocIdx;
+        SetShadowToStackAfterReturnInlined(IRBPoison, ShadowBase,
+                                           ClassSize >> Mapping.Scale);
+        Value *SavedFlagPtrPtr = IRBPoison.CreateAdd(
+            LocalStackBase,
+            ConstantInt::get(IntptrTy, ClassSize - ASan.LongSize / 8));
+        Value *SavedFlagPtr = IRBPoison.CreateLoad(
+            IRBPoison.CreateIntToPtr(SavedFlagPtrPtr, IntptrPtrTy));
+        IRBPoison.CreateStore(
+            Constant::getNullValue(IRBPoison.getInt8Ty()),
+            IRBPoison.CreateIntToPtr(SavedFlagPtr, IRBPoison.getInt8PtrTy()));
+      } else {
+        // For larger frames call __asan_stack_free_*.
+        IRBRet.CreateCall3(AsanStackFreeFunc[StackMallocIdx], LocalStackBase,
+                           ConstantInt::get(IntptrTy, LocalStackSize),
+                           OrigStackBase);
+      }
     } else if (HavePoisonedAllocas) {
       // If we poisoned some allocas in llvm.lifetime analysis,
       // unpoison whole stack frame now.
-- 
cgit v1.1


From 0119f3df9c2016664540f8e3be89fe5cd54cbb07 Mon Sep 17 00:00:00 2001
From: Quentin Colombet <qcolombet@apple.com>
Date: Tue, 17 Sep 2013 16:57:34 +0000
Subject: [InstCombiner] Slice a big load in two loads when the elements are
 next to each other in memory.

The motivation was to get rid of truncate and shift right instructions that get
in the way of paired load or floating point load.
E.g.,
Consider the following example:
struct Complex {
  float real;
  float imm;
};

When accessing a complex, llvm was generating a 64-bits load and the imm field
was obtained by a trunc(lshr) sequence, resulting in poor code generation, at
least for x86.

The idea is to declare that two load instructions is the canonical form for
loading two arithmetic type, which are next to each other in memory.

Two scalar loads at a constant offset from each other are pretty
easy to detect for the sorts of passes that like to mess with loads.

<rdar://problem/14477220>


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190870 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineLoadStoreAlloca.cpp     | 285 +++++++++++++++++++++
 1 file changed, 285 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 88e16e9..0579c27 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -16,10 +16,20 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
+/// Hidden option to stress test load slicing, i.e., when this option
+/// is enabled, load slicing bypasses most of its profitability guards.
+/// It will also generate, uncanonalized form of slicing.
+static cl::opt<bool>
+StressLoadSlicing("instcombine-stress-load-slicing", cl::Hidden,
+                  cl::desc("Bypass the profitability model of load "
+                           "slicing"),
+                  cl::init(false));
+
 STATISTIC(NumDeadStore,    "Number of dead stores eliminated");
 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
 
@@ -337,6 +347,274 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
   return 0;
 }
 
+namespace {
+  /// \brief Helper structure used to slice a load in smaller loads.
+  struct LoadedSlice {
+    // The last instruction that represent the slice. This should be a
+    // truncate instruction.
+    Instruction *Inst;
+    // The original load instruction.
+    LoadInst *Origin;
+    // The right shift amount in bits from the original load.
+    unsigned Shift;
+
+    LoadedSlice(Instruction *Inst = NULL, LoadInst *Origin = NULL,
+                unsigned Shift = 0)
+    : Inst(Inst), Origin(Origin), Shift(Shift) {}
+
+    LoadedSlice(const LoadedSlice& LS) : Inst(LS.Inst), Origin(LS.Origin),
+      Shift(LS.Shift) {}
+
+    /// \brief Get the bits used in a chunk of bits \p BitWidth large.
+    /// \return Result is \p BitWidth and has used bits set to 1 and
+    ///         not used bits set to 0.
+    APInt getUsedBits() const {
+      // Reproduce the trunc(lshr) sequence:
+      // - Start from the truncated value.
+      // - Zero extend to the desired bit width.
+      // - Shift left.
+      assert(Origin && "No original load to compare against.");
+      unsigned BitWidth = Origin->getType()->getPrimitiveSizeInBits();
+      assert(Inst && "This slice is not bound to an instruction");
+      assert(Inst->getType()->getPrimitiveSizeInBits() <= BitWidth &&
+             "Extracted slice is smaller than the whole type!");
+      APInt UsedBits(Inst->getType()->getPrimitiveSizeInBits(), 0);
+      UsedBits.setAllBits();
+      UsedBits = UsedBits.zext(BitWidth);
+      UsedBits <<= Shift;
+      return UsedBits;
+    }
+
+    /// \brief Get the size of the slice to be loaded in bytes.
+    unsigned getLoadedSize() const {
+      unsigned SliceSize = getUsedBits().countPopulation();
+      assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
+      return SliceSize / 8;
+    }
+
+    /// \brief Get the offset in bytes of this slice in the original chunk of
+    /// bits, whose layout is defined by \p IsBigEndian.
+    uint64_t getOffsetFromBase(bool IsBigEndian) const {
+      assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not support.");
+      uint64_t Offset = Shift / 8;
+      unsigned TySizeInBytes = Origin->getType()->getPrimitiveSizeInBits() / 8;
+      assert(!(Origin->getType()->getPrimitiveSizeInBits() & 0x7) &&
+             "The size of the original loaded type is not a multiple of a"
+             " byte.");
+      // If Offset is bigger than TySizeInBytes, it means we are loading all
+      // zeros. This should have been optimized before in the process.
+      assert(TySizeInBytes > Offset &&
+             "Invalid shift amount for given loaded size");
+      if (IsBigEndian)
+        Offset = TySizeInBytes - Offset - getLoadedSize();
+      return Offset;
+    }
+
+    /// \brief Generate the sequence of instructions to load the slice
+    /// represented by this object and redirect the uses of this slice to
+    /// this new sequence of instructions.
+    /// \pre this->Inst && this->Origin are valid Instructions.
+    /// \return The last instruction of the sequence used to load the slice.
+    Instruction *loadSlice(InstCombiner::BuilderTy &Builder,
+                           bool IsBigEndian) const {
+      assert(Inst && Origin && "Unable to replace a non-existing slice.");
+      Value *BaseAddr = Origin->getOperand(0);
+      unsigned Alignment = Origin->getAlignment();
+      Builder.SetInsertPoint(Origin);
+      // Assume we are looking at a chunk of bytes.
+      // BaseAddr = (i8*)BaseAddr.
+      BaseAddr = Builder.CreateBitCast(BaseAddr, Builder.getInt8PtrTy(),
+                                       "raw_cast");
+      // Get the offset in that chunk of bytes w.r.t. the endianess.
+      uint64_t Offset = getOffsetFromBase(IsBigEndian);
+      if (Offset) {
+        APInt APOffset(64, Offset);
+        // BaseAddr = BaseAddr + Offset.
+        BaseAddr = Builder.CreateInBoundsGEP(BaseAddr, Builder.getInt(APOffset),
+                                             "raw_idx");
+      }
+
+      // Create the type of the loaded slice according to its size.
+      Type *SliceType =
+        Type::getIntNTy(Origin->getContext(), getLoadedSize() * 8);
+
+      // Bit cast the raw pointer to the pointer type of the slice.
+      BaseAddr = Builder.CreateBitCast(BaseAddr, SliceType->getPointerTo(),
+                                       "cast");
+
+      // Compute the new alignment.
+      if (Offset != 0)
+        Alignment = MinAlign(Alignment, Alignment + Offset);
+
+      // Create the load for the slice.
+      Instruction *LastInst = Builder.CreateAlignedLoad(BaseAddr, Alignment,
+                                                        Inst->getName()+".val");
+      // If the final type is not the same as the loaded type, this means that
+      // we have to pad with zero. Create a zero extend for that.
+      Type * FinalType = Inst->getType();
+      if (SliceType != FinalType)
+        LastInst = cast<Instruction>(Builder.CreateZExt(LastInst, FinalType));
+
+      // Update the IR to reflect the new access to the slice.
+      Inst->replaceAllUsesWith(LastInst);
+
+      return LastInst;
+    }
+
+    /// \brief Check if it would be profitable to expand this slice as an
+    /// independant load.
+    bool isProfitable() const {
+      // Slicing is assumed to be profitable iff the chains leads to arithmetic
+      // operations.
+      SmallVector<const Instruction *, 8> Uses;
+      Uses.push_back(Inst);
+      do {
+        const Instruction *Use = Uses.pop_back_val();
+        for (Value::const_use_iterator UseIt = Use->use_begin(),
+             UseItEnd = Use->use_end(); UseIt != UseItEnd; ++UseIt) {
+          const Instruction *UseOfUse = cast<Instruction>(*UseIt);
+          // Consider these instructions as arithmetic operations.
+          if (isa<BinaryOperator>(UseOfUse) ||
+              isa<CastInst>(UseOfUse) ||
+              isa<PHINode>(UseOfUse) ||
+              isa<GetElementPtrInst>(UseOfUse))
+            return true;
+          // No need to check if the Use has already been checked as we do not
+          // insert any PHINode.
+          Uses.push_back(UseOfUse);
+        }
+      } while (!Uses.empty());
+      DEBUG(dbgs() << "IC: Not a profitable slice " << *Inst << '\n');
+      return false;
+    }
+  };
+}
+
+/// \brief Check the profitability of all involved LoadedSlice.
+/// Unless StressLoadSlicing is specified, this also returns false
+/// when slicing is not in the canonical form.
+/// The canonical form of sliced load is (1) two loads,
+/// which are (2) next to each other in memory.
+///
+/// FIXME: We may want to allow more slices to be created but
+/// this means other passes should know how to deal with all those
+/// slices.
+/// FIXME: We may want to split loads to different types, e.g.,
+/// int vs. float.
+static bool
+isSlicingProfitable(const SmallVectorImpl<LoadedSlice> &LoadedSlices,
+                    const APInt &UsedBits) {
+  unsigned NbOfSlices = LoadedSlices.size();
+  // Check (1).
+  if (!StressLoadSlicing && NbOfSlices != 2)
+    return false;
+
+  // Check (2).
+  if (!StressLoadSlicing && !UsedBits.isAllOnesValue()) {
+    // Get rid of the unused bits on the right.
+    APInt MemoryLayout = UsedBits.lshr(UsedBits.countTrailingZeros());
+    // Get rid of the unused bits on the left.
+    if (MemoryLayout.countLeadingZeros())
+      MemoryLayout = MemoryLayout.trunc(MemoryLayout.getActiveBits());
+    // Check that the chunk of memory is completely used.
+    if (!MemoryLayout.isAllOnesValue())
+      return false;
+  }
+
+  unsigned NbOfProfitableSlices = 0;
+  for (unsigned CurrSlice = 0; CurrSlice < NbOfSlices; ++CurrSlice) {
+    if (LoadedSlices[CurrSlice].isProfitable())
+      ++NbOfProfitableSlices;
+    else if (!StressLoadSlicing)
+      return false;
+  }
+  // In Stress mode, we may have 0 profitable slice.
+  // Check that here.
+  // In non-Stress mode, all the slices are profitable at this point.
+  return NbOfProfitableSlices > 0;
+}
+
+/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
+/// operations, split it in the various pieces being extracted.
+///
+/// This sort of thing is introduced by SROA.
+/// This slicing takes care not to insert overlapping loads.
+/// \pre LI is a simple load (i.e., not an atomic or volatile load).
+static Instruction *sliceUpLoadInst(LoadInst &LI,
+                                    InstCombiner::BuilderTy &Builder,
+                                    DataLayout &TD) {
+  assert(LI.isSimple() && "We are trying to transform a non-simple load!");
+
+  // FIXME: If we want to support floating point and vector types, we should
+  // support bitcast and extract/insert element instructions.
+  Type *LITy = LI.getType();
+  if (!LITy->isIntegerTy()) return 0;
+
+  // Keep track of already used bits to detect overlapping values.
+  // In that case, we will just abort the transformation.
+  APInt UsedBits(LITy->getPrimitiveSizeInBits(), 0);
+
+  SmallVector<LoadedSlice, 4> LoadedSlices;
+
+  // Check if this load is used as several smaller chunks of bits.
+  // Basically, look for uses in trunc or trunc(lshr) and record a new chain
+  // of computation for each trunc.
+  for (Value::use_iterator UI = LI.use_begin(), UIEnd = LI.use_end();
+       UI != UIEnd; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+    unsigned Shift = 0;
+
+    // Check if this is a trunc(lshr).
+    if (User->getOpcode() == Instruction::LShr && User->hasOneUse() &&
+        isa<ConstantInt>(User->getOperand(1))) {
+      Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
+      User = User->use_back();
+    }
+
+    // At this point, User is a TruncInst, iff we encountered, trunc or
+    // trunc(lshr).
+    if (!isa<TruncInst>(User))
+      return 0;
+
+    // The width of the type must be a power of 2 and greater than 8-bits.
+    // Otherwise the load cannot be represented in LLVM IR.
+    // Moreover, if we shifted with a non 8-bits multiple, the slice
+    // will be accross several bytes. We do not support that.
+    unsigned Width = User->getType()->getPrimitiveSizeInBits();
+    if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
+      return 0;
+
+    // Build the slice for this chain of computations.
+    LoadedSlice LS(User, &LI, Shift);
+    APInt CurrentUsedBits = LS.getUsedBits();
+
+    // Check if this slice overlaps with another.
+    if ((CurrentUsedBits & UsedBits) != 0)
+      return 0;
+    // Update the bits used globally.
+    UsedBits |= CurrentUsedBits;
+
+    // Record the slice.
+    LoadedSlices.push_back(LS);
+  }
+
+  // Abort slicing if it does not seem to be profitable.
+  if (!isSlicingProfitable(LoadedSlices, UsedBits))
+    return 0;
+
+  // Rewrite each chain to use an independent load.
+  // By construction, each chain can be represented by a unique load.
+  bool IsBigEndian = TD.isBigEndian();
+  for (SmallVectorImpl<LoadedSlice>::const_iterator LSIt = LoadedSlices.begin(),
+       LSItEnd = LoadedSlices.end(); LSIt != LSItEnd; ++LSIt) {
+    Instruction *SliceInst = LSIt->loadSlice(Builder, IsBigEndian);
+    (void)SliceInst;
+    DEBUG(dbgs() << "IC: Replacing " << *LSIt->Inst << "\n"
+                    "    with " << *SliceInst << '\n');
+  }
+  return 0; // Don't do anything with LI.
+}
+
 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   Value *Op = LI.getOperand(0);
 
@@ -443,6 +721,13 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
         }
     }
   }
+
+  // Try to split a load in smaller non-overlapping loads to expose independant
+  // chain of computations and get rid of trunc/lshr sequence of code.
+  // The data layout is required for that operation, as code generation will
+  // change with respect to endianess.
+  if (TD)
+    return sliceUpLoadInst(LI, *Builder, *TD);
   return 0;
 }
 
-- 
cgit v1.1


From 3c940067424204ecffb48ddc269895d48442279a Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Tue, 17 Sep 2013 17:03:29 +0000
Subject: SLPVectorizer: Don't vectorize phi nodes that use invoke values

We can't insert an insertelement after an invoke. We would have to split a
critical edge. So when we see a phi node that uses an invoke we just give up.

radar://14990770

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190871 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1f288bc..b287ca7 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -639,6 +639,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
   switch (Opcode) {
     case Instruction::PHI: {
       PHINode *PH = dyn_cast<PHINode>(VL0);
+
+      // Check for terminator values (e.g. invoke).
+      for (unsigned j = 0; j < VL.size(); ++j)
+        for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
+          TerminatorInst *Term = dyn_cast<TerminatorInst>(cast<PHINode>(VL[j])->getIncomingValue(i));
+          if (Term) {
+            DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
+            newTreeEntry(VL, false);
+            return;
+          }
+        }
+
       newTreeEntry(VL, true);
       DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
 
-- 
cgit v1.1


From 3e1c40de7f33db639698982bb58d8c2c8d7d5780 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 17 Sep 2013 21:10:14 +0000
Subject: Cleanup handling of constant function casts.

Some of this code is no longer necessary since int<->ptr casts are no
longer occur as of r187444.

This also fixes handling vectors of pointers, and adds a bunch of new
testcases for vectors and address spaces.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190885 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCalls.cpp | 32 +++++++------------------
 1 file changed, 8 insertions(+), 24 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 9f74fd6..beb63e8 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -999,20 +999,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
 
   // Check to see if we are changing the return type...
   if (OldRetTy != NewRetTy) {
-    if (Callee->isDeclaration() &&
-        // Conversion is ok if changing from one pointer type to another or from
-        // a pointer to an integer of the same size.
-        !((OldRetTy->isPointerTy() || !TD ||
-           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
-          (NewRetTy->isPointerTy() || !TD ||
-           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
-      return false;   // Cannot transform this return value.
+    if (!CastInst::isBitCastable(NewRetTy, OldRetTy)) {
+      if (Callee->isDeclaration())
+        return false;   // Cannot transform this return value.
 
-    if (!Caller->use_empty() &&
-        // void -> non-void is handled specially
-        !NewRetTy->isVoidTy() &&
-        !CastInst::isBitCastable(NewRetTy, OldRetTy))
+      if (!Caller->use_empty() &&
+          // void -> non-void is handled specially
+          !NewRetTy->isVoidTy())
       return false;   // Cannot transform this return value.
+    }
 
     if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
       AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
@@ -1045,9 +1040,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     Type *ParamTy = FT->getParamType(i);
     Type *ActTy = (*AI)->getType();
 
-    if (!CastInst::isBitCastable(ActTy, ParamTy)) {
+    if (!CastInst::isBitCastable(ActTy, ParamTy))
       return false;   // Cannot transform this parameter value.
-    }
 
     if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
           hasAttributes(AttributeFuncs::
@@ -1068,16 +1062,6 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
           TD->getTypeAllocSize(ParamPTy->getElementType()))
         return false;
     }
-
-    // Converting from one pointer type to another or between a pointer and an
-    // integer of the same size is safe even if we do not have a body.
-    bool isConvertible = ActTy == ParamTy ||
-      (TD && ((ParamTy->isPointerTy() ||
-      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
-              (ActTy->isPointerTy() ||
-              ActTy == TD->getIntPtrType(Caller->getContext()))));
-    if (Callee->isDeclaration() && !isConvertible)
-      return false;
   }
 
   if (Callee->isDeclaration()) {
-- 
cgit v1.1


From 5383a377476529e55e3c244e83ec8ad66159cc22 Mon Sep 17 00:00:00 2001
From: Quentin Colombet <qcolombet@apple.com>
Date: Tue, 17 Sep 2013 22:01:26 +0000
Subject: Revert the load slicing done in r190870.

To avoid regressions with bitfield optimizations, this slicing should take place
later, like ISel time.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190891 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineLoadStoreAlloca.cpp     | 285 ---------------------
 1 file changed, 285 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 0579c27..88e16e9 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -16,20 +16,10 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
-/// Hidden option to stress test load slicing, i.e., when this option
-/// is enabled, load slicing bypasses most of its profitability guards.
-/// It will also generate, uncanonalized form of slicing.
-static cl::opt<bool>
-StressLoadSlicing("instcombine-stress-load-slicing", cl::Hidden,
-                  cl::desc("Bypass the profitability model of load "
-                           "slicing"),
-                  cl::init(false));
-
 STATISTIC(NumDeadStore,    "Number of dead stores eliminated");
 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
 
@@ -347,274 +337,6 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
   return 0;
 }
 
-namespace {
-  /// \brief Helper structure used to slice a load in smaller loads.
-  struct LoadedSlice {
-    // The last instruction that represent the slice. This should be a
-    // truncate instruction.
-    Instruction *Inst;
-    // The original load instruction.
-    LoadInst *Origin;
-    // The right shift amount in bits from the original load.
-    unsigned Shift;
-
-    LoadedSlice(Instruction *Inst = NULL, LoadInst *Origin = NULL,
-                unsigned Shift = 0)
-    : Inst(Inst), Origin(Origin), Shift(Shift) {}
-
-    LoadedSlice(const LoadedSlice& LS) : Inst(LS.Inst), Origin(LS.Origin),
-      Shift(LS.Shift) {}
-
-    /// \brief Get the bits used in a chunk of bits \p BitWidth large.
-    /// \return Result is \p BitWidth and has used bits set to 1 and
-    ///         not used bits set to 0.
-    APInt getUsedBits() const {
-      // Reproduce the trunc(lshr) sequence:
-      // - Start from the truncated value.
-      // - Zero extend to the desired bit width.
-      // - Shift left.
-      assert(Origin && "No original load to compare against.");
-      unsigned BitWidth = Origin->getType()->getPrimitiveSizeInBits();
-      assert(Inst && "This slice is not bound to an instruction");
-      assert(Inst->getType()->getPrimitiveSizeInBits() <= BitWidth &&
-             "Extracted slice is smaller than the whole type!");
-      APInt UsedBits(Inst->getType()->getPrimitiveSizeInBits(), 0);
-      UsedBits.setAllBits();
-      UsedBits = UsedBits.zext(BitWidth);
-      UsedBits <<= Shift;
-      return UsedBits;
-    }
-
-    /// \brief Get the size of the slice to be loaded in bytes.
-    unsigned getLoadedSize() const {
-      unsigned SliceSize = getUsedBits().countPopulation();
-      assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
-      return SliceSize / 8;
-    }
-
-    /// \brief Get the offset in bytes of this slice in the original chunk of
-    /// bits, whose layout is defined by \p IsBigEndian.
-    uint64_t getOffsetFromBase(bool IsBigEndian) const {
-      assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not support.");
-      uint64_t Offset = Shift / 8;
-      unsigned TySizeInBytes = Origin->getType()->getPrimitiveSizeInBits() / 8;
-      assert(!(Origin->getType()->getPrimitiveSizeInBits() & 0x7) &&
-             "The size of the original loaded type is not a multiple of a"
-             " byte.");
-      // If Offset is bigger than TySizeInBytes, it means we are loading all
-      // zeros. This should have been optimized before in the process.
-      assert(TySizeInBytes > Offset &&
-             "Invalid shift amount for given loaded size");
-      if (IsBigEndian)
-        Offset = TySizeInBytes - Offset - getLoadedSize();
-      return Offset;
-    }
-
-    /// \brief Generate the sequence of instructions to load the slice
-    /// represented by this object and redirect the uses of this slice to
-    /// this new sequence of instructions.
-    /// \pre this->Inst && this->Origin are valid Instructions.
-    /// \return The last instruction of the sequence used to load the slice.
-    Instruction *loadSlice(InstCombiner::BuilderTy &Builder,
-                           bool IsBigEndian) const {
-      assert(Inst && Origin && "Unable to replace a non-existing slice.");
-      Value *BaseAddr = Origin->getOperand(0);
-      unsigned Alignment = Origin->getAlignment();
-      Builder.SetInsertPoint(Origin);
-      // Assume we are looking at a chunk of bytes.
-      // BaseAddr = (i8*)BaseAddr.
-      BaseAddr = Builder.CreateBitCast(BaseAddr, Builder.getInt8PtrTy(),
-                                       "raw_cast");
-      // Get the offset in that chunk of bytes w.r.t. the endianess.
-      uint64_t Offset = getOffsetFromBase(IsBigEndian);
-      if (Offset) {
-        APInt APOffset(64, Offset);
-        // BaseAddr = BaseAddr + Offset.
-        BaseAddr = Builder.CreateInBoundsGEP(BaseAddr, Builder.getInt(APOffset),
-                                             "raw_idx");
-      }
-
-      // Create the type of the loaded slice according to its size.
-      Type *SliceType =
-        Type::getIntNTy(Origin->getContext(), getLoadedSize() * 8);
-
-      // Bit cast the raw pointer to the pointer type of the slice.
-      BaseAddr = Builder.CreateBitCast(BaseAddr, SliceType->getPointerTo(),
-                                       "cast");
-
-      // Compute the new alignment.
-      if (Offset != 0)
-        Alignment = MinAlign(Alignment, Alignment + Offset);
-
-      // Create the load for the slice.
-      Instruction *LastInst = Builder.CreateAlignedLoad(BaseAddr, Alignment,
-                                                        Inst->getName()+".val");
-      // If the final type is not the same as the loaded type, this means that
-      // we have to pad with zero. Create a zero extend for that.
-      Type * FinalType = Inst->getType();
-      if (SliceType != FinalType)
-        LastInst = cast<Instruction>(Builder.CreateZExt(LastInst, FinalType));
-
-      // Update the IR to reflect the new access to the slice.
-      Inst->replaceAllUsesWith(LastInst);
-
-      return LastInst;
-    }
-
-    /// \brief Check if it would be profitable to expand this slice as an
-    /// independant load.
-    bool isProfitable() const {
-      // Slicing is assumed to be profitable iff the chains leads to arithmetic
-      // operations.
-      SmallVector<const Instruction *, 8> Uses;
-      Uses.push_back(Inst);
-      do {
-        const Instruction *Use = Uses.pop_back_val();
-        for (Value::const_use_iterator UseIt = Use->use_begin(),
-             UseItEnd = Use->use_end(); UseIt != UseItEnd; ++UseIt) {
-          const Instruction *UseOfUse = cast<Instruction>(*UseIt);
-          // Consider these instructions as arithmetic operations.
-          if (isa<BinaryOperator>(UseOfUse) ||
-              isa<CastInst>(UseOfUse) ||
-              isa<PHINode>(UseOfUse) ||
-              isa<GetElementPtrInst>(UseOfUse))
-            return true;
-          // No need to check if the Use has already been checked as we do not
-          // insert any PHINode.
-          Uses.push_back(UseOfUse);
-        }
-      } while (!Uses.empty());
-      DEBUG(dbgs() << "IC: Not a profitable slice " << *Inst << '\n');
-      return false;
-    }
-  };
-}
-
-/// \brief Check the profitability of all involved LoadedSlice.
-/// Unless StressLoadSlicing is specified, this also returns false
-/// when slicing is not in the canonical form.
-/// The canonical form of sliced load is (1) two loads,
-/// which are (2) next to each other in memory.
-///
-/// FIXME: We may want to allow more slices to be created but
-/// this means other passes should know how to deal with all those
-/// slices.
-/// FIXME: We may want to split loads to different types, e.g.,
-/// int vs. float.
-static bool
-isSlicingProfitable(const SmallVectorImpl<LoadedSlice> &LoadedSlices,
-                    const APInt &UsedBits) {
-  unsigned NbOfSlices = LoadedSlices.size();
-  // Check (1).
-  if (!StressLoadSlicing && NbOfSlices != 2)
-    return false;
-
-  // Check (2).
-  if (!StressLoadSlicing && !UsedBits.isAllOnesValue()) {
-    // Get rid of the unused bits on the right.
-    APInt MemoryLayout = UsedBits.lshr(UsedBits.countTrailingZeros());
-    // Get rid of the unused bits on the left.
-    if (MemoryLayout.countLeadingZeros())
-      MemoryLayout = MemoryLayout.trunc(MemoryLayout.getActiveBits());
-    // Check that the chunk of memory is completely used.
-    if (!MemoryLayout.isAllOnesValue())
-      return false;
-  }
-
-  unsigned NbOfProfitableSlices = 0;
-  for (unsigned CurrSlice = 0; CurrSlice < NbOfSlices; ++CurrSlice) {
-    if (LoadedSlices[CurrSlice].isProfitable())
-      ++NbOfProfitableSlices;
-    else if (!StressLoadSlicing)
-      return false;
-  }
-  // In Stress mode, we may have 0 profitable slice.
-  // Check that here.
-  // In non-Stress mode, all the slices are profitable at this point.
-  return NbOfProfitableSlices > 0;
-}
-
-/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
-/// operations, split it in the various pieces being extracted.
-///
-/// This sort of thing is introduced by SROA.
-/// This slicing takes care not to insert overlapping loads.
-/// \pre LI is a simple load (i.e., not an atomic or volatile load).
-static Instruction *sliceUpLoadInst(LoadInst &LI,
-                                    InstCombiner::BuilderTy &Builder,
-                                    DataLayout &TD) {
-  assert(LI.isSimple() && "We are trying to transform a non-simple load!");
-
-  // FIXME: If we want to support floating point and vector types, we should
-  // support bitcast and extract/insert element instructions.
-  Type *LITy = LI.getType();
-  if (!LITy->isIntegerTy()) return 0;
-
-  // Keep track of already used bits to detect overlapping values.
-  // In that case, we will just abort the transformation.
-  APInt UsedBits(LITy->getPrimitiveSizeInBits(), 0);
-
-  SmallVector<LoadedSlice, 4> LoadedSlices;
-
-  // Check if this load is used as several smaller chunks of bits.
-  // Basically, look for uses in trunc or trunc(lshr) and record a new chain
-  // of computation for each trunc.
-  for (Value::use_iterator UI = LI.use_begin(), UIEnd = LI.use_end();
-       UI != UIEnd; ++UI) {
-    Instruction *User = cast<Instruction>(*UI);
-    unsigned Shift = 0;
-
-    // Check if this is a trunc(lshr).
-    if (User->getOpcode() == Instruction::LShr && User->hasOneUse() &&
-        isa<ConstantInt>(User->getOperand(1))) {
-      Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
-      User = User->use_back();
-    }
-
-    // At this point, User is a TruncInst, iff we encountered, trunc or
-    // trunc(lshr).
-    if (!isa<TruncInst>(User))
-      return 0;
-
-    // The width of the type must be a power of 2 and greater than 8-bits.
-    // Otherwise the load cannot be represented in LLVM IR.
-    // Moreover, if we shifted with a non 8-bits multiple, the slice
-    // will be accross several bytes. We do not support that.
-    unsigned Width = User->getType()->getPrimitiveSizeInBits();
-    if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
-      return 0;
-
-    // Build the slice for this chain of computations.
-    LoadedSlice LS(User, &LI, Shift);
-    APInt CurrentUsedBits = LS.getUsedBits();
-
-    // Check if this slice overlaps with another.
-    if ((CurrentUsedBits & UsedBits) != 0)
-      return 0;
-    // Update the bits used globally.
-    UsedBits |= CurrentUsedBits;
-
-    // Record the slice.
-    LoadedSlices.push_back(LS);
-  }
-
-  // Abort slicing if it does not seem to be profitable.
-  if (!isSlicingProfitable(LoadedSlices, UsedBits))
-    return 0;
-
-  // Rewrite each chain to use an independent load.
-  // By construction, each chain can be represented by a unique load.
-  bool IsBigEndian = TD.isBigEndian();
-  for (SmallVectorImpl<LoadedSlice>::const_iterator LSIt = LoadedSlices.begin(),
-       LSItEnd = LoadedSlices.end(); LSIt != LSItEnd; ++LSIt) {
-    Instruction *SliceInst = LSIt->loadSlice(Builder, IsBigEndian);
-    (void)SliceInst;
-    DEBUG(dbgs() << "IC: Replacing " << *LSIt->Inst << "\n"
-                    "    with " << *SliceInst << '\n');
-  }
-  return 0; // Don't do anything with LI.
-}
-
 Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   Value *Op = LI.getOperand(0);
 
@@ -721,13 +443,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
         }
     }
   }
-
-  // Try to split a load in smaller non-overlapping loads to expose independant
-  // chain of computations and get rid of trunc/lshr sequence of code.
-  // The data layout is required for that operation, as code generation will
-  // change with respect to endianess.
-  if (TD)
-    return sliceUpLoadInst(LI, *Builder, *TD);
   return 0;
 }
 
-- 
cgit v1.1


From 0b956507cab3dc4b06b310d2674bb35c79f46dc0 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Wed, 18 Sep 2013 00:11:27 +0000
Subject: ifndef NDEBUG-out an asserts-only constant committed in r190863

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190905 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/AddressSanitizer.cpp | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 75ecc94..2ee0f9d 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -93,7 +93,9 @@ static const int kAsanStackLeftRedzoneMagic = 0xf1;
 static const int kAsanStackMidRedzoneMagic = 0xf2;
 static const int kAsanStackRightRedzoneMagic = 0xf3;
 static const int kAsanStackPartialRedzoneMagic = 0xf4;
+#ifndef NDEBUG
 static const int kAsanStackAfterReturnMagic = 0xf5;
+#endif
 
 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
 static const size_t kNumberOfAccessSizes = 5;
-- 
cgit v1.1


From 4acd20a20be9f7d91ed35c1c6a501cec1605e854 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@gmail.com>
Date: Wed, 18 Sep 2013 03:55:53 +0000
Subject: Lift alignment restrictions for load/store folding on
 VINSERTF128/VEXTRACTF128. Fixes PR17268.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190916 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b287ca7..c2427e7 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -51,7 +51,7 @@ static cl::opt<int>
                               "number "));
 namespace {
 
-static const unsigned MinVecRegSize = 128;
+static const unsigned MinVecRegSize = 256;
 
 static const unsigned RecursionMaxDepth = 12;
 
-- 
cgit v1.1


From b8f54d86f28f84103a5e8dff5d3f3a3b493aaaa7 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@gmail.com>
Date: Wed, 18 Sep 2013 04:10:17 +0000
Subject: Revert accidental commit I had to make to get the test case in
 PR17268 to still work correctly.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190917 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c2427e7..b287ca7 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -51,7 +51,7 @@ static cl::opt<int>
                               "number "));
 namespace {
 
-static const unsigned MinVecRegSize = 256;
+static const unsigned MinVecRegSize = 128;
 
 static const unsigned RecursionMaxDepth = 12;
 
-- 
cgit v1.1


From d072d1b2a36f6c6e1b4b513dc69e91b61ad8c21a Mon Sep 17 00:00:00 2001
From: Robert Lytton <robert@xmos.com>
Date: Wed, 18 Sep 2013 12:43:35 +0000
Subject: Prevent LoopVectorizer and SLPVectorizer running if the target has no
 vector registers.

XCore target: Add XCoreTargetTransformInfo
This is where getNumberOfRegisters() resides, which in turn returns the
number of vector registers (=0).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190936 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 5 +++++
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 5 +++++
 2 files changed, 10 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1d82c7b..30908c8 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -909,6 +909,11 @@ struct LoopVectorize : public LoopPass {
     DT = &getAnalysis<DominatorTree>();
     TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
 
+    // If the target claims to have no vector registers don't attempt
+    // vectorization.
+    if (!TTI->getNumberOfRegisters(true))
+      return false;
+
     if (DL == NULL) {
       DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout");
       return false;
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b287ca7..cd3f723 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1572,6 +1572,11 @@ struct SLPVectorizer : public FunctionPass {
     StoreRefs.clear();
     bool Changed = false;
 
+    // If the target claims to have no vector registers don't attempt
+    // vectorization.
+    if (!TTI->getNumberOfRegisters(true))
+      return false;
+
     // Must have DataLayout. We can't require it because some tests run w/o
     // triple.
     if (!DL)
-- 
cgit v1.1


From ac04abaf5a1df4c4bf48367cfbb41600289c4d78 Mon Sep 17 00:00:00 2001
From: Kostya Serebryany <kcc@google.com>
Date: Wed, 18 Sep 2013 14:07:14 +0000
Subject: [asan] call __asan_stack_malloc_N only if use-after-return detection
 is enabled with the run-time option

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190939 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/AddressSanitizer.cpp           | 23 +++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 2ee0f9d..8f8af20 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -88,6 +88,9 @@ static const char *const kAsanPoisonStackMemoryName =
 static const char *const kAsanUnpoisonStackMemoryName =
     "__asan_unpoison_stack_memory";
 
+static const char *const kAsanOptionDetectUAR =
+    "__asan_option_detect_stack_use_after_return";
+
 // These constants must match the definitions in the run-time library.
 static const int kAsanStackLeftRedzoneMagic = 0xf1;
 static const int kAsanStackMidRedzoneMagic = 0xf2;
@@ -1409,10 +1412,28 @@ void FunctionStackPoisoner::poisonStack() {
   Value *LocalStackBase = OrigStackBase;
 
   if (DoStackMalloc) {
+    // LocalStackBase = OrigStackBase
+    // if (__asan_option_detect_stack_use_after_return)
+    //   LocalStackBase = __asan_stack_malloc_N(LocalStackBase, OrigStackBase);
     StackMallocIdx = StackMallocSizeClass(LocalStackSize);
     assert(StackMallocIdx <= kMaxAsanStackMallocSizeClass);
-    LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc[StackMallocIdx],
+    Constant *OptionDetectUAR = F.getParent()->getOrInsertGlobal(
+        kAsanOptionDetectUAR, IRB.getInt32Ty());
+    Value *Cmp = IRB.CreateICmpNE(IRB.CreateLoad(OptionDetectUAR),
+                                  Constant::getNullValue(IRB.getInt32Ty()));
+    Instruction *Term =
+        SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+    BasicBlock *CmpBlock = cast<Instruction>(Cmp)->getParent();
+    IRBuilder<> IRBIf(Term);
+    LocalStackBase = IRBIf.CreateCall2(
+        AsanStackMallocFunc[StackMallocIdx],
         ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
+    BasicBlock *SetBlock = cast<Instruction>(LocalStackBase)->getParent();
+    IRB.SetInsertPoint(InsBefore);
+    PHINode *Phi = IRB.CreatePHI(IntptrTy, 2);
+    Phi->addIncoming(OrigStackBase, CmpBlock);
+    Phi->addIncoming(LocalStackBase, SetBlock);
+    LocalStackBase = Phi;
   }
 
   // This string will be parsed by the run-time (DescribeAddressIfStack).
-- 
cgit v1.1


From 6591308b7e041bb8e5e211f84bcc4a97d3764cc5 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Thu, 19 Sep 2013 15:22:35 +0000
Subject: [msan] Wrap indirect functions.

Adds a flag to the MemorySanitizer pass that enables runtime rewriting of
indirect calls. This is part of MSanDR implementation and is needed to return
control to the DynamiRio-based helper tool on transition between instrumented
and non-instrumented modules. Disabled by default.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191006 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp | 50 +++++++++++++++++++---
 1 file changed, 43 insertions(+), 7 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 9a67cfc..cab7a7a 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -157,6 +157,14 @@ static cl::opt<std::string>  ClBlacklistFile("msan-blacklist",
        cl::desc("File containing the list of functions where MemorySanitizer "
                 "should not report bugs"), cl::Hidden);
 
+// Experimental. Wraps all indirect calls in the instrumented code with
+// a call to the given function. This is needed to assist the dynamic
+// helper tool (MSanDR) to regain control on transition between instrumented and
+// non-instrumented code.
+static cl::opt<std::string> ClWrapIndirectCalls("msan-wrap-indirect-calls",
+       cl::desc("Wrap indirect calls with a given function"),
+       cl::Hidden);
+
 namespace {
 
 /// \brief An instrumentation pass implementing detection of uninitialized
@@ -168,12 +176,12 @@ class MemorySanitizer : public FunctionPass {
  public:
   MemorySanitizer(bool TrackOrigins = false,
                   StringRef BlacklistFile = StringRef())
-    : FunctionPass(ID),
-      TrackOrigins(TrackOrigins || ClTrackOrigins),
-      TD(0),
-      WarningFn(0),
-      BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
-                                          : BlacklistFile) { }
+      : FunctionPass(ID),
+        TrackOrigins(TrackOrigins || ClTrackOrigins),
+        TD(0),
+        WarningFn(0),
+        BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile : BlacklistFile),
+        WrapIndirectCalls(!ClWrapIndirectCalls.empty()) {}
   const char *getPassName() const { return "MemorySanitizer"; }
   bool runOnFunction(Function &F);
   bool doInitialization(Module &M);
@@ -236,6 +244,12 @@ class MemorySanitizer : public FunctionPass {
   /// \brief An empty volatile inline asm that prevents callback merge.
   InlineAsm *EmptyAsm;
 
+  bool WrapIndirectCalls;
+  /// \brief Run-time wrapper for indirect calls.
+  Value *IndirectCallWrapperFn;
+  // Argument and return type of IndirectCallWrapperFn: void (*f)(void).
+  Type *AnyFunctionPtrTy;
+
   friend struct MemorySanitizerVisitor;
   friend struct VarArgAMD64Helper;
 };
@@ -329,6 +343,13 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
                             StringRef(""), StringRef(""),
                             /*hasSideEffects=*/true);
+
+  if (WrapIndirectCalls) {
+    AnyFunctionPtrTy =
+        PointerType::getUnqual(FunctionType::get(IRB.getVoidTy(), false));
+    IndirectCallWrapperFn = M.getOrInsertFunction(
+        ClWrapIndirectCalls, AnyFunctionPtrTy, AnyFunctionPtrTy, NULL);
+  }
 }
 
 /// \brief Module-level initialization.
@@ -1570,6 +1591,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
   }
 
+  // Replace call to (*Fn) with a call to (*IndirectCallWrapperFn(Fn)).
+  void wrapIndirectCall(IRBuilder<> &IRB, CallSite CS) {
+    Value *Fn = CS.getCalledValue();
+    Value *NewFn = IRB.CreateBitCast(
+        IRB.CreateCall(MS.IndirectCallWrapperFn,
+                       IRB.CreateBitCast(Fn, MS.AnyFunctionPtrTy)),
+        Fn->getType());
+    setShadow(NewFn, getShadow(Fn));
+    CS.setCalledFunction(NewFn);
+  }
+
   void visitCallSite(CallSite CS) {
     Instruction &I = *CS.getInstruction();
     assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite");
@@ -1608,6 +1640,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       }
     }
     IRBuilder<> IRB(&I);
+
+    if (MS.WrapIndirectCalls && !CS.getCalledFunction())
+      wrapIndirectCall(IRB, CS);
+
     unsigned ArgOffset = 0;
     DEBUG(dbgs() << "  CallSite: " << I << "\n");
     for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
@@ -1651,7 +1687,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     DEBUG(dbgs() << "  done with call args\n");
 
     FunctionType *FT =
-      cast<FunctionType>(CS.getCalledValue()->getType()-> getContainedType(0));
+      cast<FunctionType>(CS.getCalledValue()->getType()->getContainedType(0));
     if (FT->isVarArg()) {
       VAHelper->visitCallSite(CS, IRB);
     }
-- 
cgit v1.1


From 1bc7315c022b327a496366a78eb31ba446c699bd Mon Sep 17 00:00:00 2001
From: Shuxin Yang <shuxin.llvm@gmail.com>
Date: Thu, 19 Sep 2013 17:22:51 +0000
Subject: GVN proceeds in the presence of dead code.

This is how it ignores the dead code:
1) When a dead branch target, say block B, is identified, all the
    blocks dominated by B is dead as well.

2) The PHIs of those blocks in dominance-frontier(B) is updated such
   that the operands corresponding to dead predecessors are replaced
   by "UndefVal".

   Using lattice's jargon, the "UndefVal" is the "Top" in essence.
   Phi node like this "phi(v1 bb1, undef xx)" will be optimized into
   "v1" if v1 is constant, or v1 is an instruction which dominate this
   PHI node.

3) When analyzing the availability of a load L, all dead mem-ops which
   L depends on disguise as a load which evaluate exactly same value as L.

4) The dead mem-ops will be materialized as "UndefVal" during code motion.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191017 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/GVN.cpp | 170 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 164 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index bc418af..2e4d428 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
@@ -507,7 +508,9 @@ namespace {
     enum ValType {
       SimpleVal,  // A simple offsetted value that is accessed.
       LoadVal,    // A value produced by a load.
-      MemIntrin   // A memory intrinsic which is loaded from.
+      MemIntrin,  // A memory intrinsic which is loaded from.
+      UndefVal    // A UndefValue representing a value from dead block (which
+                  // is not yet physically removed from the CFG). 
     };
   
     /// V - The value that is live out of the block.
@@ -545,10 +548,20 @@ namespace {
       Res.Offset = Offset;
       return Res;
     }
-  
+
+    static AvailableValueInBlock getUndef(BasicBlock *BB) {
+      AvailableValueInBlock Res;
+      Res.BB = BB;
+      Res.Val.setPointer(0);
+      Res.Val.setInt(UndefVal);
+      Res.Offset = 0;
+      return Res;
+    }
+
     bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
     bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
     bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+    bool isUndefValue() const { return Val.getInt() == UndefVal; }
   
     Value *getSimpleValue() const {
       assert(isSimpleValue() && "Wrong accessor");
@@ -576,6 +589,7 @@ namespace {
     DominatorTree *DT;
     const DataLayout *TD;
     const TargetLibraryInfo *TLI;
+    SetVector<BasicBlock *> DeadBlocks;
 
     ValueTable VN;
 
@@ -698,6 +712,9 @@ namespace {
     unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
                                          const BasicBlockEdge &Root);
     bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
+    bool processFoldableCondBr(BranchInst *BI);
+    void addDeadBlock(BasicBlock *BB);
+    void assignValNumForDeadCode();
   };
 
   char GVN::ID = 0;
@@ -1253,8 +1270,10 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   // just use the dominating value directly.
   if (ValuesPerBlock.size() == 1 &&
       gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
-                                               LI->getParent()))
+                                               LI->getParent())) {
+    assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block");
     return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
+  }
 
   // Otherwise, we have to construct SSA form.
   SmallVector<PHINode*, 8> NewPHIs;
@@ -1324,7 +1343,7 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
                    << *getCoercedLoadValue() << '\n'
                    << *Res << '\n' << "\n\n\n");
     }
-  } else {
+  } else if (isMemIntrinValue()) {
     const DataLayout *TD = gvn.getDataLayout();
     assert(TD && "Need target data to handle type mismatch case");
     Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
@@ -1332,6 +1351,10 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
     DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
                  << "  " << *getMemIntrinValue() << '\n'
                  << *Res << '\n' << "\n\n\n");
+  } else {
+    assert(isUndefValue() && "Should be UndefVal");
+    DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
+    return UndefValue::get(LoadTy);
   }
   return Res;
 }
@@ -1355,6 +1378,13 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
     BasicBlock *DepBB = Deps[i].getBB();
     MemDepResult DepInfo = Deps[i].getResult();
 
+    if (DeadBlocks.count(DepBB)) {
+      // Dead dependent mem-op disguise as a load evaluating the same value
+      // as the load in question.
+      ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
+      continue;
+    }
+
     if (!DepInfo.isDef() && !DepInfo.isClobber()) {
       UnavailableBlocks.push_back(DepBB);
       continue;
@@ -2191,11 +2221,13 @@ bool GVN::processInstruction(Instruction *I) {
   // For conditional branches, we can perform simple conditional propagation on
   // the condition value itself.
   if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
-    if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+    if (!BI->isConditional())
       return false;
 
-    Value *BranchCond = BI->getCondition();
+    if (isa<Constant>(BI->getCondition()))
+      return processFoldableCondBr(BI);
 
+    Value *BranchCond = BI->getCondition();
     BasicBlock *TrueSucc = BI->getSuccessor(0);
     BasicBlock *FalseSucc = BI->getSuccessor(1);
     // Avoid multiple edges early.
@@ -2312,6 +2344,9 @@ bool GVN::runOnFunction(Function& F) {
   }
 
   if (EnablePRE) {
+    // Fabricate val-num for dead-code in order to suppress assertion in
+    // performPRE().
+    assignValNumForDeadCode();
     bool PREChanged = true;
     while (PREChanged) {
       PREChanged = performPRE(F);
@@ -2325,6 +2360,9 @@ bool GVN::runOnFunction(Function& F) {
   // Actually, when this happens, we should just fully integrate PRE into GVN.
 
   cleanupGlobalSets();
+  // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
+  // iteration. 
+  DeadBlocks.clear();
 
   return Changed;
 }
@@ -2335,6 +2373,9 @@ bool GVN::processBlock(BasicBlock *BB) {
   // (and incrementing BI before processing an instruction).
   assert(InstrsToErase.empty() &&
          "We expect InstrsToErase to be empty across iterations");
+  if (DeadBlocks.count(BB))
+    return false;
+
   bool ChangedFunction = false;
 
   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
@@ -2628,3 +2669,120 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
     }
   }
 }
+
+// BB is declared dead, which implied other blocks become dead as well. This
+// function is to add all these blocks to "DeadBlocks". For the dead blocks'
+// live successors, update their phi nodes by replacing the operands
+// corresponding to dead blocks with UndefVal.
+//
+void GVN::addDeadBlock(BasicBlock *BB) {
+  SmallVector<BasicBlock *, 4> NewDead;
+  SmallSetVector<BasicBlock *, 4> DF;
+
+  NewDead.push_back(BB);
+  while (!NewDead.empty()) {
+    BasicBlock *D = NewDead.pop_back_val();
+    if (DeadBlocks.count(D))
+      continue;
+
+    // All blocks dominated by D are dead.
+    SmallVector<BasicBlock *, 8> Dom;
+    DT->getDescendants(D, Dom);
+    DeadBlocks.insert(Dom.begin(), Dom.end());
+    
+    // Figure out the dominance-frontier(D).
+    for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(),
+           E = Dom.end(); I != E; I++) {
+      BasicBlock *B = *I;
+      for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) {
+        BasicBlock *S = *SI;
+        if (DeadBlocks.count(S))
+          continue;
+
+        bool AllPredDead = true;
+        for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++)
+          if (!DeadBlocks.count(*PI)) {
+            AllPredDead = false;
+            break;
+          }
+
+        if (!AllPredDead) {
+          // S could be proved dead later on. That is why we don't update phi
+          // operands at this moment.
+          DF.insert(S);
+        } else {
+          // While S is not dominated by D, it is dead by now. This could take
+          // place if S already have a dead predecessor before D is declared
+          // dead.
+          NewDead.push_back(S);
+        }
+      }
+    }
+  }
+
+  // For the dead blocks' live successors, update their phi nodes by replacing
+  // the operands corresponding to dead blocks with UndefVal.
+  for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end();
+        I != E; I++) {
+    BasicBlock *B = *I;
+    if (DeadBlocks.count(B))
+      continue;
+
+    for (pred_iterator PI = pred_begin(B), PE = pred_end(B); PI != PE; PI++) {
+      BasicBlock *P = *PI;
+      if (!DeadBlocks.count(P))
+        continue;
+      for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
+        PHINode &Phi = cast<PHINode>(*II);
+        Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
+                             UndefValue::get(Phi.getType()));
+      }
+    }
+  }
+}
+
+// If the given branch is recognized as a foldable branch (i.e. conditional
+// branch with constant condition), it will perform following analyses and
+// transformation.
+//  1) If the dead out-coming edge is a critical-edge, split it. Let 
+//     R be the target of the dead out-coming edge.
+//  1) Identify the set of dead blocks implied by the branch's dead outcoming
+//     edge. The result of this step will be {X| X is dominated by R}
+//  2) Identify those blocks which haves at least one dead prodecessor. The
+//     result of this step will be dominance-frontier(R).
+//  3) Update the PHIs in DF(R) by replacing the operands corresponding to 
+//     dead blocks with "UndefVal" in an hope these PHIs will optimized away.
+//
+// Return true iff *NEW* dead code are found.
+bool GVN::processFoldableCondBr(BranchInst *BI) {
+  if (!BI || BI->isUnconditional())
+    return false;
+
+  ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+  if (!Cond)
+    return false;
+
+  BasicBlock *DeadRoot = Cond->getZExtValue() ? 
+                         BI->getSuccessor(1) : BI->getSuccessor(0);
+  if (DeadBlocks.count(DeadRoot))
+    return false;
+
+  if (!DeadRoot->getSinglePredecessor())
+    DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
+
+  addDeadBlock(DeadRoot);
+  return true;
+}
+
+// performPRE() will trigger assert if it come across an instruciton without
+// associated val-num. As it normally has far more live instructions than dead
+// instructions, it makes more sense just to "fabricate" a val-number for the
+// dead code than checking if instruction involved is dead or not.
+void GVN::assignValNumForDeadCode() {
+  for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(),
+        E = DeadBlocks.end(); I != E; I++) {
+    for (BasicBlock::iterator II = (*I)->begin(), EE = (*I)->end();
+          II != EE; II++)
+      VN.lookup_or_add(&*II);
+  }
+}
-- 
cgit v1.1


From bf22298093d7069fbf3a3149d26fbcb4ad453ceb Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 19 Sep 2013 20:59:04 +0000
Subject: InstCombine: Don't allow turning vector-of-pointer loads into
 vector-of-integer.

The code below can't handle any pointers. PR17293.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191036 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 88e16e9..0a0727e 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -318,7 +318,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
             SrcPTy->isVectorTy()) &&
           // Do not allow turning this into a load of an integer, which is then
           // casted to a pointer, this pessimizes pointer analysis a lot.
-          (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
+          (SrcPTy->isPtrOrPtrVectorTy() ==
+           LI.getType()->isPtrOrPtrVectorTy()) &&
           IC.getDataLayout()->getTypeSizeInBits(SrcPTy) ==
                IC.getDataLayout()->getTypeSizeInBits(DestPTy)) {
 
-- 
cgit v1.1


From b1ccfb3a548e122e282cd62c534c4d47f5310bf6 Mon Sep 17 00:00:00 2001
From: Shuxin Yang <shuxin.llvm@gmail.com>
Date: Thu, 19 Sep 2013 21:13:46 +0000
Subject: [Fast-math] Disable "(C1/X)*C2 => (C1*C2)/X" if C1/X has multiple
 uses.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  If "C1/X" were having multiple uses, the only benefit of this
transformation is to potentially shorten critical path. But it is at the
cost of instroducing additional div.

  The additional div may or may not incur cost depending on how div is
implemented. If it is implemented using Newton–Raphson iteration, it dosen't
seem to incur any cost (FIXME). However, if the div blocks the entire
pipeline, that sounds to be pretty expensive. Let CodeGen to take care
this transformation.

  This patch sees 6% on a benchmark.

rdar://15032743


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191037 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index cc6a301..9c310f0 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -374,9 +374,12 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
   } else {
     if (C0) {
       // (C0 / X) * C => (C0 * C) / X
-      ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
-      if (isNormalFp(F))
-        R = BinaryOperator::CreateFDiv(F, Opnd1);
+      if (FMulOrDiv->hasOneUse()) {
+        // It would otherwise introduce another div.
+        ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C));
+        if (isNormalFp(F))
+          R = BinaryOperator::CreateFDiv(F, Opnd1);
+      }
     } else {
       // (X / C1) * C => X * (C/C1) if C/C1 is not a denormal
       ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFDiv(C, C1));
-- 
cgit v1.1


From 4c332fa5a5a650cd779935d78b505f84cef4db69 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Fri, 20 Sep 2013 14:38:44 +0000
Subject: InstCombine: Canonicalize (gep i8* X, -(ptrtoint Y)) to (sub
 (ptrtoint X), (ptrtoint Y))

The GEP pattern is what SCEV expander emits for "ugly geps". The latter is what
you get for pointer subtraction in C code. The rest of instcombine already
knows how to deal with that so just canonicalize on that.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191090 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstructionCombining.cpp | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 803c727..fcb26ab 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1182,6 +1182,20 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
   }
 
+  // Canonicalize (gep i8* X, -(ptrtoint Y)) to (sub (ptrtoint X), (ptrtoint Y))
+  // The GEP pattern is emitted by the SCEV expander for certain kinds of
+  // pointer arithmetic.
+  if (TD && GEP.getNumIndices() == 1 &&
+      match(GEP.getOperand(1), m_Neg(m_PtrToInt(m_Value()))) &&
+      GEP.getType() == Builder->getInt8PtrTy() &&
+      GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
+          TD->getPointerSizeInBits(GEP.getPointerAddressSpace())) {
+    Operator *Index = cast<Operator>(GEP.getOperand(1));
+    Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType());
+    Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1));
+    return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType());
+  }
+
   // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
   Value *StrippedPtr = PtrOp->stripPointerCasts();
   PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType());
-- 
cgit v1.1


From fc572d87d2bbf39732e43fc761b5173ef0bcc0f7 Mon Sep 17 00:00:00 2001
From: Joerg Sonnenberger <joerg@bec.de>
Date: Fri, 20 Sep 2013 20:33:57 +0000
Subject: Revert r191017, it results in segmentation faults in Qt.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191104 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/GVN.cpp | 170 ++----------------------------------------
 1 file changed, 6 insertions(+), 164 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 2e4d428..bc418af 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -21,7 +21,6 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
@@ -508,9 +507,7 @@ namespace {
     enum ValType {
       SimpleVal,  // A simple offsetted value that is accessed.
       LoadVal,    // A value produced by a load.
-      MemIntrin,  // A memory intrinsic which is loaded from.
-      UndefVal    // A UndefValue representing a value from dead block (which
-                  // is not yet physically removed from the CFG). 
+      MemIntrin   // A memory intrinsic which is loaded from.
     };
   
     /// V - The value that is live out of the block.
@@ -548,20 +545,10 @@ namespace {
       Res.Offset = Offset;
       return Res;
     }
-
-    static AvailableValueInBlock getUndef(BasicBlock *BB) {
-      AvailableValueInBlock Res;
-      Res.BB = BB;
-      Res.Val.setPointer(0);
-      Res.Val.setInt(UndefVal);
-      Res.Offset = 0;
-      return Res;
-    }
-
+  
     bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
     bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
     bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
-    bool isUndefValue() const { return Val.getInt() == UndefVal; }
   
     Value *getSimpleValue() const {
       assert(isSimpleValue() && "Wrong accessor");
@@ -589,7 +576,6 @@ namespace {
     DominatorTree *DT;
     const DataLayout *TD;
     const TargetLibraryInfo *TLI;
-    SetVector<BasicBlock *> DeadBlocks;
 
     ValueTable VN;
 
@@ -712,9 +698,6 @@ namespace {
     unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
                                          const BasicBlockEdge &Root);
     bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
-    bool processFoldableCondBr(BranchInst *BI);
-    void addDeadBlock(BasicBlock *BB);
-    void assignValNumForDeadCode();
   };
 
   char GVN::ID = 0;
@@ -1270,10 +1253,8 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   // just use the dominating value directly.
   if (ValuesPerBlock.size() == 1 &&
       gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
-                                               LI->getParent())) {
-    assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block");
+                                               LI->getParent()))
     return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
-  }
 
   // Otherwise, we have to construct SSA form.
   SmallVector<PHINode*, 8> NewPHIs;
@@ -1343,7 +1324,7 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
                    << *getCoercedLoadValue() << '\n'
                    << *Res << '\n' << "\n\n\n");
     }
-  } else if (isMemIntrinValue()) {
+  } else {
     const DataLayout *TD = gvn.getDataLayout();
     assert(TD && "Need target data to handle type mismatch case");
     Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
@@ -1351,10 +1332,6 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
     DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
                  << "  " << *getMemIntrinValue() << '\n'
                  << *Res << '\n' << "\n\n\n");
-  } else {
-    assert(isUndefValue() && "Should be UndefVal");
-    DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
-    return UndefValue::get(LoadTy);
   }
   return Res;
 }
@@ -1378,13 +1355,6 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
     BasicBlock *DepBB = Deps[i].getBB();
     MemDepResult DepInfo = Deps[i].getResult();
 
-    if (DeadBlocks.count(DepBB)) {
-      // Dead dependent mem-op disguise as a load evaluating the same value
-      // as the load in question.
-      ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
-      continue;
-    }
-
     if (!DepInfo.isDef() && !DepInfo.isClobber()) {
       UnavailableBlocks.push_back(DepBB);
       continue;
@@ -2221,13 +2191,11 @@ bool GVN::processInstruction(Instruction *I) {
   // For conditional branches, we can perform simple conditional propagation on
   // the condition value itself.
   if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
-    if (!BI->isConditional())
+    if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
       return false;
 
-    if (isa<Constant>(BI->getCondition()))
-      return processFoldableCondBr(BI);
-
     Value *BranchCond = BI->getCondition();
+
     BasicBlock *TrueSucc = BI->getSuccessor(0);
     BasicBlock *FalseSucc = BI->getSuccessor(1);
     // Avoid multiple edges early.
@@ -2344,9 +2312,6 @@ bool GVN::runOnFunction(Function& F) {
   }
 
   if (EnablePRE) {
-    // Fabricate val-num for dead-code in order to suppress assertion in
-    // performPRE().
-    assignValNumForDeadCode();
     bool PREChanged = true;
     while (PREChanged) {
       PREChanged = performPRE(F);
@@ -2360,9 +2325,6 @@ bool GVN::runOnFunction(Function& F) {
   // Actually, when this happens, we should just fully integrate PRE into GVN.
 
   cleanupGlobalSets();
-  // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
-  // iteration. 
-  DeadBlocks.clear();
 
   return Changed;
 }
@@ -2373,9 +2335,6 @@ bool GVN::processBlock(BasicBlock *BB) {
   // (and incrementing BI before processing an instruction).
   assert(InstrsToErase.empty() &&
          "We expect InstrsToErase to be empty across iterations");
-  if (DeadBlocks.count(BB))
-    return false;
-
   bool ChangedFunction = false;
 
   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
@@ -2669,120 +2628,3 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
     }
   }
 }
-
-// BB is declared dead, which implied other blocks become dead as well. This
-// function is to add all these blocks to "DeadBlocks". For the dead blocks'
-// live successors, update their phi nodes by replacing the operands
-// corresponding to dead blocks with UndefVal.
-//
-void GVN::addDeadBlock(BasicBlock *BB) {
-  SmallVector<BasicBlock *, 4> NewDead;
-  SmallSetVector<BasicBlock *, 4> DF;
-
-  NewDead.push_back(BB);
-  while (!NewDead.empty()) {
-    BasicBlock *D = NewDead.pop_back_val();
-    if (DeadBlocks.count(D))
-      continue;
-
-    // All blocks dominated by D are dead.
-    SmallVector<BasicBlock *, 8> Dom;
-    DT->getDescendants(D, Dom);
-    DeadBlocks.insert(Dom.begin(), Dom.end());
-    
-    // Figure out the dominance-frontier(D).
-    for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(),
-           E = Dom.end(); I != E; I++) {
-      BasicBlock *B = *I;
-      for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) {
-        BasicBlock *S = *SI;
-        if (DeadBlocks.count(S))
-          continue;
-
-        bool AllPredDead = true;
-        for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++)
-          if (!DeadBlocks.count(*PI)) {
-            AllPredDead = false;
-            break;
-          }
-
-        if (!AllPredDead) {
-          // S could be proved dead later on. That is why we don't update phi
-          // operands at this moment.
-          DF.insert(S);
-        } else {
-          // While S is not dominated by D, it is dead by now. This could take
-          // place if S already have a dead predecessor before D is declared
-          // dead.
-          NewDead.push_back(S);
-        }
-      }
-    }
-  }
-
-  // For the dead blocks' live successors, update their phi nodes by replacing
-  // the operands corresponding to dead blocks with UndefVal.
-  for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end();
-        I != E; I++) {
-    BasicBlock *B = *I;
-    if (DeadBlocks.count(B))
-      continue;
-
-    for (pred_iterator PI = pred_begin(B), PE = pred_end(B); PI != PE; PI++) {
-      BasicBlock *P = *PI;
-      if (!DeadBlocks.count(P))
-        continue;
-      for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
-        PHINode &Phi = cast<PHINode>(*II);
-        Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
-                             UndefValue::get(Phi.getType()));
-      }
-    }
-  }
-}
-
-// If the given branch is recognized as a foldable branch (i.e. conditional
-// branch with constant condition), it will perform following analyses and
-// transformation.
-//  1) If the dead out-coming edge is a critical-edge, split it. Let 
-//     R be the target of the dead out-coming edge.
-//  1) Identify the set of dead blocks implied by the branch's dead outcoming
-//     edge. The result of this step will be {X| X is dominated by R}
-//  2) Identify those blocks which haves at least one dead prodecessor. The
-//     result of this step will be dominance-frontier(R).
-//  3) Update the PHIs in DF(R) by replacing the operands corresponding to 
-//     dead blocks with "UndefVal" in an hope these PHIs will optimized away.
-//
-// Return true iff *NEW* dead code are found.
-bool GVN::processFoldableCondBr(BranchInst *BI) {
-  if (!BI || BI->isUnconditional())
-    return false;
-
-  ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
-  if (!Cond)
-    return false;
-
-  BasicBlock *DeadRoot = Cond->getZExtValue() ? 
-                         BI->getSuccessor(1) : BI->getSuccessor(0);
-  if (DeadBlocks.count(DeadRoot))
-    return false;
-
-  if (!DeadRoot->getSinglePredecessor())
-    DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
-
-  addDeadBlock(DeadRoot);
-  return true;
-}
-
-// performPRE() will trigger assert if it come across an instruciton without
-// associated val-num. As it normally has far more live instructions than dead
-// instructions, it makes more sense just to "fabricate" a val-number for the
-// dead code than checking if instruction involved is dead or not.
-void GVN::assignValNumForDeadCode() {
-  for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(),
-        E = DeadBlocks.end(); I != E; I++) {
-    for (BasicBlock::iterator II = (*I)->begin(), EE = (*I)->end();
-          II != EE; II++)
-      VN.lookup_or_add(&*II);
-  }
-}
-- 
cgit v1.1


From 074e489dbc0779465cd71a2e10643c156ae5d6d7 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Fri, 20 Sep 2013 21:18:20 +0000
Subject: SLPVectorizer: Handle more horizontal reductions (disabled)

Match reductions starting at binary operation feeding into a phi. The code
handles trees like

 r += v1 + v2 + v3 ...

and

 r += v1
 r += v2
 ...

and

 r *= v1 + v2 + ...

We currently only handle associative operations (add, fadd fast).

The code can now also handle reductions feeding into stores.

 a[i] = v1 + v2 + v3 + ...

The code is currently disabled behind the flag "-slp-vectorize-hor".  The cost
model for most architectures is not there yet.

I found one opportunity of a horizontal reduction feeding a phi in TSVC
(LoopRerolling-flt) and there are several opportunities where reductions feed
into stores.

radar://14607682

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191108 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 376 ++++++++++++++++++++++++++++-
 1 file changed, 368 insertions(+), 8 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cd3f723..caedd09 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -49,6 +49,11 @@ static cl::opt<int>
     SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
                      cl::desc("Only vectorize if you gain more than this "
                               "number "));
+
+static cl::opt<bool>
+ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
+                   cl::desc("Attempt to vectorize horizontal reductions"));
+
 namespace {
 
 static const unsigned MinVecRegSize = 128;
@@ -238,17 +243,21 @@ public:
     }
 
   /// \brief Vectorize the tree that starts with the elements in \p VL.
-  void vectorizeTree();
+  /// Returns the vectorized root and the scalar operations the root was based
+  /// on.
+  std::pair<Value *, ValueList *> vectorizeTree();
 
   /// \returns the vectorization cost of the subtree that starts at \p VL.
   /// A negative number means that this is profitable.
   int getTreeCost();
 
-  /// Construct a vectorizable tree that starts at \p Roots.
-  void buildTree(ArrayRef<Value *> Roots);
+  /// Construct a vectorizable tree that starts at \p Roots and is possibly
+  /// used by a reduction of \p RdxOps.
+  void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0);
 
   /// Clear the internal data structures that are created by 'buildTree'.
   void deleteTree() {
+    RdxOps = 0;
     VectorizableTree.clear();
     ScalarToTreeEntry.clear();
     MustGather.clear();
@@ -401,6 +410,9 @@ private:
   /// Numbers instructions in different blocks.
   DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
 
+  /// Reduction operators.
+  ValueSet *RdxOps;
+
   // Analysis and block reference.
   Function *F;
   ScalarEvolution *SE;
@@ -413,8 +425,9 @@ private:
   IRBuilder<> Builder;
 };
 
-void BoUpSLP::buildTree(ArrayRef<Value *> Roots) {
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
   deleteTree();
+  RdxOps = Rdx;
   if (!getSameType(Roots))
     return;
   buildTree_rec(Roots, 0);
@@ -445,8 +458,12 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots) {
           assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
           continue;
         }
+        Instruction *UserInst = dyn_cast<Instruction>(*User);
+        if (!UserInst)
+          continue;
 
-        if (!isa<Instruction>(*User))
+        // Ignore uses that are part of the reduction.
+        if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
           continue;
 
         DEBUG(dbgs() << "SLP: Need to extract:" << **User << " from lane " <<
@@ -578,6 +595,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
         continue;
       }
 
+      // This user is part of the reduction.
+      if (RdxOps && RdxOps->count(User))
+        continue;
+
       // Make sure that we can schedule this unknown user.
       BlockNumbering &BN = BlocksNumbers[BB];
       int UserIndex = BN.getIndex(User);
@@ -1372,7 +1393,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
   return 0;
 }
 
-void BoUpSLP::vectorizeTree() {
+std::pair<Value *, BoUpSLP::ValueList *> BoUpSLP::vectorizeTree() {
   Builder.SetInsertPoint(F->getEntryBlock().begin());
   vectorizeTree(&VectorizableTree[0]);
 
@@ -1449,7 +1470,10 @@ void BoUpSLP::vectorizeTree() {
           DEBUG(dbgs() << "SLP: \tvalidating user:" << **User << ".\n");
           assert(!MustGather.count(*User) &&
                  "Replacing gathered value with undef");
-          assert(ScalarToTreeEntry.count(*User) &&
+
+          assert((ScalarToTreeEntry.count(*User) ||
+                  // It is legal to replace the reduction users by undef.
+                  (RdxOps && RdxOps->count(*User))) &&
                  "Replacing out-of-tree value with undef");
         }
         Value *Undef = UndefValue::get(Ty);
@@ -1464,6 +1488,9 @@ void BoUpSLP::vectorizeTree() {
     BlocksNumbers[it].forget();
   }
   Builder.ClearInsertionPoint();
+
+  return std::make_pair(VectorizableTree[0].VectorizedValue,
+                        &VectorizableTree[0].Scalars);
 }
 
 void BoUpSLP::optimizeGatherSequence() {
@@ -1887,6 +1914,310 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
   return 0;
 }
 
+/// \brief Generate a shuffle mask to be used in a reduction tree.
+///
+/// \param VecLen The length of the vector to be reduced.
+/// \param NumEltsToRdx The number of elements that should be reduced in the
+///        vector.
+/// \param IsPairwise Whether the reduction is a pairwise or splitting
+///        reduction. A pairwise reduction will generate a mask of 
+///        <0,2,...> or <1,3,..> while a splitting reduction will generate
+///        <2,3, undef,undef> for a vector of 4 and NumElts = 2.
+/// \param IsLeft True will generate a mask of even elements, odd otherwise.
+static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
+                                   bool IsPairwise, bool IsLeft,
+                                   IRBuilder<> &Builder) {
+  assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask");
+
+  SmallVector<Constant *, 32> ShuffleMask(
+      VecLen, UndefValue::get(Builder.getInt32Ty()));
+
+  if (IsPairwise)
+    // Build a mask of 0, 2, ... (left) or 1, 3, ... (right).
+    for (unsigned i = 0; i != NumEltsToRdx; ++i)
+      ShuffleMask[i] = Builder.getInt32(2 * i + !IsLeft);
+  else
+    // Move the upper half of the vector to the lower half.
+    for (unsigned i = 0; i != NumEltsToRdx; ++i)
+      ShuffleMask[i] = Builder.getInt32(NumEltsToRdx + i);
+
+  return ConstantVector::get(ShuffleMask);
+}
+
+
+/// Model horizontal reductions.
+///
+/// A horizontal reduction is a tree of reduction operations (currently add and
+/// fadd) that has operations that can be put into a vector as its leaf.
+/// For example, this tree:
+///
+/// mul mul mul mul
+///  \  /    \  /
+///   +       +
+///    \     /
+///       +
+/// This tree has "mul" as its reduced values and "+" as its reduction
+/// operations. A reduction might be feeding into a store or a binary operation
+/// feeding a phi.
+///    ...
+///    \  /
+///     +
+///     \
+///  phi +=
+///
+///  Or:
+///    ...
+///    \  /
+///     +
+///     \
+///   *p =
+///
+class HorizontalReduction {
+  SmallPtrSet<Value *, 16> ReductionOps;
+  SmallVector<Value *, 32> ReducedVals;
+
+  BinaryOperator *ReductionRoot;
+  PHINode *ReductionPHI;
+
+  /// The opcode of the reduction.
+  unsigned ReductionOpcode;
+  /// The opcode of the values we perform a reduction on.
+  unsigned ReducedValueOpcode;
+  /// The width of one full horizontal reduction operation.
+  unsigned ReduxWidth;
+  /// Should we model this reduction as a pairwise reduction tree or a tree that
+  /// splits the vector in halves and adds those halves.
+  bool IsPairwiseReduction;
+
+public:
+  HorizontalReduction()
+    : ReductionRoot(0), ReductionPHI(0), ReductionOpcode(0),
+    ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
+
+  /// \brief Try to find a reduction tree.
+  bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B,
+                                 DataLayout *DL) {
+    assert((!Phi ||
+            std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
+           "Thi phi needs to use the binary operator");
+
+    // We could have a initial reductions that is not an add.
+    //  r *= v1 + v2 + v3 + v4
+    // In such a case start looking for a tree rooted in the first '+'.
+    if (Phi) {
+      if (B->getOperand(0) == Phi) {
+        Phi = 0;
+        B = dyn_cast<BinaryOperator>(B->getOperand(1));
+      } else if (B->getOperand(1) == Phi) {
+        Phi = 0;
+        B = dyn_cast<BinaryOperator>(B->getOperand(0));
+      }
+    }
+
+    if (!B)
+      return false;
+
+    Type *Ty = B->getType();
+    if (Ty->isVectorTy())
+      return false;
+
+    ReductionOpcode = B->getOpcode();
+    ReducedValueOpcode = 0;
+    ReduxWidth = MinVecRegSize / DL->getTypeSizeInBits(Ty);
+    ReductionRoot = B;
+    ReductionPHI = Phi;
+
+    if (ReduxWidth < 4)
+      return false;
+
+    // We currently only support adds.
+    if (ReductionOpcode != Instruction::Add &&
+        ReductionOpcode != Instruction::FAdd)
+      return false;
+
+    // Post order traverse the reduction tree starting at B. We only handle true
+    // trees containing only binary operators.
+    SmallVector<std::pair<BinaryOperator *, unsigned>, 32> Stack;
+    Stack.push_back(std::make_pair(B, 0));
+    while (!Stack.empty()) {
+      BinaryOperator *TreeN = Stack.back().first;
+      unsigned EdgeToVist = Stack.back().second++;
+      bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;
+
+      // Only handle trees in the current basic block.
+      if (TreeN->getParent() != B->getParent())
+        return false;
+
+      // Each tree node needs to have one user except for the ultimate
+      // reduction.
+      if (!TreeN->hasOneUse() && TreeN != B)
+        return false;
+
+      // Postorder vist.
+      if (EdgeToVist == 2 || IsReducedValue) {
+        if (IsReducedValue) {
+          // Make sure that the opcodes of the operations that we are going to
+          // reduce match.
+          if (!ReducedValueOpcode)
+            ReducedValueOpcode = TreeN->getOpcode();
+          else if (ReducedValueOpcode != TreeN->getOpcode())
+            return false;
+          ReducedVals.push_back(TreeN);
+        } else {
+          // We need to be able to reassociate the adds.
+          if (!TreeN->isAssociative())
+            return false;
+          ReductionOps.insert(TreeN);
+        }
+        // Retract.
+        Stack.pop_back();
+        continue;
+      }
+
+      // Visit left or right.
+      Value *NextV = TreeN->getOperand(EdgeToVist);
+      BinaryOperator *Next = dyn_cast<BinaryOperator>(NextV);
+      if (Next)
+        Stack.push_back(std::make_pair(Next, 0));
+      else if (NextV != Phi)
+        return false;
+    }
+    return true;
+  }
+
+  /// \brief Attempt to vectorize the tree found by
+  /// matchAssociativeReduction.
+  bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
+    if (ReducedVals.empty())
+      return false;
+
+    unsigned NumReducedVals = ReducedVals.size();
+    if (NumReducedVals < ReduxWidth)
+      return false;
+
+    Value *VectorizedTree = 0;
+    IRBuilder<> Builder(ReductionRoot);
+    FastMathFlags Unsafe;
+    Unsafe.setUnsafeAlgebra();
+    Builder.SetFastMathFlags(Unsafe);
+    unsigned i = 0;
+
+    for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
+      ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth);
+      V.buildTree(ValsToReduce, &ReductionOps);
+
+      // Estimate cost.
+      int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
+      if (Cost >= -SLPCostThreshold)
+        break;
+
+      DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost
+                   << ". (HorRdx)\n");
+
+      // Vectorize a tree.
+      Value *VectorizedRoot;
+      BoUpSLP::ValueList *Scalars;
+      tie(VectorizedRoot, Scalars) = V.vectorizeTree();
+
+      // Emit a reduction.
+      Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder);
+      if (VectorizedTree) {
+        Builder.SetCurrentDebugLocation(
+          cast<Instruction>((*Scalars)[0])->getDebugLoc());
+        VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
+                                     ReducedSubTree, "bin.rdx");
+      } else
+        VectorizedTree = ReducedSubTree;
+    }
+
+    if (VectorizedTree) {
+      // Finish the reduction.
+      for (; i < NumReducedVals; ++i) {
+        Builder.SetCurrentDebugLocation(
+          cast<Instruction>(ReducedVals[i])->getDebugLoc());
+        VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
+                                     ReducedVals[i]);
+      }
+      // Update users.
+      if (ReductionPHI) {
+        assert(ReductionRoot != NULL && "Need a reduction operation");
+        ReductionRoot->setOperand(0, VectorizedTree);
+        ReductionRoot->setOperand(1, ReductionPHI);
+      } else
+        ReductionRoot->replaceAllUsesWith(VectorizedTree);
+    }
+    return VectorizedTree != 0;
+  }
+
+private:
+
+  /// \brief Calcuate the cost of a reduction.
+  int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) {
+    Type *ScalarTy = FirstReducedVal->getType();
+    Type *VecTy = VectorType::get(ScalarTy, ReduxWidth);
+
+    int PairwiseRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, true);
+    int SplittingRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, false);
+
+    IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost;
+    int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost;
+
+    int ScalarReduxCost =
+        ReduxWidth * TTI->getArithmeticInstrCost(ReductionOpcode, VecTy);
+
+    DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost
+                 << " for reduction that starts with " << *FirstReducedVal
+                 << " (It is a "
+                 << (IsPairwiseReduction ? "pairwise" : "splitting")
+                 << " reduction)\n");
+
+    return VecReduxCost - ScalarReduxCost;
+  }
+
+  static Value *createBinOp(IRBuilder<> &Builder, unsigned Opcode, Value *L,
+                            Value *R, const Twine &Name = "") {
+    if (Opcode == Instruction::FAdd)
+      return Builder.CreateFAdd(L, R, Name);
+    return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, L, R, Name);
+  }
+
+  /// \brief Emit a horizontal reduction of the vectorized value.
+  Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder) {
+    assert(VectorizedValue && "Need to have a vectorized tree node");
+    Instruction *ValToReduce = dyn_cast<Instruction>(VectorizedValue);
+    assert(isPowerOf2_32(ReduxWidth) &&
+           "We only handle power-of-two reductions for now");
+
+    SmallVector<Constant *, 32> ShuffleMask(ReduxWidth, 0);
+    Value *TmpVec = ValToReduce;
+    for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
+      if (IsPairwiseReduction) {
+        Value *LeftMask =
+          createRdxShuffleMask(ReduxWidth, i, true, true, Builder);
+        Value *RightMask =
+          createRdxShuffleMask(ReduxWidth, i, true, false, Builder);
+
+        Value *LeftShuf = Builder.CreateShuffleVector(
+          TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l");
+        Value *RightShuf = Builder.CreateShuffleVector(
+          TmpVec, UndefValue::get(TmpVec->getType()), (RightMask),
+          "rdx.shuf.r");
+        TmpVec = createBinOp(Builder, ReductionOpcode, LeftShuf, RightShuf,
+                             "bin.rdx");
+      } else {
+        Value *UpperHalf =
+          createRdxShuffleMask(ReduxWidth, i, false, false, Builder);
+        Value *Shuf = Builder.CreateShuffleVector(
+          TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf");
+        TmpVec = createBinOp(Builder, ReductionOpcode, TmpVec, Shuf, "bin.rdx");
+      }
+    }
+
+    // The result is in the first element of the vector.
+    return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+  }
+};
+
 /// \brief Recognize construction of vectors like
 ///  %ra = insertelement <4 x float> undef, float %s0, i32 0
 ///  %rb = insertelement <4 x float> %ra, float %s1, i32 1
@@ -1981,7 +2312,18 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       if (!BI)
         continue;
 
-      Value *Inst = BI->getOperand(0);
+      // Try to match and vectorize a horizontal reduction.
+      HorizontalReduction HorRdx;
+      if (ShouldVectorizeHor &&
+          HorRdx.matchAssociativeReduction(P, BI, DL) &&
+          HorRdx.tryToReduce(R, TTI)) {
+        Changed = true;
+        it = BB->begin();
+        e = BB->end();
+        continue;
+      }
+
+     Value *Inst = BI->getOperand(0);
       if (Inst == P)
         Inst = BI->getOperand(1);
 
@@ -1991,10 +2333,28 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
         Changed = true;
         it = BB->begin();
         e = BB->end();
+        continue;
       }
+
       continue;
     }
 
+    // Try to vectorize horizontal reductions feeding into a store.
+    if (StoreInst *SI = dyn_cast<StoreInst>(it))
+      if (BinaryOperator *BinOp =
+              dyn_cast<BinaryOperator>(SI->getValueOperand())) {
+        HorizontalReduction HorRdx;
+        if (ShouldVectorizeHor &&
+            ((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
+              HorRdx.tryToReduce(R, TTI)) ||
+             tryToVectorize(BinOp, R))) {
+          Changed = true;
+          it = BB->begin();
+          e = BB->end();
+          continue;
+        }
+      }
+
     // Try to vectorize trees that start at compare instructions.
     if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
       if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
-- 
cgit v1.1


From 19a6f1133fbc01d18bd07cdc082e9488d731bcbc Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Fri, 20 Sep 2013 22:12:42 +0000
Subject: InstCombine: Remove unused argument. No functionality change.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191112 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombine.h           |  4 ++--
 lib/Transforms/InstCombine/InstCombineCompares.cpp | 14 ++++----------
 2 files changed, 6 insertions(+), 12 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index e29bf1a..ee95106 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -158,8 +158,8 @@ public:
                               ConstantInt *DivRHS);
   Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
                               ConstantInt *DivRHS);
-  Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
-                                ICmpInst::Predicate Pred, Value *TheAdd);
+  Instruction *FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI,
+                                ICmpInst::Predicate Pred);
   Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
                            ICmpInst::Predicate Cond, Instruction &I);
   Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 29de6f7..2c292ce 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -739,10 +739,9 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
 }
 
 /// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
-Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
+Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI,
                                             Value *X, ConstantInt *CI,
-                                            ICmpInst::Predicate Pred,
-                                            Value *TheAdd) {
+                                            ICmpInst::Predicate Pred) {
   // If we have X+0, exit early (simplifying logic below) and let it get folded
   // elsewhere.   icmp X+0, X  -> icmp X, X
   if (CI->isZero()) {
@@ -2977,20 +2976,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     Value *X; ConstantInt *Cst;
     // icmp X+Cst, X
     if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
-      return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0);
+      return FoldICmpAddOpCst(I, X, Cst, I.getPredicate());
 
     // icmp X, X+Cst
     if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
-      return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1);
+      return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate());
   }
   return Changed ? &I : 0;
 }
 
-
-
-
-
-
 /// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
 ///
 Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
-- 
cgit v1.1


From d93e8a06b2ca09ab18f390cd514b7443e2e571f7 Mon Sep 17 00:00:00 2001
From: Shuxin Yang <shuxin.llvm@gmail.com>
Date: Fri, 20 Sep 2013 23:12:57 +0000
Subject: Resurrect r191017 " GVN proceeds in the presence of dead code" plus a
 fix to PR17307 & 17308.

  The problem of r191017 is that when GVN fabricate a val-number for a dead instruction (in order
to make following expr-PRE happy), it forget to fabricate a leader-table entry for it as well.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191118 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/GVN.cpp | 174 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 168 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index bc418af..fefc8ad 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
@@ -507,7 +508,9 @@ namespace {
     enum ValType {
       SimpleVal,  // A simple offsetted value that is accessed.
       LoadVal,    // A value produced by a load.
-      MemIntrin   // A memory intrinsic which is loaded from.
+      MemIntrin,  // A memory intrinsic which is loaded from.
+      UndefVal    // A UndefValue representing a value from dead block (which
+                  // is not yet physically removed from the CFG). 
     };
   
     /// V - The value that is live out of the block.
@@ -545,10 +548,20 @@ namespace {
       Res.Offset = Offset;
       return Res;
     }
-  
+
+    static AvailableValueInBlock getUndef(BasicBlock *BB) {
+      AvailableValueInBlock Res;
+      Res.BB = BB;
+      Res.Val.setPointer(0);
+      Res.Val.setInt(UndefVal);
+      Res.Offset = 0;
+      return Res;
+    }
+
     bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
     bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
     bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+    bool isUndefValue() const { return Val.getInt() == UndefVal; }
   
     Value *getSimpleValue() const {
       assert(isSimpleValue() && "Wrong accessor");
@@ -576,6 +589,7 @@ namespace {
     DominatorTree *DT;
     const DataLayout *TD;
     const TargetLibraryInfo *TLI;
+    SetVector<BasicBlock *> DeadBlocks;
 
     ValueTable VN;
 
@@ -698,6 +712,9 @@ namespace {
     unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
                                          const BasicBlockEdge &Root);
     bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
+    bool processFoldableCondBr(BranchInst *BI);
+    void addDeadBlock(BasicBlock *BB);
+    void assignValNumForDeadCode();
   };
 
   char GVN::ID = 0;
@@ -1253,8 +1270,10 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   // just use the dominating value directly.
   if (ValuesPerBlock.size() == 1 &&
       gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
-                                               LI->getParent()))
+                                               LI->getParent())) {
+    assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block");
     return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
+  }
 
   // Otherwise, we have to construct SSA form.
   SmallVector<PHINode*, 8> NewPHIs;
@@ -1324,7 +1343,7 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
                    << *getCoercedLoadValue() << '\n'
                    << *Res << '\n' << "\n\n\n");
     }
-  } else {
+  } else if (isMemIntrinValue()) {
     const DataLayout *TD = gvn.getDataLayout();
     assert(TD && "Need target data to handle type mismatch case");
     Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
@@ -1332,6 +1351,10 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
     DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
                  << "  " << *getMemIntrinValue() << '\n'
                  << *Res << '\n' << "\n\n\n");
+  } else {
+    assert(isUndefValue() && "Should be UndefVal");
+    DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
+    return UndefValue::get(LoadTy);
   }
   return Res;
 }
@@ -1355,6 +1378,13 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
     BasicBlock *DepBB = Deps[i].getBB();
     MemDepResult DepInfo = Deps[i].getResult();
 
+    if (DeadBlocks.count(DepBB)) {
+      // Dead dependent mem-op disguise as a load evaluating the same value
+      // as the load in question.
+      ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
+      continue;
+    }
+
     if (!DepInfo.isDef() && !DepInfo.isClobber()) {
       UnavailableBlocks.push_back(DepBB);
       continue;
@@ -2191,11 +2221,13 @@ bool GVN::processInstruction(Instruction *I) {
   // For conditional branches, we can perform simple conditional propagation on
   // the condition value itself.
   if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
-    if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+    if (!BI->isConditional())
       return false;
 
-    Value *BranchCond = BI->getCondition();
+    if (isa<Constant>(BI->getCondition()))
+      return processFoldableCondBr(BI);
 
+    Value *BranchCond = BI->getCondition();
     BasicBlock *TrueSucc = BI->getSuccessor(0);
     BasicBlock *FalseSucc = BI->getSuccessor(1);
     // Avoid multiple edges early.
@@ -2312,6 +2344,9 @@ bool GVN::runOnFunction(Function& F) {
   }
 
   if (EnablePRE) {
+    // Fabricate val-num for dead-code in order to suppress assertion in
+    // performPRE().
+    assignValNumForDeadCode();
     bool PREChanged = true;
     while (PREChanged) {
       PREChanged = performPRE(F);
@@ -2325,6 +2360,9 @@ bool GVN::runOnFunction(Function& F) {
   // Actually, when this happens, we should just fully integrate PRE into GVN.
 
   cleanupGlobalSets();
+  // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
+  // iteration. 
+  DeadBlocks.clear();
 
   return Changed;
 }
@@ -2335,6 +2373,9 @@ bool GVN::processBlock(BasicBlock *BB) {
   // (and incrementing BI before processing an instruction).
   assert(InstrsToErase.empty() &&
          "We expect InstrsToErase to be empty across iterations");
+  if (DeadBlocks.count(BB))
+    return false;
+
   bool ChangedFunction = false;
 
   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
@@ -2628,3 +2669,124 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
     }
   }
 }
+
+// BB is declared dead, which implied other blocks become dead as well. This
+// function is to add all these blocks to "DeadBlocks". For the dead blocks'
+// live successors, update their phi nodes by replacing the operands
+// corresponding to dead blocks with UndefVal.
+//
+void GVN::addDeadBlock(BasicBlock *BB) {
+  SmallVector<BasicBlock *, 4> NewDead;
+  SmallSetVector<BasicBlock *, 4> DF;
+
+  NewDead.push_back(BB);
+  while (!NewDead.empty()) {
+    BasicBlock *D = NewDead.pop_back_val();
+    if (DeadBlocks.count(D))
+      continue;
+
+    // All blocks dominated by D are dead.
+    SmallVector<BasicBlock *, 8> Dom;
+    DT->getDescendants(D, Dom);
+    DeadBlocks.insert(Dom.begin(), Dom.end());
+    
+    // Figure out the dominance-frontier(D).
+    for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(),
+           E = Dom.end(); I != E; I++) {
+      BasicBlock *B = *I;
+      for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) {
+        BasicBlock *S = *SI;
+        if (DeadBlocks.count(S))
+          continue;
+
+        bool AllPredDead = true;
+        for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++)
+          if (!DeadBlocks.count(*PI)) {
+            AllPredDead = false;
+            break;
+          }
+
+        if (!AllPredDead) {
+          // S could be proved dead later on. That is why we don't update phi
+          // operands at this moment.
+          DF.insert(S);
+        } else {
+          // While S is not dominated by D, it is dead by now. This could take
+          // place if S already have a dead predecessor before D is declared
+          // dead.
+          NewDead.push_back(S);
+        }
+      }
+    }
+  }
+
+  // For the dead blocks' live successors, update their phi nodes by replacing
+  // the operands corresponding to dead blocks with UndefVal.
+  for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end();
+        I != E; I++) {
+    BasicBlock *B = *I;
+    if (DeadBlocks.count(B))
+      continue;
+
+    for (pred_iterator PI = pred_begin(B), PE = pred_end(B); PI != PE; PI++) {
+      BasicBlock *P = *PI;
+      if (!DeadBlocks.count(P))
+        continue;
+      for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
+        PHINode &Phi = cast<PHINode>(*II);
+        Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
+                             UndefValue::get(Phi.getType()));
+      }
+    }
+  }
+}
+
+// If the given branch is recognized as a foldable branch (i.e. conditional
+// branch with constant condition), it will perform following analyses and
+// transformation.
+//  1) If the dead out-coming edge is a critical-edge, split it. Let 
+//     R be the target of the dead out-coming edge.
+//  1) Identify the set of dead blocks implied by the branch's dead outcoming
+//     edge. The result of this step will be {X| X is dominated by R}
+//  2) Identify those blocks which haves at least one dead prodecessor. The
+//     result of this step will be dominance-frontier(R).
+//  3) Update the PHIs in DF(R) by replacing the operands corresponding to 
+//     dead blocks with "UndefVal" in an hope these PHIs will optimized away.
+//
+// Return true iff *NEW* dead code are found.
+bool GVN::processFoldableCondBr(BranchInst *BI) {
+  if (!BI || BI->isUnconditional())
+    return false;
+
+  ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+  if (!Cond)
+    return false;
+
+  BasicBlock *DeadRoot = Cond->getZExtValue() ? 
+                         BI->getSuccessor(1) : BI->getSuccessor(0);
+  if (DeadBlocks.count(DeadRoot))
+    return false;
+
+  if (!DeadRoot->getSinglePredecessor())
+    DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
+
+  addDeadBlock(DeadRoot);
+  return true;
+}
+
+// performPRE() will trigger assert if it come across an instruciton without
+// associated val-num. As it normally has far more live instructions than dead
+// instructions, it makes more sense just to "fabricate" a val-number for the
+// dead code than checking if instruction involved is dead or not.
+void GVN::assignValNumForDeadCode() {
+  for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(),
+        E = DeadBlocks.end(); I != E; I++) {
+    BasicBlock *BB = *I;
+    for (BasicBlock::iterator II = BB->begin(), EE = BB->end();
+          II != EE; II++) {
+      Instruction *Inst = &*II;
+      unsigned ValNum = VN.lookup_or_add(Inst);
+      addToLeaderTable(ValNum, Inst, BB);
+    }
+  }
+}
-- 
cgit v1.1


From 74d3482f76d1f8a20cedfc6701e017e7fd337cf9 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Sat, 21 Sep 2013 00:06:20 +0000
Subject: Revert "SLPVectorizer: Handle more horizontal reductions (disabled)"

This reverts commit r191108.

The horizontal.ll test case fails under libgmalloc. Thanks Shuxin for pointing
this out to me.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191121 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 376 +----------------------------
 1 file changed, 8 insertions(+), 368 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index caedd09..cd3f723 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -49,11 +49,6 @@ static cl::opt<int>
     SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
                      cl::desc("Only vectorize if you gain more than this "
                               "number "));
-
-static cl::opt<bool>
-ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
-                   cl::desc("Attempt to vectorize horizontal reductions"));
-
 namespace {
 
 static const unsigned MinVecRegSize = 128;
@@ -243,21 +238,17 @@ public:
     }
 
   /// \brief Vectorize the tree that starts with the elements in \p VL.
-  /// Returns the vectorized root and the scalar operations the root was based
-  /// on.
-  std::pair<Value *, ValueList *> vectorizeTree();
+  void vectorizeTree();
 
   /// \returns the vectorization cost of the subtree that starts at \p VL.
   /// A negative number means that this is profitable.
   int getTreeCost();
 
-  /// Construct a vectorizable tree that starts at \p Roots and is possibly
-  /// used by a reduction of \p RdxOps.
-  void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0);
+  /// Construct a vectorizable tree that starts at \p Roots.
+  void buildTree(ArrayRef<Value *> Roots);
 
   /// Clear the internal data structures that are created by 'buildTree'.
   void deleteTree() {
-    RdxOps = 0;
     VectorizableTree.clear();
     ScalarToTreeEntry.clear();
     MustGather.clear();
@@ -410,9 +401,6 @@ private:
   /// Numbers instructions in different blocks.
   DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
 
-  /// Reduction operators.
-  ValueSet *RdxOps;
-
   // Analysis and block reference.
   Function *F;
   ScalarEvolution *SE;
@@ -425,9 +413,8 @@ private:
   IRBuilder<> Builder;
 };
 
-void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots) {
   deleteTree();
-  RdxOps = Rdx;
   if (!getSameType(Roots))
     return;
   buildTree_rec(Roots, 0);
@@ -458,12 +445,8 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
           assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
           continue;
         }
-        Instruction *UserInst = dyn_cast<Instruction>(*User);
-        if (!UserInst)
-          continue;
 
-        // Ignore uses that are part of the reduction.
-        if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
+        if (!isa<Instruction>(*User))
           continue;
 
         DEBUG(dbgs() << "SLP: Need to extract:" << **User << " from lane " <<
@@ -595,10 +578,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
         continue;
       }
 
-      // This user is part of the reduction.
-      if (RdxOps && RdxOps->count(User))
-        continue;
-
       // Make sure that we can schedule this unknown user.
       BlockNumbering &BN = BlocksNumbers[BB];
       int UserIndex = BN.getIndex(User);
@@ -1393,7 +1372,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
   return 0;
 }
 
-std::pair<Value *, BoUpSLP::ValueList *> BoUpSLP::vectorizeTree() {
+void BoUpSLP::vectorizeTree() {
   Builder.SetInsertPoint(F->getEntryBlock().begin());
   vectorizeTree(&VectorizableTree[0]);
 
@@ -1470,10 +1449,7 @@ std::pair<Value *, BoUpSLP::ValueList *> BoUpSLP::vectorizeTree() {
           DEBUG(dbgs() << "SLP: \tvalidating user:" << **User << ".\n");
           assert(!MustGather.count(*User) &&
                  "Replacing gathered value with undef");
-
-          assert((ScalarToTreeEntry.count(*User) ||
-                  // It is legal to replace the reduction users by undef.
-                  (RdxOps && RdxOps->count(*User))) &&
+          assert(ScalarToTreeEntry.count(*User) &&
                  "Replacing out-of-tree value with undef");
         }
         Value *Undef = UndefValue::get(Ty);
@@ -1488,9 +1464,6 @@ std::pair<Value *, BoUpSLP::ValueList *> BoUpSLP::vectorizeTree() {
     BlocksNumbers[it].forget();
   }
   Builder.ClearInsertionPoint();
-
-  return std::make_pair(VectorizableTree[0].VectorizedValue,
-                        &VectorizableTree[0].Scalars);
 }
 
 void BoUpSLP::optimizeGatherSequence() {
@@ -1914,310 +1887,6 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
   return 0;
 }
 
-/// \brief Generate a shuffle mask to be used in a reduction tree.
-///
-/// \param VecLen The length of the vector to be reduced.
-/// \param NumEltsToRdx The number of elements that should be reduced in the
-///        vector.
-/// \param IsPairwise Whether the reduction is a pairwise or splitting
-///        reduction. A pairwise reduction will generate a mask of 
-///        <0,2,...> or <1,3,..> while a splitting reduction will generate
-///        <2,3, undef,undef> for a vector of 4 and NumElts = 2.
-/// \param IsLeft True will generate a mask of even elements, odd otherwise.
-static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
-                                   bool IsPairwise, bool IsLeft,
-                                   IRBuilder<> &Builder) {
-  assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask");
-
-  SmallVector<Constant *, 32> ShuffleMask(
-      VecLen, UndefValue::get(Builder.getInt32Ty()));
-
-  if (IsPairwise)
-    // Build a mask of 0, 2, ... (left) or 1, 3, ... (right).
-    for (unsigned i = 0; i != NumEltsToRdx; ++i)
-      ShuffleMask[i] = Builder.getInt32(2 * i + !IsLeft);
-  else
-    // Move the upper half of the vector to the lower half.
-    for (unsigned i = 0; i != NumEltsToRdx; ++i)
-      ShuffleMask[i] = Builder.getInt32(NumEltsToRdx + i);
-
-  return ConstantVector::get(ShuffleMask);
-}
-
-
-/// Model horizontal reductions.
-///
-/// A horizontal reduction is a tree of reduction operations (currently add and
-/// fadd) that has operations that can be put into a vector as its leaf.
-/// For example, this tree:
-///
-/// mul mul mul mul
-///  \  /    \  /
-///   +       +
-///    \     /
-///       +
-/// This tree has "mul" as its reduced values and "+" as its reduction
-/// operations. A reduction might be feeding into a store or a binary operation
-/// feeding a phi.
-///    ...
-///    \  /
-///     +
-///     \
-///  phi +=
-///
-///  Or:
-///    ...
-///    \  /
-///     +
-///     \
-///   *p =
-///
-class HorizontalReduction {
-  SmallPtrSet<Value *, 16> ReductionOps;
-  SmallVector<Value *, 32> ReducedVals;
-
-  BinaryOperator *ReductionRoot;
-  PHINode *ReductionPHI;
-
-  /// The opcode of the reduction.
-  unsigned ReductionOpcode;
-  /// The opcode of the values we perform a reduction on.
-  unsigned ReducedValueOpcode;
-  /// The width of one full horizontal reduction operation.
-  unsigned ReduxWidth;
-  /// Should we model this reduction as a pairwise reduction tree or a tree that
-  /// splits the vector in halves and adds those halves.
-  bool IsPairwiseReduction;
-
-public:
-  HorizontalReduction()
-    : ReductionRoot(0), ReductionPHI(0), ReductionOpcode(0),
-    ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
-
-  /// \brief Try to find a reduction tree.
-  bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B,
-                                 DataLayout *DL) {
-    assert((!Phi ||
-            std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
-           "Thi phi needs to use the binary operator");
-
-    // We could have a initial reductions that is not an add.
-    //  r *= v1 + v2 + v3 + v4
-    // In such a case start looking for a tree rooted in the first '+'.
-    if (Phi) {
-      if (B->getOperand(0) == Phi) {
-        Phi = 0;
-        B = dyn_cast<BinaryOperator>(B->getOperand(1));
-      } else if (B->getOperand(1) == Phi) {
-        Phi = 0;
-        B = dyn_cast<BinaryOperator>(B->getOperand(0));
-      }
-    }
-
-    if (!B)
-      return false;
-
-    Type *Ty = B->getType();
-    if (Ty->isVectorTy())
-      return false;
-
-    ReductionOpcode = B->getOpcode();
-    ReducedValueOpcode = 0;
-    ReduxWidth = MinVecRegSize / DL->getTypeSizeInBits(Ty);
-    ReductionRoot = B;
-    ReductionPHI = Phi;
-
-    if (ReduxWidth < 4)
-      return false;
-
-    // We currently only support adds.
-    if (ReductionOpcode != Instruction::Add &&
-        ReductionOpcode != Instruction::FAdd)
-      return false;
-
-    // Post order traverse the reduction tree starting at B. We only handle true
-    // trees containing only binary operators.
-    SmallVector<std::pair<BinaryOperator *, unsigned>, 32> Stack;
-    Stack.push_back(std::make_pair(B, 0));
-    while (!Stack.empty()) {
-      BinaryOperator *TreeN = Stack.back().first;
-      unsigned EdgeToVist = Stack.back().second++;
-      bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;
-
-      // Only handle trees in the current basic block.
-      if (TreeN->getParent() != B->getParent())
-        return false;
-
-      // Each tree node needs to have one user except for the ultimate
-      // reduction.
-      if (!TreeN->hasOneUse() && TreeN != B)
-        return false;
-
-      // Postorder vist.
-      if (EdgeToVist == 2 || IsReducedValue) {
-        if (IsReducedValue) {
-          // Make sure that the opcodes of the operations that we are going to
-          // reduce match.
-          if (!ReducedValueOpcode)
-            ReducedValueOpcode = TreeN->getOpcode();
-          else if (ReducedValueOpcode != TreeN->getOpcode())
-            return false;
-          ReducedVals.push_back(TreeN);
-        } else {
-          // We need to be able to reassociate the adds.
-          if (!TreeN->isAssociative())
-            return false;
-          ReductionOps.insert(TreeN);
-        }
-        // Retract.
-        Stack.pop_back();
-        continue;
-      }
-
-      // Visit left or right.
-      Value *NextV = TreeN->getOperand(EdgeToVist);
-      BinaryOperator *Next = dyn_cast<BinaryOperator>(NextV);
-      if (Next)
-        Stack.push_back(std::make_pair(Next, 0));
-      else if (NextV != Phi)
-        return false;
-    }
-    return true;
-  }
-
-  /// \brief Attempt to vectorize the tree found by
-  /// matchAssociativeReduction.
-  bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
-    if (ReducedVals.empty())
-      return false;
-
-    unsigned NumReducedVals = ReducedVals.size();
-    if (NumReducedVals < ReduxWidth)
-      return false;
-
-    Value *VectorizedTree = 0;
-    IRBuilder<> Builder(ReductionRoot);
-    FastMathFlags Unsafe;
-    Unsafe.setUnsafeAlgebra();
-    Builder.SetFastMathFlags(Unsafe);
-    unsigned i = 0;
-
-    for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
-      ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth);
-      V.buildTree(ValsToReduce, &ReductionOps);
-
-      // Estimate cost.
-      int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
-      if (Cost >= -SLPCostThreshold)
-        break;
-
-      DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost
-                   << ". (HorRdx)\n");
-
-      // Vectorize a tree.
-      Value *VectorizedRoot;
-      BoUpSLP::ValueList *Scalars;
-      tie(VectorizedRoot, Scalars) = V.vectorizeTree();
-
-      // Emit a reduction.
-      Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder);
-      if (VectorizedTree) {
-        Builder.SetCurrentDebugLocation(
-          cast<Instruction>((*Scalars)[0])->getDebugLoc());
-        VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
-                                     ReducedSubTree, "bin.rdx");
-      } else
-        VectorizedTree = ReducedSubTree;
-    }
-
-    if (VectorizedTree) {
-      // Finish the reduction.
-      for (; i < NumReducedVals; ++i) {
-        Builder.SetCurrentDebugLocation(
-          cast<Instruction>(ReducedVals[i])->getDebugLoc());
-        VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
-                                     ReducedVals[i]);
-      }
-      // Update users.
-      if (ReductionPHI) {
-        assert(ReductionRoot != NULL && "Need a reduction operation");
-        ReductionRoot->setOperand(0, VectorizedTree);
-        ReductionRoot->setOperand(1, ReductionPHI);
-      } else
-        ReductionRoot->replaceAllUsesWith(VectorizedTree);
-    }
-    return VectorizedTree != 0;
-  }
-
-private:
-
-  /// \brief Calcuate the cost of a reduction.
-  int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) {
-    Type *ScalarTy = FirstReducedVal->getType();
-    Type *VecTy = VectorType::get(ScalarTy, ReduxWidth);
-
-    int PairwiseRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, true);
-    int SplittingRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, false);
-
-    IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost;
-    int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost;
-
-    int ScalarReduxCost =
-        ReduxWidth * TTI->getArithmeticInstrCost(ReductionOpcode, VecTy);
-
-    DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost
-                 << " for reduction that starts with " << *FirstReducedVal
-                 << " (It is a "
-                 << (IsPairwiseReduction ? "pairwise" : "splitting")
-                 << " reduction)\n");
-
-    return VecReduxCost - ScalarReduxCost;
-  }
-
-  static Value *createBinOp(IRBuilder<> &Builder, unsigned Opcode, Value *L,
-                            Value *R, const Twine &Name = "") {
-    if (Opcode == Instruction::FAdd)
-      return Builder.CreateFAdd(L, R, Name);
-    return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, L, R, Name);
-  }
-
-  /// \brief Emit a horizontal reduction of the vectorized value.
-  Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder) {
-    assert(VectorizedValue && "Need to have a vectorized tree node");
-    Instruction *ValToReduce = dyn_cast<Instruction>(VectorizedValue);
-    assert(isPowerOf2_32(ReduxWidth) &&
-           "We only handle power-of-two reductions for now");
-
-    SmallVector<Constant *, 32> ShuffleMask(ReduxWidth, 0);
-    Value *TmpVec = ValToReduce;
-    for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
-      if (IsPairwiseReduction) {
-        Value *LeftMask =
-          createRdxShuffleMask(ReduxWidth, i, true, true, Builder);
-        Value *RightMask =
-          createRdxShuffleMask(ReduxWidth, i, true, false, Builder);
-
-        Value *LeftShuf = Builder.CreateShuffleVector(
-          TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l");
-        Value *RightShuf = Builder.CreateShuffleVector(
-          TmpVec, UndefValue::get(TmpVec->getType()), (RightMask),
-          "rdx.shuf.r");
-        TmpVec = createBinOp(Builder, ReductionOpcode, LeftShuf, RightShuf,
-                             "bin.rdx");
-      } else {
-        Value *UpperHalf =
-          createRdxShuffleMask(ReduxWidth, i, false, false, Builder);
-        Value *Shuf = Builder.CreateShuffleVector(
-          TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf");
-        TmpVec = createBinOp(Builder, ReductionOpcode, TmpVec, Shuf, "bin.rdx");
-      }
-    }
-
-    // The result is in the first element of the vector.
-    return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
-  }
-};
-
 /// \brief Recognize construction of vectors like
 ///  %ra = insertelement <4 x float> undef, float %s0, i32 0
 ///  %rb = insertelement <4 x float> %ra, float %s1, i32 1
@@ -2312,18 +1981,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       if (!BI)
         continue;
 
-      // Try to match and vectorize a horizontal reduction.
-      HorizontalReduction HorRdx;
-      if (ShouldVectorizeHor &&
-          HorRdx.matchAssociativeReduction(P, BI, DL) &&
-          HorRdx.tryToReduce(R, TTI)) {
-        Changed = true;
-        it = BB->begin();
-        e = BB->end();
-        continue;
-      }
-
-     Value *Inst = BI->getOperand(0);
+      Value *Inst = BI->getOperand(0);
       if (Inst == P)
         Inst = BI->getOperand(1);
 
@@ -2333,28 +1991,10 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
         Changed = true;
         it = BB->begin();
         e = BB->end();
-        continue;
       }
-
       continue;
     }
 
-    // Try to vectorize horizontal reductions feeding into a store.
-    if (StoreInst *SI = dyn_cast<StoreInst>(it))
-      if (BinaryOperator *BinOp =
-              dyn_cast<BinaryOperator>(SI->getValueOperand())) {
-        HorizontalReduction HorRdx;
-        if (ShouldVectorizeHor &&
-            ((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
-              HorRdx.tryToReduce(R, TTI)) ||
-             tryToVectorize(BinOp, R))) {
-          Changed = true;
-          it = BB->begin();
-          e = BB->end();
-          continue;
-        }
-      }
-
     // Try to vectorize trees that start at compare instructions.
     if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
       if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
-- 
cgit v1.1


From 2ecd8c90b037e58f4914acfc6a4ced5a01774a05 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Sat, 21 Sep 2013 00:27:05 +0000
Subject: LoopVectorizer: Only allow vectorization of intrinsics. We can't know
 for sure that the functions 'abs' or 'round' are the functions from libm.

rdar://15012650


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191122 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 30908c8..02029e6 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2925,9 +2925,18 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       // We still don't handle functions. However, we can ignore dbg intrinsic
       // calls and we do handle certain intrinsic and libm functions.
       CallInst *CI = dyn_cast<CallInst>(it);
-      if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
+      if (CI) {
         DEBUG(dbgs() << "LV: Found a call site.\n");
-        return false;
+
+        if (!isa<IntrinsicInst>(it)) {
+          DEBUG(dbgs() << "LV: We only vectorize intrinsics.\n");
+          return false;
+        }
+
+        if (!getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
+          DEBUG(dbgs() << "LV: Found an unknown intrinsic.\n");
+          return false;
+        }
       }
 
       // Check that the instruction return type is vectorizable.
-- 
cgit v1.1


From a9baf1ecfd343e2d8d8fa277c8b093a1869726bb Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Sat, 21 Sep 2013 01:06:00 +0000
Subject: Reapply "SLPVectorizer: Handle more horizontal reductions
 (disabled)""

Reapply r191108 with a fix for a memory corruption error I introduced.  Of
course, we can't reference the scalars that we replace by vectorizing and then
call their eraseFromParent method. I only 'needed' the scalars to get the
DebugLoc. Just store the DebugLoc before actually vectorizing instead. As a nice
side effect, this also simplifies the interface between BoUpSLP and the
HorizontalReduction class to returning a value pointer (the vectorized tree
root).

radar://14607682

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191123 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 372 ++++++++++++++++++++++++++++-
 1 file changed, 364 insertions(+), 8 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cd3f723..053e08e4 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -49,6 +49,11 @@ static cl::opt<int>
     SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
                      cl::desc("Only vectorize if you gain more than this "
                               "number "));
+
+static cl::opt<bool>
+ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
+                   cl::desc("Attempt to vectorize horizontal reductions"));
+
 namespace {
 
 static const unsigned MinVecRegSize = 128;
@@ -238,17 +243,20 @@ public:
     }
 
   /// \brief Vectorize the tree that starts with the elements in \p VL.
-  void vectorizeTree();
+  /// Returns the vectorized root.
+  Value *vectorizeTree();
 
   /// \returns the vectorization cost of the subtree that starts at \p VL.
   /// A negative number means that this is profitable.
   int getTreeCost();
 
-  /// Construct a vectorizable tree that starts at \p Roots.
-  void buildTree(ArrayRef<Value *> Roots);
+  /// Construct a vectorizable tree that starts at \p Roots and is possibly
+  /// used by a reduction of \p RdxOps.
+  void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0);
 
   /// Clear the internal data structures that are created by 'buildTree'.
   void deleteTree() {
+    RdxOps = 0;
     VectorizableTree.clear();
     ScalarToTreeEntry.clear();
     MustGather.clear();
@@ -401,6 +409,9 @@ private:
   /// Numbers instructions in different blocks.
   DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
 
+  /// Reduction operators.
+  ValueSet *RdxOps;
+
   // Analysis and block reference.
   Function *F;
   ScalarEvolution *SE;
@@ -413,8 +424,9 @@ private:
   IRBuilder<> Builder;
 };
 
-void BoUpSLP::buildTree(ArrayRef<Value *> Roots) {
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
   deleteTree();
+  RdxOps = Rdx;
   if (!getSameType(Roots))
     return;
   buildTree_rec(Roots, 0);
@@ -445,8 +457,12 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots) {
           assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
           continue;
         }
+        Instruction *UserInst = dyn_cast<Instruction>(*User);
+        if (!UserInst)
+          continue;
 
-        if (!isa<Instruction>(*User))
+        // Ignore uses that are part of the reduction.
+        if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
           continue;
 
         DEBUG(dbgs() << "SLP: Need to extract:" << **User << " from lane " <<
@@ -578,6 +594,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
         continue;
       }
 
+      // This user is part of the reduction.
+      if (RdxOps && RdxOps->count(User))
+        continue;
+
       // Make sure that we can schedule this unknown user.
       BlockNumbering &BN = BlocksNumbers[BB];
       int UserIndex = BN.getIndex(User);
@@ -1372,7 +1392,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
   return 0;
 }
 
-void BoUpSLP::vectorizeTree() {
+Value *BoUpSLP::vectorizeTree() {
   Builder.SetInsertPoint(F->getEntryBlock().begin());
   vectorizeTree(&VectorizableTree[0]);
 
@@ -1449,7 +1469,10 @@ void BoUpSLP::vectorizeTree() {
           DEBUG(dbgs() << "SLP: \tvalidating user:" << **User << ".\n");
           assert(!MustGather.count(*User) &&
                  "Replacing gathered value with undef");
-          assert(ScalarToTreeEntry.count(*User) &&
+
+          assert((ScalarToTreeEntry.count(*User) ||
+                  // It is legal to replace the reduction users by undef.
+                  (RdxOps && RdxOps->count(*User))) &&
                  "Replacing out-of-tree value with undef");
         }
         Value *Undef = UndefValue::get(Ty);
@@ -1464,6 +1487,8 @@ void BoUpSLP::vectorizeTree() {
     BlocksNumbers[it].forget();
   }
   Builder.ClearInsertionPoint();
+
+  return VectorizableTree[0].VectorizedValue;
 }
 
 void BoUpSLP::optimizeGatherSequence() {
@@ -1887,6 +1912,308 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
   return 0;
 }
 
+/// \brief Generate a shuffle mask to be used in a reduction tree.
+///
+/// \param VecLen The length of the vector to be reduced.
+/// \param NumEltsToRdx The number of elements that should be reduced in the
+///        vector.
+/// \param IsPairwise Whether the reduction is a pairwise or splitting
+///        reduction. A pairwise reduction will generate a mask of 
+///        <0,2,...> or <1,3,..> while a splitting reduction will generate
+///        <2,3, undef,undef> for a vector of 4 and NumElts = 2.
+/// \param IsLeft True will generate a mask of even elements, odd otherwise.
+static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
+                                   bool IsPairwise, bool IsLeft,
+                                   IRBuilder<> &Builder) {
+  assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask");
+
+  SmallVector<Constant *, 32> ShuffleMask(
+      VecLen, UndefValue::get(Builder.getInt32Ty()));
+
+  if (IsPairwise)
+    // Build a mask of 0, 2, ... (left) or 1, 3, ... (right).
+    for (unsigned i = 0; i != NumEltsToRdx; ++i)
+      ShuffleMask[i] = Builder.getInt32(2 * i + !IsLeft);
+  else
+    // Move the upper half of the vector to the lower half.
+    for (unsigned i = 0; i != NumEltsToRdx; ++i)
+      ShuffleMask[i] = Builder.getInt32(NumEltsToRdx + i);
+
+  return ConstantVector::get(ShuffleMask);
+}
+
+
+/// Model horizontal reductions.
+///
+/// A horizontal reduction is a tree of reduction operations (currently add and
+/// fadd) that has operations that can be put into a vector as its leaf.
+/// For example, this tree:
+///
+/// mul mul mul mul
+///  \  /    \  /
+///   +       +
+///    \     /
+///       +
+/// This tree has "mul" as its reduced values and "+" as its reduction
+/// operations. A reduction might be feeding into a store or a binary operation
+/// feeding a phi.
+///    ...
+///    \  /
+///     +
+///     \
+///  phi +=
+///
+///  Or:
+///    ...
+///    \  /
+///     +
+///     \
+///   *p =
+///
+class HorizontalReduction {
+  SmallPtrSet<Value *, 16> ReductionOps;
+  SmallVector<Value *, 32> ReducedVals;
+
+  BinaryOperator *ReductionRoot;
+  PHINode *ReductionPHI;
+
+  /// The opcode of the reduction.
+  unsigned ReductionOpcode;
+  /// The opcode of the values we perform a reduction on.
+  unsigned ReducedValueOpcode;
+  /// The width of one full horizontal reduction operation.
+  unsigned ReduxWidth;
+  /// Should we model this reduction as a pairwise reduction tree or a tree that
+  /// splits the vector in halves and adds those halves.
+  bool IsPairwiseReduction;
+
+public:
+  HorizontalReduction()
+    : ReductionRoot(0), ReductionPHI(0), ReductionOpcode(0),
+    ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
+
+  /// \brief Try to find a reduction tree.
+  bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B,
+                                 DataLayout *DL) {
+    assert((!Phi ||
+            std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
+           "Thi phi needs to use the binary operator");
+
+    // We could have a initial reductions that is not an add.
+    //  r *= v1 + v2 + v3 + v4
+    // In such a case start looking for a tree rooted in the first '+'.
+    if (Phi) {
+      if (B->getOperand(0) == Phi) {
+        Phi = 0;
+        B = dyn_cast<BinaryOperator>(B->getOperand(1));
+      } else if (B->getOperand(1) == Phi) {
+        Phi = 0;
+        B = dyn_cast<BinaryOperator>(B->getOperand(0));
+      }
+    }
+
+    if (!B)
+      return false;
+
+    Type *Ty = B->getType();
+    if (Ty->isVectorTy())
+      return false;
+
+    ReductionOpcode = B->getOpcode();
+    ReducedValueOpcode = 0;
+    ReduxWidth = MinVecRegSize / DL->getTypeSizeInBits(Ty);
+    ReductionRoot = B;
+    ReductionPHI = Phi;
+
+    if (ReduxWidth < 4)
+      return false;
+
+    // We currently only support adds.
+    if (ReductionOpcode != Instruction::Add &&
+        ReductionOpcode != Instruction::FAdd)
+      return false;
+
+    // Post order traverse the reduction tree starting at B. We only handle true
+    // trees containing only binary operators.
+    SmallVector<std::pair<BinaryOperator *, unsigned>, 32> Stack;
+    Stack.push_back(std::make_pair(B, 0));
+    while (!Stack.empty()) {
+      BinaryOperator *TreeN = Stack.back().first;
+      unsigned EdgeToVist = Stack.back().second++;
+      bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;
+
+      // Only handle trees in the current basic block.
+      if (TreeN->getParent() != B->getParent())
+        return false;
+
+      // Each tree node needs to have one user except for the ultimate
+      // reduction.
+      if (!TreeN->hasOneUse() && TreeN != B)
+        return false;
+
+      // Postorder vist.
+      if (EdgeToVist == 2 || IsReducedValue) {
+        if (IsReducedValue) {
+          // Make sure that the opcodes of the operations that we are going to
+          // reduce match.
+          if (!ReducedValueOpcode)
+            ReducedValueOpcode = TreeN->getOpcode();
+          else if (ReducedValueOpcode != TreeN->getOpcode())
+            return false;
+          ReducedVals.push_back(TreeN);
+        } else {
+          // We need to be able to reassociate the adds.
+          if (!TreeN->isAssociative())
+            return false;
+          ReductionOps.insert(TreeN);
+        }
+        // Retract.
+        Stack.pop_back();
+        continue;
+      }
+
+      // Visit left or right.
+      Value *NextV = TreeN->getOperand(EdgeToVist);
+      BinaryOperator *Next = dyn_cast<BinaryOperator>(NextV);
+      if (Next)
+        Stack.push_back(std::make_pair(Next, 0));
+      else if (NextV != Phi)
+        return false;
+    }
+    return true;
+  }
+
+  /// \brief Attempt to vectorize the tree found by
+  /// matchAssociativeReduction.
+  bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
+    if (ReducedVals.empty())
+      return false;
+
+    unsigned NumReducedVals = ReducedVals.size();
+    if (NumReducedVals < ReduxWidth)
+      return false;
+
+    Value *VectorizedTree = 0;
+    IRBuilder<> Builder(ReductionRoot);
+    FastMathFlags Unsafe;
+    Unsafe.setUnsafeAlgebra();
+    Builder.SetFastMathFlags(Unsafe);
+    unsigned i = 0;
+
+    for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
+      ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth);
+      V.buildTree(ValsToReduce, &ReductionOps);
+
+      // Estimate cost.
+      int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
+      if (Cost >= -SLPCostThreshold)
+        break;
+
+      DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost
+                   << ". (HorRdx)\n");
+
+      // Vectorize a tree.
+      DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
+      Value *VectorizedRoot = V.vectorizeTree();
+
+      // Emit a reduction.
+      Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder);
+      if (VectorizedTree) {
+        Builder.SetCurrentDebugLocation(Loc);
+        VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
+                                     ReducedSubTree, "bin.rdx");
+      } else
+        VectorizedTree = ReducedSubTree;
+    }
+
+    if (VectorizedTree) {
+      // Finish the reduction.
+      for (; i < NumReducedVals; ++i) {
+        Builder.SetCurrentDebugLocation(
+          cast<Instruction>(ReducedVals[i])->getDebugLoc());
+        VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
+                                     ReducedVals[i]);
+      }
+      // Update users.
+      if (ReductionPHI) {
+        assert(ReductionRoot != NULL && "Need a reduction operation");
+        ReductionRoot->setOperand(0, VectorizedTree);
+        ReductionRoot->setOperand(1, ReductionPHI);
+      } else
+        ReductionRoot->replaceAllUsesWith(VectorizedTree);
+    }
+    return VectorizedTree != 0;
+  }
+
+private:
+
+  /// \brief Calcuate the cost of a reduction.
+  int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) {
+    Type *ScalarTy = FirstReducedVal->getType();
+    Type *VecTy = VectorType::get(ScalarTy, ReduxWidth);
+
+    int PairwiseRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, true);
+    int SplittingRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, false);
+
+    IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost;
+    int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost;
+
+    int ScalarReduxCost =
+        ReduxWidth * TTI->getArithmeticInstrCost(ReductionOpcode, VecTy);
+
+    DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost
+                 << " for reduction that starts with " << *FirstReducedVal
+                 << " (It is a "
+                 << (IsPairwiseReduction ? "pairwise" : "splitting")
+                 << " reduction)\n");
+
+    return VecReduxCost - ScalarReduxCost;
+  }
+
+  static Value *createBinOp(IRBuilder<> &Builder, unsigned Opcode, Value *L,
+                            Value *R, const Twine &Name = "") {
+    if (Opcode == Instruction::FAdd)
+      return Builder.CreateFAdd(L, R, Name);
+    return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, L, R, Name);
+  }
+
+  /// \brief Emit a horizontal reduction of the vectorized value.
+  Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder) {
+    assert(VectorizedValue && "Need to have a vectorized tree node");
+    Instruction *ValToReduce = dyn_cast<Instruction>(VectorizedValue);
+    assert(isPowerOf2_32(ReduxWidth) &&
+           "We only handle power-of-two reductions for now");
+
+    SmallVector<Constant *, 32> ShuffleMask(ReduxWidth, 0);
+    Value *TmpVec = ValToReduce;
+    for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
+      if (IsPairwiseReduction) {
+        Value *LeftMask =
+          createRdxShuffleMask(ReduxWidth, i, true, true, Builder);
+        Value *RightMask =
+          createRdxShuffleMask(ReduxWidth, i, true, false, Builder);
+
+        Value *LeftShuf = Builder.CreateShuffleVector(
+          TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l");
+        Value *RightShuf = Builder.CreateShuffleVector(
+          TmpVec, UndefValue::get(TmpVec->getType()), (RightMask),
+          "rdx.shuf.r");
+        TmpVec = createBinOp(Builder, ReductionOpcode, LeftShuf, RightShuf,
+                             "bin.rdx");
+      } else {
+        Value *UpperHalf =
+          createRdxShuffleMask(ReduxWidth, i, false, false, Builder);
+        Value *Shuf = Builder.CreateShuffleVector(
+          TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf");
+        TmpVec = createBinOp(Builder, ReductionOpcode, TmpVec, Shuf, "bin.rdx");
+      }
+    }
+
+    // The result is in the first element of the vector.
+    return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+  }
+};
+
 /// \brief Recognize construction of vectors like
 ///  %ra = insertelement <4 x float> undef, float %s0, i32 0
 ///  %rb = insertelement <4 x float> %ra, float %s1, i32 1
@@ -1981,7 +2308,18 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       if (!BI)
         continue;
 
-      Value *Inst = BI->getOperand(0);
+      // Try to match and vectorize a horizontal reduction.
+      HorizontalReduction HorRdx;
+      if (ShouldVectorizeHor &&
+          HorRdx.matchAssociativeReduction(P, BI, DL) &&
+          HorRdx.tryToReduce(R, TTI)) {
+        Changed = true;
+        it = BB->begin();
+        e = BB->end();
+        continue;
+      }
+
+     Value *Inst = BI->getOperand(0);
       if (Inst == P)
         Inst = BI->getOperand(1);
 
@@ -1991,10 +2329,28 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
         Changed = true;
         it = BB->begin();
         e = BB->end();
+        continue;
       }
+
       continue;
     }
 
+    // Try to vectorize horizontal reductions feeding into a store.
+    if (StoreInst *SI = dyn_cast<StoreInst>(it))
+      if (BinaryOperator *BinOp =
+              dyn_cast<BinaryOperator>(SI->getValueOperand())) {
+        HorizontalReduction HorRdx;
+        if (ShouldVectorizeHor &&
+            ((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
+              HorRdx.tryToReduce(R, TTI)) ||
+             tryToVectorize(BinOp, R))) {
+          Changed = true;
+          it = BB->begin();
+          e = BB->end();
+          continue;
+        }
+      }
+
     // Try to vectorize trees that start at compare instructions.
     if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
       if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
-- 
cgit v1.1


From 38bf2d62b6be4b496e8f6d176578699b9c6e08bb Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Sat, 21 Sep 2013 05:37:30 +0000
Subject: SLPVectorizer: Fix multiline comment warning

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191135 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 053e08e4..b0c4894 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1960,14 +1960,14 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
 ///    ...
 ///    \  /
 ///     +
-///     \
+///     |
 ///  phi +=
 ///
 ///  Or:
 ///    ...
 ///    \  /
 ///     +
-///     \
+///     |
 ///   *p =
 ///
 class HorizontalReduction {
-- 
cgit v1.1


From 1ce1525ed453aea78d17f28ec3c353d0cde5341f Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Sat, 21 Sep 2013 20:36:04 +0000
Subject: SROA: Handle casts involving vectors of pointers and integer scalars.

SROA wants to convert any types of equivalent widths but it's not possible to
convert vectors of pointers to an integer scalar with a single cast. As a
workaround we add a bitcast to the corresponding int ptr type first. This type
of cast used to be an edge case but has become common with SLP vectorization.
Fixes PR17271.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191143 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/SROA.cpp | 58 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 47 insertions(+), 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index da441dc..16464d9 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1452,6 +1452,10 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
   if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
     return false;
 
+  // We handle can convert pointers to integers and vice-versa. Same for vectors
+  // of pointers and integers.
+  OldTy = OldTy->getScalarType();
+  NewTy = NewTy->getScalarType();
   if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
     if (NewTy->isPointerTy() && OldTy->isPointerTy())
       return true;
@@ -1470,21 +1474,53 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
 /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
 /// two types for viability with this routine.
 static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
-                           Type *Ty) {
-  assert(canConvertValue(DL, V->getType(), Ty) &&
-         "Value not convertable to type");
-  if (V->getType() == Ty)
+                           Type *NewTy) {
+  Type *OldTy = V->getType();
+  assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");
+
+  if (OldTy == NewTy)
     return V;
-  if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
-    if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+
+  if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+    if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
       if (NewITy->getBitWidth() > OldITy->getBitWidth())
         return IRB.CreateZExt(V, NewITy);
-  if (V->getType()->isIntegerTy() && Ty->isPointerTy())
-    return IRB.CreateIntToPtr(V, Ty);
-  if (V->getType()->isPointerTy() && Ty->isIntegerTy())
-    return IRB.CreatePtrToInt(V, Ty);
 
-  return IRB.CreateBitCast(V, Ty);
+  // See if we need inttoptr for this type pair. A cast involving both scalars
+  // and vectors requires and additional bitcast.
+  if (OldTy->getScalarType()->isIntegerTy() &&
+      NewTy->getScalarType()->isPointerTy()) {
+    // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
+    if (OldTy->isVectorTy() && !NewTy->isVectorTy())
+      return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
+                                NewTy);
+
+    // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
+    if (!OldTy->isVectorTy() && NewTy->isVectorTy())
+      return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
+                                NewTy);
+
+    return IRB.CreateIntToPtr(V, NewTy);
+  }
+
+  // See if we need ptrtoint for this type pair. A cast involving both scalars
+  // and vectors requires and additional bitcast.
+  if (OldTy->getScalarType()->isPointerTy() &&
+      NewTy->getScalarType()->isIntegerTy()) {
+    // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
+    if (OldTy->isVectorTy() && !NewTy->isVectorTy())
+      return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
+                               NewTy);
+
+    // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
+    if (!OldTy->isVectorTy() && NewTy->isVectorTy())
+      return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
+                               NewTy);
+
+    return IRB.CreatePtrToInt(V, NewTy);
+  }
+
+  return IRB.CreateBitCast(V, NewTy);
 }
 
 /// \brief Test whether the given slice use can be promoted to a vector.
-- 
cgit v1.1


From 7f80b75b963781d81b772ae2f3a35dc74e1b6457 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Sun, 22 Sep 2013 11:24:58 +0000
Subject: Drop spurious handle in comment.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191172 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/SROA.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 16464d9..a0be2c6 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1452,7 +1452,7 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
   if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
     return false;
 
-  // We handle can convert pointers to integers and vice-versa. Same for vectors
+  // We can convert pointers to integers and vice-versa. Same for vectors
   // of pointers and integers.
   OldTy = OldTy->getScalarType();
   NewTy = NewTy->getScalarType();
-- 
cgit v1.1


From 0d293e45b66c742fdbc3998209bb20ed6c5806bf Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Sun, 22 Sep 2013 14:09:50 +0000
Subject: Provide basic type safety for array_pod_sort comparators.

This makes using array_pod_sort significantly safer. The implementation relies
on function pointer casting but that should be safe as we're dealing with void*
here.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191175 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp     | 10 ++--------
 lib/Transforms/Utils/SimplifyCFG.cpp |  7 ++++---
 2 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 7ba7f86..cf15580 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -3042,14 +3042,8 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
   return true;
 }
 
-static int compareNames(const void *A, const void *B) {
-  const GlobalValue *VA = *reinterpret_cast<GlobalValue* const*>(A);
-  const GlobalValue *VB = *reinterpret_cast<GlobalValue* const*>(B);
-  if (VA->getName() < VB->getName())
-    return -1;
-  if (VB->getName() < VA->getName())
-    return 1;
-  return 0;
+static int compareNames(Constant *const *A, Constant *const *B) {
+  return (*A)->getName().compare((*B)->getName());
 }
 
 static void setUsedInitializer(GlobalVariable &V,
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 0dea844..0e56904 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -699,9 +699,10 @@ namespace {
   };
 }
 
-static int ConstantIntSortPredicate(const void *P1, const void *P2) {
-  const ConstantInt *LHS = *(const ConstantInt*const*)P1;
-  const ConstantInt *RHS = *(const ConstantInt*const*)P2;
+static int ConstantIntSortPredicate(ConstantInt *const *P1,
+                                    ConstantInt *const *P2) {
+  const ConstantInt *LHS = *P1;
+  const ConstantInt *RHS = *P2;
   if (LHS->getValue().ult(RHS->getValue()))
     return 1;
   if (LHS->getValue() == RHS->getValue())
-- 
cgit v1.1


From 4e7b015a4a724782bf37284c3c74f2540cf8f3e1 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Mon, 23 Sep 2013 14:54:39 +0000
Subject: Revert "LoopVectorizer: Only allow vectorization of intrinsics."

Revert 191122 - with extra checks we are allowed to vectorize math library
function calls.

Standard library indentifiers are reserved names so functions with external
linkage must not overrided them. However, functions with internal linkage can.

Therefore, we can vectorize calls to math library functions with a check for
external linkage and matching signature. This matches what we do during
SelectionDAG building.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191206 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 75 ++++++++++++++++++------------
 1 file changed, 46 insertions(+), 29 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 02029e6..e3bae02 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1809,6 +1809,31 @@ LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
   }
 }
 
+static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
+                                              Intrinsic::ID ValidIntrinsicID) {
+  if (I.getNumArgOperands() != 1 ||
+      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+      I.getType() != I.getArgOperand(0)->getType() ||
+      !I.onlyReadsMemory())
+    return Intrinsic::not_intrinsic;
+
+  return ValidIntrinsicID;
+}
+
+static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I,
+                                               Intrinsic::ID ValidIntrinsicID) {
+  if (I.getNumArgOperands() != 2 ||
+      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+      !I.getArgOperand(1)->getType()->isFloatingPointTy() ||
+      I.getType() != I.getArgOperand(0)->getType() ||
+      I.getType() != I.getArgOperand(1)->getType() ||
+      !I.onlyReadsMemory())
+    return Intrinsic::not_intrinsic;
+
+  return ValidIntrinsicID;
+}
+
+
 static Intrinsic::ID
 getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
   // If we have an intrinsic call, check if it is trivially vectorizable.
@@ -1847,8 +1872,9 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
   LibFunc::Func Func;
   Function *F = CI->getCalledFunction();
   // We're going to make assumptions on the semantics of the functions, check
-  // that the target knows that it's available in this environment.
-  if (!F || !TLI->getLibFunc(F->getName(), Func))
+  // that the target knows that it's available in this environment and it does
+  // not have local linkage.
+  if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
     return Intrinsic::not_intrinsic;
 
   // Otherwise check if we have a call to a function that can be turned into a
@@ -1859,67 +1885,67 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
   case LibFunc::sin:
   case LibFunc::sinf:
   case LibFunc::sinl:
-    return Intrinsic::sin;
+    return checkUnaryFloatSignature(*CI, Intrinsic::sin);
   case LibFunc::cos:
   case LibFunc::cosf:
   case LibFunc::cosl:
-    return Intrinsic::cos;
+    return checkUnaryFloatSignature(*CI, Intrinsic::cos);
   case LibFunc::exp:
   case LibFunc::expf:
   case LibFunc::expl:
-    return Intrinsic::exp;
+    return checkUnaryFloatSignature(*CI, Intrinsic::exp);
   case LibFunc::exp2:
   case LibFunc::exp2f:
   case LibFunc::exp2l:
-    return Intrinsic::exp2;
+    return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
   case LibFunc::log:
   case LibFunc::logf:
   case LibFunc::logl:
-    return Intrinsic::log;
+    return checkUnaryFloatSignature(*CI, Intrinsic::log);
   case LibFunc::log10:
   case LibFunc::log10f:
   case LibFunc::log10l:
-    return Intrinsic::log10;
+    return checkUnaryFloatSignature(*CI, Intrinsic::log10);
   case LibFunc::log2:
   case LibFunc::log2f:
   case LibFunc::log2l:
-    return Intrinsic::log2;
+    return checkUnaryFloatSignature(*CI, Intrinsic::log2);
   case LibFunc::fabs:
   case LibFunc::fabsf:
   case LibFunc::fabsl:
-    return Intrinsic::fabs;
+    return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
   case LibFunc::copysign:
   case LibFunc::copysignf:
   case LibFunc::copysignl:
-    return Intrinsic::copysign;
+    return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
   case LibFunc::floor:
   case LibFunc::floorf:
   case LibFunc::floorl:
-    return Intrinsic::floor;
+    return checkUnaryFloatSignature(*CI, Intrinsic::floor);
   case LibFunc::ceil:
   case LibFunc::ceilf:
   case LibFunc::ceill:
-    return Intrinsic::ceil;
+    return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
   case LibFunc::trunc:
   case LibFunc::truncf:
   case LibFunc::truncl:
-    return Intrinsic::trunc;
+    return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
   case LibFunc::rint:
   case LibFunc::rintf:
   case LibFunc::rintl:
-    return Intrinsic::rint;
+    return checkUnaryFloatSignature(*CI, Intrinsic::rint);
   case LibFunc::nearbyint:
   case LibFunc::nearbyintf:
   case LibFunc::nearbyintl:
-    return Intrinsic::nearbyint;
+    return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
   case LibFunc::round:
   case LibFunc::roundf:
   case LibFunc::roundl:
-    return Intrinsic::round;
+    return checkUnaryFloatSignature(*CI, Intrinsic::round);
   case LibFunc::pow:
   case LibFunc::powf:
   case LibFunc::powl:
-    return Intrinsic::pow;
+    return checkBinaryFloatSignature(*CI, Intrinsic::pow);
   }
 
   return Intrinsic::not_intrinsic;
@@ -2925,18 +2951,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       // We still don't handle functions. However, we can ignore dbg intrinsic
       // calls and we do handle certain intrinsic and libm functions.
       CallInst *CI = dyn_cast<CallInst>(it);
-      if (CI) {
+      if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
         DEBUG(dbgs() << "LV: Found a call site.\n");
-
-        if (!isa<IntrinsicInst>(it)) {
-          DEBUG(dbgs() << "LV: We only vectorize intrinsics.\n");
-          return false;
-        }
-
-        if (!getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
-          DEBUG(dbgs() << "LV: Found an unknown intrinsic.\n");
-          return false;
-        }
+        return false;
       }
 
       // Check that the instruction return type is vectorizable.
-- 
cgit v1.1


From ecf0fcd2b17ccc71b2a7b5849c1416aeb48a9390 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Tue, 24 Sep 2013 11:20:27 +0000
Subject: [msan] Handling of atomic load/store, atomic rmw, cmpxchg.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191287 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp | 98 +++++++++++++++++++++-
 1 file changed, 96 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index cab7a7a..eafa2b6 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -66,6 +66,31 @@
 /// avoids storing origin to memory when a fully initialized value is stored.
 /// This way it avoids needless overwritting origin of the 4-byte region on
 /// a short (i.e. 1 byte) clean store, and it is also good for performance.
+///
+///                            Atomic handling.
+///
+/// Ideally, every atomic store of application value should update the
+/// corresponding shadow location in an atomic way. Unfortunately, atomic store
+/// of two disjoint locations can not be done without severe slowdown.
+///
+/// Therefore, we implement an approximation that may err on the safe side.
+/// In this implementation, every atomically accessed location in the program
+/// may only change from (partially) uninitialized to fully initialized, but
+/// not the other way around. We load the shadow _after_ the application load,
+/// and we store the shadow _before_ the app store. Also, we always store clean
+/// shadow (if the application store is atomic). This way, if the store-load
+/// pair constitutes a happens-before arc, shadow store and load are correctly
+/// ordered such that the load will get either the value that was stored, or
+/// some later value (which is always clean).
+///
+/// This does not work very well with Compare-And-Swap (CAS) and
+/// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
+/// must store the new shadow before the app operation, and load the shadow
+/// after the app operation. Computers don't work this way. Current
+/// implementation ignores the load aspect of CAS/RMW, always returning a clean
+/// value. It implements the store part as a simple atomic store by storing a
+/// clean shadow.
+
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "msan"
@@ -487,7 +512,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       IRBuilder<> IRB(&I);
       Value *Val = I.getValueOperand();
       Value *Addr = I.getPointerOperand();
-      Value *Shadow = getShadow(Val);
+      Value *Shadow = I.isAtomic() ? getCleanShadow(Val) : getShadow(Val);
       Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
 
       StoreInst *NewSI =
@@ -498,6 +523,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       if (ClCheckAccessAddress)
         insertCheck(Addr, &I);
 
+      if (I.isAtomic())
+        I.setOrdering(addReleaseOrdering(I.getOrdering()));
+
       if (MS.TrackOrigins) {
         unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
         if (ClStoreCleanOrigin || isa<StructType>(Shadow->getType())) {
@@ -876,6 +904,38 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
   }
 
+  AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
+    switch (a) {
+      case NotAtomic:
+        return NotAtomic;
+      case Unordered:
+      case Monotonic:
+      case Release:
+        return Release;
+      case Acquire:
+      case AcquireRelease:
+        return AcquireRelease;
+      case SequentiallyConsistent:
+        return SequentiallyConsistent;
+    }
+  }
+
+  AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
+    switch (a) {
+      case NotAtomic:
+        return NotAtomic;
+      case Unordered:
+      case Monotonic:
+      case Acquire:
+        return Acquire;
+      case Release:
+      case AcquireRelease:
+        return AcquireRelease;
+      case SequentiallyConsistent:
+        return SequentiallyConsistent;
+    }
+  }
+
   // ------------------- Visitors.
 
   /// \brief Instrument LoadInst
@@ -884,7 +944,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   /// Optionally, checks that the load address is fully defined.
   void visitLoadInst(LoadInst &I) {
     assert(I.getType()->isSized() && "Load type must have size");
-    IRBuilder<> IRB(&I);
+    IRBuilder<> IRB(I.getNextNode());
     Type *ShadowTy = getShadowTy(&I);
     Value *Addr = I.getPointerOperand();
     if (LoadShadow) {
@@ -898,6 +958,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     if (ClCheckAccessAddress)
       insertCheck(I.getPointerOperand(), &I);
 
+    if (I.isAtomic())
+      I.setOrdering(addAcquireOrdering(I.getOrdering()));
+
     if (MS.TrackOrigins) {
       if (LoadShadow) {
         unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
@@ -917,6 +980,37 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     StoreList.push_back(&I);
   }
 
+  void handleCASOrRMW(Instruction &I) {
+    assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
+
+    IRBuilder<> IRB(&I);
+    Value *Addr = I.getOperand(0);
+    Value *ShadowPtr = getShadowPtr(Addr, I.getType(), IRB);
+
+    if (ClCheckAccessAddress)
+      insertCheck(Addr, &I);
+
+    // Only test the conditional argument of cmpxchg instruction.
+    // The other argument can potentially be uninitialized, but we can not
+    // detect this situation reliably without possible false positives.
+    if (isa<AtomicCmpXchgInst>(I))
+      insertCheck(I.getOperand(1), &I);
+
+    IRB.CreateStore(getCleanShadow(&I), ShadowPtr);
+
+    setShadow(&I, getCleanShadow(&I));
+  }
+
+  void visitAtomicRMWInst(AtomicRMWInst &I) {
+    handleCASOrRMW(I);
+    I.setOrdering(addReleaseOrdering(I.getOrdering()));
+  }
+
+  void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
+    handleCASOrRMW(I);
+    I.setOrdering(addReleaseOrdering(I.getOrdering()));
+  }
+
   // Vector manipulation.
   void visitExtractElementInst(ExtractElementInst &I) {
     insertCheck(I.getOperand(1), &I);
-- 
cgit v1.1


From d721520e4c5d8ad71310a34727a567c3d74e7c08 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Tue, 24 Sep 2013 16:37:40 +0000
Subject: Push analysis passes to InstSimplify when they're around anyways.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191309 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombinePHI.cpp | 2 +-
 lib/Transforms/Scalar/CodeGenPrepare.cpp      | 3 ++-
 lib/Transforms/Utils/Local.cpp                | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index e99eaf3..4c6d0c4 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -790,7 +790,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
 // PHINode simplification
 //
 Instruction *InstCombiner::visitPHINode(PHINode &PN) {
-  if (Value *V = SimplifyInstruction(&PN, TD))
+  if (Value *V = SimplifyInstruction(&PN, TD, TLI))
     return ReplaceInstUsesWith(PN, V);
 
   // If all PHI operands are the same operation, pull them through the PHI,
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 9c5633b..9b56a76 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -1893,7 +1893,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
     // It is possible for very late stage optimizations (such as SimplifyCFG)
     // to introduce PHI nodes too late to be cleaned up.  If we detect such a
     // trivial PHI, go ahead and zap it here.
-    if (Value *V = SimplifyInstruction(P)) {
+    if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0,
+                                       TLInfo, DT)) {
       P->replaceAllUsesWith(V);
       P->eraseFromParent();
       ++NumPHIsElim;
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 8f7314d..56a2d92 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -413,7 +413,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
     Instruction *Inst = BI++;
 
     WeakVH BIHandle(BI);
-    if (recursivelySimplifyInstruction(Inst, TD)) {
+    if (recursivelySimplifyInstruction(Inst, TD, TLI)) {
       MadeChange = true;
       if (BIHandle != BI)
         BI = BB->begin();
-- 
cgit v1.1


From 085e23841e9c4f4682385fce456704a5f75f9cdc Mon Sep 17 00:00:00 2001
From: Yi Jiang <yjiang@apple.com>
Date: Tue, 24 Sep 2013 17:26:43 +0000
Subject: set the cost of tiny trees to INT_MAX in SLP vectorizer to disable
 vectorization on them

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191314 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b0c4894..b8e57de 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -947,7 +947,7 @@ int BoUpSLP::getTreeCost() {
     if (!VectorizableTree.size()) {
       assert(!ExternalUses.size() && "We should not have any external users");
     }
-    return 0;
+    return INT_MAX;
   }
 
   unsigned BundleWidth = VectorizableTree[0].Scalars.size();
-- 
cgit v1.1


From 63799f6febc91ebec0d308737bfd1c659e4c24b7 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Wed, 25 Sep 2013 08:56:00 +0000
Subject: [msan] Fix -Wreturn-type warnings in non-self-hosted build.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191361 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index eafa2b6..65db206 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -918,6 +918,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       case SequentiallyConsistent:
         return SequentiallyConsistent;
     }
+    llvm_unreachable("Unknown ordering");
   }
 
   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
@@ -934,6 +935,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       case SequentiallyConsistent:
         return SequentiallyConsistent;
     }
+    llvm_unreachable("Unknown ordering");
   }
 
   // ------------------- Visitors.
-- 
cgit v1.1


From 9660ebb398cc3e9202a24087ad46290c3de29c7c Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Wed, 25 Sep 2013 14:02:32 +0000
Subject: SLPVectorize: Put horizontal reductions feeding a store under
 separate flag

Put them under a separate flag for experimentation. They are more likely to
interfere with loop vectorization which happens later in the pass pipeline.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191371 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 31 +++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b8e57de..acb1560 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -54,6 +54,11 @@ static cl::opt<bool>
 ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
                    cl::desc("Attempt to vectorize horizontal reductions"));
 
+static cl::opt<bool> ShouldStartVectorizeHorAtStore(
+    "slp-vectorize-hor-store", cl::init(false), cl::Hidden,
+    cl::desc(
+        "Attempt to vectorize horizontal reductions feeding into a store"));
+
 namespace {
 
 static const unsigned MinVecRegSize = 128;
@@ -2336,20 +2341,20 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
     }
 
     // Try to vectorize horizontal reductions feeding into a store.
-    if (StoreInst *SI = dyn_cast<StoreInst>(it))
-      if (BinaryOperator *BinOp =
-              dyn_cast<BinaryOperator>(SI->getValueOperand())) {
-        HorizontalReduction HorRdx;
-        if (ShouldVectorizeHor &&
-            ((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
-              HorRdx.tryToReduce(R, TTI)) ||
-             tryToVectorize(BinOp, R))) {
-          Changed = true;
-          it = BB->begin();
-          e = BB->end();
-          continue;
+    if (ShouldStartVectorizeHorAtStore)
+      if (StoreInst *SI = dyn_cast<StoreInst>(it))
+        if (BinaryOperator *BinOp =
+                dyn_cast<BinaryOperator>(SI->getValueOperand())) {
+          HorizontalReduction HorRdx;
+          if (((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
+                HorRdx.tryToReduce(R, TTI)) ||
+               tryToVectorize(BinOp, R))) {
+            Changed = true;
+            it = BB->begin();
+            e = BB->end();
+            continue;
+          }
         }
-      }
 
     // Try to vectorize trees that start at compare instructions.
     if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
-- 
cgit v1.1


From 6b4fa2256c1a7e379443447c19c0ade6790a25de Mon Sep 17 00:00:00 2001
From: Puyan Lotfi <puyan@puyan.org>
Date: Fri, 27 Sep 2013 07:36:10 +0000
Subject: First check in. Modified a comment.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191491 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Hello/Hello.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index 9f2343b..9251783 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -52,7 +52,7 @@ namespace {
       return false;
     }
 
-    // We don't modify the program, so we preserve all analyses
+    // We don't modify the program, so we preserve all analyses.
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
     }
-- 
cgit v1.1


From d237e834a816399b7e1561dd4db2c501f5095712 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail@justinbogner.com>
Date: Fri, 27 Sep 2013 15:30:25 +0000
Subject: Transforms: Use getFirstNonPHI to set the insertion point for PHIs

We were previously using getFirstInsertionPt to insert PHI
instructions when vectorizing, but getFirstInsertionPt also skips past
landingpads, causing this to generate invalid IR.

We can avoid this issue by using getFirstNonPHI instead.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191526 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +-
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index e3bae02..4833aec 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1552,7 +1552,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
 
   // Use this IR builder to create the loop instructions (Phi, Br, Cmp)
   // inside the loop.
-  Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
+  Builder.SetInsertPoint(VecBody->getFirstNonPHI());
 
   // Generate the induction variable.
   setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction));
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index acb1560..7649fff 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1201,7 +1201,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
   switch (Opcode) {
     case Instruction::PHI: {
       PHINode *PH = dyn_cast<PHINode>(VL0);
-      Builder.SetInsertPoint(PH->getParent()->getFirstInsertionPt());
+      Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
       Builder.SetCurrentDebugLocation(PH->getDebugLoc());
       PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
       E->VectorizedValue = NewPhi;
-- 
cgit v1.1


From 5053537a301ee268d3e49f8cf0426f0c536be4e3 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail@justinbogner.com>
Date: Fri, 27 Sep 2013 20:35:39 +0000
Subject: InstCombine: Only foldSelectICmpAndOr for integer types

Currently foldSelectICmpAndOr asserts if the "or" involves a vector
containing several of the same power of two. We can easily avoid this by
only performing the fold on integer types, like foldSelectICmpAnd does.

Fixes <rdar://problem/15012516>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191552 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineSelect.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 7581dbe..283bec2 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -367,7 +367,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
                                   Value *FalseVal,
                                   InstCombiner::BuilderTy *Builder) {
   const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
-  if (!IC || !IC->isEquality())
+  if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
     return 0;
 
   Value *CmpLHS = IC->getOperand(0);
-- 
cgit v1.1


From 9e93ba225f9f4d0cfab175b73d2a744d01104451 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 27 Sep 2013 21:24:57 +0000
Subject: Fix SLPVectorizer using wrong address space for load/store

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191564 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7649fff..ae202c2 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1365,8 +1365,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       setInsertPointAfterBundle(E->Scalars);
 
       LoadInst *LI = cast<LoadInst>(VL0);
-      Value *VecPtr =
-      Builder.CreateBitCast(LI->getPointerOperand(), VecTy->getPointerTo());
+      unsigned AS = LI->getPointerAddressSpace();
+
+      Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
+                                            VecTy->getPointerTo(AS));
       unsigned Alignment = LI->getAlignment();
       LI = Builder.CreateLoad(VecPtr);
       LI->setAlignment(Alignment);
@@ -1376,6 +1378,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     case Instruction::Store: {
       StoreInst *SI = cast<StoreInst>(VL0);
       unsigned Alignment = SI->getAlignment();
+      unsigned AS = SI->getPointerAddressSpace();
 
       ValueList ValueOp;
       for (int i = 0, e = E->Scalars.size(); i < e; ++i)
@@ -1384,8 +1387,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       setInsertPointAfterBundle(E->Scalars);
 
       Value *VecValue = vectorizeTree(ValueOp);
-      Value *VecPtr =
-      Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo());
+      Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
+                                            VecTy->getPointerTo(AS));
       StoreInst *S = Builder.CreateStore(VecValue, VecPtr);
       S->setAlignment(Alignment);
       E->VectorizedValue = S;
-- 
cgit v1.1


From b060a46b0de75d6ab711b60d123ab42a9a6d7b3c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 27 Sep 2013 22:18:51 +0000
Subject: Use type helper functions

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191574 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCalls.cpp    | 2 +-
 lib/Transforms/InstCombine/InstCombineCompares.cpp | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index beb63e8..0cd7b14 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1057,7 +1057,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
       if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
         return false;
 
-      Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
+      Type *CurElTy = ActTy->getPointerElementType();
       if (TD->getTypeAllocSize(CurElTy) !=
           TD->getTypeAllocSize(ParamPTy->getElementType()))
         return false;
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2c292ce..402f8c3 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1782,8 +1782,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
   // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
   // integer type is the same size as the pointer type.
   if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
-      TD->getPointerSizeInBits() ==
-         cast<IntegerType>(DestTy)->getBitWidth()) {
+      TD->getPointerSizeInBits() == DestTy->getIntegerBitWidth()) {
     Value *RHSOp = 0;
     if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
       RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
-- 
cgit v1.1


From b99f6e14af3752c356b6acc887e1a3dcd961e19f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 27 Sep 2013 22:26:25 +0000
Subject: Use right pointer type in DebugIR

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191576 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/DebugIR.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/DebugIR.cpp b/lib/Transforms/Instrumentation/DebugIR.cpp
index 9489bb2..f50a044 100644
--- a/lib/Transforms/Instrumentation/DebugIR.cpp
+++ b/lib/Transforms/Instrumentation/DebugIR.cpp
@@ -402,7 +402,7 @@ private:
       Type *PointeeTy = T->getPointerElementType();
       if (!(N = getType(PointeeTy)))
         N = Builder.createPointerType(
-            getOrCreateType(PointeeTy), Layout.getPointerSizeInBits(),
+            getOrCreateType(PointeeTy), Layout.getPointerTypeSizeInBits(T),
             Layout.getPrefTypeAlignment(T), getTypeName(T));
     } else if (T->isArrayTy()) {
       SmallVector<Value *, 1> Subrange;
-- 
cgit v1.1


From f80a63fa23862e578de919f4b44d4fcdee68fd0d Mon Sep 17 00:00:00 2001
From: Robert Wilhelm <robert.wilhelm@gmx.net>
Date: Sat, 28 Sep 2013 11:46:15 +0000
Subject: Fix spelling intruction -> instruction.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191610 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/LowerExpectIntrinsic.cpp | 2 +-
 lib/Transforms/Vectorize/BBVectorize.cpp      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index 4aee8ff..e017f50 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -29,7 +29,7 @@
 
 using namespace llvm;
 
-STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled");
+STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled");
 
 static cl::opt<uint32_t>
 LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 83ee1a4..456dbc1 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -533,7 +533,7 @@ namespace {
       default: break;
       case Instruction::GetElementPtr:
         // We mark this instruction as zero-cost because scalar GEPs are usually
-        // lowered to the intruction addressing mode. At the moment we don't
+        // lowered to the instruction addressing mode. At the moment we don't
         // generate vector GEPs.
         return 0;
       case Instruction::Br:
-- 
cgit v1.1


From 3f4f420ab7acb10221ba971543a7eed5489fb626 Mon Sep 17 00:00:00 2001
From: Robert Wilhelm <robert.wilhelm@gmx.net>
Date: Sat, 28 Sep 2013 13:42:22 +0000
Subject: Even more spelling fixes for "instruction".

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191611 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 2 +-
 lib/Transforms/Vectorize/LoopVectorize.cpp   | 4 ++--
 lib/Transforms/Vectorize/SLPVectorizer.cpp   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 32af415..952b76b 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -314,7 +314,7 @@ bool NclPopcountRecognize::preliminaryScreen() {
   if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
     return false;
 
-  // Counting population are usually conducted by few arithmetic instrutions.
+  // Counting population are usually conducted by few arithmetic instructions.
   // Such instructions can be easilly "absorbed" by vacant slots in a
   // non-compact loop. Therefore, recognizing popcount idiom only makes sense
   // in a compact loop.
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4833aec..e8c245e 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1357,7 +1357,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
       Instruction *Cloned = Instr->clone();
       if (!IsVoidRetTy)
         Cloned->setName(Instr->getName() + ".cloned");
-      // Replace the operands of the cloned instrucions with extracted scalars.
+      // Replace the operands of the cloned instructions with extracted scalars.
       for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
         Value *Op = Params[op][Part];
         // Param is a vector. Need to extract the right lane.
@@ -4901,7 +4901,7 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
     Instruction *Cloned = Instr->clone();
       if (!IsVoidRetTy)
         Cloned->setName(Instr->getName() + ".cloned");
-      // Replace the operands of the cloned instrucions with extracted scalars.
+      // Replace the operands of the cloned instructions with extracted scalars.
       for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
         Value *Op = Params[op][Part];
         Cloned->setOperand(op, Op);
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ae202c2..2b0bdfa 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -318,7 +318,7 @@ private:
   /// \returns the pointer to the barrier instruction if we can't sink.
   Value *getSinkBarrier(Instruction *Src, Instruction *Dst);
 
-  /// \returns the index of the last instrucion in the BB from \p VL.
+  /// \returns the index of the last instruction in the BB from \p VL.
   int getLastIndex(ArrayRef<Value *> VL);
 
   /// \returns the Instruction in the bundle \p VL.
-- 
cgit v1.1


From 6ef4dd8cb6852fd0036244a07dc944dc8fb8933c Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Mon, 30 Sep 2013 14:18:35 +0000
Subject: Fix a bug in InstCombine where it attempted to cast a Value* to an
 Instruction* when it was actually a Constant*.

There are quite a few other casts to Instruction that might have the same problem,
but this is the only one I have a test case for.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191668 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 9c310f0..5c5ee12 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -519,10 +519,10 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
       if (Opnd0->hasOneUse()) {
         // -X * Y => -(X*Y) (Promote negation as high as possible)
         Value *T = Builder->CreateFMul(N0, Opnd1);
-        cast<Instruction>(T)->setDebugLoc(I.getDebugLoc());
         Instruction *Neg = BinaryOperator::CreateFNeg(T);
         if (I.getFastMathFlags().any()) {
-          cast<Instruction>(T)->copyFastMathFlags(&I);
+          if (Instruction *TI = dyn_cast<Instruction>(T))
+            TI->copyFastMathFlags(&I);
           Neg->copyFastMathFlags(&I);
         }
         return Neg;
-- 
cgit v1.1


From adb412daa41aef94a9f724dfd1ade9f579bb3a84 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Mon, 30 Sep 2013 15:39:48 +0000
Subject: IRBuilder: Add RAII objects to reset insertion points or fast math
 flags.

Inspired by the object from the SLPVectorizer. This found a minor bug in the
debug loc restoration in the vectorizer where the location of a following
instruction was attached instead of the location from the original instruction.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191673 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2b0bdfa..c8c8ba5 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -65,26 +65,6 @@ static const unsigned MinVecRegSize = 128;
 
 static const unsigned RecursionMaxDepth = 12;
 
-/// RAII pattern to save the insertion point of the IR builder.
-class BuilderLocGuard {
-public:
-  BuilderLocGuard(IRBuilder<> &B) : Builder(B), Loc(B.GetInsertPoint()),
-  DbgLoc(B.getCurrentDebugLocation()) {}
-  ~BuilderLocGuard() {
-    Builder.SetCurrentDebugLocation(DbgLoc);
-    if (Loc)
-      Builder.SetInsertPoint(Loc);
-  }
-
-private:
-  // Prevent copying.
-  BuilderLocGuard(const BuilderLocGuard &);
-  BuilderLocGuard &operator=(const BuilderLocGuard &);
-  IRBuilder<> &Builder;
-  AssertingVH<Instruction> Loc;
-  DebugLoc DbgLoc;
-};
-
 /// A helper class for numbering instructions in multiple blocks.
 /// Numbers start at zero for each basic block.
 struct BlockNumbering {
@@ -1177,7 +1157,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
 }
 
 Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
-  BuilderLocGuard Guard(Builder);
+  IRBuilder<>::InsertPointGuard Guard(Builder);
 
   if (E->VectorizedValue) {
     DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n");
-- 
cgit v1.1


From 6dc5c6b8792dd599257eb78c5891ede95bbc6085 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Mon, 30 Sep 2013 15:39:59 +0000
Subject: InstCombine: Replace manual fast math flag copying with the new
 IRBuilder RAII helper.

Defines away the issue where cast<Instruction> would fail because constant
folding happened. Also slightly cleaner.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191674 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineMulDivRem.cpp           | 42 +++++++++++-----------
 1 file changed, 20 insertions(+), 22 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 5c5ee12..a759548 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -463,10 +463,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
             if (Swap && FAddSub->getOpcode() == Instruction::FSub)
               std::swap(M0, M1);
 
-            Value *R = (FAddSub->getOpcode() == Instruction::FAdd) ?
-                        BinaryOperator::CreateFAdd(M0, M1) :
-                        BinaryOperator::CreateFSub(M0, M1);
-            Instruction *RI = cast<Instruction>(R);
+            Instruction *RI = (FAddSub->getOpcode() == Instruction::FAdd)
+                                  ? BinaryOperator::CreateFAdd(M0, M1)
+                                  : BinaryOperator::CreateFSub(M0, M1);
             RI->copyFastMathFlags(&I);
             return RI;
           }
@@ -493,13 +492,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
     }
     // if pattern detected emit alternate sequence
     if (OpX && OpY) {
+      BuilderTy::FastMathFlagGuard Guard(*Builder);
+      Builder->SetFastMathFlags(Log2->getFastMathFlags());
       Log2->setArgOperand(0, OpY);
       Value *FMulVal = Builder->CreateFMul(OpX, Log2);
-      Instruction *FMul = cast<Instruction>(FMulVal);
-      FMul->copyFastMathFlags(Log2);
-      Instruction *FSub = BinaryOperator::CreateFSub(FMulVal, OpX);
-      FSub->copyFastMathFlags(Log2);
-      return FSub;
+      Value *FSub = Builder->CreateFSub(FMulVal, OpX);
+      FSub->takeName(&I);
+      return ReplaceInstUsesWith(I, FSub);
     }
   }
 
@@ -509,6 +508,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
   for (int i = 0; i < 2; i++) {
     bool IgnoreZeroSign = I.hasNoSignedZeros();
     if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
+      BuilderTy::FastMathFlagGuard Guard(*Builder);
+      Builder->SetFastMathFlags(I.getFastMathFlags());
+
       Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
       Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
 
@@ -519,13 +521,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
       if (Opnd0->hasOneUse()) {
         // -X * Y => -(X*Y) (Promote negation as high as possible)
         Value *T = Builder->CreateFMul(N0, Opnd1);
-        Instruction *Neg = BinaryOperator::CreateFNeg(T);
-        if (I.getFastMathFlags().any()) {
-          if (Instruction *TI = dyn_cast<Instruction>(T))
-            TI->copyFastMathFlags(&I);
-          Neg->copyFastMathFlags(&I);
-        }
-        return Neg;
+        Value *Neg = Builder->CreateFNeg(T);
+        Neg->takeName(&I);
+        return ReplaceInstUsesWith(I, Neg);
       }
     }
 
@@ -548,13 +546,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
           Y = Opnd0_0;
 
         if (Y) {
-          Instruction *T = cast<Instruction>(Builder->CreateFMul(Opnd1, Opnd1));
-          T->copyFastMathFlags(&I);
-          T->setDebugLoc(I.getDebugLoc());
+          BuilderTy::FastMathFlagGuard Guard(*Builder);
+          Builder->SetFastMathFlags(I.getFastMathFlags());
+          Value *T = Builder->CreateFMul(Opnd1, Opnd1);
 
-          Instruction *R = BinaryOperator::CreateFMul(T, Y);
-          R->copyFastMathFlags(&I);
-          return R;
+          Value *R = Builder->CreateFMul(T, Y);
+          R->takeName(&I);
+          return ReplaceInstUsesWith(I, R);
         }
       }
     }
-- 
cgit v1.1


From d4278821665aa97f5fc0d19a32ff1fb39a22d395 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Mon, 30 Sep 2013 15:40:17 +0000
Subject: Convert manual insert point restores to the new RAII object.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191675 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/BoundsChecking.cpp | 4 +---
 lib/Transforms/Utils/FlattenCFG.cpp               | 4 +---
 lib/Transforms/Vectorize/LoopVectorize.cpp        | 8 +-------
 3 files changed, 3 insertions(+), 13 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index b094d42..031577a 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -80,9 +80,8 @@ BasicBlock *BoundsChecking::getTrapBB() {
     return TrapBB;
 
   Function *Fn = Inst->getParent()->getParent();
-  BasicBlock::iterator PrevInsertPoint = Builder->GetInsertPoint();
+  IRBuilder<>::InsertPointGuard Guard(*Builder);
   TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
-  Builder->SetInsertPoint(TrapBB);
 
   llvm::Value *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
   CallInst *TrapCall = Builder->CreateCall(F);
@@ -91,7 +90,6 @@ BasicBlock *BoundsChecking::getTrapBB() {
   TrapCall->setDebugLoc(Inst->getDebugLoc());
   Builder->CreateUnreachable();
 
-  Builder->SetInsertPoint(PrevInsertPoint);
   return TrapBB;
 }
 
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 0beb6fd..1da226b 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -266,8 +266,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
   BasicBlock *CB;
   BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
   bool Iteration = true;
-  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
-  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+  IRBuilder<>::InsertPointGuard Guard(Builder);
   Value *PC = PBI->getCondition();
 
   do {
@@ -298,7 +297,6 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
     new UnreachableInst(CB->getContext(), CB);
   } while (Iteration);
 
-  Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
   DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
   return true;
 }
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index e8c245e..0b5d0d4 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1027,25 +1027,19 @@ LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
 }
 
 Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
-  // Save the current insertion location.
-  Instruction *Loc = Builder.GetInsertPoint();
-
   // We need to place the broadcast of invariant variables outside the loop.
   Instruction *Instr = dyn_cast<Instruction>(V);
   bool NewInstr = (Instr && Instr->getParent() == LoopVectorBody);
   bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr;
 
   // Place the code for broadcasting invariant variables in the new preheader.
+  IRBuilder<>::InsertPointGuard Guard(Builder);
   if (Invariant)
     Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
 
   // Broadcast the scalar into all locations in the vector.
   Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
 
-  // Restore the builder insertion point.
-  if (Invariant)
-    Builder.SetInsertPoint(Loc);
-
   return Shuf;
 }
 
-- 
cgit v1.1


From b313a93be77c88ddac3eee553bdf9199c26bfd74 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Mon, 30 Sep 2013 15:52:50 +0000
Subject: BoundsChecking: Fix refacto.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191676 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/BoundsChecking.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 031577a..4a9e950 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -82,6 +82,7 @@ BasicBlock *BoundsChecking::getTrapBB() {
   Function *Fn = Inst->getParent()->getParent();
   IRBuilder<>::InsertPointGuard Guard(*Builder);
   TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
+  Builder->SetInsertPoint(TrapBB);
 
   llvm::Value *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
   CallInst *TrapCall = Builder->CreateCall(F);
-- 
cgit v1.1


From f9dd19f49833e083f8d32ea015d6d5b57be5e4f4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 30 Sep 2013 21:06:18 +0000
Subject: Constant fold ptrtoint + compare with address spaces

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191699 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCompares.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 402f8c3..f691350 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1782,7 +1782,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
   // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
   // integer type is the same size as the pointer type.
   if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
-      TD->getPointerSizeInBits() == DestTy->getIntegerBitWidth()) {
+      TD->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
     Value *RHSOp = 0;
     if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
       RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
-- 
cgit v1.1


From 3ca8f2e5d5d8cb0551b42ef89deaaadd9c4cb067 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 30 Sep 2013 21:11:01 +0000
Subject: Use right address space size in InstCombineCompares

The test's output doesn't change, but this ensures
this is actually hit with a different address space.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191701 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCompares.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index f691350..e624572 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -394,9 +394,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   // If the index is larger than the pointer size of the target, truncate the
   // index down like the GEP would do implicitly.  We don't have to do this for
   // an inbounds GEP because the index can't be out of range.
-  if (!GEP->isInBounds() &&
-      Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
-    Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
+  if (!GEP->isInBounds()) {
+    Type *IntPtrTy = TD->getIntPtrType(GEP->getType());
+    unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
+    if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
+      Idx = Builder->CreateTrunc(Idx, IntPtrTy);
+  }
 
   // If the comparison is only true for one or two elements, emit direct
   // comparisons.
-- 
cgit v1.1


From 611082966190251fed33f3d0cf1b4c0d20ad777b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 1 Oct 2013 00:01:14 +0000
Subject: Fix code duplication

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191716 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/BBVectorize.cpp | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 456dbc1..9a6a0e6 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -2344,6 +2344,12 @@ namespace {
     return ExpandedIEChain;
   }
 
+  static unsigned getNumScalarElements(Type *Ty) {
+    if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
+      return VecTy->getNumElements();
+    return 1;
+  }
+
   // Returns the value to be used as the specified operand of the vector
   // instruction that fuses I with J.
   Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I,
@@ -2359,17 +2365,8 @@ namespace {
     Instruction *L = I, *H = J;
     Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
 
-    unsigned numElemL;
-    if (ArgTypeL->isVectorTy())
-      numElemL = cast<VectorType>(ArgTypeL)->getNumElements();
-    else
-      numElemL = 1;
-
-    unsigned numElemH;
-    if (ArgTypeH->isVectorTy())
-      numElemH = cast<VectorType>(ArgTypeH)->getNumElements();
-    else
-      numElemH = 1;
+    unsigned numElemL = getNumScalarElements(ArgTypeL);
+    unsigned numElemH = getNumScalarElements(ArgTypeH);
 
     Value *LOp = L->getOperand(o);
     Value *HOp = H->getOperand(o);
@@ -2750,16 +2747,8 @@ namespace {
       VectorType *VType = getVecTypeForPair(IType, JType);
       unsigned numElem = VType->getNumElements();
 
-      unsigned numElemI, numElemJ;
-      if (IType->isVectorTy())
-        numElemI = cast<VectorType>(IType)->getNumElements();
-      else
-        numElemI = 1;
-
-      if (JType->isVectorTy())
-        numElemJ = cast<VectorType>(JType)->getNumElements();
-      else
-        numElemJ = 1;
+      unsigned numElemI = getNumScalarElements(IType);
+      unsigned numElemJ = getNumScalarElements(JType);
 
       if (IType->isVectorTy()) {
         std::vector<Constant*> Mask1(numElemI), Mask2(numElemI);
-- 
cgit v1.1


From 8819c84aed10777ba91d4e862229882b8da0b272 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Tue, 1 Oct 2013 13:32:03 +0000
Subject: Remove several unused variables.

Patch by Alp Toker.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191757 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/AddressSanitizer.cpp | 3 +--
 lib/Transforms/Utils/BasicBlockUtils.cpp            | 1 -
 lib/Transforms/Utils/LowerInvoke.cpp                | 1 -
 lib/Transforms/Vectorize/LoopVectorize.cpp          | 1 -
 lib/Transforms/Vectorize/SLPVectorizer.cpp          | 1 -
 5 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 8f8af20..7ced56b 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -927,8 +927,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
   StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
                                                IntptrTy, IntptrTy,
                                                IntptrTy, IntptrTy, NULL);
-  SmallVector<Constant *, 16> Initializers(n), DynamicInit;
-
+  SmallVector<Constant *, 16> Initializers(n);
 
   Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
   assert(CtorFunc);
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index e17a416..a2e82f3 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -248,7 +248,6 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
 
   // If the edge isn't critical, then BB has a single successor or Succ has a
   // single pred.  Split the block.
-  BasicBlock::iterator SplitPoint;
   if (BasicBlock *SP = Succ->getSinglePredecessor()) {
     // If the successor only has a single pred, split the top of the successor
     // block.
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index f66b54d..9799a30 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -346,7 +346,6 @@ splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
       // Scan all of the uses and see if the live range is live across an unwind
       // edge.  If we find a use live across an invoke edge, create an alloca
       // and spill the value.
-      std::set<InvokeInst*> InvokesWithStoreInserted;
 
       // Find all of the blocks that this value is live in.
       std::set<BasicBlock*> LiveBBs;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0b5d0d4..d5df1115 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3758,7 +3758,6 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
     return true;
   }
 
-  SmallPtrSet<Value *, 16> ReadOnlyPtr;
   for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
     LoadInst *LD = cast<LoadInst>(*I);
     Value* Ptr = LD->getPointerOperand();
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c8c8ba5..c2c53c7 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2172,7 +2172,6 @@ private:
     assert(isPowerOf2_32(ReduxWidth) &&
            "We only handle power-of-two reductions for now");
 
-    SmallVector<Constant *, 32> ShuffleMask(ReduxWidth, 0);
     Value *TmpVec = ValToReduce;
     for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
       if (IsPairwiseReduction) {
-- 
cgit v1.1


From 187c774a7650e4bbbaac175cb6509f709edba593 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 1 Oct 2013 18:05:30 +0000
Subject: Don't merge tiny functions.

It's silly to merge functions like these:

define void @foo(i32 %x) {
  ret void
}

define void @bar(i32 %x) {
  ret void
}

to get

define void @bar(i32) {
  tail call void @foo(i32 %0)
  ret void
}

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191786 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/MergeFunctions.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 0f09b90..7e4c1668 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -836,6 +836,18 @@ bool MergeFunctions::insert(ComparableFunction &NewF) {
 
   const ComparableFunction &OldF = *Result.first;
 
+  // Don't merge tiny functions, since it can just end up making the function
+  // larger.
+  // FIXME: Should still merge them if they are unnamed_addr and produce an
+  // alias.
+  if (NewF.getFunc()->size() == 1) {
+    if (NewF.getFunc()->front().size() <= 2) {
+      DEBUG(dbgs() << NewF.getFunc()->getName()
+            << " is to small to bother merging\n");
+      return false;
+    }
+  }
+
   // Never thunk a strong function to a weak function.
   assert(!OldF.getFunc()->mayBeOverridden() ||
          NewF.getFunc()->mayBeOverridden());
-- 
cgit v1.1


From 6ffce6fa9295699078f81fabb9ca32f6f22bdf09 Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <samsonov@google.com>
Date: Wed, 2 Oct 2013 15:31:34 +0000
Subject: Remove "localize global" optimization

Summary:
As discussed in http://llvm-reviews.chandlerc.com/D1754,
this optimization isn't really valid for C, and fires too rarely anyway.

Reviewers: rafael, nicholas

Reviewed By: nicholas

CC: rnk, llvm-commits, nicholas

Differential Revision: http://llvm-reviews.chandlerc.com/D1769

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191834 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp | 59 ++--------------------------------------
 1 file changed, 3 insertions(+), 56 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index cf15580..3129559 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -50,7 +50,6 @@ STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
 STATISTIC(NumDeleted   , "Number of globals deleted");
 STATISTIC(NumFnDeleted , "Number of functions deleted");
 STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
-STATISTIC(NumLocalized , "Number of globals localized");
 STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
 STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
 STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
@@ -137,24 +136,12 @@ struct GlobalStatus {
   /// ever stored to this global, keep track of what value it is.
   Value *StoredOnceValue;
 
-  /// AccessingFunction/HasMultipleAccessingFunctions - These start out
-  /// null/false.  When the first accessing function is noticed, it is recorded.
-  /// When a second different accessing function is noticed,
-  /// HasMultipleAccessingFunctions is set to true.
-  const Function *AccessingFunction;
-  bool HasMultipleAccessingFunctions;
-
-  /// HasNonInstructionUser - Set to true if this global has a user that is not
-  /// an instruction (e.g. a constant expr or GV initializer).
-  bool HasNonInstructionUser;
-
   /// AtomicOrdering - Set to the strongest atomic ordering requirement.
   AtomicOrdering Ordering;
 
-  GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
-                   StoredOnceValue(0), AccessingFunction(0),
-                   HasMultipleAccessingFunctions(false),
-                   HasNonInstructionUser(false), Ordering(NotAtomic) {}
+  GlobalStatus()
+      : isCompared(false), isLoaded(false), StoredType(NotStored),
+        StoredOnceValue(0), Ordering(NotAtomic) {}
 };
 
 }
@@ -195,21 +182,12 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
        ++UI) {
     const User *U = *UI;
     if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
-      GS.HasNonInstructionUser = true;
-
       // If the result of the constantexpr isn't pointer type, then we won't
       // know to expect it in various places.  Just reject early.
       if (!isa<PointerType>(CE->getType())) return true;
 
       if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
-      if (!GS.HasMultipleAccessingFunctions) {
-        const Function *F = I->getParent()->getParent();
-        if (GS.AccessingFunction == 0)
-          GS.AccessingFunction = F;
-        else if (GS.AccessingFunction != F)
-          GS.HasMultipleAccessingFunctions = true;
-      }
       if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
         GS.isLoaded = true;
         // Don't hack on volatile loads.
@@ -286,12 +264,10 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
         return true;  // Any other non-load instruction might take address!
       }
     } else if (const Constant *C = dyn_cast<Constant>(U)) {
-      GS.HasNonInstructionUser = true;
       // We might have a dead and dangling constant hanging off of here.
       if (!SafeToDestroyConstant(C))
         return true;
     } else {
-      GS.HasNonInstructionUser = true;
       // Otherwise must be some other user.
       return true;
     }
@@ -1938,35 +1914,6 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
 bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
                                       Module::global_iterator &GVI,
                                       const GlobalStatus &GS) {
-  // If this is a first class global and has only one accessing function
-  // and this function is main (which we know is not recursive), we replace
-  // the global with a local alloca in this function.
-  //
-  // NOTE: It doesn't make sense to promote non single-value types since we
-  // are just replacing static memory to stack memory.
-  //
-  // If the global is in different address space, don't bring it to stack.
-  if (!GS.HasMultipleAccessingFunctions &&
-      GS.AccessingFunction && !GS.HasNonInstructionUser &&
-      GV->getType()->getElementType()->isSingleValueType() &&
-      GS.AccessingFunction->getName() == "main" &&
-      GS.AccessingFunction->hasExternalLinkage() &&
-      GV->getType()->getAddressSpace() == 0) {
-    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
-    Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
-                                                   ->getEntryBlock().begin());
-    Type *ElemTy = GV->getType()->getElementType();
-    // FIXME: Pass Global's alignment when globals have alignment
-    AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
-    if (!isa<UndefValue>(GV->getInitializer()))
-      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
-
-    GV->replaceAllUsesWith(Alloca);
-    GV->eraseFromParent();
-    ++NumLocalized;
-    return true;
-  }
-
   // If the global is never loaded (but may be stored to), it is dead.
   // Delete it now.
   if (!GS.isLoaded) {
-- 
cgit v1.1


From dd5d86d992eb129ecd0bb013d2db2d6a0e8d2605 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Wed, 2 Oct 2013 15:42:23 +0000
Subject: Remove the very substantial, largely unmaintained legacy PGO
 infrastructure.

This was essentially work toward PGO based on a design that had several
flaws, partially dating from a time when LLVM had a different
architecture, and with an effort to modernize it abandoned without being
completed. Since then, it has bitrotted for several years further. The
result is nearly unusable, and isn't helping any of the modern PGO
efforts. Instead, it is getting in the way, adding confusion about PGO
in LLVM and distracting everyone with maintenance on essentially dead
code. Removing it paves the way for modern efforts around PGO.

Among other effects, this removes the last of the runtime libraries from
LLVM. Those are being developed in the separate 'compiler-rt' project
now, with somewhat different licensing specifically more approriate for
runtimes.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191835 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/CMakeLists.txt      |    3 -
 lib/Transforms/Instrumentation/EdgeProfiling.cpp   |  117 --
 lib/Transforms/Instrumentation/Instrumentation.cpp |    3 -
 .../Instrumentation/OptimalEdgeProfiling.cpp       |  225 ----
 lib/Transforms/Instrumentation/PathProfiling.cpp   | 1424 --------------------
 lib/Transforms/Scalar/CodeGenPrepare.cpp           |    8 -
 lib/Transforms/Utils/BreakCriticalEdges.cpp        |    9 +-
 lib/Transforms/Utils/Local.cpp                     |    6 -
 8 files changed, 1 insertion(+), 1794 deletions(-)
 delete mode 100644 lib/Transforms/Instrumentation/EdgeProfiling.cpp
 delete mode 100644 lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
 delete mode 100644 lib/Transforms/Instrumentation/PathProfiling.cpp

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 65d41f5..71a0ecd 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -3,12 +3,9 @@ add_llvm_library(LLVMInstrumentation
   BoundsChecking.cpp
   DataFlowSanitizer.cpp
   DebugIR.cpp
-  EdgeProfiling.cpp
   GCOVProfiling.cpp
   MemorySanitizer.cpp
   Instrumentation.cpp
-  OptimalEdgeProfiling.cpp
-  PathProfiling.cpp
   ProfilingUtils.cpp
   ThreadSanitizer.cpp
   )
diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
deleted file mode 100644
index a2459fb..0000000
--- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-//===- EdgeProfiling.cpp - Insert counters for edge profiling -------------===//
-//
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass instruments the specified program with counters for edge profiling.
-// Edge profiling can give a reasonable approximation of the hot paths through a
-// program, and is used for a wide variety of program transformations.
-//
-// Note that this implementation is very naive.  We insert a counter for *every*
-// edge in the program, instead of using control flow information to prune the
-// number of counters inserted.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "insert-edge-profiling"
-
-#include "llvm/Transforms/Instrumentation.h"
-#include "ProfilingUtils.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <set>
-using namespace llvm;
-
-STATISTIC(NumEdgesInserted, "The # of edges inserted.");
-
-namespace {
-  class EdgeProfiler : public ModulePass {
-    bool runOnModule(Module &M);
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    EdgeProfiler() : ModulePass(ID) {
-      initializeEdgeProfilerPass(*PassRegistry::getPassRegistry());
-    }
-
-    virtual const char *getPassName() const {
-      return "Edge Profiler";
-    }
-  };
-}
-
-char EdgeProfiler::ID = 0;
-INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
-                "Insert instrumentation for edge profiling", false, false)
-
-ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
-
-bool EdgeProfiler::runOnModule(Module &M) {
-  Function *Main = M.getFunction("main");
-  if (Main == 0) {
-    errs() << "WARNING: cannot insert edge profiling into a module"
-           << " with no main function!\n";
-    return false;  // No main, no instrumentation!
-  }
-
-  std::set<BasicBlock*> BlocksToInstrument;
-  unsigned NumEdges = 0;
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (F->isDeclaration()) continue;
-    // Reserve space for (0,entry) edge.
-    ++NumEdges;
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-      // Keep track of which blocks need to be instrumented.  We don't want to
-      // instrument blocks that are added as the result of breaking critical
-      // edges!
-      BlocksToInstrument.insert(BB);
-      NumEdges += BB->getTerminator()->getNumSuccessors();
-    }
-  }
-
-  Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);
-  GlobalVariable *Counters =
-    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
-                       Constant::getNullValue(ATy), "EdgeProfCounters");
-  NumEdgesInserted = NumEdges;
-
-  // Instrument all of the edges...
-  unsigned i = 0;
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (F->isDeclaration()) continue;
-    // Create counter for (0,entry) edge.
-    IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters);
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-      if (BlocksToInstrument.count(BB)) {  // Don't instrument inserted blocks
-        // Okay, we have to add a counter of each outgoing edge.  If the
-        // outgoing edge is not critical don't split it, just insert the counter
-        // in the source or destination of the edge.
-        TerminatorInst *TI = BB->getTerminator();
-        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
-          // If the edge is critical, split it.
-          SplitCriticalEdge(TI, s, this);
-
-          // Okay, we are guaranteed that the edge is no longer critical.  If we
-          // only have a single successor, insert the counter in this block,
-          // otherwise insert it in the successor block.
-          if (TI->getNumSuccessors() == 1) {
-            // Insert counter at the start of the block
-            IncrementCounterInBlock(BB, i++, Counters, false);
-          } else {
-            // Insert counter at the start of the block
-            IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
-          }
-        }
-      }
-  }
-
-  // Add the initialization call to main.
-  InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters);
-  return true;
-}
-
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index 94f7901..b1bea38 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -24,10 +24,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
   initializeAddressSanitizerPass(Registry);
   initializeAddressSanitizerModulePass(Registry);
   initializeBoundsCheckingPass(Registry);
-  initializeEdgeProfilerPass(Registry);
   initializeGCOVProfilerPass(Registry);
-  initializeOptimalEdgeProfilerPass(Registry);
-  initializePathProfilerPass(Registry);
   initializeMemorySanitizerPass(Registry);
   initializeThreadSanitizerPass(Registry);
   initializeDataFlowSanitizerPass(Registry);
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
deleted file mode 100644
index b45aef6..0000000
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-//===- OptimalEdgeProfiling.cpp - Insert counters for opt. edge profiling -===//
-//
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass instruments the specified program with counters for edge profiling.
-// Edge profiling can give a reasonable approximation of the hot paths through a
-// program, and is used for a wide variety of program transformations.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "insert-optimal-edge-profiling"
-#include "llvm/Transforms/Instrumentation.h"
-#include "MaximumSpanningTree.h"
-#include "ProfilingUtils.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-using namespace llvm;
-
-STATISTIC(NumEdgesInserted, "The # of edges inserted.");
-
-namespace {
-  class OptimalEdgeProfiler : public ModulePass {
-    bool runOnModule(Module &M);
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    OptimalEdgeProfiler() : ModulePass(ID) {
-      initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry());
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequiredID(ProfileEstimatorPassID);
-      AU.addRequired<ProfileInfo>();
-    }
-
-    virtual const char *getPassName() const {
-      return "Optimal Edge Profiler";
-    }
-  };
-}
-
-char OptimalEdgeProfiler::ID = 0;
-INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
-                "Insert optimal instrumentation for edge profiling",
-                false, false)
-INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass)
-INITIALIZE_AG_DEPENDENCY(ProfileInfo)
-INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
-                "Insert optimal instrumentation for edge profiling",
-                false, false)
-
-ModulePass *llvm::createOptimalEdgeProfilerPass() {
-  return new OptimalEdgeProfiler();
-}
-
-inline static void printEdgeCounter(ProfileInfo::Edge e,
-                                    BasicBlock* b,
-                                    unsigned i) {
-  DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \
-               << ((b)?(b)->getName():"0") << " (# " << (i) << ")\n");
-}
-
-bool OptimalEdgeProfiler::runOnModule(Module &M) {
-  Function *Main = M.getFunction("main");
-  if (Main == 0) {
-    errs() << "WARNING: cannot insert edge profiling into a module"
-           << " with no main function!\n";
-    return false;  // No main, no instrumentation!
-  }
-
-  // NumEdges counts all the edges that may be instrumented. Later on its
-  // decided which edges to actually instrument, to achieve optimal profiling.
-  // For the entry block a virtual edge (0,entry) is reserved, for each block
-  // with no successors an edge (BB,0) is reserved. These edges are necessary
-  // to calculate a truly optimal maximum spanning tree and thus an optimal
-  // instrumentation.
-  unsigned NumEdges = 0;
-
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (F->isDeclaration()) continue;
-    // Reserve space for (0,entry) edge.
-    ++NumEdges;
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-      // Keep track of which blocks need to be instrumented.  We don't want to
-      // instrument blocks that are added as the result of breaking critical
-      // edges!
-      if (BB->getTerminator()->getNumSuccessors() == 0) {
-        // Reserve space for (BB,0) edge.
-        ++NumEdges;
-      } else {
-        NumEdges += BB->getTerminator()->getNumSuccessors();
-      }
-    }
-  }
-
-  // In the profiling output a counter for each edge is reserved, but only few
-  // are used. This is done to be able to read back in the profile without
-  // calulating the maximum spanning tree again, instead each edge counter that
-  // is not used is initialised with -1 to signal that this edge counter has to
-  // be calculated from other edge counters on reading the profile info back
-  // in.
-
-  Type *Int32 = Type::getInt32Ty(M.getContext());
-  ArrayType *ATy = ArrayType::get(Int32, NumEdges);
-  GlobalVariable *Counters =
-    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
-                       Constant::getNullValue(ATy), "OptEdgeProfCounters");
-  NumEdgesInserted = 0;
-
-  std::vector<Constant*> Initializer(NumEdges);
-  Constant *Zero = ConstantInt::get(Int32, 0);
-  Constant *Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
-
-  // Instrument all of the edges not in MST...
-  unsigned i = 0;
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (F->isDeclaration()) continue;
-    DEBUG(dbgs() << "Working on " << F->getName() << "\n");
-
-    // Calculate a Maximum Spanning Tree with the edge weights determined by
-    // ProfileEstimator. ProfileEstimator also assign weights to the virtual
-    // edges (0,entry) and (BB,0) (for blocks with no successors) and this
-    // edges also participate in the maximum spanning tree calculation.
-    // The third parameter of MaximumSpanningTree() has the effect that not the
-    // actual MST is returned but the edges _not_ in the MST.
-
-    ProfileInfo::EdgeWeights ECs =
-      getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
-    std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
-    MaximumSpanningTree<BasicBlock> MST(EdgeVector);
-    std::stable_sort(MST.begin(), MST.end());
-
-    // Check if (0,entry) not in the MST. If not, instrument edge
-    // (IncrementCounterInBlock()) and set the counter initially to zero, if
-    // the edge is in the MST the counter is initialised to -1.
-
-    BasicBlock *entry = &(F->getEntryBlock());
-    ProfileInfo::Edge edge = ProfileInfo::getEdge(0, entry);
-    if (!std::binary_search(MST.begin(), MST.end(), edge)) {
-      printEdgeCounter(edge, entry, i);
-      IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
-      Initializer[i++] = (Zero);
-    } else{
-      Initializer[i++] = (Uncounted);
-    }
-
-    // InsertedBlocks contains all blocks that were inserted for splitting an
-    // edge, this blocks do not have to be instrumented.
-    DenseSet<BasicBlock*> InsertedBlocks;
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-      // Check if block was not inserted and thus does not have to be
-      // instrumented.
-      if (InsertedBlocks.count(BB)) continue;
-
-      // Okay, we have to add a counter of each outgoing edge not in MST. If
-      // the outgoing edge is not critical don't split it, just insert the
-      // counter in the source or destination of the edge. Also, if the block
-      // has no successors, the virtual edge (BB,0) is processed.
-      TerminatorInst *TI = BB->getTerminator();
-      if (TI->getNumSuccessors() == 0) {
-        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB, 0);
-        if (!std::binary_search(MST.begin(), MST.end(), edge)) {
-          printEdgeCounter(edge, BB, i);
-          IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
-          Initializer[i++] = (Zero);
-        } else{
-          Initializer[i++] = (Uncounted);
-        }
-      }
-      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
-        BasicBlock *Succ = TI->getSuccessor(s);
-        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ);
-        if (!std::binary_search(MST.begin(), MST.end(), edge)) {
-
-          // If the edge is critical, split it.
-          bool wasInserted = SplitCriticalEdge(TI, s, this);
-          Succ = TI->getSuccessor(s);
-          if (wasInserted)
-            InsertedBlocks.insert(Succ);
-
-          // Okay, we are guaranteed that the edge is no longer critical.  If
-          // we only have a single successor, insert the counter in this block,
-          // otherwise insert it in the successor block.
-          if (TI->getNumSuccessors() == 1) {
-            // Insert counter at the start of the block
-            printEdgeCounter(edge, BB, i);
-            IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
-          } else {
-            // Insert counter at the start of the block
-            printEdgeCounter(edge, Succ, i);
-            IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
-          }
-          Initializer[i++] = (Zero);
-        } else {
-          Initializer[i++] = (Uncounted);
-        }
-      }
-    }
-  }
-
-  // Check if the number of edges counted at first was the number of edges we
-  // considered for instrumentation.
-  assert(i == NumEdges && "the number of edges in counting array is wrong");
-
-  // Assign the now completely defined initialiser to the array.
-  Constant *init = ConstantArray::get(ATy, Initializer);
-  Counters->setInitializer(init);
-
-  // Add the initialization call to main.
-  InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters);
-  return true;
-}
-
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
deleted file mode 100644
index 7de7326..0000000
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ /dev/null
@@ -1,1424 +0,0 @@
-//===- PathProfiling.cpp - Inserts counters for path profiling ------------===//
-//
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass instruments functions for Ball-Larus path profiling.  Ball-Larus
-// profiling converts the CFG into a DAG by replacing backedges with edges
-// from entry to the start block and from the end block to exit.  The paths
-// along the new DAG are enumrated, i.e. each path is given a path number.
-// Edges are instrumented to increment the path number register, such that the
-// path number register will equal the path number of the path taken at the
-// exit.
-//
-// This file defines classes for building a CFG for use with different stages
-// in the Ball-Larus path profiling instrumentation [Ball96].  The
-// requirements are formatting the llvm CFG into the Ball-Larus DAG, path
-// numbering, finding a spanning tree, moving increments from the spanning
-// tree to chords.
-//
-// Terms:
-// DAG            - Directed Acyclic Graph.
-// Ball-Larus DAG - A CFG with an entry node, an exit node, and backedges
-//                  removed in the following manner.  For every backedge
-//                  v->w, insert edge ENTRY->w and edge v->EXIT.
-// Path Number    - The number corresponding to a specific path through a
-//                  Ball-Larus DAG.
-// Spanning Tree  - A subgraph, S, is a spanning tree if S covers all
-//                  vertices and is a tree.
-// Chord          - An edge not in the spanning tree.
-//
-// [Ball96]
-//  T. Ball and J. R. Larus. "Efficient Path Profiling."
-//  International Symposium on Microarchitecture, pages 46-57, 1996.
-//  http://portal.acm.org/citation.cfm?id=243857
-//
-// [Ball94]
-//  Thomas Ball.  "Efficiently Counting Program Events with Support for
-//  On-line queries."
-//  ACM Transactions on Programmmg Languages and Systems, Vol 16, No 5,
-//  September 1994, Pages 1399-1410.
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "insert-path-profiling"
-
-#include "llvm/Transforms/Instrumentation.h"
-#include "ProfilingUtils.h"
-#include "llvm/Analysis/PathNumbering.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/TypeBuilder.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <vector>
-
-#define HASH_THRESHHOLD 100000
-
-using namespace llvm;
-
-namespace {
-class BLInstrumentationNode;
-class BLInstrumentationEdge;
-class BLInstrumentationDag;
-
-// ---------------------------------------------------------------------------
-// BLInstrumentationNode extends BallLarusNode with member used by the
-// instrumentation algortihms.
-// ---------------------------------------------------------------------------
-class BLInstrumentationNode : public BallLarusNode {
-public:
-  // Creates a new BLInstrumentationNode from a BasicBlock.
-  BLInstrumentationNode(BasicBlock* BB);
-
-  // Get/sets the Value corresponding to the pathNumber register,
-  // constant or phinode.  Used by the instrumentation code to remember
-  // path number Values.
-  Value* getStartingPathNumber();
-  void setStartingPathNumber(Value* pathNumber);
-
-  Value* getEndingPathNumber();
-  void setEndingPathNumber(Value* pathNumber);
-
-  // Get/set the PHINode Instruction for this node.
-  PHINode* getPathPHI();
-  void setPathPHI(PHINode* pathPHI);
-
-private:
-
-  Value* _startingPathNumber; // The Value for the current pathNumber.
-  Value* _endingPathNumber; // The Value for the current pathNumber.
-  PHINode* _pathPHI; // The PHINode for current pathNumber.
-};
-
-// --------------------------------------------------------------------------
-// BLInstrumentationEdge extends BallLarusEdge with data about the
-// instrumentation that will end up on each edge.
-// --------------------------------------------------------------------------
-class BLInstrumentationEdge : public BallLarusEdge {
-public:
-  BLInstrumentationEdge(BLInstrumentationNode* source,
-                        BLInstrumentationNode* target);
-
-  // Sets the target node of this edge.  Required to split edges.
-  void setTarget(BallLarusNode* node);
-
-  // Get/set whether edge is in the spanning tree.
-  bool isInSpanningTree() const;
-  void setIsInSpanningTree(bool isInSpanningTree);
-
-  // Get/ set whether this edge will be instrumented with a path number
-  // initialization.
-  bool isInitialization() const;
-  void setIsInitialization(bool isInitialization);
-
-  // Get/set whether this edge will be instrumented with a path counter
-  // increment.  Notice this is incrementing the path counter
-  // corresponding to the path number register.  The path number
-  // increment is determined by getIncrement().
-  bool isCounterIncrement() const;
-  void setIsCounterIncrement(bool isCounterIncrement);
-
-  // Get/set the path number increment that this edge will be instrumented
-  // with.  This is distinct from the path counter increment and the
-  // weight.  The counter increment counts the number of executions of
-  // some path, whereas the path number keeps track of which path number
-  // the program is on.
-  long getIncrement() const;
-  void setIncrement(long increment);
-
-  // Get/set whether the edge has been instrumented.
-  bool hasInstrumentation();
-  void setHasInstrumentation(bool hasInstrumentation);
-
-  // Returns the successor number of this edge in the source.
-  unsigned getSuccessorNumber();
-
-private:
-  // The increment that the code will be instrumented with.
-  long long _increment;
-
-  // Whether this edge is in the spanning tree.
-  bool _isInSpanningTree;
-
-  // Whether this edge is an initialiation of the path number.
-  bool _isInitialization;
-
-  // Whether this edge is a path counter increment.
-  bool _isCounterIncrement;
-
-  // Whether this edge has been instrumented.
-  bool _hasInstrumentation;
-};
-
-// ---------------------------------------------------------------------------
-// BLInstrumentationDag extends BallLarusDag with algorithms that
-// determine where instrumentation should be placed.
-// ---------------------------------------------------------------------------
-class BLInstrumentationDag : public BallLarusDag {
-public:
-  BLInstrumentationDag(Function &F);
-
-  // Returns the Exit->Root edge. This edge is required for creating
-  // directed cycles in the algorithm for moving instrumentation off of
-  // the spanning tree
-  BallLarusEdge* getExitRootEdge();
-
-  // Returns an array of phony edges which mark those nodes
-  // with function calls
-  BLEdgeVector getCallPhonyEdges();
-
-  // Gets/sets the path counter array
-  GlobalVariable* getCounterArray();
-  void setCounterArray(GlobalVariable* c);
-
-  // Calculates the increments for the chords, thereby removing
-  // instrumentation from the spanning tree edges. Implementation is based
-  // on the algorithm in Figure 4 of [Ball94]
-  void calculateChordIncrements();
-
-  // Updates the state when an edge has been split
-  void splitUpdate(BLInstrumentationEdge* formerEdge, BasicBlock* newBlock);
-
-  // Calculates a spanning tree of the DAG ignoring cycles.  Whichever
-  // edges are in the spanning tree will not be instrumented, but this
-  // implementation does not try to minimize the instrumentation overhead
-  // by trying to find hot edges.
-  void calculateSpanningTree();
-
-  // Pushes initialization further down in order to group the first
-  // increment and initialization.
-  void pushInitialization();
-
-  // Pushes the path counter increments up in order to group the last path
-  // number increment.
-  void pushCounters();
-
-  // Removes phony edges from the successor list of the source, and the
-  // predecessor list of the target.
-  void unlinkPhony();
-
-  // Generate dot graph for the function
-  void generateDotGraph();
-
-protected:
-  // BLInstrumentationDag creates BLInstrumentationNode objects in this
-  // method overriding the creation of BallLarusNode objects.
-  //
-  // Allows subclasses to determine which type of Node is created.
-  // Override this method to produce subclasses of BallLarusNode if
-  // necessary.
-  virtual BallLarusNode* createNode(BasicBlock* BB);
-
-  // BLInstrumentationDag create BLInstrumentationEdges.
-  //
-  // Allows subclasses to determine which type of Edge is created.
-  // Override this method to produce subclasses of BallLarusEdge if
-  // necessary.  Parameters source and target will have been created by
-  // createNode and can be cast to the subclass of BallLarusNode*
-  // returned by createNode.
-  virtual BallLarusEdge* createEdge(
-    BallLarusNode* source, BallLarusNode* target, unsigned edgeNumber);
-
-private:
-  BLEdgeVector _treeEdges; // All edges in the spanning tree.
-  BLEdgeVector _chordEdges; // All edges not in the spanning tree.
-  GlobalVariable* _counterArray; // Array to store path counters
-
-  // Removes the edge from the appropriate predecessor and successor lists.
-  void unlinkEdge(BallLarusEdge* edge);
-
-  // Makes an edge part of the spanning tree.
-  void makeEdgeSpanning(BLInstrumentationEdge* edge);
-
-  // Pushes initialization and calls itself recursively.
-  void pushInitializationFromEdge(BLInstrumentationEdge* edge);
-
-  // Pushes path counter increments up recursively.
-  void pushCountersFromEdge(BLInstrumentationEdge* edge);
-
-  // Depth first algorithm for determining the chord increments.f
-  void calculateChordIncrementsDfs(
-    long weight, BallLarusNode* v, BallLarusEdge* e);
-
-  // Determines the relative direction of two edges.
-  int calculateChordIncrementsDir(BallLarusEdge* e, BallLarusEdge* f);
-};
-
-// ---------------------------------------------------------------------------
-// PathProfiler is a module pass which instruments path profiling instructions
-// ---------------------------------------------------------------------------
-class PathProfiler : public ModulePass {
-private:
-  // Current context for multi threading support.
-  LLVMContext* Context;
-
-  // Which function are we currently instrumenting
-  unsigned currentFunctionNumber;
-
-  // The function prototype in the profiling runtime for incrementing a
-  // single path counter in a hash table.
-  Constant* llvmIncrementHashFunction;
-  Constant* llvmDecrementHashFunction;
-
-  // Instruments each function with path profiling.  'main' is instrumented
-  // with code to save the profile to disk.
-  bool runOnModule(Module &M);
-
-  // Analyzes the function for Ball-Larus path profiling, and inserts code.
-  void runOnFunction(std::vector<Constant*> &ftInit, Function &F, Module &M);
-
-  // Creates an increment constant representing incr.
-  ConstantInt* createIncrementConstant(long incr, int bitsize);
-
-  // Creates an increment constant representing the value in
-  // edge->getIncrement().
-  ConstantInt* createIncrementConstant(BLInstrumentationEdge* edge);
-
-  // Finds the insertion point after pathNumber in block.  PathNumber may
-  // be NULL.
-  BasicBlock::iterator getInsertionPoint(
-    BasicBlock* block, Value* pathNumber);
-
-  // Inserts source's pathNumber Value* into target.  Target may or may not
-  // have multiple predecessors, and may or may not have its phiNode
-  // initalized.
-  void pushValueIntoNode(
-    BLInstrumentationNode* source, BLInstrumentationNode* target);
-
-  // Inserts source's pathNumber Value* into the appropriate slot of
-  // target's phiNode.
-  void pushValueIntoPHI(
-    BLInstrumentationNode* target, BLInstrumentationNode* source);
-
-  // The Value* in node, oldVal,  is updated with a Value* correspodning to
-  // oldVal + addition.
-  void insertNumberIncrement(BLInstrumentationNode* node, Value* addition,
-                             bool atBeginning);
-
-  // Creates a counter increment in the given node.  The Value* in node is
-  // taken as the index into a hash table.
-  void insertCounterIncrement(
-    Value* incValue,
-    BasicBlock::iterator insertPoint,
-    BLInstrumentationDag* dag,
-    bool increment = true);
-
-  // A PHINode is created in the node, and its values initialized to -1U.
-  void preparePHI(BLInstrumentationNode* node);
-
-  // Inserts instrumentation for the given edge
-  //
-  // Pre: The edge's source node has pathNumber set if edge is non zero
-  // path number increment.
-  //
-  // Post: Edge's target node has a pathNumber set to the path number Value
-  // corresponding to the value of the path register after edge's
-  // execution.
-  void insertInstrumentationStartingAt(
-    BLInstrumentationEdge* edge,
-    BLInstrumentationDag* dag);
-
-  // If this edge is a critical edge, then inserts a node at this edge.
-  // This edge becomes the first edge, and a new BallLarusEdge is created.
-  bool splitCritical(BLInstrumentationEdge* edge, BLInstrumentationDag* dag);
-
-  // Inserts instrumentation according to the marked edges in dag.  Phony
-  // edges must be unlinked from the DAG, but accessible from the
-  // backedges.  Dag must have initializations, path number increments, and
-  // counter increments present.
-  //
-  // Counter storage is created here.
-  void insertInstrumentation( BLInstrumentationDag& dag, Module &M);
-
-public:
-  static char ID; // Pass identification, replacement for typeid
-  PathProfiler() : ModulePass(ID) {
-    initializePathProfilerPass(*PassRegistry::getPassRegistry());
-  }
-
-  virtual const char *getPassName() const {
-    return "Path Profiler";
-  }
-};
-} // end anonymous namespace
-
-// Should we print the dot-graphs
-static cl::opt<bool> DotPathDag("path-profile-pathdag", cl::Hidden,
-        cl::desc("Output the path profiling DAG for each function."));
-
-// Register the path profiler as a pass
-char PathProfiler::ID = 0;
-INITIALIZE_PASS(PathProfiler, "insert-path-profiling",
-                "Insert instrumentation for Ball-Larus path profiling",
-                false, false)
-
-ModulePass *llvm::createPathProfilerPass() { return new PathProfiler(); }
-
-namespace llvm {
-  class PathProfilingFunctionTable {};
-
-  // Type for global array storing references to hashes or arrays
-  template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable,
-                                            xcompile> {
-  public:
-    static StructType *get(LLVMContext& C) {
-      return( StructType::get(
-                TypeBuilder<types::i<32>, xcompile>::get(C), // type
-                TypeBuilder<types::i<32>, xcompile>::get(C), // array size
-                TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr
-                NULL));
-    }
-  };
-
-  typedef TypeBuilder<PathProfilingFunctionTable, true>
-  ftEntryTypeBuilder;
-
-  // BallLarusEdge << operator overloading
-  raw_ostream& operator<<(raw_ostream& os,
-                          const BLInstrumentationEdge& edge)
-      LLVM_ATTRIBUTE_USED;
-  raw_ostream& operator<<(raw_ostream& os,
-                          const BLInstrumentationEdge& edge) {
-    os << "[" << edge.getSource()->getName() << " -> "
-       << edge.getTarget()->getName() << "] init: "
-       << (edge.isInitialization() ? "yes" : "no")
-       << " incr:" << edge.getIncrement() << " cinc: "
-       << (edge.isCounterIncrement() ? "yes" : "no");
-    return(os);
-  }
-}
-
-// Creates a new BLInstrumentationNode from a BasicBlock.
-BLInstrumentationNode::BLInstrumentationNode(BasicBlock* BB) :
-  BallLarusNode(BB),
-  _startingPathNumber(NULL), _endingPathNumber(NULL), _pathPHI(NULL) {}
-
-// Constructor for BLInstrumentationEdge.
-BLInstrumentationEdge::BLInstrumentationEdge(BLInstrumentationNode* source,
-                                             BLInstrumentationNode* target)
-  : BallLarusEdge(source, target, 0),
-    _increment(0), _isInSpanningTree(false), _isInitialization(false),
-    _isCounterIncrement(false), _hasInstrumentation(false) {}
-
-// Sets the target node of this edge.  Required to split edges.
-void BLInstrumentationEdge::setTarget(BallLarusNode* node) {
-  _target = node;
-}
-
-// Returns whether this edge is in the spanning tree.
-bool BLInstrumentationEdge::isInSpanningTree() const {
-  return(_isInSpanningTree);
-}
-
-// Sets whether this edge is in the spanning tree.
-void BLInstrumentationEdge::setIsInSpanningTree(bool isInSpanningTree) {
-  _isInSpanningTree = isInSpanningTree;
-}
-
-// Returns whether this edge will be instrumented with a path number
-// initialization.
-bool BLInstrumentationEdge::isInitialization() const {
-  return(_isInitialization);
-}
-
-// Sets whether this edge will be instrumented with a path number
-// initialization.
-void BLInstrumentationEdge::setIsInitialization(bool isInitialization) {
-  _isInitialization = isInitialization;
-}
-
-// Returns whether this edge will be instrumented with a path counter
-// increment.  Notice this is incrementing the path counter
-// corresponding to the path number register.  The path number
-// increment is determined by getIncrement().
-bool BLInstrumentationEdge::isCounterIncrement() const {
-  return(_isCounterIncrement);
-}
-
-// Sets whether this edge will be instrumented with a path counter
-// increment.
-void BLInstrumentationEdge::setIsCounterIncrement(bool isCounterIncrement) {
-  _isCounterIncrement = isCounterIncrement;
-}
-
-// Gets the path number increment that this edge will be instrumented
-// with.  This is distinct from the path counter increment and the
-// weight.  The counter increment is counts the number of executions of
-// some path, whereas the path number keeps track of which path number
-// the program is on.
-long BLInstrumentationEdge::getIncrement() const {
-  return(_increment);
-}
-
-// Set whether this edge will be instrumented with a path number
-// increment.
-void BLInstrumentationEdge::setIncrement(long increment) {
-  _increment = increment;
-}
-
-// True iff the edge has already been instrumented.
-bool BLInstrumentationEdge::hasInstrumentation() {
-  return(_hasInstrumentation);
-}
-
-// Set whether this edge has been instrumented.
-void BLInstrumentationEdge::setHasInstrumentation(bool hasInstrumentation) {
-  _hasInstrumentation = hasInstrumentation;
-}
-
-// Returns the successor number of this edge in the source.
-unsigned BLInstrumentationEdge::getSuccessorNumber() {
-  BallLarusNode* sourceNode = getSource();
-  BallLarusNode* targetNode = getTarget();
-  BasicBlock* source = sourceNode->getBlock();
-  BasicBlock* target = targetNode->getBlock();
-
-  if(source == NULL || target == NULL)
-    return(0);
-
-  TerminatorInst* terminator = source->getTerminator();
-
-        unsigned i;
-  for(i=0; i < terminator->getNumSuccessors(); i++) {
-    if(terminator->getSuccessor(i) == target)
-      break;
-  }
-
-  return(i);
-}
-
-// BLInstrumentationDag constructor initializes a DAG for the given Function.
-BLInstrumentationDag::BLInstrumentationDag(Function &F) : BallLarusDag(F),
-                                                          _counterArray(0) {
-}
-
-// Returns the Exit->Root edge. This edge is required for creating
-// directed cycles in the algorithm for moving instrumentation off of
-// the spanning tree
-BallLarusEdge* BLInstrumentationDag::getExitRootEdge() {
-  BLEdgeIterator erEdge = getExit()->succBegin();
-  return(*erEdge);
-}
-
-BLEdgeVector BLInstrumentationDag::getCallPhonyEdges () {
-  BLEdgeVector callEdges;
-
-  for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
-       edge != end; edge++ ) {
-    if( (*edge)->getType() == BallLarusEdge::CALLEDGE_PHONY )
-      callEdges.push_back(*edge);
-  }
-
-  return callEdges;
-}
-
-// Gets the path counter array
-GlobalVariable* BLInstrumentationDag::getCounterArray() {
-  return _counterArray;
-}
-
-void BLInstrumentationDag::setCounterArray(GlobalVariable* c) {
-  _counterArray = c;
-}
-
-// Calculates the increment for the chords, thereby removing
-// instrumentation from the spanning tree edges. Implementation is based on
-// the algorithm in Figure 4 of [Ball94]
-void BLInstrumentationDag::calculateChordIncrements() {
-  calculateChordIncrementsDfs(0, getRoot(), NULL);
-
-  BLInstrumentationEdge* chord;
-  for(BLEdgeIterator chordEdge = _chordEdges.begin(),
-      end = _chordEdges.end(); chordEdge != end; chordEdge++) {
-    chord = (BLInstrumentationEdge*) *chordEdge;
-    chord->setIncrement(chord->getIncrement() + chord->getWeight());
-  }
-}
-
-// Updates the state when an edge has been split
-void BLInstrumentationDag::splitUpdate(BLInstrumentationEdge* formerEdge,
-                                       BasicBlock* newBlock) {
-  BallLarusNode* oldTarget = formerEdge->getTarget();
-  BallLarusNode* newNode = addNode(newBlock);
-  formerEdge->setTarget(newNode);
-  newNode->addPredEdge(formerEdge);
-
-  DEBUG(dbgs() << "  Edge split: " << *formerEdge << "\n");
-
-  oldTarget->removePredEdge(formerEdge);
-  BallLarusEdge* newEdge = addEdge(newNode, oldTarget,0);
-
-  if( formerEdge->getType() == BallLarusEdge::BACKEDGE ||
-                        formerEdge->getType() == BallLarusEdge::SPLITEDGE) {
-                newEdge->setType(formerEdge->getType());
-    newEdge->setPhonyRoot(formerEdge->getPhonyRoot());
-    newEdge->setPhonyExit(formerEdge->getPhonyExit());
-    formerEdge->setType(BallLarusEdge::NORMAL);
-                formerEdge->setPhonyRoot(NULL);
-    formerEdge->setPhonyExit(NULL);
-  }
-}
-
-// Calculates a spanning tree of the DAG ignoring cycles.  Whichever
-// edges are in the spanning tree will not be instrumented, but this
-// implementation does not try to minimize the instrumentation overhead
-// by trying to find hot edges.
-void BLInstrumentationDag::calculateSpanningTree() {
-  std::stack<BallLarusNode*> dfsStack;
-
-  for(BLNodeIterator nodeIt = _nodes.begin(), end = _nodes.end();
-      nodeIt != end; nodeIt++) {
-    (*nodeIt)->setColor(BallLarusNode::WHITE);
-  }
-
-  dfsStack.push(getRoot());
-  while(dfsStack.size() > 0) {
-    BallLarusNode* node = dfsStack.top();
-    dfsStack.pop();
-
-    if(node->getColor() == BallLarusNode::WHITE)
-      continue;
-
-    BallLarusNode* nextNode;
-    bool forward = true;
-    BLEdgeIterator succEnd = node->succEnd();
-
-    node->setColor(BallLarusNode::WHITE);
-    // first iterate over successors then predecessors
-    for(BLEdgeIterator edge = node->succBegin(), predEnd = node->predEnd();
-        edge != predEnd; edge++) {
-      if(edge == succEnd) {
-        edge = node->predBegin();
-        forward = false;
-      }
-
-      // Ignore split edges
-      if ((*edge)->getType() == BallLarusEdge::SPLITEDGE)
-        continue;
-
-      nextNode = forward? (*edge)->getTarget(): (*edge)->getSource();
-      if(nextNode->getColor() != BallLarusNode::WHITE) {
-        nextNode->setColor(BallLarusNode::WHITE);
-        makeEdgeSpanning((BLInstrumentationEdge*)(*edge));
-      }
-    }
-  }
-
-  for(BLEdgeIterator edge = _edges.begin(), end = _edges.end();
-      edge != end; edge++) {
-    BLInstrumentationEdge* instEdge = (BLInstrumentationEdge*) (*edge);
-      // safe since createEdge is overriden
-    if(!instEdge->isInSpanningTree() && (*edge)->getType()
-        != BallLarusEdge::SPLITEDGE)
-      _chordEdges.push_back(instEdge);
-  }
-}
-
-// Pushes initialization further down in order to group the first
-// increment and initialization.
-void BLInstrumentationDag::pushInitialization() {
-  BLInstrumentationEdge* exitRootEdge =
-                (BLInstrumentationEdge*) getExitRootEdge();
-  exitRootEdge->setIsInitialization(true);
-  pushInitializationFromEdge(exitRootEdge);
-}
-
-// Pushes the path counter increments up in order to group the last path
-// number increment.
-void BLInstrumentationDag::pushCounters() {
-  BLInstrumentationEdge* exitRootEdge =
-    (BLInstrumentationEdge*) getExitRootEdge();
-  exitRootEdge->setIsCounterIncrement(true);
-  pushCountersFromEdge(exitRootEdge);
-}
-
-// Removes phony edges from the successor list of the source, and the
-// predecessor list of the target.
-void BLInstrumentationDag::unlinkPhony() {
-  BallLarusEdge* edge;
-
-  for(BLEdgeIterator next = _edges.begin(),
-      end = _edges.end(); next != end; next++) {
-    edge = (*next);
-
-    if( edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
-        edge->getType() == BallLarusEdge::SPLITEDGE_PHONY ||
-        edge->getType() == BallLarusEdge::CALLEDGE_PHONY ) {
-      unlinkEdge(edge);
-    }
-  }
-}
-
-// Generate a .dot graph to represent the DAG and pathNumbers
-void BLInstrumentationDag::generateDotGraph() {
-  std::string errorInfo;
-  std::string functionName = getFunction().getName().str();
-  std::string filename = "pathdag." + functionName + ".dot";
-
-  DEBUG (dbgs() << "Writing '" << filename << "'...\n");
-  raw_fd_ostream dotFile(filename.c_str(), errorInfo);
-
-  if (!errorInfo.empty()) {
-    errs() << "Error opening '" << filename.c_str() <<"' for writing!";
-    errs() << "\n";
-    return;
-  }
-
-  dotFile << "digraph " << functionName << " {\n";
-
-  for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
-       edge != end; edge++) {
-    std::string sourceName = (*edge)->getSource()->getName();
-    std::string targetName = (*edge)->getTarget()->getName();
-
-    dotFile << "\t\"" << sourceName.c_str() << "\" -> \""
-            << targetName.c_str() << "\" ";
-
-    long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
-
-    switch( (*edge)->getType() ) {
-    case BallLarusEdge::NORMAL:
-      dotFile << "[label=" << inc << "] [color=black];\n";
-      break;
-
-    case BallLarusEdge::BACKEDGE:
-      dotFile << "[color=cyan];\n";
-      break;
-
-    case BallLarusEdge::BACKEDGE_PHONY:
-      dotFile << "[label=" << inc
-              << "] [color=blue];\n";
-      break;
-
-    case BallLarusEdge::SPLITEDGE:
-      dotFile << "[color=violet];\n";
-      break;
-
-    case BallLarusEdge::SPLITEDGE_PHONY:
-      dotFile << "[label=" << inc << "] [color=red];\n";
-      break;
-
-    case BallLarusEdge::CALLEDGE_PHONY:
-      dotFile << "[label=" << inc     << "] [color=green];\n";
-      break;
-    }
-  }
-
-  dotFile << "}\n";
-}
-
-// Allows subclasses to determine which type of Node is created.
-// Override this method to produce subclasses of BallLarusNode if
-// necessary. The destructor of BallLarusDag will call free on each pointer
-// created.
-BallLarusNode* BLInstrumentationDag::createNode(BasicBlock* BB) {
-  return( new BLInstrumentationNode(BB) );
-}
-
-// Allows subclasses to determine which type of Edge is created.
-// Override this method to produce subclasses of BallLarusEdge if
-// necessary. The destructor of BallLarusDag will call free on each pointer
-// created.
-BallLarusEdge* BLInstrumentationDag::createEdge(BallLarusNode* source,
-                                                BallLarusNode* target, unsigned edgeNumber) {
-  // One can cast from BallLarusNode to BLInstrumentationNode since createNode
-  // is overriden to produce BLInstrumentationNode.
-  return( new BLInstrumentationEdge((BLInstrumentationNode*)source,
-                                    (BLInstrumentationNode*)target) );
-}
-
-// Sets the Value corresponding to the pathNumber register, constant,
-// or phinode.  Used by the instrumentation code to remember path
-// number Values.
-Value* BLInstrumentationNode::getStartingPathNumber(){
-  return(_startingPathNumber);
-}
-
-// Sets the Value of the pathNumber.  Used by the instrumentation code.
-void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) {
-  DEBUG(dbgs() << "  SPN-" << getName() << " <-- " << (pathNumber ?
-                                                       pathNumber->getName() :
-                                                       "unused") << "\n");
-  _startingPathNumber = pathNumber;
-}
-
-Value* BLInstrumentationNode::getEndingPathNumber(){
-  return(_endingPathNumber);
-}
-
-void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) {
-  DEBUG(dbgs() << "  EPN-" << getName() << " <-- "
-               << (pathNumber ? pathNumber->getName() : "unused") << "\n");
-  _endingPathNumber = pathNumber;
-}
-
-// Get the PHINode Instruction for this node.  Used by instrumentation
-// code.
-PHINode* BLInstrumentationNode::getPathPHI() {
-  return(_pathPHI);
-}
-
-// Set the PHINode Instruction for this node.  Used by instrumentation
-// code.
-void BLInstrumentationNode::setPathPHI(PHINode* pathPHI) {
-  _pathPHI = pathPHI;
-}
-
-// Removes the edge from the appropriate predecessor and successor
-// lists.
-void BLInstrumentationDag::unlinkEdge(BallLarusEdge* edge) {
-  if(edge == getExitRootEdge())
-    DEBUG(dbgs() << " Removing exit->root edge\n");
-
-  edge->getSource()->removeSuccEdge(edge);
-  edge->getTarget()->removePredEdge(edge);
-}
-
-// Makes an edge part of the spanning tree.
-void BLInstrumentationDag::makeEdgeSpanning(BLInstrumentationEdge* edge) {
-  edge->setIsInSpanningTree(true);
-  _treeEdges.push_back(edge);
-}
-
-// Pushes initialization and calls itself recursively.
-void BLInstrumentationDag::pushInitializationFromEdge(
-  BLInstrumentationEdge* edge) {
-  BallLarusNode* target;
-
-  target = edge->getTarget();
-  if( target->getNumberPredEdges() > 1 || target == getExit() ) {
-    return;
-  } else {
-    for(BLEdgeIterator next = target->succBegin(),
-          end = target->succEnd(); next != end; next++) {
-      BLInstrumentationEdge* intoEdge = (BLInstrumentationEdge*) *next;
-
-      // Skip split edges
-      if (intoEdge->getType() == BallLarusEdge::SPLITEDGE)
-        continue;
-
-      intoEdge->setIncrement(intoEdge->getIncrement() +
-                             edge->getIncrement());
-      intoEdge->setIsInitialization(true);
-      pushInitializationFromEdge(intoEdge);
-    }
-
-    edge->setIncrement(0);
-    edge->setIsInitialization(false);
-  }
-}
-
-// Pushes path counter increments up recursively.
-void BLInstrumentationDag::pushCountersFromEdge(BLInstrumentationEdge* edge) {
-  BallLarusNode* source;
-
-  source = edge->getSource();
-  if(source->getNumberSuccEdges() > 1 || source == getRoot()
-     || edge->isInitialization()) {
-    return;
-  } else {
-    for(BLEdgeIterator previous = source->predBegin(),
-          end = source->predEnd(); previous != end; previous++) {
-      BLInstrumentationEdge* fromEdge = (BLInstrumentationEdge*) *previous;
-
-      // Skip split edges
-      if (fromEdge->getType() == BallLarusEdge::SPLITEDGE)
-        continue;
-
-      fromEdge->setIncrement(fromEdge->getIncrement() +
-                             edge->getIncrement());
-      fromEdge->setIsCounterIncrement(true);
-      pushCountersFromEdge(fromEdge);
-    }
-
-    edge->setIncrement(0);
-    edge->setIsCounterIncrement(false);
-  }
-}
-
-// Depth first algorithm for determining the chord increments.
-void BLInstrumentationDag::calculateChordIncrementsDfs(long weight,
-                                                       BallLarusNode* v, BallLarusEdge* e) {
-  BLInstrumentationEdge* f;
-
-  for(BLEdgeIterator treeEdge = _treeEdges.begin(),
-        end = _treeEdges.end(); treeEdge != end; treeEdge++) {
-    f = (BLInstrumentationEdge*) *treeEdge;
-    if(e != f && v == f->getTarget()) {
-      calculateChordIncrementsDfs(
-        calculateChordIncrementsDir(e,f)*(weight) +
-        f->getWeight(), f->getSource(), f);
-    }
-    if(e != f && v == f->getSource()) {
-      calculateChordIncrementsDfs(
-        calculateChordIncrementsDir(e,f)*(weight) +
-        f->getWeight(), f->getTarget(), f);
-    }
-  }
-
-  for(BLEdgeIterator chordEdge = _chordEdges.begin(),
-        end = _chordEdges.end(); chordEdge != end; chordEdge++) {
-    f = (BLInstrumentationEdge*) *chordEdge;
-    if(v == f->getSource() || v == f->getTarget()) {
-      f->setIncrement(f->getIncrement() +
-                      calculateChordIncrementsDir(e,f)*weight);
-    }
-  }
-}
-
-// Determines the relative direction of two edges.
-int BLInstrumentationDag::calculateChordIncrementsDir(BallLarusEdge* e,
-                                                      BallLarusEdge* f) {
-  if( e == NULL)
-    return(1);
-  else if(e->getSource() == f->getTarget()
-          || e->getTarget() == f->getSource())
-    return(1);
-
-  return(-1);
-}
-
-// Creates an increment constant representing incr.
-ConstantInt* PathProfiler::createIncrementConstant(long incr,
-                                                   int bitsize) {
-  return(ConstantInt::get(IntegerType::get(*Context, 32), incr));
-}
-
-// Creates an increment constant representing the value in
-// edge->getIncrement().
-ConstantInt* PathProfiler::createIncrementConstant(
-  BLInstrumentationEdge* edge) {
-  return(createIncrementConstant(edge->getIncrement(), 32));
-}
-
-// Finds the insertion point after pathNumber in block.  PathNumber may
-// be NULL.
-BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
-                                                     pathNumber) {
-  if(pathNumber == NULL || isa<ConstantInt>(pathNumber)
-     || (((Instruction*)(pathNumber))->getParent()) != block) {
-    return(block->getFirstInsertionPt());
-  } else {
-    Instruction* pathNumberInst = (Instruction*) (pathNumber);
-    BasicBlock::iterator insertPoint;
-    BasicBlock::iterator end = block->end();
-
-    for(insertPoint = block->begin();
-        insertPoint != end; insertPoint++) {
-      Instruction* insertInst = &(*insertPoint);
-
-      if(insertInst == pathNumberInst)
-        return(++insertPoint);
-    }
-
-    return(insertPoint);
-  }
-}
-
-// A PHINode is created in the node, and its values initialized to -1U.
-void PathProfiler::preparePHI(BLInstrumentationNode* node) {
-  BasicBlock* block = node->getBlock();
-  BasicBlock::iterator insertPoint = block->getFirstInsertionPt();
-  pred_iterator PB = pred_begin(node->getBlock()),
-          PE = pred_end(node->getBlock());
-  PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context),
-                                 std::distance(PB, PE), "pathNumber",
-                                 insertPoint );
-  node->setPathPHI(phi);
-  node->setStartingPathNumber(phi);
-  node->setEndingPathNumber(phi);
-
-  for(pred_iterator predIt = PB; predIt != PE; predIt++) {
-    BasicBlock* pred = (*predIt);
-
-    if(pred != NULL)
-      phi->addIncoming(createIncrementConstant((long)-1, 32), pred);
-  }
-}
-
-// Inserts source's pathNumber Value* into target.  Target may or may not
-// have multiple predecessors, and may or may not have its phiNode
-// initalized.
-void PathProfiler::pushValueIntoNode(BLInstrumentationNode* source,
-                                     BLInstrumentationNode* target) {
-  if(target->getBlock() == NULL)
-    return;
-
-
-  if(target->getNumberPredEdges() <= 1) {
-    assert(target->getStartingPathNumber() == NULL &&
-           "Target already has path number");
-    target->setStartingPathNumber(source->getEndingPathNumber());
-    target->setEndingPathNumber(source->getEndingPathNumber());
-    DEBUG(dbgs() << "  Passing path number"
-          << (source->getEndingPathNumber() ? "" : " (null)")
-          << " value through.\n");
-  } else {
-    if(target->getPathPHI() == NULL) {
-      DEBUG(dbgs() << "  Initializing PHI node for block '"
-            << target->getName() << "'\n");
-      preparePHI(target);
-    }
-    pushValueIntoPHI(target, source);
-    DEBUG(dbgs() << "  Passing number value into PHI for block '"
-          << target->getName() << "'\n");
-  }
-}
-
-// Inserts source's pathNumber Value* into the appropriate slot of
-// target's phiNode.
-void PathProfiler::pushValueIntoPHI(BLInstrumentationNode* target,
-                                    BLInstrumentationNode* source) {
-  PHINode* phi = target->getPathPHI();
-  assert(phi != NULL && "  Tried to push value into node with PHI, but node"
-         " actually had no PHI.");
-  phi->removeIncomingValue(source->getBlock(), false);
-  phi->addIncoming(source->getEndingPathNumber(), source->getBlock());
-}
-
-// The Value* in node, oldVal,  is updated with a Value* correspodning to
-// oldVal + addition.
-void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node,
-                                         Value* addition, bool atBeginning) {
-  BasicBlock* block = node->getBlock();
-  assert(node->getStartingPathNumber() != NULL);
-  assert(node->getEndingPathNumber() != NULL);
-
-  BasicBlock::iterator insertPoint;
-
-  if( atBeginning )
-    insertPoint = block->getFirstInsertionPt();
-  else
-    insertPoint = block->getTerminator();
-
-  DEBUG(errs() << "  Creating addition instruction.\n");
-  Value* newpn = BinaryOperator::Create(Instruction::Add,
-                                        node->getStartingPathNumber(),
-                                        addition, "pathNumber", insertPoint);
-
-  node->setEndingPathNumber(newpn);
-
-  if( atBeginning )
-    node->setStartingPathNumber(newpn);
-}
-
-// Creates a counter increment in the given node.  The Value* in node is
-// taken as the index into an array or hash table.  The hash table access
-// is a call to the runtime.
-void PathProfiler::insertCounterIncrement(Value* incValue,
-                                          BasicBlock::iterator insertPoint,
-                                          BLInstrumentationDag* dag,
-                                          bool increment) {
-  // Counter increment for array
-  if( dag->getNumberOfPaths() <= HASH_THRESHHOLD ) {
-    // Get pointer to the array location
-    std::vector<Value*> gepIndices(2);
-    gepIndices[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
-    gepIndices[1] = incValue;
-
-    GetElementPtrInst* pcPointer =
-      GetElementPtrInst::Create(dag->getCounterArray(), gepIndices,
-                                "counterInc", insertPoint);
-
-    // Load from the array - call it oldPC
-    LoadInst* oldPc = new LoadInst(pcPointer, "oldPC", insertPoint);
-
-    // Test to see whether adding 1 will overflow the counter
-    ICmpInst* isMax = new ICmpInst(insertPoint, CmpInst::ICMP_ULT, oldPc,
-                                   createIncrementConstant(0xffffffff, 32),
-                                   "isMax");
-
-    // Select increment for the path counter based on overflow
-    SelectInst* inc =
-      SelectInst::Create( isMax, createIncrementConstant(increment?1:-1,32),
-                          createIncrementConstant(0,32),
-                          "pathInc", insertPoint);
-
-    // newPc = oldPc + inc
-    BinaryOperator* newPc = BinaryOperator::Create(Instruction::Add,
-                                                   oldPc, inc, "newPC",
-                                                   insertPoint);
-
-    // Store back in to the array
-    new StoreInst(newPc, pcPointer, insertPoint);
-  } else { // Counter increment for hash
-    std::vector<Value*> args(2);
-    args[0] = ConstantInt::get(Type::getInt32Ty(*Context),
-                               currentFunctionNumber);
-    args[1] = incValue;
-
-    CallInst::Create(
-      increment ? llvmIncrementHashFunction : llvmDecrementHashFunction,
-      args, "", insertPoint);
-  }
-}
-
-// Inserts instrumentation for the given edge
-//
-// Pre: The edge's source node has pathNumber set if edge is non zero
-// path number increment.
-//
-// Post: Edge's target node has a pathNumber set to the path number Value
-// corresponding to the value of the path register after edge's
-// execution.
-//
-// FIXME: This should be reworked so it's not recursive.
-void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
-                                                   BLInstrumentationDag* dag) {
-  // Mark the edge as instrumented
-  edge->setHasInstrumentation(true);
-  DEBUG(dbgs() << "\nInstrumenting edge: " << (*edge) << "\n");
-
-  // create a new node for this edge's instrumentation
-  splitCritical(edge, dag);
-
-  BLInstrumentationNode* sourceNode = (BLInstrumentationNode*)edge->getSource();
-  BLInstrumentationNode* targetNode = (BLInstrumentationNode*)edge->getTarget();
-  BLInstrumentationNode* instrumentNode;
-  BLInstrumentationNode* nextSourceNode;
-
-  bool atBeginning = false;
-
-  // Source node has only 1 successor so any information can be simply
-  // inserted in to it without splitting
-  if( sourceNode->getBlock() && sourceNode->getNumberSuccEdges() <= 1) {
-    DEBUG(dbgs() << "  Potential instructions to be placed in: "
-          << sourceNode->getName() << " (at end)\n");
-    instrumentNode = sourceNode;
-    nextSourceNode = targetNode; // ... since we never made any new nodes
-  }
-
-  // The target node only has one predecessor, so we can safely insert edge
-  // instrumentation into it. If there was splitting, it must have been
-  // successful.
-  else if( targetNode->getNumberPredEdges() == 1 ) {
-    DEBUG(dbgs() << "  Potential instructions to be placed in: "
-          << targetNode->getName() << " (at beginning)\n");
-    pushValueIntoNode(sourceNode, targetNode);
-    instrumentNode = targetNode;
-    nextSourceNode = NULL; // ... otherwise we'll just keep splitting
-    atBeginning = true;
-  }
-
-  // Somehow, splitting must have failed.
-  else {
-    errs() << "Instrumenting could not split a critical edge.\n";
-    DEBUG(dbgs() << "  Couldn't split edge " << (*edge) << ".\n");
-    return;
-  }
-
-  // Insert instrumentation if this is a back or split edge
-  if( edge->getType() == BallLarusEdge::BACKEDGE ||
-      edge->getType() == BallLarusEdge::SPLITEDGE ) {
-    BLInstrumentationEdge* top =
-      (BLInstrumentationEdge*) edge->getPhonyRoot();
-    BLInstrumentationEdge* bottom =
-      (BLInstrumentationEdge*) edge->getPhonyExit();
-
-    assert( top->isInitialization() && " Top phony edge did not"
-            " contain a path number initialization.");
-    assert( bottom->isCounterIncrement() && " Bottom phony edge"
-            " did not contain a path counter increment.");
-
-    // split edge has yet to be initialized
-    if( !instrumentNode->getEndingPathNumber() ) {
-      instrumentNode->setStartingPathNumber(createIncrementConstant(0,32));
-      instrumentNode->setEndingPathNumber(createIncrementConstant(0,32));
-    }
-
-    BasicBlock::iterator insertPoint = atBeginning ?
-      instrumentNode->getBlock()->getFirstInsertionPt() :
-      instrumentNode->getBlock()->getTerminator();
-
-    // add information from the bottom edge, if it exists
-    if( bottom->getIncrement() ) {
-      Value* newpn =
-        BinaryOperator::Create(Instruction::Add,
-                               instrumentNode->getStartingPathNumber(),
-                               createIncrementConstant(bottom),
-                               "pathNumber", insertPoint);
-      instrumentNode->setEndingPathNumber(newpn);
-    }
-
-    insertCounterIncrement(instrumentNode->getEndingPathNumber(),
-                           insertPoint, dag);
-
-    if( atBeginning )
-      instrumentNode->setStartingPathNumber(createIncrementConstant(top));
-
-    instrumentNode->setEndingPathNumber(createIncrementConstant(top));
-
-    // Check for path counter increments
-    if( top->isCounterIncrement() ) {
-      insertCounterIncrement(instrumentNode->getEndingPathNumber(),
-                             instrumentNode->getBlock()->getTerminator(),dag);
-      instrumentNode->setEndingPathNumber(0);
-    }
-  }
-
-  // Insert instrumentation if this is a normal edge
-  else {
-    BasicBlock::iterator insertPoint = atBeginning ?
-      instrumentNode->getBlock()->getFirstInsertionPt() :
-      instrumentNode->getBlock()->getTerminator();
-
-    if( edge->isInitialization() ) { // initialize path number
-      instrumentNode->setEndingPathNumber(createIncrementConstant(edge));
-    } else if( edge->getIncrement() )       {// increment path number
-      Value* newpn =
-        BinaryOperator::Create(Instruction::Add,
-                               instrumentNode->getStartingPathNumber(),
-                               createIncrementConstant(edge),
-                               "pathNumber", insertPoint);
-      instrumentNode->setEndingPathNumber(newpn);
-
-      if( atBeginning )
-        instrumentNode->setStartingPathNumber(newpn);
-    }
-
-    // Check for path counter increments
-    if( edge->isCounterIncrement() ) {
-      insertCounterIncrement(instrumentNode->getEndingPathNumber(),
-                             insertPoint, dag);
-      instrumentNode->setEndingPathNumber(0);
-    }
-  }
-
-  // Push it along
-  if (nextSourceNode && instrumentNode->getEndingPathNumber())
-    pushValueIntoNode(instrumentNode, nextSourceNode);
-
-  // Add all the successors
-  for( BLEdgeIterator next = targetNode->succBegin(),
-         end = targetNode->succEnd(); next != end; next++ ) {
-    // So long as it is un-instrumented, add it to the list
-    if( !((BLInstrumentationEdge*)(*next))->hasInstrumentation() )
-      insertInstrumentationStartingAt((BLInstrumentationEdge*)*next,dag);
-    else
-      DEBUG(dbgs() << "  Edge " << *(BLInstrumentationEdge*)(*next)
-            << " already instrumented.\n");
-  }
-}
-
-// Inserts instrumentation according to the marked edges in dag.  Phony edges
-// must be unlinked from the DAG, but accessible from the backedges.  Dag
-// must have initializations, path number increments, and counter increments
-// present.
-//
-// Counter storage is created here.
-void PathProfiler::insertInstrumentation(
-  BLInstrumentationDag& dag, Module &M) {
-
-  BLInstrumentationEdge* exitRootEdge =
-    (BLInstrumentationEdge*) dag.getExitRootEdge();
-  insertInstrumentationStartingAt(exitRootEdge, &dag);
-
-  // Iterate through each call edge and apply the appropriate hash increment
-  // and decrement functions
-  BLEdgeVector callEdges = dag.getCallPhonyEdges();
-  for( BLEdgeIterator edge = callEdges.begin(),
-         end = callEdges.end(); edge != end; edge++ ) {
-    BLInstrumentationNode* node =
-      (BLInstrumentationNode*)(*edge)->getSource();
-    BasicBlock::iterator insertPoint = node->getBlock()->getFirstInsertionPt();
-
-    // Find the first function call
-    while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call )
-      insertPoint++;
-
-    DEBUG(dbgs() << "\nInstrumenting method call block '"
-                 << node->getBlock()->getName() << "'\n");
-    DEBUG(dbgs() << "   Path number initialized: "
-                 << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
-
-    Value* newpn;
-    if( node->getStartingPathNumber() ) {
-      long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
-      if ( inc )
-        newpn = BinaryOperator::Create(Instruction::Add,
-                                       node->getStartingPathNumber(),
-                                       createIncrementConstant(inc,32),
-                                       "pathNumber", insertPoint);
-      else
-        newpn = node->getStartingPathNumber();
-    } else {
-      newpn = (Value*)createIncrementConstant(
-        ((BLInstrumentationEdge*)(*edge))->getIncrement(), 32);
-    }
-
-    insertCounterIncrement(newpn, insertPoint, &dag);
-    insertCounterIncrement(newpn, node->getBlock()->getTerminator(),
-                           &dag, false);
-  }
-}
-
-// Entry point of the module
-void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit,
-                                 Function &F, Module &M) {
-  // Build DAG from CFG
-  BLInstrumentationDag dag = BLInstrumentationDag(F);
-  dag.init();
-
-  // give each path a unique integer value
-  dag.calculatePathNumbers();
-
-  // modify path increments to increase the efficiency
-  // of instrumentation
-  dag.calculateSpanningTree();
-  dag.calculateChordIncrements();
-  dag.pushInitialization();
-  dag.pushCounters();
-  dag.unlinkPhony();
-
-  // potentially generate .dot graph for the dag
-  if (DotPathDag)
-    dag.generateDotGraph ();
-
-  // Should we store the information in an array or hash
-  if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) {
-    Type* t = ArrayType::get(Type::getInt32Ty(*Context),
-                                   dag.getNumberOfPaths());
-
-    dag.setCounterArray(new GlobalVariable(M, t, false,
-                                           GlobalValue::InternalLinkage,
-                                           Constant::getNullValue(t), ""));
-  }
-
-  insertInstrumentation(dag, M);
-
-  // Add to global function reference table
-  unsigned type;
-  Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context);
-
-  if( dag.getNumberOfPaths() <= HASH_THRESHHOLD )
-    type = ProfilingArray;
-  else
-    type = ProfilingHash;
-
-  std::vector<Constant*> entryArray(3);
-  entryArray[0] = createIncrementConstant(type,32);
-  entryArray[1] = createIncrementConstant(dag.getNumberOfPaths(),32);
-  entryArray[2] = dag.getCounterArray() ?
-    ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) :
-    Constant::getNullValue(voidPtr);
-
-  StructType* at = ftEntryTypeBuilder::get(*Context);
-  ConstantStruct* functionEntry =
-    (ConstantStruct*)ConstantStruct::get(at, entryArray);
-  ftInit.push_back(functionEntry);
-}
-
-// Output the bitcode if we want to observe instrumentation changess
-#define PRINT_MODULE dbgs() <<                               \
-  "\n\n============= MODULE BEGIN ===============\n" << M << \
-  "\n============== MODULE END ================\n"
-
-bool PathProfiler::runOnModule(Module &M) {
-  Context = &M.getContext();
-
-  DEBUG(dbgs()
-        << "****************************************\n"
-        << "****************************************\n"
-        << "**                                    **\n"
-        << "**   PATH PROFILING INSTRUMENTATION   **\n"
-        << "**                                    **\n"
-        << "****************************************\n"
-        << "****************************************\n");
-
-  // No main, no instrumentation!
-  Function *Main = M.getFunction("main");
-
-  // Using fortran? ... this kind of works
-  if (!Main)
-    Main = M.getFunction("MAIN__");
-
-  if (!Main) {
-    errs() << "WARNING: cannot insert path profiling into a module"
-           << " with no main function!\n";
-    return false;
-  }
-
-  llvmIncrementHashFunction = M.getOrInsertFunction(
-    "llvm_increment_path_count",
-    Type::getVoidTy(*Context), // return type
-    Type::getInt32Ty(*Context), // function number
-    Type::getInt32Ty(*Context), // path number
-    NULL );
-
-  llvmDecrementHashFunction = M.getOrInsertFunction(
-    "llvm_decrement_path_count",
-    Type::getVoidTy(*Context), // return type
-    Type::getInt32Ty(*Context), // function number
-    Type::getInt32Ty(*Context), // path number
-    NULL );
-
-  std::vector<Constant*> ftInit;
-  unsigned functionNumber = 0;
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
-    if (F->isDeclaration())
-      continue;
-
-    DEBUG(dbgs() << "Function: " << F->getName() << "\n");
-    functionNumber++;
-
-    // set function number
-    currentFunctionNumber = functionNumber;
-    runOnFunction(ftInit, *F, M);
-  }
-
-  Type *t = ftEntryTypeBuilder::get(*Context);
-  ArrayType* ftArrayType = ArrayType::get(t, ftInit.size());
-  Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit);
-
-  DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n");
-
-  GlobalVariable* functionTable =
-    new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage,
-                       ftInitConstant, "functionPathTable");
-  Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0);
-  InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable,
-                          PointerType::getUnqual(eltType));
-
-  DEBUG(PRINT_MODULE);
-
-  return true;
-}
-
-// If this edge is a critical edge, then inserts a node at this edge.
-// This edge becomes the first edge, and a new BallLarusEdge is created.
-// Returns true if the edge was split
-bool PathProfiler::splitCritical(BLInstrumentationEdge* edge,
-                                 BLInstrumentationDag* dag) {
-  unsigned succNum = edge->getSuccessorNumber();
-  BallLarusNode* sourceNode = edge->getSource();
-  BallLarusNode* targetNode = edge->getTarget();
-  BasicBlock* sourceBlock = sourceNode->getBlock();
-  BasicBlock* targetBlock = targetNode->getBlock();
-
-  if(sourceBlock == NULL || targetBlock == NULL
-     || sourceNode->getNumberSuccEdges() <= 1
-     || targetNode->getNumberPredEdges() == 1 ) {
-    return(false);
-  }
-
-  TerminatorInst* terminator = sourceBlock->getTerminator();
-
-  if( SplitCriticalEdge(terminator, succNum, this, false)) {
-    BasicBlock* newBlock = terminator->getSuccessor(succNum);
-    dag->splitUpdate(edge, newBlock);
-    return(true);
-  } else
-    return(false);
-}
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 9b56a76..007e9b7 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -22,7 +22,6 @@
 #include "llvm/Analysis/DominatorInternals.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -80,7 +79,6 @@ namespace {
     const TargetLowering *TLI;
     const TargetLibraryInfo *TLInfo;
     DominatorTree *DT;
-    ProfileInfo *PFI;
 
     /// CurInstIterator - As we scan instructions optimizing them, this is the
     /// next instruction to optimize.  Xforms that can invalidate this should
@@ -111,7 +109,6 @@ namespace {
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addPreserved<DominatorTree>();
-      AU.addPreserved<ProfileInfo>();
       AU.addRequired<TargetLibraryInfo>();
     }
 
@@ -151,7 +148,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   if (TM) TLI = TM->getTargetLowering();
   TLInfo = &getAnalysis<TargetLibraryInfo>();
   DT = getAnalysisIfAvailable<DominatorTree>();
-  PFI = getAnalysisIfAvailable<ProfileInfo>();
   OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                            Attribute::OptimizeForSize);
 
@@ -442,10 +438,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
     DT->changeImmediateDominator(DestBB, NewIDom);
     DT->eraseNode(BB);
   }
-  if (PFI) {
-    PFI->replaceAllUses(BB, DestBB);
-    PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
-  }
   BB->eraseFromParent();
   ++NumBlocksElim;
 
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 8f3ff96..0e7f7f7 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -22,7 +22,6 @@
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
@@ -45,7 +44,6 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<LoopInfo>();
-      AU.addPreserved<ProfileInfo>();
 
       // No loop canonicalization guarantees are broken by this pass.
       AU.addPreservedID(LoopSimplifyID);
@@ -213,10 +211,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
 
   DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
   LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
-  ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
 
   // If we have nothing to update, just return.
-  if (DT == 0 && LI == 0 && PI == 0)
+  if (DT == 0 && LI == 0)
     return NewBB;
 
   // Now update analysis information.  Since the only predecessor of NewBB is
@@ -369,9 +366,5 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
     }
   }
 
-  // Update ProfileInfo if it is around.
-  if (PI)
-    PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);
-
   return NewBB;
 }
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 56a2d92..82b8da3 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -20,7 +20,6 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/DIBuilder.h"
 #include "llvm/DebugInfo.h"
@@ -513,11 +512,6 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
       DT->changeImmediateDominator(DestBB, PredBBIDom);
       DT->eraseNode(PredBB);
     }
-    ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
-    if (PI) {
-      PI->replaceAllUses(PredBB, DestBB);
-      PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB));
-    }
   }
   // Nuke BB.
   PredBB->eraseFromParent();
-- 
cgit v1.1


From af7ae9d6890ce5fae27e38ccebb5da09288c49e0 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Wed, 2 Oct 2013 17:04:59 +0000
Subject: StructurizeCFG: Add dependency on LowerSwitch pass
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Switch instructions were crashing the StructurizeCFG pass, and it's
probably easier anyway if we don't need to handle them in this pass.

Reviewed-by: Christian König <christian.koenig@amd.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191841 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/StructurizeCFG.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index bb6f163..72fea80 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -231,7 +231,7 @@ public:
 
   StructurizeCFG() :
     RegionPass(ID) {
-    initializeRegionInfoPass(*PassRegistry::getPassRegistry());
+    initializeStructurizeCFGPass(*PassRegistry::getPassRegistry());
   }
 
   using Pass::doInitialization;
@@ -244,6 +244,7 @@ public:
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequiredID(LowerSwitchID);
     AU.addRequired<DominatorTree>();
     AU.addPreserved<DominatorTree>();
     RegionPass::getAnalysisUsage(AU);
@@ -256,6 +257,7 @@ char StructurizeCFG::ID = 0;
 
 INITIALIZE_PASS_BEGIN(StructurizeCFG, "structurizecfg", "Structurize the CFG",
                       false, false)
+INITIALIZE_PASS_DEPENDENCY(LowerSwitch)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 INITIALIZE_PASS_DEPENDENCY(RegionInfo)
 INITIALIZE_PASS_END(StructurizeCFG, "structurizecfg", "Structurize the CFG",
-- 
cgit v1.1


From 6623d050c6f4351293bc1849e49bc0e37ec04596 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Wed, 2 Oct 2013 19:06:06 +0000
Subject: SLPVectorizer: Make store chain finding more aggressive with
 GetUnderlyingObject.

This recursively strips all GEPs like the existing code. It also handles bitcasts and
other operations that do not change the pointer value.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191847 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c2c53c7..4bee2cb 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -25,8 +25,8 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/DataLayout.h"
@@ -318,10 +318,7 @@ private:
     /// \returns true if the scalars in VL are equal to this entry.
     bool isSame(ArrayRef<Value *> VL) const {
       assert(VL.size() == Scalars.size() && "Invalid size");
-      for (int i = 0, e = VL.size(); i != e; ++i)
-        if (VL[i] != Scalars[i])
-          return false;
-      return true;
+      return std::equal(VL.begin(), VL.end(), Scalars.begin());
     }
 
     /// A vector of scalars.
@@ -1783,10 +1780,8 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
     if (Ty->isAggregateType() || Ty->isVectorTy())
       return 0;
 
-    // Find the base of the GEP.
-    Value *Ptr = SI->getPointerOperand();
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
-      Ptr = GEP->getPointerOperand();
+    // Find the base pointer.
+    Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), DL);
 
     // Save the store locations.
     StoreRefs[Ptr].push_back(SI);
-- 
cgit v1.1


From 5c86f1296947df003e22b3a08e241dc26b408e4b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 2 Oct 2013 20:04:26 +0000
Subject: Fix comment grammar and capitalization.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191850 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index d5df1115..c1cc00b 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3710,8 +3710,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
         Stores.push_back(St);
         DepChecker.addAccess(St);
       }
-    } // next instr.
-  } // next block.
+    } // Next instr.
+  } // Next block.
 
   // Now we have two lists that hold the loads and the stores.
   // Next, we find the pointers that they use.
@@ -3805,7 +3805,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
   if (NumComparisons == 0 && NeedRTCheck)
     NeedRTCheck = false;
 
-  // Check that we did not collect too many pointers or found a unsizeable
+  // Check that we did not collect too many pointers or found an unsizeable
   // pointer.
   if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
     PtrRtCheck.reset();
-- 
cgit v1.1


From 7b7294c534f97f97860090401672a9c9831033db Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 2 Oct 2013 20:04:29 +0000
Subject: Fix debug printing spacing.

Fix missing newlines, missing and extra spaces in printed messages.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191851 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 65 +++++++++++++++---------------
 1 file changed, 33 insertions(+), 32 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index c1cc00b..294b70a 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -867,14 +867,14 @@ private:
       if (isPowerOf2_32(Val) && Val <= MaxVectorWidth)
         Width = Val;
       else
-        DEBUG(dbgs() << "LV: ignoring invalid width hint metadata");
+        DEBUG(dbgs() << "LV: ignoring invalid width hint metadata\n");
     } else if (Hint == "unroll") {
       if (isPowerOf2_32(Val) && Val <= MaxUnrollFactor)
         Unroll = Val;
       else
-        DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata");
+        DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n");
     } else {
-      DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint);
+      DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n');
     }
   }
 };
@@ -915,7 +915,7 @@ struct LoopVectorize : public LoopPass {
       return false;
 
     if (DL == NULL) {
-      DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout");
+      DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout\n");
       return false;
     }
 
@@ -966,8 +966,8 @@ struct LoopVectorize : public LoopPass {
     }
 
     DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
-          F->getParent()->getModuleIdentifier()<<"\n");
-    DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n");
+          F->getParent()->getModuleIdentifier() << '\n');
+    DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n');
 
     if (VF.Width == 1) {
       if (UF == 1)
@@ -1400,7 +1400,7 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
       Starts.push_back(Ptr);
       Ends.push_back(Ptr);
     } else {
-      DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
+      DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n');
 
       Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
       Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
@@ -2745,7 +2745,7 @@ bool LoopVectorizationLegality::canVectorize() {
   // We need to have a loop header.
   BasicBlock *Latch = TheLoop->getLoopLatch();
   DEBUG(dbgs() << "LV: Found a loop: " <<
-        TheLoop->getHeader()->getName() << "\n");
+        TheLoop->getHeader()->getName() << '\n');
 
   // ScalarEvolution needs to be able to find the exit count.
   const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
@@ -2815,7 +2815,7 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
       Instruction *U = cast<Instruction>(*I);
       // This user may be a reduction exit value.
       if (!TheLoop->contains(U)) {
-        DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+        DEBUG(dbgs() << "LV: Found an outside user for : " << *U << '\n');
         return true;
       }
     }
@@ -2953,7 +2953,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       // Check that the instruction return type is vectorizable.
       if (!VectorType::isValidElementType(it->getType()) &&
           !it->getType()->isVoidTy()) {
-        DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n");
+        DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
         return false;
       }
 
@@ -3158,7 +3158,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
 
       RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId);
 
-      DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr <<"\n");
+      DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *Ptr << '\n');
     } else {
       CanDoRT = false;
     }
@@ -3223,7 +3223,7 @@ void AccessAnalysis::processMemAccesses(bool UseDeferred) {
                         !isa<Argument>(UnderlyingObj)) &&
            !isIdentifiedObject(UnderlyingObj))) {
         DEBUG(dbgs() << "LV: Found an unidentified " <<
-              (IsWrite ?  "write" : "read" ) << " ptr:" << *UnderlyingObj <<
+              (IsWrite ?  "write" : "read" ) << " ptr: " << *UnderlyingObj <<
               "\n");
         IsRTCheckNeeded = (IsRTCheckNeeded ||
                            !isIdentifiedObject(UnderlyingObj) ||
@@ -3567,7 +3567,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   if (Val == 0) {
     if (ATy == BTy)
       return false;
-    DEBUG(dbgs() << "LV: Zero dependence difference but different types");
+    DEBUG(dbgs() << "LV: Zero dependence difference but different types\n");
     return true;
   }
 
@@ -3576,7 +3576,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   // Positive distance bigger than max vectorization factor.
   if (ATy != BTy) {
     DEBUG(dbgs() <<
-          "LV: ReadWrite-Write positive dependency with different types");
+          "LV: ReadWrite-Write positive dependency with different types\n");
     return false;
   }
 
@@ -3593,7 +3593,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
       2*TypeByteSize > MaxSafeDepDistBytes ||
       Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
     DEBUG(dbgs() << "LV: Failure because of Positive distance "
-        << Val.getSExtValue() << "\n");
+        << Val.getSExtValue() << '\n');
     return true;
   }
 
@@ -3606,7 +3606,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
      return true;
 
   DEBUG(dbgs() << "LV: Positive distance " << Val.getSExtValue() <<
-        " with max VF=" << MaxSafeDepDistBytes/TypeByteSize << "\n");
+        " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
 
   return false;
 }
@@ -3833,7 +3833,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
     MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
   }
 
-  DEBUG(dbgs() << "LV: We "<< (NeedRTCheck ? "" : "don't") <<
+  DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
         " need a runtime memory check.\n");
 
   return CanVecMem;
@@ -4209,7 +4209,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
 
   // Find the trip count.
   unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
-  DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n");
+  DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
 
   unsigned WidestType = getWidestType();
   unsigned WidestRegister = TTI.getRegisterBitWidth(true);
@@ -4220,7 +4220,8 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
                     WidestRegister : MaxSafeDepDist);
   unsigned MaxVectorSize = WidestRegister / WidestType;
   DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
-  DEBUG(dbgs() << "LV: The Widest register is:" << WidestRegister << "bits.\n");
+  DEBUG(dbgs() << "LV: The Widest register is: "
+          << WidestRegister << " bits.\n");
 
   if (MaxVectorSize == 0) {
     DEBUG(dbgs() << "LV: The target has no vector registers.\n");
@@ -4256,7 +4257,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
 
   if (UserVF != 0) {
     assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
-    DEBUG(dbgs() << "LV: Using user VF "<<UserVF<<".\n");
+    DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
 
     Factor.Width = UserVF;
     return Factor;
@@ -4264,13 +4265,13 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
 
   float Cost = expectedCost(1);
   unsigned Width = 1;
-  DEBUG(dbgs() << "LV: Scalar loop costs: "<< (int)Cost << ".\n");
+  DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)Cost << ".\n");
   for (unsigned i=2; i <= VF; i*=2) {
     // Notice that the vector loop needs to be executed less times, so
     // we need to divide the cost of the vector loops by the width of
     // the vector elements.
     float VectorCost = expectedCost(i) / (float)i;
-    DEBUG(dbgs() << "LV: Vector loop of width "<< i << " costs: " <<
+    DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " <<
           (int)VectorCost << ".\n");
     if (VectorCost < Cost) {
       Cost = VectorCost;
@@ -4407,7 +4408,7 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
   }
 
   if (HasReductions) {
-    DEBUG(dbgs() << "LV: Unrolling because of reductions. \n");
+    DEBUG(dbgs() << "LV: Unrolling because of reductions.\n");
     return UF;
   }
 
@@ -4415,14 +4416,14 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
   // We assume that the cost overhead is 1 and we use the cost model
   // to estimate the cost of the loop and unroll until the cost of the
   // loop overhead is about 5% of the cost of the loop.
-  DEBUG(dbgs() << "LV: Loop cost is "<< LoopCost <<" \n");
+  DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n');
   if (LoopCost < SmallLoopCost) {
-    DEBUG(dbgs() << "LV: Unrolling to reduce branch cost. \n");
+    DEBUG(dbgs() << "LV: Unrolling to reduce branch cost.\n");
     unsigned NewUF = SmallLoopCost / (LoopCost + 1);
     return std::min(NewUF, UF);
   }
 
-  DEBUG(dbgs() << "LV: Not Unrolling. \n");
+  DEBUG(dbgs() << "LV: Not Unrolling.\n");
   return 1;
 }
 
@@ -4523,16 +4524,16 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
     MaxUsage = std::max(MaxUsage, OpenIntervals.size());
 
     DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " <<
-          OpenIntervals.size() <<"\n");
+          OpenIntervals.size() << '\n');
 
     // Add the current instruction to the list of open intervals.
     OpenIntervals.insert(I);
   }
 
   unsigned Invariant = LoopInvariants.size();
-  DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << " \n");
-  DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << " \n");
-  DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << " \n");
+  DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << '\n');
+  DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
+  DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << '\n');
 
   R.LoopInvariantRegs = Invariant;
   R.MaxLocalUsers = MaxUsage;
@@ -4556,8 +4557,8 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
 
       unsigned C = getInstructionCost(it, VF);
       BlockCost += C;
-      DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " <<
-            VF << " For instruction: "<< *it << "\n");
+      DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF " <<
+            VF << " For instruction: " << *it << '\n');
     }
 
     // We assume that if-converted blocks have a 50% chance of being executed.
-- 
cgit v1.1


From d0132a783341696eba8ac97b83ae3388d95b4563 Mon Sep 17 00:00:00 2001
From: Yi Jiang <yjiang@apple.com>
Date: Wed, 2 Oct 2013 20:20:39 +0000
Subject: Apply slp vectorization on fully-vectorizable tree of height 2

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191852 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4bee2cb..7d7e877 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -311,6 +311,10 @@ private:
   /// \returns a vector from a collection of scalars in \p VL.
   Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
 
+  /// \returns whether the VectorizableTree is fully vectoriable and will
+  /// be beneficial even the tree height is tiny.
+  bool isFullyVectorizableTinyTree(); 
+
   struct TreeEntry {
     TreeEntry() : Scalars(), VectorizedValue(0), LastScalarIndex(0),
     NeedToGather(0) {}
@@ -917,15 +921,28 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
   }
 }
 
+bool BoUpSLP::isFullyVectorizableTinyTree() {
+  DEBUG(dbgs() << "SLP: Check whether the tree with height " <<
+        VectorizableTree.size() << " is fully vectorizable .\n");
+
+  // We only handle trees of height 2.
+  if (VectorizableTree.size() != 2)
+    return false;
+
+  // Gathering cost would be too much for tiny trees.
+  if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather) 
+    return false; 
+
+  return true; 
+}
+
 int BoUpSLP::getTreeCost() {
   int Cost = 0;
   DEBUG(dbgs() << "SLP: Calculating cost for tree of size " <<
         VectorizableTree.size() << ".\n");
 
-  // Don't vectorize tiny trees. Small load/store chains or consecutive stores
-  // of constants will be vectoried in SelectionDAG in MergeConsecutiveStores.
-  // The SelectionDAG vectorizer can only handle pairs (trees of height = 2).
-  if (VectorizableTree.size() < 3) {
+  // We only vectorize tiny trees if it is fully vectorizable.
+  if (VectorizableTree.size() < 3 && !isFullyVectorizableTinyTree()) {
     if (!VectorizableTree.size()) {
       assert(!ExternalUses.size() && "We should not have any external users");
     }
-- 
cgit v1.1


From 407847f130885dd9e26e908f033f697c0975aeae Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 2 Oct 2013 22:38:17 +0000
Subject: Don't use runtime bounds check between address spaces.

Don't vectorize with a runtime check if it requires a
comparison between pointers with different address spaces.
The values can't be assumed to be directly comparable.
Previously it would create an illegal bitcast.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191862 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 60 ++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 294b70a..a71df08 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1385,11 +1385,9 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
   SmallVector<TrackingVH<Value> , 2> Starts;
   SmallVector<TrackingVH<Value> , 2> Ends;
 
+  LLVMContext &Ctx = Loc->getContext();
   SCEVExpander Exp(*SE, "induction");
 
-  // Use this type for pointer arithmetic.
-  Type* PtrArithTy = Type::getInt8PtrTy(Loc->getContext(), 0);
-
   for (unsigned i = 0; i < NumPointers; ++i) {
     Value *Ptr = PtrRtCheck->Pointers[i];
     const SCEV *Sc = SE->getSCEV(Ptr);
@@ -1401,6 +1399,10 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
       Ends.push_back(Ptr);
     } else {
       DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n');
+      unsigned AS = Ptr->getType()->getPointerAddressSpace();
+
+      // Use this type for pointer arithmetic.
+      Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
 
       Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
       Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
@@ -1422,10 +1424,20 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
       if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j])
        continue;
 
-      Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
-      Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
-      Value *End0 =   ChkBuilder.CreateBitCast(Ends[i],   PtrArithTy, "bc");
-      Value *End1 =   ChkBuilder.CreateBitCast(Ends[j],   PtrArithTy, "bc");
+      unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
+      unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
+
+      assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
+             (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
+             "Trying to bounds check pointers with different address spaces");
+
+      Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
+      Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
+
+      Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
+      Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
+      Value *End0 =   ChkBuilder.CreateBitCast(Ends[i],   PtrArithTy1, "bc");
+      Value *End1 =   ChkBuilder.CreateBitCast(Ends[j],   PtrArithTy0, "bc");
 
       Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
       Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
@@ -1440,9 +1452,8 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
   // We have to do this trickery because the IRBuilder might fold the check to a
   // constant expression in which case there is no Instruction anchored in a
   // the block.
-  LLVMContext &Ctx = Loc->getContext();
-  Instruction * Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
-                                                  ConstantInt::getTrue(Ctx));
+  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
+                                                 ConstantInt::getTrue(Ctx));
   ChkBuilder.Insert(Check, "memcheck.conflict");
   return Check;
 }
@@ -3166,9 +3177,36 @@ bool AccessAnalysis::canCheckPtrAtRT(
 
   if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
     NumComparisons = 0; // Only one dependence set.
-  else
+  else {
     NumComparisons = (NumWritePtrChecks * (NumReadPtrChecks +
                                            NumWritePtrChecks - 1));
+  }
+
+  // If the pointers that we would use for the bounds comparison have different
+  // address spaces, assume the values aren't directly comparable, so we can't
+  // use them for the runtime check. We also have to assume they could
+  // overlap. In the future there should be metadata for whether address spaces
+  // are disjoint.
+  unsigned NumPointers = RtCheck.Pointers.size();
+  for (unsigned i = 0; i < NumPointers; ++i) {
+    for (unsigned j = i + 1; j < NumPointers; ++j) {
+      // Only need to check pointers between two different dependency sets.
+      if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
+       continue;
+
+      Value *PtrI = RtCheck.Pointers[i];
+      Value *PtrJ = RtCheck.Pointers[j];
+
+      unsigned ASi = PtrI->getType()->getPointerAddressSpace();
+      unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
+      if (ASi != ASj) {
+        DEBUG(dbgs() << "LV: Runtime check would require comparison between"
+                       " different address spaces\n");
+        return false;
+      }
+    }
+  }
+
   return CanDoRT;
 }
 
-- 
cgit v1.1


From 1df59ef1aa271a4e33cf8973e14bcaf55c585231 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 3 Oct 2013 18:15:57 +0000
Subject: Make gep i8* X, -(ptrtoint Y) transform work with address spaces

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191920 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstructionCombining.cpp | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index fcb26ab..27f1a3e 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1186,14 +1186,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   // The GEP pattern is emitted by the SCEV expander for certain kinds of
   // pointer arithmetic.
   if (TD && GEP.getNumIndices() == 1 &&
-      match(GEP.getOperand(1), m_Neg(m_PtrToInt(m_Value()))) &&
-      GEP.getType() == Builder->getInt8PtrTy() &&
-      GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
-          TD->getPointerSizeInBits(GEP.getPointerAddressSpace())) {
-    Operator *Index = cast<Operator>(GEP.getOperand(1));
-    Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType());
-    Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1));
-    return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType());
+      match(GEP.getOperand(1), m_Neg(m_PtrToInt(m_Value())))) {
+    unsigned AS = GEP.getPointerAddressSpace();
+    if (GEP.getType() == Builder->getInt8PtrTy(AS) &&
+        GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
+        TD->getPointerSizeInBits(AS)) {
+      Operator *Index = cast<Operator>(GEP.getOperand(1));
+      Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType());
+      Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1));
+      return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType());
+    }
   }
 
   // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
-- 
cgit v1.1


From 438900938c3ac9d7fac2dd5d2c85ca4b9b2e35f7 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 3 Oct 2013 18:29:09 +0000
Subject: Optimize linkonce_odr unnamed_addr functions during LTO.

Generalize the API so we can distinguish symbols that are needed just for a DSO
symbol table from those that are used from some native .o.

The symbols that are only wanted for the dso symbol table can be dropped if
llvm can prove every other dso has a copy (linkonce_odr) and the address is not
important (unnamed_addr).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191922 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/IPO.cpp                |  2 +-
 lib/Transforms/IPO/Internalize.cpp        | 46 +++++++++++++++++++++++++------
 lib/Transforms/IPO/PassManagerBuilder.cpp |  2 +-
 3 files changed, 39 insertions(+), 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 5d563d8..5f26bac 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -98,7 +98,7 @@ void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
   std::vector<const char *> Export;
   if (AllButMain)
     Export.push_back("main");
-  unwrap(PM)->add(createInternalizePass(Export));
+  unwrap(PM)->add(createInternalizePass(Export, None));
 }
 
 void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index f2feacc..f20a7bd 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -44,13 +44,20 @@ APIList("internalize-public-api-list", cl::value_desc("list"),
         cl::desc("A list of symbol names to preserve"),
         cl::CommaSeparated);
 
+static cl::list<std::string>
+DSOList("internalize-dso-list", cl::value_desc("list"),
+        cl::desc("A list of symbol names need for a dso symbol table"),
+        cl::CommaSeparated);
+
 namespace {
   class InternalizePass : public ModulePass {
     std::set<std::string> ExternalNames;
+    std::set<std::string> DSONames;
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit InternalizePass();
-    explicit InternalizePass(ArrayRef<const char *> ExportList);
+    explicit InternalizePass(ArrayRef<const char *> ExportList,
+                             ArrayRef<const char *> DSOList);
     void LoadFile(const char *Filename);
     virtual bool runOnModule(Module &M);
 
@@ -71,15 +78,21 @@ InternalizePass::InternalizePass()
   if (!APIFile.empty())           // If a filename is specified, use it.
     LoadFile(APIFile.c_str());
   ExternalNames.insert(APIList.begin(), APIList.end());
+  DSONames.insert(DSOList.begin(), DSOList.end());
 }
 
-InternalizePass::InternalizePass(ArrayRef<const char *> ExportList)
+InternalizePass::InternalizePass(ArrayRef<const char *> ExportList,
+                                 ArrayRef<const char *> DSOList)
   : ModulePass(ID){
   initializeInternalizePassPass(*PassRegistry::getPassRegistry());
   for(ArrayRef<const char *>::const_iterator itr = ExportList.begin();
         itr != ExportList.end(); itr++) {
     ExternalNames.insert(*itr);
   }
+  for(ArrayRef<const char *>::const_iterator itr = DSOList.begin();
+        itr != DSOList.end(); itr++) {
+    DSONames.insert(*itr);
+  }
 }
 
 void InternalizePass::LoadFile(const char *Filename) {
@@ -99,7 +112,8 @@ void InternalizePass::LoadFile(const char *Filename) {
 }
 
 static bool shouldInternalize(const GlobalValue &GV,
-                              const std::set<std::string> &ExternalNames) {
+                              const std::set<std::string> &ExternalNames,
+                              const std::set<std::string> &DSONames) {
   // Function must be defined here
   if (GV.isDeclaration())
     return false;
@@ -116,7 +130,20 @@ static bool shouldInternalize(const GlobalValue &GV,
   if (ExternalNames.count(GV.getName()))
     return false;
 
-  return true;
+  // Not needed for the symbol table?
+  if (!DSONames.count(GV.getName()))
+    return true;
+
+  // Not a linkonce. Someone can depend on it being on the symbol table.
+  if (!GV.hasLinkOnceLinkage())
+    return false;
+
+  // The address is not important, we can hide it.
+  if (GV.hasUnnamedAddr())
+    return true;
+
+  // FIXME: Check if the address is used.
+  return false;
 }
 
 bool InternalizePass::runOnModule(Module &M) {
@@ -145,7 +172,7 @@ bool InternalizePass::runOnModule(Module &M) {
   // Mark all functions not in the api as internal.
   // FIXME: maybe use private linkage?
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    if (!shouldInternalize(*I, ExternalNames))
+    if (!shouldInternalize(*I, ExternalNames, DSONames))
       continue;
 
     I->setLinkage(GlobalValue::InternalLinkage);
@@ -182,7 +209,7 @@ bool InternalizePass::runOnModule(Module &M) {
   // FIXME: maybe use private linkage?
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
        I != E; ++I) {
-    if (!shouldInternalize(*I, ExternalNames))
+    if (!shouldInternalize(*I, ExternalNames, DSONames))
       continue;
 
     I->setLinkage(GlobalValue::InternalLinkage);
@@ -194,7 +221,7 @@ bool InternalizePass::runOnModule(Module &M) {
   // Mark all aliases that are not in the api as internal as well.
   for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
        I != E; ++I) {
-    if (!shouldInternalize(*I, ExternalNames))
+    if (!shouldInternalize(*I, ExternalNames, DSONames))
       continue;
 
     I->setLinkage(GlobalValue::InternalLinkage);
@@ -210,6 +237,7 @@ ModulePass *llvm::createInternalizePass() {
   return new InternalizePass();
 }
 
-ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) {
-  return new InternalizePass(ExportList);
+ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList,
+                                        ArrayRef<const char *> DSOList) {
+  return new InternalizePass(ExportList, DSOList);
 }
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 2008c5d..b9660fa 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -277,7 +277,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
   // for a main function.  If main is defined, mark all other functions
   // internal.
   if (Internalize)
-    PM.add(createInternalizePass("main"));
+    PM.add(createInternalizePass("main", None));
 
   // Propagate constants at call sites into the functions they call.  This
   // opens opportunities for globalopt (and inlining) by substituting function
-- 
cgit v1.1


From 03e84c9df91cbc1fe0219a51109e260cacdfd2b7 Mon Sep 17 00:00:00 2001
From: Owen Anderson <resistor@mac.com>
Date: Thu, 3 Oct 2013 21:08:05 +0000
Subject: Pull fptrunc's upwards through selects when one of the select's
 selectands was a constant.  This has a number of benefits, including
 producing small immediates (easier to materialize, smaller constant pools) as
 well as being more likely to allow the fptrunc to fuse with a preceding
 instruction (truncating selects are unusual).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191929 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCasts.cpp | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a35631f..01894cb 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1229,6 +1229,19 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
     }
   }
 
+  // (fptrunc (select cond, R1, Cst)) -->
+  // (select cond, (fptrunc R1), (fptrunc Cst))
+  SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0));
+  if (SI &&
+      (isa<ConstantFP>(SI->getOperand(1)) ||
+       isa<ConstantFP>(SI->getOperand(2)))) {
+    Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1),
+                                             CI.getType());
+    Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2),
+                                             CI.getType());
+    return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc);
+  }
+
   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0));
   if (II) {
     switch (II->getIntrinsicID()) {
-- 
cgit v1.1


From af57bdf7d673a3731fb887218e7a9ccd1576ab4f Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Fri, 4 Oct 2013 20:39:16 +0000
Subject: SLPVectorizer: Sort inputs to commutative binary operations

Sort the operands of the other entries in the current vectorization root
according to the first entry's operands opcodes.

%conv0 = uitofp ...
%load0 = load float ...

= fmul %conv0, %load0
= fmul %load0, %conv1
= fmul %load0, %conv2

Make sure that we recursively vectorize <%conv0, %conv1, %conv2> and <%load0,
%load0, %load0>.

This makes it more likely to obtain vectorizable trees. We have to be careful
when we sort that we don't destroy 'good' existing ordering implied by source
order.

radar://15080067

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191977 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 127 ++++++++++++++++++++++++++++-
 1 file changed, 123 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7d7e877..b5a303e 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -206,6 +206,112 @@ static bool CanReuseExtract(ArrayRef<Value *> VL) {
   return true;
 }
 
+static bool all_equal(SmallVectorImpl<Value *> &V) {
+  Value *First = V[0];
+  for (int i = 1, e = V.size(); i != e; ++i)
+    if (V[i] != First)
+      return false;
+  return true;
+}
+
+static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+                                           SmallVectorImpl<Value *> &Left,
+                                           SmallVectorImpl<Value *> &Right) {
+
+  SmallVector<Value *, 16> OrigLeft, OrigRight;
+
+  bool AllSameOpcodeLeft = true;
+  bool AllSameOpcodeRight = true;
+  for (unsigned i = 0, e = VL.size(); i != e; ++i) {
+    Instruction *I = cast<Instruction>(VL[i]);
+    Value *V0 = I->getOperand(0);
+    Value *V1 = I->getOperand(1);
+
+    OrigLeft.push_back(V0);
+    OrigRight.push_back(V1);
+
+    Instruction *I0 = dyn_cast<Instruction>(V0);
+    Instruction *I1 = dyn_cast<Instruction>(V1);
+
+    // Check whether all operands on one side have the same opcode. In this case
+    // we want to preserve the original order and not make things worse by
+    // reordering.
+    AllSameOpcodeLeft = I0;
+    AllSameOpcodeRight = I1;
+
+    if (i && AllSameOpcodeLeft) {
+      if(Instruction *P0 = dyn_cast<Instruction>(OrigLeft[i-1])) {
+        if(P0->getOpcode() != I0->getOpcode())
+          AllSameOpcodeLeft = false;
+      } else
+        AllSameOpcodeLeft = false;
+    }
+    if (i && AllSameOpcodeRight) {
+      if(Instruction *P1 = dyn_cast<Instruction>(OrigRight[i-1])) {
+        if(P1->getOpcode() != I1->getOpcode())
+          AllSameOpcodeRight = false;
+      } else
+        AllSameOpcodeRight = false;
+    }
+
+    // Sort two opcodes. In the code below we try to preserve the ability to use
+    // broadcast of values instead of individual inserts.
+    // vl1 = load
+    // vl2 = phi
+    // vr1 = load
+    // vr2 = vr2
+    //    = vl1 x vr1
+    //    = vl2 x vr2
+    // If we just sorted according to opcode we would leave the first line in
+    // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load).
+    //    = vl1 x vr1
+    //    = vr2 x vl2
+    // Because vr2 and vr1 are from the same load we loose the opportunity of a
+    // broadcast for the packed right side in the backend: we have [vr1, vl2]
+    // instead of [vr1, vr2=vr1].
+    if (I0 && I1) {
+       if(!i && I0->getOpcode() > I1->getOpcode()) {
+         Left.push_back(I1);
+         Right.push_back(I0);
+       } else if (i && I0->getOpcode() > I1->getOpcode() && Right[i-1] != I1) {
+         // Try not to destroy a broad cast for no apparent benefit.
+         Left.push_back(I1);
+         Right.push_back(I0);
+       } else if (i && I0->getOpcode() == I1->getOpcode() && Right[i-1] ==  I0) {
+         // Try preserve broadcasts.
+         Left.push_back(I1);
+         Right.push_back(I0);
+       } else if (i && I0->getOpcode() == I1->getOpcode() && Left[i-1] == I1) {
+         // Try preserve broadcasts.
+         Left.push_back(I1);
+         Right.push_back(I0);
+       } else {
+         Left.push_back(I0);
+         Right.push_back(I1);
+       }
+       continue;
+    }
+    // One opcode, put the instruction on the right.
+    if (I0) {
+      Left.push_back(V1);
+      Right.push_back(I0);
+      continue;
+    }
+    Left.push_back(V0);
+    Right.push_back(V1);
+  }
+
+  bool LeftBroadcast = all_equal(Left);
+  bool RightBroadcast = all_equal(Right);
+
+  // Don't reorder if the operands where good to begin with.
+  if (!(LeftBroadcast || RightBroadcast) &&
+      (AllSameOpcodeRight || AllSameOpcodeLeft)) {
+    Left = OrigLeft;
+    Right = OrigRight;
+  }
+}
+
 /// Bottom Up SLP Vectorizer.
 class BoUpSLP {
 public:
@@ -775,6 +881,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
       newTreeEntry(VL, true);
       DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
 
+      // Sort operands of the instructions so that each side is more likely to
+      // have the same opcode.
+      if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
+        ValueList Left, Right;
+        reorderInputsAccordingToOpcode(VL, Left, Right);
+        buildTree_rec(Left, Depth + 1);
+        buildTree_rec(Right, Depth + 1);
+        return;
+      }
+
       for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
         ValueList Operands;
         // Prepare the operand vector.
@@ -1331,10 +1447,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     case Instruction::Or:
     case Instruction::Xor: {
       ValueList LHSVL, RHSVL;
-      for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
-        LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
-        RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
-      }
+      if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
+        reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
+      else
+        for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
+          LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
+          RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+        }
 
       setInsertPointAfterBundle(E->Scalars);
 
-- 
cgit v1.1


From fc3b7bbf49086e827a628e22db0b75e46c151d17 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Fri, 4 Oct 2013 23:41:05 +0000
Subject: UpdatePHINodes in BasicBlockUtils should not crash on duplicate
 predecessors

UpdatePHINodes has an optimization to reuse an existing PHI node, where it
first deletes all of its entries and then replaces them. Unfortunately, in the
case where we had duplicate predecessors (which are allowed so long as the
associated PHI entries have the same value), the loop removing the existing PHI
entries from the to-be-reused PHI would assert (if that PHI was not the one
which had the duplicates).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192001 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/BasicBlockUtils.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index a2e82f3..12de9ee 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -400,8 +400,12 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
       // If all incoming values for the new PHI would be the same, just don't
       // make a new PHI.  Instead, just remove the incoming values from the old
       // PHI.
-      for (unsigned i = 0, e = Preds.size(); i != e; ++i)
-        PN->removeIncomingValue(Preds[i], false);
+      for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+        // Explicitly check the BB index here to handle duplicates in Preds.
+        int Idx = PN->getBasicBlockIndex(Preds[i]);
+        if (Idx >= 0)
+          PN->removeIncomingValue(Idx, false);
+      }
     } else {
       // If the values coming into the block are not the same, we need a PHI.
       // Create the new PHI node, insert it into NewBB at the end of the block
-- 
cgit v1.1


From 23eb90714bb6a5a7d94a262f439b5bf872733cf1 Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <samsonov@google.com>
Date: Mon, 7 Oct 2013 19:03:24 +0000
Subject: Revert r191834 until we measure the effect of this benchmarks and
 maybe find a better way to fix it

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192121 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp | 59 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 3129559..cf15580 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -50,6 +50,7 @@ STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
 STATISTIC(NumDeleted   , "Number of globals deleted");
 STATISTIC(NumFnDeleted , "Number of functions deleted");
 STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
+STATISTIC(NumLocalized , "Number of globals localized");
 STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
 STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
 STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
@@ -136,12 +137,24 @@ struct GlobalStatus {
   /// ever stored to this global, keep track of what value it is.
   Value *StoredOnceValue;
 
+  /// AccessingFunction/HasMultipleAccessingFunctions - These start out
+  /// null/false.  When the first accessing function is noticed, it is recorded.
+  /// When a second different accessing function is noticed,
+  /// HasMultipleAccessingFunctions is set to true.
+  const Function *AccessingFunction;
+  bool HasMultipleAccessingFunctions;
+
+  /// HasNonInstructionUser - Set to true if this global has a user that is not
+  /// an instruction (e.g. a constant expr or GV initializer).
+  bool HasNonInstructionUser;
+
   /// AtomicOrdering - Set to the strongest atomic ordering requirement.
   AtomicOrdering Ordering;
 
-  GlobalStatus()
-      : isCompared(false), isLoaded(false), StoredType(NotStored),
-        StoredOnceValue(0), Ordering(NotAtomic) {}
+  GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
+                   StoredOnceValue(0), AccessingFunction(0),
+                   HasMultipleAccessingFunctions(false),
+                   HasNonInstructionUser(false), Ordering(NotAtomic) {}
 };
 
 }
@@ -182,12 +195,21 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
        ++UI) {
     const User *U = *UI;
     if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+      GS.HasNonInstructionUser = true;
+
       // If the result of the constantexpr isn't pointer type, then we won't
       // know to expect it in various places.  Just reject early.
       if (!isa<PointerType>(CE->getType())) return true;
 
       if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
+      if (!GS.HasMultipleAccessingFunctions) {
+        const Function *F = I->getParent()->getParent();
+        if (GS.AccessingFunction == 0)
+          GS.AccessingFunction = F;
+        else if (GS.AccessingFunction != F)
+          GS.HasMultipleAccessingFunctions = true;
+      }
       if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
         GS.isLoaded = true;
         // Don't hack on volatile loads.
@@ -264,10 +286,12 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
         return true;  // Any other non-load instruction might take address!
       }
     } else if (const Constant *C = dyn_cast<Constant>(U)) {
+      GS.HasNonInstructionUser = true;
       // We might have a dead and dangling constant hanging off of here.
       if (!SafeToDestroyConstant(C))
         return true;
     } else {
+      GS.HasNonInstructionUser = true;
       // Otherwise must be some other user.
       return true;
     }
@@ -1914,6 +1938,35 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
 bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
                                       Module::global_iterator &GVI,
                                       const GlobalStatus &GS) {
+  // If this is a first class global and has only one accessing function
+  // and this function is main (which we know is not recursive), we replace
+  // the global with a local alloca in this function.
+  //
+  // NOTE: It doesn't make sense to promote non single-value types since we
+  // are just replacing static memory to stack memory.
+  //
+  // If the global is in different address space, don't bring it to stack.
+  if (!GS.HasMultipleAccessingFunctions &&
+      GS.AccessingFunction && !GS.HasNonInstructionUser &&
+      GV->getType()->getElementType()->isSingleValueType() &&
+      GS.AccessingFunction->getName() == "main" &&
+      GS.AccessingFunction->hasExternalLinkage() &&
+      GV->getType()->getAddressSpace() == 0) {
+    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
+    Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
+                                                   ->getEntryBlock().begin());
+    Type *ElemTy = GV->getType()->getElementType();
+    // FIXME: Pass Global's alignment when globals have alignment
+    AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
+    if (!isa<UndefValue>(GV->getInitializer()))
+      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
+
+    GV->replaceAllUsesWith(Alloca);
+    GV->eraseFromParent();
+    ++NumLocalized;
+    return true;
+  }
+
   // If the global is never loaded (but may be stored to), it is dead.
   // Delete it now.
   if (!GS.isLoaded) {
-- 
cgit v1.1


From 1ee3c0008be6f7012aa69f7a73a2819300bdf23f Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Mon, 7 Oct 2013 21:05:43 +0000
Subject: LoopVectorize: External uses must use the last value in a reduction
 cycle

Otherwise, we don't perform operations that would have been performed on
the scalar version.

Fixes PR17498.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192133 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index a71df08..415f86e 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4015,6 +4015,12 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
         if (ExitInstruction != 0 || Cur == Phi)
           return false;
 
+        // The instruction used by an outside user must be the last instruction
+        // before we feed back to the reduction phi. Otherwise, we loose VF-1
+        // operations on the value.
+        if (std::find(Phi->op_begin(), Phi->op_end(), Cur) == Phi->op_end())
+         return false;
+
         ExitInstruction = Cur;
         continue;
       }
-- 
cgit v1.1


From e0409098aee1270164a7e453e450264f8e62cbfd Mon Sep 17 00:00:00 2001
From: Shuxin Yang <shuxin.llvm@gmail.com>
Date: Wed, 9 Oct 2013 17:21:44 +0000
Subject: Fix a bug in Dead Argument Elimination.

  If a function seen at compile time is not necessarily the one linked to
the binary being built, it is illegal to change the actual arguments
passing to it.

  e.g.
   --------------------------
   void foo(int lol) {
     // foo() has linkage satisifying isWeakForLinker()
     // "lol" is not used at all.
   }

   void bar(int lo2) {
      // xform to foo(undef) is illegal, as compiler dose not know which
      // instance of foo() will be linked to the the binary being built.
      foo(lol2);
   }
  -----------------------------

  Such functions can be captured by isWeakForLinker(). NOTE that
mayBeOverridden() is insufficient for this purpose as it dosen't include
linkage types like AvailableExternallyLinkage and LinkOnceODRLinkage.
Take link_odr* as an example, it indicates a set of *EQUIVALENT* globals
that can be merged at link-time. However, the semantic of
*EQUIVALENT*-functions includes parameters. Changing parameters breaks
the assumption.

  Thank John McCall for help, especially for the explanation of subtle
difference between linkage types.

  rdar://11546243


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192302 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/DeadArgumentElimination.cpp | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 6ee6162..8621f1a 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -357,6 +357,19 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
   if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
     return false;
 
+  // If a function seen at compile time is not necessarily the one linked to
+  // the binary being built, it is illegal to change the actual arguments
+  // passing to it. These functions can be captured by isWeakForLinker().
+  // *NOTE* that mayBeOverridden() is insufficient for this purpose as it
+  // dosen't include linkage types like AvailableExternallyLinkage and
+  // LinkOnceODRLinkage. Take link_odr* as an example, it indicates a set of
+  // *EQUIVALENT* globals that can be merged at link-time. However, the
+  // semantic of *EQUIVALENT*-functions includes parameters. Changing
+  // parameters breaks the assumption.
+  //
+  if (Fn.isWeakForLinker())
+    return false;
+
   if (Fn.use_empty())
     return false;
 
-- 
cgit v1.1


From de2aa608438c1675bb69c2b2087663b9dfcf752b Mon Sep 17 00:00:00 2001
From: Renato Golin <renato.golin@linaro.org>
Date: Fri, 11 Oct 2013 16:14:39 +0000
Subject: Better info when debugging vectorizer

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192460 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 415f86e..a34d5b3 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2745,19 +2745,17 @@ bool LoopVectorizationLegality::canVectorize() {
   if (!TheLoop->getExitingBlock())
     return false;
 
-  unsigned NumBlocks = TheLoop->getNumBlocks();
+  // We need to have a loop header.
+  DEBUG(dbgs() << "LV: Found a loop: " <<
+        TheLoop->getHeader()->getName() << '\n');
 
   // Check if we can if-convert non single-bb loops.
+  unsigned NumBlocks = TheLoop->getNumBlocks();
   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
     DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
     return false;
   }
 
-  // We need to have a loop header.
-  BasicBlock *Latch = TheLoop->getLoopLatch();
-  DEBUG(dbgs() << "LV: Found a loop: " <<
-        TheLoop->getHeader()->getName() << '\n');
-
   // ScalarEvolution needs to be able to find the exit count.
   const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
   if (ExitCount == SE->getCouldNotCompute()) {
@@ -2766,6 +2764,7 @@ bool LoopVectorizationLegality::canVectorize() {
   }
 
   // Do not loop-vectorize loops with a tiny trip count.
+  BasicBlock *Latch = TheLoop->getLoopLatch();
   unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch);
   if (TC > 0u && TC < TinyTripCountVectorThreshold) {
     DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " <<
-- 
cgit v1.1


From fe82a3e360fc850e7551f2f1f32b58e539182c68 Mon Sep 17 00:00:00 2001
From: Tobias Grosser <tobias@grosser.es>
Date: Sat, 12 Oct 2013 18:29:15 +0000
Subject: LoopVectorize: Add missing INITIALIZE_PASS_DEPENDENCY macros

Contributed-by:  Peter Zotov  <whitequark@whitequark.org>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192536 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index a34d5b3..e85d4fc 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4863,7 +4863,10 @@ char LoopVectorize::ID = 0;
 static const char lv_name[] = "Loop Vectorization";
 INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
 INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
 
-- 
cgit v1.1


From 24732c3363a9a442c14cf236c3de1086cdee6000 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Sat, 12 Oct 2013 18:56:27 +0000
Subject: SLPVectorizer: Sort PHINodes based on their opcode

Before this patch we relied on the order of phi nodes when we looked for phi
nodes of the same type. This could prevent vectorization of cases where there
was a phi node of a second type in between phi nodes of some type.

This is important for vectorization of an internal graphics kernel. On the test
suite + external on x86_64 (and on a run on armv7s) it showed no impact on
either performance or compile time.

radar://15024459

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192537 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 67 ++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 23 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b5a303e..af1c0e7 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2366,42 +2366,63 @@ static bool findBuildVector(InsertElementInst *IE,
   return false;
 }
 
+static bool PhiTypeSorterFunc(Value *V, Value *V2) {
+  return V->getType() < V2->getType();
+}
+
 bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
   bool Changed = false;
   SmallVector<Value *, 4> Incoming;
-  SmallSet<Instruction *, 16> VisitedInstrs;
+  SmallSet<Value *, 16> VisitedInstrs;
+
+  bool HaveVectorizedPhiNodes = true;
+  while (HaveVectorizedPhiNodes) {
+    HaveVectorizedPhiNodes = false;
+
+    // Collect the incoming values from the PHIs.
+    Incoming.clear();
+    for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie;
+         ++instr) {
+      PHINode *P = dyn_cast<PHINode>(instr);
+      if (!P)
+        break;
 
-  // Collect the incoming values from the PHIs.
-  for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie;
-       ++instr) {
-    PHINode *P = dyn_cast<PHINode>(instr);
+      if (!VisitedInstrs.count(P))
+        Incoming.push_back(P);
+    }
 
-    if (!P)
-      break;
+    // Sort by type.
+    std::stable_sort(Incoming.begin(), Incoming.end(), PhiTypeSorterFunc);
 
-    // We may go through BB multiple times so skip the one we have checked.
-    if (!VisitedInstrs.insert(instr))
-      continue;
+    // Try to vectorize elements base on their type.
+    for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
+                                           E = Incoming.end();
+         IncIt != E;) {
 
-    // Stop constructing the list when you reach a different type.
-    if (Incoming.size() && P->getType() != Incoming[0]->getType()) {
-      if (tryToVectorizeList(Incoming, R)) {
-        // We would like to start over since some instructions are deleted
-        // and the iterator may become invalid value.
+      // Look for the next elements with the same type.
+      SmallVector<Value *, 4>::iterator SameTypeIt = IncIt;
+      while (SameTypeIt != E &&
+             (*SameTypeIt)->getType() == (*IncIt)->getType()) {
+        VisitedInstrs.insert(*SameTypeIt);
+        ++SameTypeIt;
+      }
+
+      // Try to vectorize them.
+      unsigned NumElts = (SameTypeIt - IncIt);
+      DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n");
+      if (NumElts > 1 &&
+          tryToVectorizeList(ArrayRef<Value *>(IncIt, NumElts), R)) {
+        // Success start over because instructions might have been changed.
+        HaveVectorizedPhiNodes = true;
         Changed = true;
-        instr = BB->begin();
-        ie = BB->end();
+        break;
       }
 
-      Incoming.clear();
+      // Start over at the next instruction of a differnt type (or the end).
+      IncIt = SameTypeIt;
     }
-
-    Incoming.push_back(P);
   }
 
-  if (Incoming.size() > 1)
-    Changed |= tryToVectorizeList(Incoming, R);
-
   VisitedInstrs.clear();
 
   for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
-- 
cgit v1.1


From a0f6d1651b08254982244e18afcdd513ca34a3ca Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Mon, 14 Oct 2013 09:52:09 +0000
Subject: [msan] Fix handling of scalar select of vectors.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192575 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 65db206..f158cee 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1233,15 +1233,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy) {
     Type *srcTy = V->getType();
     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
-      return IRB.CreateIntCast(V, dstTy, false);
+      return IRB.CreateIntCast(V, dstTy, true);
     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
         dstTy->getVectorNumElements() == srcTy->getVectorNumElements())
-      return IRB.CreateIntCast(V, dstTy, false);
+      return IRB.CreateIntCast(V, dstTy, true);
     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
     Value *V2 =
-      IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), false);
+      IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), true);
     return IRB.CreateBitCast(V2, dstTy);
     // TODO: handle struct types.
   }
@@ -1899,7 +1899,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     } else {
       // Sa = (sext Sb) | (select b, Sc, Sd)
       S = IRB.CreateOr(
-          S, IRB.CreateSExt(getShadow(I.getCondition()), S->getType()),
+          S, CreateShadowCast(IRB, getShadow(I.getCondition()), S->getType()),
           "_msprop_select");
     }
     setShadow(&I, S);
-- 
cgit v1.1


From 95864303f5054c68043febc861764070e8f13913 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Mon, 14 Oct 2013 15:16:25 +0000
Subject: [msan] Instrument x86.*_cvt* intrinsics.

Currently MSan checks that arguments of *cvt* intrinsics are fully initialized.
That's too much to ask: some of them only operate on lower half, or even
quarter, of the input register.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192599 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp | 177 +++++++++++++++++----
 1 file changed, 149 insertions(+), 28 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index f158cee..88a8d41 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -469,18 +469,21 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   MemorySanitizer &MS;
   SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
   ValueMap<Value*, Value*> ShadowMap, OriginMap;
+  OwningPtr<VarArgHelper> VAHelper;
+
+  // The following flags disable parts of MSan instrumentation based on
+  // blacklist contents and command-line options.
   bool InsertChecks;
   bool LoadShadow;
   bool PoisonStack;
   bool PoisonUndef;
   bool CheckReturnValue;
-  OwningPtr<VarArgHelper> VAHelper;
 
   struct ShadowOriginAndInsertPoint {
-    Instruction *Shadow;
-    Instruction *Origin;
+    Value *Shadow;
+    Value *Origin;
     Instruction *OrigIns;
-    ShadowOriginAndInsertPoint(Instruction *S, Instruction *O, Instruction *I)
+    ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
       : Shadow(S), Origin(O), OrigIns(I) { }
     ShadowOriginAndInsertPoint() : Shadow(0), Origin(0), OrigIns(0) { }
   };
@@ -521,7 +524,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       (void)NewSI;
 
       if (ClCheckAccessAddress)
-        insertCheck(Addr, &I);
+        insertShadowCheck(Addr, &I);
 
       if (I.isAtomic())
         I.setOrdering(addReleaseOrdering(I.getOrdering()));
@@ -534,11 +537,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
         } else {
           Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
 
-          Constant *Cst = dyn_cast_or_null<Constant>(ConvertedShadow);
           // TODO(eugenis): handle non-zero constant shadow by inserting an
           // unconditional check (can not simply fail compilation as this could
           // be in the dead code).
-          if (Cst)
+          if (isa<Constant>(ConvertedShadow))
             continue;
 
           Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
@@ -556,12 +558,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
   void materializeChecks() {
     for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) {
-      Instruction *Shadow = InstrumentationList[i].Shadow;
+      Value *Shadow = InstrumentationList[i].Shadow;
       Instruction *OrigIns = InstrumentationList[i].OrigIns;
       IRBuilder<> IRB(OrigIns);
       DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
       Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
       DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
+      // See the comment in materializeStores().
+      if (isa<Constant>(ConvertedShadow))
+        continue;
       Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
                                     getCleanShadow(ConvertedShadow), "_mscmp");
       Instruction *CheckTerm =
@@ -571,7 +576,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
       IRB.SetInsertPoint(CheckTerm);
       if (MS.TrackOrigins) {
-        Instruction *Origin = InstrumentationList[i].Origin;
+        Value *Origin = InstrumentationList[i].Origin;
         IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0),
                         MS.OriginTLS);
       }
@@ -888,20 +893,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   /// \brief Remember the place where a shadow check should be inserted.
   ///
   /// This location will be later instrumented with a check that will print a
-  /// UMR warning in runtime if the value is not fully defined.
-  void insertCheck(Value *Val, Instruction *OrigIns) {
-    assert(Val);
+  /// UMR warning in runtime if the shadow value is not 0.
+  void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
+    assert(Shadow);
     if (!InsertChecks) return;
-    Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
-    if (!Shadow) return;
 #ifndef NDEBUG
     Type *ShadowTy = Shadow->getType();
     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
            "Can only insert checks for integer and vector shadow types");
 #endif
-    Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
     InstrumentationList.push_back(
-      ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
+        ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
+  }
+
+  /// \brief Remember the place where a shadow check should be inserted.
+  ///
+  /// This location will be later instrumented with a check that will print a
+  /// UMR warning in runtime if the value is not fully defined.
+  void insertShadowCheck(Value *Val, Instruction *OrigIns) {
+    assert(Val);
+    Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
+    if (!Shadow) return;
+    Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
+    insertShadowCheck(Shadow, Origin, OrigIns);
   }
 
   AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
@@ -958,7 +972,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
 
     if (ClCheckAccessAddress)
-      insertCheck(I.getPointerOperand(), &I);
+      insertShadowCheck(I.getPointerOperand(), &I);
 
     if (I.isAtomic())
       I.setOrdering(addAcquireOrdering(I.getOrdering()));
@@ -990,13 +1004,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     Value *ShadowPtr = getShadowPtr(Addr, I.getType(), IRB);
 
     if (ClCheckAccessAddress)
-      insertCheck(Addr, &I);
+      insertShadowCheck(Addr, &I);
 
     // Only test the conditional argument of cmpxchg instruction.
     // The other argument can potentially be uninitialized, but we can not
     // detect this situation reliably without possible false positives.
     if (isa<AtomicCmpXchgInst>(I))
-      insertCheck(I.getOperand(1), &I);
+      insertShadowCheck(I.getOperand(1), &I);
 
     IRB.CreateStore(getCleanShadow(&I), ShadowPtr);
 
@@ -1015,7 +1029,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
   // Vector manipulation.
   void visitExtractElementInst(ExtractElementInst &I) {
-    insertCheck(I.getOperand(1), &I);
+    insertShadowCheck(I.getOperand(1), &I);
     IRBuilder<> IRB(&I);
     setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
               "_msprop"));
@@ -1023,7 +1037,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   }
 
   void visitInsertElementInst(InsertElementInst &I) {
-    insertCheck(I.getOperand(2), &I);
+    insertShadowCheck(I.getOperand(2), &I);
     IRBuilder<> IRB(&I);
     setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
               I.getOperand(2), "_msprop"));
@@ -1031,7 +1045,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   }
 
   void visitShuffleVectorInst(ShuffleVectorInst &I) {
-    insertCheck(I.getOperand(2), &I);
+    insertShadowCheck(I.getOperand(2), &I);
     IRBuilder<> IRB(&I);
     setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
               I.getOperand(2), "_msprop"));
@@ -1266,7 +1280,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   void handleDiv(Instruction &I) {
     IRBuilder<> IRB(&I);
     // Strict on the second argument.
-    insertCheck(I.getOperand(1), &I);
+    insertShadowCheck(I.getOperand(1), &I);
     setShadow(&I, getShadow(&I, 0));
     setOrigin(&I, getOrigin(&I, 0));
   }
@@ -1549,7 +1563,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     IRB.CreateAlignedStore(Shadow, ShadowPtr, 1);
 
     if (ClCheckAccessAddress)
-      insertCheck(Addr, &I);
+      insertShadowCheck(Addr, &I);
 
     // FIXME: use ClStoreCleanOrigin
     // FIXME: factor out common code from materializeStores
@@ -1576,9 +1590,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       setShadow(&I, getCleanShadow(&I));
     }
 
-
     if (ClCheckAccessAddress)
-      insertCheck(Addr, &I);
+      insertShadowCheck(Addr, &I);
 
     if (MS.TrackOrigins) {
       if (LoadShadow)
@@ -1675,11 +1688,119 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOrigin(&I, getOrigin(Op));
   }
 
+  // \brief Instrument vector convert instrinsic.
+  //
+  // This function instruments intrinsics like cvtsi2ss:
+  // %Out = int_xxx_cvtyyy(%ConvertOp)
+  // or
+  // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
+  // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
+  // number \p Out elements, and (if has 2 arguments) copies the rest of the
+  // elements from \p CopyOp.
+  // In most cases conversion involves floating-point value which may trigger a
+  // hardware exception when not fully initialized. For this reason we require
+  // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
+  // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
+  // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
+  // return a fully initialized value.
+  void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements) {
+    IRBuilder<> IRB(&I);
+    Value *CopyOp, *ConvertOp;
+
+    switch (I.getNumArgOperands()) {
+    case 2:
+      CopyOp = I.getArgOperand(0);
+      ConvertOp = I.getArgOperand(1);
+      break;
+    case 1:
+      ConvertOp = I.getArgOperand(0);
+      CopyOp = NULL;
+      break;
+    default:
+      llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
+    }
+
+    // The first *NumUsedElements* elements of ConvertOp are converted to the
+    // same number of output elements. The rest of the output is copied from
+    // CopyOp, or (if not available) filled with zeroes.
+    // Combine shadow for elements of ConvertOp that are used in this operation,
+    // and insert a check.
+    // FIXME: consider propagating shadow of ConvertOp, at least in the case of
+    // int->any conversion.
+    Value *ConvertShadow = getShadow(ConvertOp);
+    Value *AggShadow = 0;
+    if (ConvertOp->getType()->isVectorTy()) {
+      AggShadow = IRB.CreateExtractElement(
+          ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
+      for (int i = 1; i < NumUsedElements; ++i) {
+        Value *MoreShadow = IRB.CreateExtractElement(
+            ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
+        AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
+      }
+    } else {
+      AggShadow = ConvertShadow;
+    }
+    assert(AggShadow->getType()->isIntegerTy());
+    insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
+
+    // Build result shadow by zero-filling parts of CopyOp shadow that come from
+    // ConvertOp.
+    if (CopyOp) {
+      assert(CopyOp->getType() == I.getType());
+      assert(CopyOp->getType()->isVectorTy());
+      Value *ResultShadow = getShadow(CopyOp);
+      Type *EltTy = ResultShadow->getType()->getVectorElementType();
+      for (int i = 0; i < NumUsedElements; ++i) {
+        ResultShadow = IRB.CreateInsertElement(
+            ResultShadow, ConstantInt::getNullValue(EltTy),
+            ConstantInt::get(IRB.getInt32Ty(), i));
+      }
+      setShadow(&I, ResultShadow);
+      setOrigin(&I, getOrigin(CopyOp));
+    } else {
+      setShadow(&I, getCleanShadow(&I));
+    }
+  }
+
   void visitIntrinsicInst(IntrinsicInst &I) {
     switch (I.getIntrinsicID()) {
     case llvm::Intrinsic::bswap:
       handleBswap(I);
       break;
+    case llvm::Intrinsic::x86_avx512_cvtsd2usi64:
+    case llvm::Intrinsic::x86_avx512_cvtsd2usi:
+    case llvm::Intrinsic::x86_avx512_cvtss2usi64:
+    case llvm::Intrinsic::x86_avx512_cvtss2usi:
+    case llvm::Intrinsic::x86_avx512_cvttss2usi64:
+    case llvm::Intrinsic::x86_avx512_cvttss2usi:
+    case llvm::Intrinsic::x86_avx512_cvttsd2usi64:
+    case llvm::Intrinsic::x86_avx512_cvttsd2usi:
+    case llvm::Intrinsic::x86_avx512_cvtusi2sd:
+    case llvm::Intrinsic::x86_avx512_cvtusi2ss:
+    case llvm::Intrinsic::x86_avx512_cvtusi642sd:
+    case llvm::Intrinsic::x86_avx512_cvtusi642ss:
+    case llvm::Intrinsic::x86_sse2_cvtsd2si64:
+    case llvm::Intrinsic::x86_sse2_cvtsd2si:
+    case llvm::Intrinsic::x86_sse2_cvtsd2ss:
+    case llvm::Intrinsic::x86_sse2_cvtsi2sd:
+    case llvm::Intrinsic::x86_sse2_cvtsi642sd:
+    case llvm::Intrinsic::x86_sse2_cvtss2sd:
+    case llvm::Intrinsic::x86_sse2_cvttsd2si64:
+    case llvm::Intrinsic::x86_sse2_cvttsd2si:
+    case llvm::Intrinsic::x86_sse_cvtsi2ss:
+    case llvm::Intrinsic::x86_sse_cvtsi642ss:
+    case llvm::Intrinsic::x86_sse_cvtss2si64:
+    case llvm::Intrinsic::x86_sse_cvtss2si:
+    case llvm::Intrinsic::x86_sse_cvttss2si64:
+    case llvm::Intrinsic::x86_sse_cvttss2si:
+      handleVectorConvertIntrinsic(I, 1);
+      break;
+    case llvm::Intrinsic::x86_sse2_cvtdq2pd:
+    case llvm::Intrinsic::x86_sse2_cvtps2pd:
+    case llvm::Intrinsic::x86_sse_cvtps2pi:
+    case llvm::Intrinsic::x86_sse_cvttps2pi:
+      handleVectorConvertIntrinsic(I, 2);
+      break;
     default:
       if (!handleUnknownIntrinsic(I))
         visitInstruction(I);
@@ -1826,7 +1947,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     if (!RetVal) return;
     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
     if (CheckReturnValue) {
-      insertCheck(RetVal, &I);
+      insertShadowCheck(RetVal, &I);
       Value *Shadow = getCleanShadow(RetVal);
       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
     } else {
@@ -1973,7 +2094,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       dumpInst(I);
     DEBUG(dbgs() << "DEFAULT: " << I << "\n");
     for (size_t i = 0, n = I.getNumOperands(); i < n; i++)
-      insertCheck(I.getOperand(i), &I);
+      insertShadowCheck(I.getOperand(i), &I);
     setShadow(&I, getCleanShadow(&I));
     setOrigin(&I, getCleanOrigin());
   }
-- 
cgit v1.1


From 2a6cbba2db261d2ee29a1373e195f95fd232e61b Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Mon, 14 Oct 2013 16:05:55 +0000
Subject: Basic blocks typically have few predecessors.  Use a SmallDenseMap to
 avoid a heap allocation when this is the case.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192602 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SSAUpdater.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index fc85ef3..30adbfa 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -63,7 +63,7 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
 }
 
 static bool IsEquivalentPHI(PHINode *PHI,
-                            DenseMap<BasicBlock*, Value*> &ValueMapping) {
+                          SmallDenseMap<BasicBlock*, Value*, 8> &ValueMapping) {
   unsigned PHINumValues = PHI->getNumIncomingValues();
   if (PHINumValues != ValueMapping.size())
     return false;
@@ -136,8 +136,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   // Otherwise, we do need a PHI: check to see if we already have one available
   // in this block that produces the right value.
   if (isa<PHINode>(BB->begin())) {
-    DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
-                                               PredValues.end());
+    SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(),
+                                                       PredValues.end());
     PHINode *SomePHI;
     for (BasicBlock::iterator It = BB->begin();
          (SomePHI = dyn_cast<PHINode>(It)); ++It) {
-- 
cgit v1.1


From 61bc4be26e777760f3cf2f903d3e6c2a8e2dfcb0 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Mon, 14 Oct 2013 16:46:46 +0000
Subject: Remove lib/Transforms/Instrumentation/ProfilingUtils.*

They were leftover from the old profiling support.

Patch by Alastair Murray.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192605 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/CMakeLists.txt     |   1 -
 lib/Transforms/Instrumentation/GCOVProfiling.cpp  |   1 -
 lib/Transforms/Instrumentation/ProfilingUtils.cpp | 169 ----------------------
 lib/Transforms/Instrumentation/ProfilingUtils.h   |  36 -----
 4 files changed, 207 deletions(-)
 delete mode 100644 lib/Transforms/Instrumentation/ProfilingUtils.cpp
 delete mode 100644 lib/Transforms/Instrumentation/ProfilingUtils.h

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 71a0ecd..3563593 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -6,7 +6,6 @@ add_llvm_library(LLVMInstrumentation
   GCOVProfiling.cpp
   MemorySanitizer.cpp
   Instrumentation.cpp
-  ProfilingUtils.cpp
   ThreadSanitizer.cpp
   )
 
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 61c53b4..f03b503 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -17,7 +17,6 @@
 #define DEBUG_TYPE "insert-gcov-profiling"
 
 #include "llvm/Transforms/Instrumentation.h"
-#include "ProfilingUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
deleted file mode 100644
index 4b3de6d..0000000
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-//===- ProfilingUtils.cpp - Helper functions shared by profilers ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a few helper functions which are used by profile
-// instrumentation code to instrument the code.  This allows the profiler pass
-// to worry about *what* to insert, and these functions take care of *how* to do
-// it.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ProfilingUtils.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-
-void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
-                                   GlobalValue *Array,
-                                   PointerType *arrayType) {
-  LLVMContext &Context = MainFn->getContext();
-  Type *ArgVTy =
-    PointerType::getUnqual(Type::getInt8PtrTy(Context));
-  PointerType *UIntPtr = arrayType ? arrayType :
-    Type::getInt32PtrTy(Context);
-  Module &M = *MainFn->getParent();
-  Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
-                                           Type::getInt32Ty(Context),
-                                           ArgVTy, UIntPtr,
-                                           Type::getInt32Ty(Context),
-                                           (Type *)0);
-
-  // This could force argc and argv into programs that wouldn't otherwise have
-  // them, but instead we just pass null values in.
-  std::vector<Value*> Args(4);
-  Args[0] = Constant::getNullValue(Type::getInt32Ty(Context));
-  Args[1] = Constant::getNullValue(ArgVTy);
-
-  // Skip over any allocas in the entry block.
-  BasicBlock *Entry = MainFn->begin();
-  BasicBlock::iterator InsertPos = Entry->begin();
-  while (isa<AllocaInst>(InsertPos)) ++InsertPos;
-
-  std::vector<Constant*> GEPIndices(2,
-                             Constant::getNullValue(Type::getInt32Ty(Context)));
-  unsigned NumElements = 0;
-  if (Array) {
-    Args[2] = ConstantExpr::getGetElementPtr(Array, GEPIndices);
-    NumElements =
-      cast<ArrayType>(Array->getType()->getElementType())->getNumElements();
-  } else {
-    // If this profiling instrumentation doesn't have a constant array, just
-    // pass null.
-    Args[2] = ConstantPointerNull::get(UIntPtr);
-  }
-  Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
-
-  CallInst *InitCall = CallInst::Create(InitFn, Args, "newargc", InsertPos);
-
-  // If argc or argv are not available in main, just pass null values in.
-  Function::arg_iterator AI;
-  switch (MainFn->arg_size()) {
-  default:
-  case 2:
-    AI = MainFn->arg_begin(); ++AI;
-    if (AI->getType() != ArgVTy) {
-      Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
-                                                            false);
-      InitCall->setArgOperand(1,
-          CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
-    } else {
-      InitCall->setArgOperand(1, AI);
-    }
-    /* FALL THROUGH */
-
-  case 1:
-    AI = MainFn->arg_begin();
-    // If the program looked at argc, have it look at the return value of the
-    // init call instead.
-    if (!AI->getType()->isIntegerTy(32)) {
-      Instruction::CastOps opcode;
-      if (!AI->use_empty()) {
-        opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
-        AI->replaceAllUsesWith(
-          CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos));
-      }
-      opcode = CastInst::getCastOpcode(AI, true,
-                                       Type::getInt32Ty(Context), true);
-      InitCall->setArgOperand(0,
-          CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
-                           "argc.cast", InitCall));
-    } else {
-      AI->replaceAllUsesWith(InitCall);
-      InitCall->setArgOperand(0, AI);
-    }
-
-  case 0: break;
-  }
-}
-
-void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
-                                   GlobalValue *CounterArray, bool beginning) {
-  // Insert the increment after any alloca or PHI instructions...
-  BasicBlock::iterator InsertPos = beginning ? BB->getFirstInsertionPt() :
-                                   BB->getTerminator();
-  while (isa<AllocaInst>(InsertPos))
-    ++InsertPos;
-
-  LLVMContext &Context = BB->getContext();
-
-  // Create the getelementptr constant expression
-  std::vector<Constant*> Indices(2);
-  Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
-  Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
-  Constant *ElementPtr =
-    ConstantExpr::getGetElementPtr(CounterArray, Indices);
-
-  // Load, increment and store the value back.
-  Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
-  Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal,
-                                 ConstantInt::get(Type::getInt32Ty(Context), 1),
-                                         "NewFuncCounter", InsertPos);
-  new StoreInst(NewVal, ElementPtr, InsertPos);
-}
-
-void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
-  // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those
-  // types.
-  Type *GlobalDtorElems[2] = {
-    Type::getInt32Ty(Mod->getContext()),
-    FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo()
-  };
-  StructType *GlobalDtorElemTy =
-      StructType::get(Mod->getContext(), GlobalDtorElems, false);
-
-  // Construct the new element we'll be adding.
-  Constant *Elem[2] = {
-    ConstantInt::get(Type::getInt32Ty(Mod->getContext()), 65535),
-    ConstantExpr::getBitCast(Callee, GlobalDtorElems[1])
-  };
-
-  // If llvm.global_dtors exists, make a copy of the things in its list and
-  // delete it, to replace it with one that has a larger array type.
-  std::vector<Constant *> dtors;
-  if (GlobalVariable *GlobalDtors = Mod->getNamedGlobal("llvm.global_dtors")) {
-    if (ConstantArray *InitList =
-        dyn_cast<ConstantArray>(GlobalDtors->getInitializer())) {
-      for (unsigned i = 0, e = InitList->getType()->getNumElements();
-           i != e; ++i)
-        dtors.push_back(cast<Constant>(InitList->getOperand(i)));
-    }
-    GlobalDtors->eraseFromParent();
-  }
-
-  // Build up llvm.global_dtors with our new item in it.
-  GlobalVariable *GlobalDtors = new GlobalVariable(
-      *Mod, ArrayType::get(GlobalDtorElemTy, 1), false,
-      GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors");
-                                    
-  dtors.push_back(ConstantStruct::get(GlobalDtorElemTy, Elem));
-  GlobalDtors->setInitializer(ConstantArray::get(
-      cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors));
-}
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.h b/lib/Transforms/Instrumentation/ProfilingUtils.h
deleted file mode 100644
index 09b2217..0000000
--- a/lib/Transforms/Instrumentation/ProfilingUtils.h
+++ /dev/null
@@ -1,36 +0,0 @@
-//===- ProfilingUtils.h - Helper functions shared by profilers --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a few helper functions which are used by profile
-// instrumentation code to instrument the code.  This allows the profiler pass
-// to worry about *what* to insert, and these functions take care of *how* to do
-// it.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef PROFILINGUTILS_H
-#define PROFILINGUTILS_H
-
-namespace llvm {
-  class BasicBlock;
-  class Function;
-  class GlobalValue;
-  class Module;
-  class PointerType;
-
-  void InsertProfilingInitCall(Function *MainFn, const char *FnName,
-                               GlobalValue *Arr = 0,
-                               PointerType *arrayType = 0);
-  void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
-                               GlobalValue *CounterArray,
-                               bool beginning = true);
-  void InsertProfilingShutdownCall(Function *Callee, Module *Mod);
-}
-
-#endif
-- 
cgit v1.1


From 390ff499f053771cba51a2f42651f126a7e096f7 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@gmail.com>
Date: Tue, 15 Oct 2013 05:20:47 +0000
Subject: Remove x86_sse42_crc32_64_8 intrinsic. It has no functional
 difference from x86_sse42_crc32_32_8 and was not mapped to a clang builtin.
 I'm not even sure why this form of the instruction is even called out
 explicitly in the docs. Also add AutoUpgrade support to convert it into the
 other intrinsic with appropriate trunc and zext.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192672 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 1 -
 1 file changed, 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index a2492d8..c831ddd 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -808,7 +808,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         // TODO: Could compute known zero/one bits based on the input.
         break;
       }
-      case Intrinsic::x86_sse42_crc32_64_8:
       case Intrinsic::x86_sse42_crc32_64_64:
         KnownZero = APInt::getHighBitsSet(64, 32);
         return 0;
-- 
cgit v1.1


From d45b3c4653dc4b18074b04662b6d0009880214e3 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Tue, 15 Oct 2013 16:19:54 +0000
Subject: LoopVectorize: Properly reflect PODness in comments.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192717 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index e85d4fc..11fd45e 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -445,7 +445,7 @@ public:
     MRK_FloatMax
   };
 
-  /// This POD struct holds information about reduction variables.
+  /// This struct holds information about reduction variables.
   struct ReductionDescriptor {
     ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
       Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
@@ -482,8 +482,8 @@ public:
     MinMaxReductionKind MinMaxKind;
   };
 
-  // This POD struct holds information about the memory runtime legality
-  // check that a group of pointers do not overlap.
+  /// This struct holds information about the memory runtime legality
+  /// check that a group of pointers do not overlap.
   struct RuntimePointerCheck {
     RuntimePointerCheck() : Need(false) {}
 
@@ -514,7 +514,7 @@ public:
     SmallVector<unsigned, 2> DependencySetId;
   };
 
-  /// A POD for saving information about induction variables.
+  /// A struct for saving information about induction variables.
   struct InductionInfo {
     InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
     InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
-- 
cgit v1.1


From 3386d252579ea00d0fc26a3ba7874bec25ce4516 Mon Sep 17 00:00:00 2001
From: Kostya Serebryany <kcc@google.com>
Date: Wed, 16 Oct 2013 14:06:14 +0000
Subject: [asan] Optimize accesses to global arrays with constant index

Summary:
Given a global array G[N], which is declared in this CU and has static initializer
avoid instrumenting accesses like G[i], where 'i' is a constant and 0<=i<N.
Also add a bit of stats.

This eliminates ~1% of instrumentations on SPEC2006
and also partially helps when asan is being run together with coverage.

Reviewers: samsonov

Reviewed By: samsonov

CC: llvm-commits

Differential Revision: http://llvm-reviews.chandlerc.com/D1947

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192794 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/AddressSanitizer.cpp           | 39 ++++++++++++++++++----
 1 file changed, 33 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 7ced56b..de0a43b 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/DIBuilder.h"
@@ -193,6 +194,13 @@ static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),
 static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
                                cl::Hidden, cl::init(-1));
 
+STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
+STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
+STATISTIC(NumOptimizedAccessesToGlobalArray,
+          "Number of optimized accesses to global arrays");
+STATISTIC(NumOptimizedAccessesToGlobalVar,
+          "Number of optimized accesses to global vars");
+
 namespace {
 /// A set of dynamically initialized globals extracted from metadata.
 class SetOfDynamicallyInitializedGlobals {
@@ -315,6 +323,7 @@ struct AddressSanitizer : public FunctionPass {
   bool ShouldInstrumentGlobal(GlobalVariable *G);
   bool LooksLikeCodeInBug11395(Instruction *I);
   void FindDynamicInitializers(Module &M);
+  bool GlobalIsLinkerInitialized(GlobalVariable *G);
 
   bool CheckInitOrder;
   bool CheckUseAfterReturn;
@@ -655,6 +664,13 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
   return NULL;
 }
 
+bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
+  // If a global variable does not have dynamic initialization we don't
+  // have to instrument it.  However, if a global does not have initializer
+  // at all, we assume it has dynamic initializer (in other TU).
+  return G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G);
+}
+
 void AddressSanitizer::instrumentMop(Instruction *I) {
   bool IsWrite = false;
   Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
@@ -663,13 +679,19 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
     if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
       // If initialization order checking is disabled, a simple access to a
       // dynamically initialized global is always valid.
-      if (!CheckInitOrder)
-        return;
-      // If a global variable does not have dynamic initialization we don't
-      // have to instrument it.  However, if a global does not have initailizer
-      // at all, we assume it has dynamic initializer (in other TU).
-      if (G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G))
+      if (!CheckInitOrder || GlobalIsLinkerInitialized(G)) {
+        NumOptimizedAccessesToGlobalVar++;
         return;
+      }
+    }
+    ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr);
+    if (CE && CE->isGEPWithNoNotionalOverIndexing()) {
+      if (GlobalVariable *G = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
+        if (CE->getOperand(1)->isNullValue() && GlobalIsLinkerInitialized(G)) {
+          NumOptimizedAccessesToGlobalArray++;
+          return;
+        }
+      }
     }
   }
 
@@ -681,6 +703,11 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
 
   assert((TypeSize % 8) == 0);
 
+  if (IsWrite)
+    NumInstrumentedWrites++;
+  else
+    NumInstrumentedReads++;
+
   // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check.
   if (TypeSize == 8  || TypeSize == 16 ||
       TypeSize == 32 || TypeSize == 64 || TypeSize == 128)
-- 
cgit v1.1


From c4e2060ecc5b74021c5639f7e8b1a063b598feac Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Wed, 16 Oct 2013 16:09:00 +0000
Subject: SLPVectorizer: Don't vectorize volatile memory operations

radar://15231682

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192799 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index af1c0e7..4d82bc4 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -786,13 +786,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
     }
     case Instruction::Load: {
       // Check if the loads are consecutive or of we need to swizzle them.
-      for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
-        if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
+      for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
+        LoadInst *L = cast<LoadInst>(VL[i]);
+        if (!L->isSimple() || !isConsecutiveAccess(VL[i], VL[i + 1])) {
           newTreeEntry(VL, false);
           DEBUG(dbgs() << "SLP: Need to swizzle loads.\n");
           return;
         }
-
+      }
       newTreeEntry(VL, true);
       DEBUG(dbgs() << "SLP: added a vector of loads.\n");
       return;
@@ -1911,6 +1912,10 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
     if (!SI)
       continue;
 
+    // Don't touch volatile stores.
+    if (!SI->isSimple())
+      continue;
+
     // Check that the pointer points to scalars.
     Type *Ty = SI->getValueOperand()->getType();
     if (Ty->isAggregateType() || Ty->isVectorTy())
-- 
cgit v1.1


From 48320e0de717f2887912e70677c55f5a9618d872 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Wed, 16 Oct 2013 17:19:40 +0000
Subject: Revert "SLPVectorizer: Don't vectorize volatile memory operations"

This speculatively reverts commit 192799. It might have broken a linux buildbot.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192816 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4d82bc4..af1c0e7 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -786,14 +786,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
     }
     case Instruction::Load: {
       // Check if the loads are consecutive or of we need to swizzle them.
-      for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
-        LoadInst *L = cast<LoadInst>(VL[i]);
-        if (!L->isSimple() || !isConsecutiveAccess(VL[i], VL[i + 1])) {
+      for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
+        if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
           newTreeEntry(VL, false);
           DEBUG(dbgs() << "SLP: Need to swizzle loads.\n");
           return;
         }
-      }
+
       newTreeEntry(VL, true);
       DEBUG(dbgs() << "SLP: added a vector of loads.\n");
       return;
@@ -1912,10 +1911,6 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
     if (!SI)
       continue;
 
-    // Don't touch volatile stores.
-    if (!SI->isSimple())
-      continue;
-
     // Check that the pointer points to scalars.
     Type *Ty = SI->getValueOperand()->getType();
     if (Ty->isAggregateType() || Ty->isVectorTy())
-- 
cgit v1.1


From fc1604ec7274e9b724ff9b2512c288a691167427 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Wed, 16 Oct 2013 17:52:40 +0000
Subject: SLPVectorizer: Don't vectorize volatile memory operations

radar://15231682

Reapply r192799,
  http://lab.llvm.org:8011/builders/lldb-x86_64-debian-clang/builds/8226
showed that the bot is still broken even with this out.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192820 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index af1c0e7..4d82bc4 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -786,13 +786,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
     }
     case Instruction::Load: {
       // Check if the loads are consecutive or of we need to swizzle them.
-      for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
-        if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
+      for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
+        LoadInst *L = cast<LoadInst>(VL[i]);
+        if (!L->isSimple() || !isConsecutiveAccess(VL[i], VL[i + 1])) {
           newTreeEntry(VL, false);
           DEBUG(dbgs() << "SLP: Need to swizzle loads.\n");
           return;
         }
-
+      }
       newTreeEntry(VL, true);
       DEBUG(dbgs() << "SLP: added a vector of loads.\n");
       return;
@@ -1911,6 +1912,10 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
     if (!SI)
       continue;
 
+    // Don't touch volatile stores.
+    if (!SI->isSimple())
+      continue;
+
     // Check that the pointer points to scalars.
     Type *Ty = SI->getValueOperand()->getType();
     if (Ty->isAggregateType() || Ty->isVectorTy())
-- 
cgit v1.1


From 4ef1999d61f955917f86320f3b1c6e3352fd0b49 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Thu, 17 Oct 2013 07:20:06 +0000
Subject: tsan: implement no_sanitize_thread attribute If a function has
 no_sanitize_thread attribute, do not instrument memory accesses in it.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192871 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 2c8a7c4..89fb746 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -358,7 +358,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
   // (e.g. variables that do not escape, etc).
 
   // Instrument memory accesses.
-  if (ClInstrumentMemoryAccesses)
+  if (ClInstrumentMemoryAccesses && F.hasFnAttribute(Attribute::SanitizeThread))
     for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) {
       Res |= instrumentLoadOrStore(AllLoadsAndStores[i]);
     }
-- 
cgit v1.1


From f5e3811607dd54fded0bb6b6ab97345446e086b9 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Thu, 17 Oct 2013 10:53:50 +0000
Subject: [msan] Use zero-extension in shadow cast by default.

Switch to sign-extension in r192575 caused 7% perf loss on 482.sphinx3.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192882 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 88a8d41..5ce88a9 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1244,18 +1244,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
   /// \brief Cast between two shadow types, extending or truncating as
   /// necessary.
-  Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy) {
+  Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
+                          bool Signed = false) {
     Type *srcTy = V->getType();
     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
-      return IRB.CreateIntCast(V, dstTy, true);
+      return IRB.CreateIntCast(V, dstTy, Signed);
     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
         dstTy->getVectorNumElements() == srcTy->getVectorNumElements())
-      return IRB.CreateIntCast(V, dstTy, true);
+      return IRB.CreateIntCast(V, dstTy, Signed);
     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
     Value *V2 =
-      IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), true);
+      IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
     return IRB.CreateBitCast(V2, dstTy);
     // TODO: handle struct types.
   }
@@ -2019,9 +2020,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                            "_msprop_select_agg");
     } else {
       // Sa = (sext Sb) | (select b, Sc, Sd)
-      S = IRB.CreateOr(
-          S, CreateShadowCast(IRB, getShadow(I.getCondition()), S->getType()),
-          "_msprop_select");
+      S = IRB.CreateOr(S, CreateShadowCast(IRB, getShadow(I.getCondition()),
+                                           S->getType(), true),
+                       "_msprop_select");
     }
     setShadow(&I, S);
     if (MS.TrackOrigins) {
-- 
cgit v1.1


From 4a7cef2202893d74caf5aa817aa40d1a67c8de46 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 17 Oct 2013 18:00:25 +0000
Subject: Simplify the interface of AnalyzeGlobal a bit and rename to
 analyzeGlobal.

No functionality change.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192906 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index cf15580..642eb2f 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -184,13 +184,8 @@ static bool SafeToDestroyConstant(const Constant *C) {
   return true;
 }
 
-
-/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
-/// structure.  If the global has its address taken, return true to indicate we
-/// can't do anything with it.
-///
-static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
-                          SmallPtrSet<const PHINode*, 16> &PHIUsers) {
+static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
+                             SmallPtrSet<const PHINode *, 16> &PHIUsers) {
   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
        ++UI) {
     const User *U = *UI;
@@ -201,7 +196,8 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
       // know to expect it in various places.  Just reject early.
       if (!isa<PointerType>(CE->getType())) return true;
 
-      if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
+      if (analyzeGlobalAux(CE, GS, PHIUsers))
+        return true;
     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
       if (!GS.HasMultipleAccessingFunctions) {
         const Function *F = I->getParent()->getParent();
@@ -260,16 +256,20 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
           }
         }
       } else if (isa<BitCastInst>(I)) {
-        if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+        if (analyzeGlobalAux(I, GS, PHIUsers))
+          return true;
       } else if (isa<GetElementPtrInst>(I)) {
-        if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+        if (analyzeGlobalAux(I, GS, PHIUsers))
+          return true;
       } else if (isa<SelectInst>(I)) {
-        if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+        if (analyzeGlobalAux(I, GS, PHIUsers))
+          return true;
       } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
         // PHI nodes we can check just like select or GEP instructions, but we
         // have to be careful about infinite recursion.
         if (PHIUsers.insert(PN))  // Not already visited.
-          if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+          if (analyzeGlobalAux(I, GS, PHIUsers))
+            return true;
       } else if (isa<CmpInst>(I)) {
         GS.isCompared = true;
       } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
@@ -300,6 +300,15 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
   return false;
 }
 
+/// Look at all uses of the global and fill in the GlobalStatus
+/// structure.  If the global has its address taken, return true to indicate we
+/// can't do anything with it.
+///
+static bool analyzeGlobal(const Value *V, GlobalStatus &GS) {
+  SmallPtrSet<const PHINode *, 16> PHIUsers;
+  return analyzeGlobalAux(V, GS, PHIUsers);
+}
+
 /// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
 /// as a root?  If so, we might not really want to eliminate the stores to it.
 static bool isLeakCheckerRoot(GlobalVariable *GV) {
@@ -1916,10 +1925,9 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
   if (!GV->hasLocalLinkage())
     return false;
 
-  SmallPtrSet<const PHINode*, 16> PHIUsers;
   GlobalStatus GS;
 
-  if (AnalyzeGlobal(GV, GS, PHIUsers))
+  if (analyzeGlobal(GV, GS))
     return false;
 
   if (!GS.isCompared && !GV->hasUnnamedAddr()) {
-- 
cgit v1.1


From 9bb874cea257753349854106a994999981290259 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 17 Oct 2013 18:06:32 +0000
Subject: rename SafeToDestroyConstant to isSafeToDestroyConstant and
 clang-format.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192907 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 642eb2f..7471a6f 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -168,17 +168,19 @@ static AtomicOrdering StrongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
   return (AtomicOrdering)std::max(X, Y);
 }
 
-/// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
-/// by constants itself.  Note that constants cannot be cyclic, so this test is
-/// pretty easy to implement recursively.
+/// It is safe to destroy a constant iff it is only used by constants itself.
+/// Note that constants cannot be cyclic, so this test is pretty easy to
+/// implement recursively.
 ///
-static bool SafeToDestroyConstant(const Constant *C) {
-  if (isa<GlobalValue>(C)) return false;
+static bool isSafeToDestroyConstant(const Constant *C) {
+  if (isa<GlobalValue>(C))
+    return false;
 
   for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
        ++UI)
     if (const Constant *CU = dyn_cast<Constant>(*UI)) {
-      if (!SafeToDestroyConstant(CU)) return false;
+      if (!isSafeToDestroyConstant(CU))
+        return false;
     } else
       return false;
   return true;
@@ -288,7 +290,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
     } else if (const Constant *C = dyn_cast<Constant>(U)) {
       GS.HasNonInstructionUser = true;
       // We might have a dead and dangling constant hanging off of here.
-      if (!SafeToDestroyConstant(C))
+      if (!isSafeToDestroyConstant(C))
         return true;
     } else {
       GS.HasNonInstructionUser = true;
@@ -442,7 +444,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
         Changed = true;
       }
     } else if (Constant *C = dyn_cast<Constant>(U)) {
-      if (SafeToDestroyConstant(C)) {
+      if (isSafeToDestroyConstant(C)) {
         C->destroyConstant();
         // This could have invalidated UI, start over from scratch.
         Dead.clear();
@@ -542,7 +544,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
     } else if (Constant *C = dyn_cast<Constant>(U)) {
       // If we have a chain of dead constantexprs or other things dangling from
       // us, and if they are all dead, nuke them without remorse.
-      if (SafeToDestroyConstant(C)) {
+      if (isSafeToDestroyConstant(C)) {
         C->destroyConstant();
         CleanupConstantGlobalUsers(V, Init, TD, TLI);
         return true;
@@ -557,7 +559,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
 static bool isSafeSROAElementUse(Value *V) {
   // We might have a dead and dangling constant hanging off of here.
   if (Constant *C = dyn_cast<Constant>(V))
-    return SafeToDestroyConstant(C);
+    return isSafeToDestroyConstant(C);
 
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
-- 
cgit v1.1


From b75fcecb0ff2f22e79454ce9ed7c246792bdbf0d Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 17 Oct 2013 18:18:52 +0000
Subject: Rename fields of GlobalStatus to match the coding style.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192910 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp | 84 ++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 43 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 7471a6f..7b2110f 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -99,38 +99,36 @@ ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
 
 namespace {
 
-/// GlobalStatus - As we analyze each global, keep track of some information
-/// about it.  If we find out that the address of the global is taken, none of
-/// this info will be accurate.
+/// As we analyze each global, keep track of some information about it.  If we
+/// find out that the address of the global is taken, none of this info will be
+/// accurate.
 struct GlobalStatus {
-  /// isCompared - True if the global's address is used in a comparison.
-  bool isCompared;
+  /// True if the global's address is used in a comparison.
+  bool IsCompared;
 
-  /// isLoaded - True if the global is ever loaded.  If the global isn't ever
-  /// loaded it can be deleted.
-  bool isLoaded;
+  /// True if the global is ever loaded.  If the global isn't ever loaded it can
+  /// be deleted.
+  bool IsLoaded;
 
-  /// StoredType - Keep track of what stores to the global look like.
+  /// Keep track of what stores to the global look like.
   ///
   enum StoredType {
-    /// NotStored - There is no store to this global.  It can thus be marked
-    /// constant.
+    /// There is no store to this global.  It can thus be marked constant.
     NotStored,
 
-    /// isInitializerStored - This global is stored to, but the only thing
-    /// stored is the constant it was initialized with.  This is only tracked
-    /// for scalar globals.
-    isInitializerStored,
+    /// This global is stored to, but the only thing stored is the constant it
+    /// was initialized with.  This is only tracked for scalar globals.
+    InitializerStored,
 
-    /// isStoredOnce - This global is stored to, but only its initializer and
-    /// one other value is ever stored to it.  If this global isStoredOnce, we
-    /// track the value stored to it in StoredOnceValue below.  This is only
-    /// tracked for scalar globals.
-    isStoredOnce,
+    /// This global is stored to, but only its initializer and one other value
+    /// is ever stored to it.  If this global StoredOnce, we track the value
+    /// stored to it in StoredOnceValue below.  This is only tracked for scalar
+    /// globals.
+    StoredOnce,
 
-    /// isStored - This global is stored to by multiple values or something else
-    /// that we cannot track.
-    isStored
+    /// This global is stored to by multiple values or something else that we
+    /// cannot track.
+    Stored
   } StoredType;
 
   /// StoredOnceValue - If only one value (besides the initializer constant) is
@@ -151,7 +149,7 @@ struct GlobalStatus {
   /// AtomicOrdering - Set to the strongest atomic ordering requirement.
   AtomicOrdering Ordering;
 
-  GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
+  GlobalStatus() : IsCompared(false), IsLoaded(false), StoredType(NotStored),
                    StoredOnceValue(0), AccessingFunction(0),
                    HasMultipleAccessingFunctions(false),
                    HasNonInstructionUser(false), Ordering(NotAtomic) {}
@@ -209,7 +207,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
           GS.HasMultipleAccessingFunctions = true;
       }
       if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
-        GS.isLoaded = true;
+        GS.IsLoaded = true;
         // Don't hack on volatile loads.
         if (LI->isVolatile()) return true;
         GS.Ordering = StrongerOrdering(GS.Ordering, LI->getOrdering());
@@ -225,7 +223,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
         // If this is a direct store to the global (i.e., the global is a scalar
         // value, not an aggregate), keep more specific information about
         // stores.
-        if (GS.StoredType != GlobalStatus::isStored) {
+        if (GS.StoredType != GlobalStatus::Stored) {
           if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
                                                            SI->getOperand(1))) {
             Value *StoredVal = SI->getOperand(0);
@@ -238,23 +236,23 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
             }
 
             if (StoredVal == GV->getInitializer()) {
-              if (GS.StoredType < GlobalStatus::isInitializerStored)
-                GS.StoredType = GlobalStatus::isInitializerStored;
+              if (GS.StoredType < GlobalStatus::InitializerStored)
+                GS.StoredType = GlobalStatus::InitializerStored;
             } else if (isa<LoadInst>(StoredVal) &&
                        cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
-              if (GS.StoredType < GlobalStatus::isInitializerStored)
-                GS.StoredType = GlobalStatus::isInitializerStored;
-            } else if (GS.StoredType < GlobalStatus::isStoredOnce) {
-              GS.StoredType = GlobalStatus::isStoredOnce;
+              if (GS.StoredType < GlobalStatus::InitializerStored)
+                GS.StoredType = GlobalStatus::InitializerStored;
+            } else if (GS.StoredType < GlobalStatus::StoredOnce) {
+              GS.StoredType = GlobalStatus::StoredOnce;
               GS.StoredOnceValue = StoredVal;
-            } else if (GS.StoredType == GlobalStatus::isStoredOnce &&
+            } else if (GS.StoredType == GlobalStatus::StoredOnce &&
                        GS.StoredOnceValue == StoredVal) {
               // noop.
             } else {
-              GS.StoredType = GlobalStatus::isStored;
+              GS.StoredType = GlobalStatus::Stored;
             }
           } else {
-            GS.StoredType = GlobalStatus::isStored;
+            GS.StoredType = GlobalStatus::Stored;
           }
         }
       } else if (isa<BitCastInst>(I)) {
@@ -273,17 +271,17 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
           if (analyzeGlobalAux(I, GS, PHIUsers))
             return true;
       } else if (isa<CmpInst>(I)) {
-        GS.isCompared = true;
+        GS.IsCompared = true;
       } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
         if (MTI->isVolatile()) return true;
         if (MTI->getArgOperand(0) == V)
-          GS.StoredType = GlobalStatus::isStored;
+          GS.StoredType = GlobalStatus::Stored;
         if (MTI->getArgOperand(1) == V)
-          GS.isLoaded = true;
+          GS.IsLoaded = true;
       } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
         assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
         if (MSI->isVolatile()) return true;
-        GS.StoredType = GlobalStatus::isStored;
+        GS.StoredType = GlobalStatus::Stored;
       } else {
         return true;  // Any other non-load instruction might take address!
       }
@@ -1932,7 +1930,7 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
   if (analyzeGlobal(GV, GS))
     return false;
 
-  if (!GS.isCompared && !GV->hasUnnamedAddr()) {
+  if (!GS.IsCompared && !GV->hasUnnamedAddr()) {
     GV->setUnnamedAddr(true);
     NumUnnamed++;
   }
@@ -1979,7 +1977,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
 
   // If the global is never loaded (but may be stored to), it is dead.
   // Delete it now.
-  if (!GS.isLoaded) {
+  if (!GS.IsLoaded) {
     DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
 
     bool Changed;
@@ -2000,7 +1998,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     }
     return Changed;
 
-  } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
+  } else if (GS.StoredType <= GlobalStatus::InitializerStored) {
     DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
     GV->setConstant(true);
 
@@ -2023,7 +2021,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
         GVI = FirstNewGV;  // Don't skip the newly produced globals!
         return true;
       }
-  } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
+  } else if (GS.StoredType == GlobalStatus::StoredOnce) {
     // If the initial value for the global was an undef value, and if only
     // one other value was stored into it, we can just change the
     // initializer to be the stored value, then delete all stores to the
-- 
cgit v1.1


From fe16848601bdde6e3a5e0860199169dd171222a4 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Fri, 18 Oct 2013 23:38:13 +0000
Subject: Mark some command line flags as hidden

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193013 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/PassManagerBuilder.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index b9660fa..0017c1b 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -29,7 +29,7 @@
 using namespace llvm;
 
 static cl::opt<bool>
-RunLoopVectorization("vectorize-loops",
+RunLoopVectorization("vectorize-loops", cl::Hidden,
                      cl::desc("Run the Loop vectorization passes"));
 
 static cl::opt<bool>
@@ -38,11 +38,11 @@ LateVectorization("late-vectorize", cl::init(true), cl::Hidden,
                            "pipeline (after the inliner)"));
 
 static cl::opt<bool>
-RunSLPVectorization("vectorize-slp",
+RunSLPVectorization("vectorize-slp", cl::Hidden,
                     cl::desc("Run the SLP vectorization passes"));
 
 static cl::opt<bool>
-RunBBVectorization("vectorize-slp-aggressive",
+RunBBVectorization("vectorize-slp-aggressive", cl::Hidden,
                     cl::desc("Run the BB vectorization passes"));
 
 static cl::opt<bool>
-- 
cgit v1.1


From d5b7f2b62cdb3a14162c57e27c06a27dda9a78c4 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Sat, 19 Oct 2013 11:27:12 +0000
Subject: Perform an intelligent splice of the predecessor with the single
 successor.

If the predecessor's being spliced into a landing pad, then we need the PHIs to
come first and the rest of the predecessor's code to come *after* the landing
pad instruction.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193035 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/Local.cpp | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 82b8da3..78217c8 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -503,7 +503,19 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
 
   // Splice all the instructions from PredBB to DestBB.
   PredBB->getTerminator()->eraseFromParent();
-  DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
+  // First splice over the PHI nodes.
+  BasicBlock::iterator PI = PredBB->begin();
+  while (isa<PHINode>(PI))
+    ++PI;
+
+  if (PI != PredBB->begin())
+    DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList(),
+                                 PredBB->begin(), PI);
+
+  // Now splice over the rest of the instructions.
+  DestBB->getInstList().splice(DestBB->getFirstInsertionPt(),
+                               PredBB->getInstList(), PI, PredBB->end());
 
   if (P) {
     DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
@@ -513,6 +525,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
       DT->eraseNode(PredBB);
     }
   }
+
   // Nuke BB.
   PredBB->eraseFromParent();
 }
-- 
cgit v1.1


From 0b5fad68b296087b4d0132dcd19690d2cdce2c77 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Sun, 20 Oct 2013 07:04:37 +0000
Subject: Teach simplify-cfg how to correctly create covered lookup tables for
 switches on iN with N >= 3.

One optimization simplify-cfg performs is the converting of switches to
lookup tables if the switch has > 4 cases. This is done by:

1. Finding the max/min case value and calculating the switch case range.
2. Create a lookup table basic block.
3. Perform a check in the switch's BB to see if the input value is in
the switch's case range. If the input value satisfies said predicate
branch to the lookup table BB, otherwise branch to the switch's default
destination BB using the default value as the result.

The conditional check consists of subtracting the min case value of the
table from any input iN value and then ensuring that said value is
unsigned less than the size of the lookup table represented as an iN
value.

If the lookup table is a covered lookup table, the size of the table will be N
which is 0 as an iN value. Thus the comparison will be an `icmp ult` of an iN
value against 0 which is always false yielding the incorrect result.

This patch fixes this problem by recognizing if we have a covered lookup table
and if we do, unconditionally jumps to the lookup table BB since the covering
property of the lookup table implies no input values could not be handled by
said BB.

rdar://15268442

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193045 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyCFG.cpp | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 0e56904..ad3b92a 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3734,14 +3734,30 @@ static bool SwitchToLookupTable(SwitchInst *SI,
                                             CommonDest->getParent(),
                                             CommonDest);
 
-  // Check whether the condition value is within the case range, and branch to
-  // the new BB.
+  // Compute the table index value.
   Builder.SetInsertPoint(SI);
   Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
                                         "switch.tableidx");
-  Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
-      MinCaseVal->getType(), TableSize));
-  Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+
+  // Compute the maximum table size representable by the integer type we are
+  // switching upon.
+  const unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
+  const uint64_t MaxTableSize = CaseSize > 63? UINT64_MAX : 1ULL << CaseSize;
+  assert(MaxTableSize >= TableSize &&
+         "It is impossible for a switch to have more entries than the max "
+         "representable value of its input integer type's size.");
+
+  // If we have a covered lookup table, unconditionally branch to the lookup table
+  // BB. Otherwise, check if the condition value is within the case range. If it
+  // is so, branch to the new BB. Otherwise branch to SI's default destination.
+  const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize;
+  if (GeneratingCoveredLookupTable) {
+    Builder.CreateBr(LookupBB);
+  } else {
+    Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
+                                         MinCaseVal->getType(), TableSize));
+    Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+  }
 
   // Populate the BB that does the lookups.
   Builder.SetInsertPoint(LookupBB);
@@ -3772,7 +3788,13 @@ static bool SwitchToLookupTable(SwitchInst *SI,
   // Remove the switch.
   for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) {
     BasicBlock *Succ = SI->getSuccessor(i);
-    if (Succ == SI->getDefaultDest()) continue;
+
+    // If we are not generating a covered lookup table, we will have a
+    // conditional branch from SI's parent BB to SI's default destination if our
+    // input value lies outside of our case range. Thus in that case leave the
+    // default destination BB as a predecessor of SI's parent BB.
+    if (Succ == SI->getDefaultDest() && !GeneratingCoveredLookupTable)
+      continue;
     Succ->removePredecessor(SI->getParent());
   }
   SI->eraseFromParent();
-- 
cgit v1.1


From 3e033f29239e48c190f29cdf3a02cdfbaf2fe72b Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Mon, 21 Oct 2013 04:09:17 +0000
Subject: Don't eliminate a partially redundant load if it's in a landing pad.

A landing pad can be jumped to only by the unwind edge of an invoke
instruction. If we eliminate a partially redundant load in a landing pad, it
will create a basic block that violates this constraint. It then leads to other
problems down the line if it tries to merge that basic block with the landing
pad. Avoid this by not eliminating the load in a landing pad.

PR17621


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193064 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/JumpThreading.cpp |  7 ++++++-
 lib/Transforms/Utils/Local.cpp          | 15 +--------------
 2 files changed, 7 insertions(+), 15 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 0b8906d..b3ec2fc 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -827,7 +827,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   return false;
 }
 
-
 /// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
 /// load instruction, eliminate it by replacing it with a PHI node.  This is an
 /// important optimization that encourages jump threading, and needs to be run
@@ -842,6 +841,12 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   if (LoadBB->getSinglePredecessor())
     return false;
 
+  // If the load is defined in a landing pad, it can't be partially redundant,
+  // because the edges between the invoke and the landing pad cannot have other
+  // instructions between them.
+  if (LoadBB->isLandingPad())
+    return false;
+
   Value *LoadedPtr = LI->getOperand(0);
 
   // If the loaded operand is defined in the LoadBB, it can't be available.
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 78217c8..82b8da3 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -503,19 +503,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
 
   // Splice all the instructions from PredBB to DestBB.
   PredBB->getTerminator()->eraseFromParent();
-
-  // First splice over the PHI nodes.
-  BasicBlock::iterator PI = PredBB->begin();
-  while (isa<PHINode>(PI))
-    ++PI;
-
-  if (PI != PredBB->begin())
-    DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList(),
-                                 PredBB->begin(), PI);
-
-  // Now splice over the rest of the instructions.
-  DestBB->getInstList().splice(DestBB->getFirstInsertionPt(),
-                               PredBB->getInstList(), PI, PredBB->end());
+  DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
 
   if (P) {
     DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
@@ -525,7 +513,6 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
       DT->eraseNode(PredBB);
     }
   }
-
   // Nuke BB.
   PredBB->eraseFromParent();
 }
-- 
cgit v1.1


From 6701bb7283747b00bc814b1db784fdbbe03644a1 Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Mon, 21 Oct 2013 05:20:11 +0000
Subject: Fix the predecessor removal logic in r193045.

Additionally some small comment/stylistic fixes are included as well.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193068 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyCFG.cpp | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index ad3b92a..61c44fc 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3741,18 +3741,20 @@ static bool SwitchToLookupTable(SwitchInst *SI,
 
   // Compute the maximum table size representable by the integer type we are
   // switching upon.
-  const unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
-  const uint64_t MaxTableSize = CaseSize > 63? UINT64_MAX : 1ULL << CaseSize;
+  unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
+  uint64_t MaxTableSize = CaseSize > 63? UINT64_MAX : 1ULL << CaseSize;
   assert(MaxTableSize >= TableSize &&
          "It is impossible for a switch to have more entries than the max "
          "representable value of its input integer type's size.");
 
-  // If we have a covered lookup table, unconditionally branch to the lookup table
-  // BB. Otherwise, check if the condition value is within the case range. If it
-  // is so, branch to the new BB. Otherwise branch to SI's default destination.
+  // If we have a fully covered lookup table, unconditionally branch to the
+  // lookup table BB. Otherwise, check if the condition value is within the case
+  // range. If it is so, branch to the new BB. Otherwise branch to SI's default
+  // destination.
   const bool GeneratingCoveredLookupTable = MaxTableSize == TableSize;
   if (GeneratingCoveredLookupTable) {
     Builder.CreateBr(LookupBB);
+    SI->getDefaultDest()->removePredecessor(SI->getParent());
   } else {
     Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
                                          MinCaseVal->getType(), TableSize));
@@ -3786,14 +3788,10 @@ static bool SwitchToLookupTable(SwitchInst *SI,
     Builder.CreateBr(CommonDest);
 
   // Remove the switch.
-  for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) {
+  for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
     BasicBlock *Succ = SI->getSuccessor(i);
 
-    // If we are not generating a covered lookup table, we will have a
-    // conditional branch from SI's parent BB to SI's default destination if our
-    // input value lies outside of our case range. Thus in that case leave the
-    // default destination BB as a predecessor of SI's parent BB.
-    if (Succ == SI->getDefaultDest() && !GeneratingCoveredLookupTable)
+    if (Succ == SI->getDefaultDest())
       continue;
     Succ->removePredecessor(SI->getParent());
   }
-- 
cgit v1.1


From 713cab059ebb67c2f51d8da9d8e57be2b1dcd9c2 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Mon, 21 Oct 2013 17:14:55 +0000
Subject: Optimize more linkonce_odr values during LTO.

When a linkonce_odr value that is on the dso list is not unnamed_addr
we can still look to see if anything is actually using its address. If
not, it is safe to hide it.

This patch implements that by moving GlobalStatus to Transforms/Utils
and using it in Internalize.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193090 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp      | 210 +---------------------------------
 lib/Transforms/IPO/Internalize.cpp    |  21 +++-
 lib/Transforms/Utils/CMakeLists.txt   |   1 +
 lib/Transforms/Utils/GlobalStatus.cpp | 178 ++++++++++++++++++++++++++++
 4 files changed, 200 insertions(+), 210 deletions(-)
 create mode 100644 lib/Transforms/Utils/GlobalStatus.cpp

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 7b2110f..74ed4e2 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include <algorithm>
 using namespace llvm;
@@ -60,7 +61,6 @@ STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
 STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
 
 namespace {
-  struct GlobalStatus;
   struct GlobalOpt : public ModulePass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<TargetLibraryInfo>();
@@ -99,214 +99,8 @@ ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
 
 namespace {
 
-/// As we analyze each global, keep track of some information about it.  If we
-/// find out that the address of the global is taken, none of this info will be
-/// accurate.
-struct GlobalStatus {
-  /// True if the global's address is used in a comparison.
-  bool IsCompared;
 
-  /// True if the global is ever loaded.  If the global isn't ever loaded it can
-  /// be deleted.
-  bool IsLoaded;
 
-  /// Keep track of what stores to the global look like.
-  ///
-  enum StoredType {
-    /// There is no store to this global.  It can thus be marked constant.
-    NotStored,
-
-    /// This global is stored to, but the only thing stored is the constant it
-    /// was initialized with.  This is only tracked for scalar globals.
-    InitializerStored,
-
-    /// This global is stored to, but only its initializer and one other value
-    /// is ever stored to it.  If this global StoredOnce, we track the value
-    /// stored to it in StoredOnceValue below.  This is only tracked for scalar
-    /// globals.
-    StoredOnce,
-
-    /// This global is stored to by multiple values or something else that we
-    /// cannot track.
-    Stored
-  } StoredType;
-
-  /// StoredOnceValue - If only one value (besides the initializer constant) is
-  /// ever stored to this global, keep track of what value it is.
-  Value *StoredOnceValue;
-
-  /// AccessingFunction/HasMultipleAccessingFunctions - These start out
-  /// null/false.  When the first accessing function is noticed, it is recorded.
-  /// When a second different accessing function is noticed,
-  /// HasMultipleAccessingFunctions is set to true.
-  const Function *AccessingFunction;
-  bool HasMultipleAccessingFunctions;
-
-  /// HasNonInstructionUser - Set to true if this global has a user that is not
-  /// an instruction (e.g. a constant expr or GV initializer).
-  bool HasNonInstructionUser;
-
-  /// AtomicOrdering - Set to the strongest atomic ordering requirement.
-  AtomicOrdering Ordering;
-
-  GlobalStatus() : IsCompared(false), IsLoaded(false), StoredType(NotStored),
-                   StoredOnceValue(0), AccessingFunction(0),
-                   HasMultipleAccessingFunctions(false),
-                   HasNonInstructionUser(false), Ordering(NotAtomic) {}
-};
-
-}
-
-/// StrongerOrdering - Return the stronger of the two ordering. If the two
-/// orderings are acquire and release, then return AcquireRelease.
-///
-static AtomicOrdering StrongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
-  if (X == Acquire && Y == Release) return AcquireRelease;
-  if (Y == Acquire && X == Release) return AcquireRelease;
-  return (AtomicOrdering)std::max(X, Y);
-}
-
-/// It is safe to destroy a constant iff it is only used by constants itself.
-/// Note that constants cannot be cyclic, so this test is pretty easy to
-/// implement recursively.
-///
-static bool isSafeToDestroyConstant(const Constant *C) {
-  if (isa<GlobalValue>(C))
-    return false;
-
-  for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
-       ++UI)
-    if (const Constant *CU = dyn_cast<Constant>(*UI)) {
-      if (!isSafeToDestroyConstant(CU))
-        return false;
-    } else
-      return false;
-  return true;
-}
-
-static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
-                             SmallPtrSet<const PHINode *, 16> &PHIUsers) {
-  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
-       ++UI) {
-    const User *U = *UI;
-    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
-      GS.HasNonInstructionUser = true;
-
-      // If the result of the constantexpr isn't pointer type, then we won't
-      // know to expect it in various places.  Just reject early.
-      if (!isa<PointerType>(CE->getType())) return true;
-
-      if (analyzeGlobalAux(CE, GS, PHIUsers))
-        return true;
-    } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
-      if (!GS.HasMultipleAccessingFunctions) {
-        const Function *F = I->getParent()->getParent();
-        if (GS.AccessingFunction == 0)
-          GS.AccessingFunction = F;
-        else if (GS.AccessingFunction != F)
-          GS.HasMultipleAccessingFunctions = true;
-      }
-      if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
-        GS.IsLoaded = true;
-        // Don't hack on volatile loads.
-        if (LI->isVolatile()) return true;
-        GS.Ordering = StrongerOrdering(GS.Ordering, LI->getOrdering());
-      } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
-        // Don't allow a store OF the address, only stores TO the address.
-        if (SI->getOperand(0) == V) return true;
-
-        // Don't hack on volatile stores.
-        if (SI->isVolatile()) return true;
-
-        GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering());
-
-        // If this is a direct store to the global (i.e., the global is a scalar
-        // value, not an aggregate), keep more specific information about
-        // stores.
-        if (GS.StoredType != GlobalStatus::Stored) {
-          if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
-                                                           SI->getOperand(1))) {
-            Value *StoredVal = SI->getOperand(0);
-
-            if (Constant *C = dyn_cast<Constant>(StoredVal)) {
-              if (C->isThreadDependent()) {
-                // The stored value changes between threads; don't track it.
-                return true;
-              }
-            }
-
-            if (StoredVal == GV->getInitializer()) {
-              if (GS.StoredType < GlobalStatus::InitializerStored)
-                GS.StoredType = GlobalStatus::InitializerStored;
-            } else if (isa<LoadInst>(StoredVal) &&
-                       cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
-              if (GS.StoredType < GlobalStatus::InitializerStored)
-                GS.StoredType = GlobalStatus::InitializerStored;
-            } else if (GS.StoredType < GlobalStatus::StoredOnce) {
-              GS.StoredType = GlobalStatus::StoredOnce;
-              GS.StoredOnceValue = StoredVal;
-            } else if (GS.StoredType == GlobalStatus::StoredOnce &&
-                       GS.StoredOnceValue == StoredVal) {
-              // noop.
-            } else {
-              GS.StoredType = GlobalStatus::Stored;
-            }
-          } else {
-            GS.StoredType = GlobalStatus::Stored;
-          }
-        }
-      } else if (isa<BitCastInst>(I)) {
-        if (analyzeGlobalAux(I, GS, PHIUsers))
-          return true;
-      } else if (isa<GetElementPtrInst>(I)) {
-        if (analyzeGlobalAux(I, GS, PHIUsers))
-          return true;
-      } else if (isa<SelectInst>(I)) {
-        if (analyzeGlobalAux(I, GS, PHIUsers))
-          return true;
-      } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
-        // PHI nodes we can check just like select or GEP instructions, but we
-        // have to be careful about infinite recursion.
-        if (PHIUsers.insert(PN))  // Not already visited.
-          if (analyzeGlobalAux(I, GS, PHIUsers))
-            return true;
-      } else if (isa<CmpInst>(I)) {
-        GS.IsCompared = true;
-      } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
-        if (MTI->isVolatile()) return true;
-        if (MTI->getArgOperand(0) == V)
-          GS.StoredType = GlobalStatus::Stored;
-        if (MTI->getArgOperand(1) == V)
-          GS.IsLoaded = true;
-      } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
-        assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
-        if (MSI->isVolatile()) return true;
-        GS.StoredType = GlobalStatus::Stored;
-      } else {
-        return true;  // Any other non-load instruction might take address!
-      }
-    } else if (const Constant *C = dyn_cast<Constant>(U)) {
-      GS.HasNonInstructionUser = true;
-      // We might have a dead and dangling constant hanging off of here.
-      if (!isSafeToDestroyConstant(C))
-        return true;
-    } else {
-      GS.HasNonInstructionUser = true;
-      // Otherwise must be some other user.
-      return true;
-    }
-  }
-
-  return false;
-}
-
-/// Look at all uses of the global and fill in the GlobalStatus
-/// structure.  If the global has its address taken, return true to indicate we
-/// can't do anything with it.
-///
-static bool analyzeGlobal(const Value *V, GlobalStatus &GS) {
-  SmallPtrSet<const PHINode *, 16> PHIUsers;
-  return analyzeGlobalAux(V, GS, PHIUsers);
 }
 
 /// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
@@ -1927,7 +1721,7 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
 
   GlobalStatus GS;
 
-  if (analyzeGlobal(GV, GS))
+  if (GlobalStatus::analyzeGlobal(GV, GS))
     return false;
 
   if (!GS.IsCompared && !GV->hasUnnamedAddr()) {
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index f20a7bd..e615918 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -11,6 +11,19 @@
 // If the function or variable is not in the list of external names given to
 // the pass it is marked as internal.
 //
+// This transformation would not be legal or profitable in a regular
+// compilation, but it gets extra information from the linker about what is safe
+// or profitable.
+//
+// As an example of a normally illegal transformation: Internalizing a function
+// with external linkage. Only if we are told it is only used from within this
+// module, it is safe to do it.
+//
+// On the profitability side: It is always legal to internalize a linkonce_odr
+// whose address is not used. Doing so normally would introduce code bloat, but
+// if we are told by the linker that the only use of this would be for a
+// DSO symbol table, it is profitable to hide it.
+//
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "internalize"
@@ -23,6 +36,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include <fstream>
 #include <set>
@@ -142,8 +156,11 @@ static bool shouldInternalize(const GlobalValue &GV,
   if (GV.hasUnnamedAddr())
     return true;
 
-  // FIXME: Check if the address is used.
-  return false;
+  GlobalStatus GS;
+  if (GlobalStatus::analyzeGlobal(&GV, GS))
+    return false;
+
+  return !GS.IsCompared;
 }
 
 bool InternalizePass::runOnModule(Module &M) {
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 3648fd6..5afd6b8 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_library(LLVMTransformUtils
   CmpInstAnalysis.cpp
   CodeExtractor.cpp
   DemoteRegToStack.cpp
+  GlobalStatus.cpp
   InlineFunction.cpp
   InstructionNamer.cpp
   IntegerDivision.cpp
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
new file mode 100644
index 0000000..8fb79aa
--- /dev/null
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -0,0 +1,178 @@
+//===-- GlobalStatus.cpp - Compute status info for globals -----------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+
+using namespace llvm;
+
+/// Return the stronger of the two ordering. If the two orderings are acquire
+/// and release, then return AcquireRelease.
+///
+static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
+  if (X == Acquire && Y == Release)
+    return AcquireRelease;
+  if (Y == Acquire && X == Release)
+    return AcquireRelease;
+  return (AtomicOrdering)std::max(X, Y);
+}
+
+/// It is safe to destroy a constant iff it is only used by constants itself.
+/// Note that constants cannot be cyclic, so this test is pretty easy to
+/// implement recursively.
+///
+bool llvm::isSafeToDestroyConstant(const Constant *C) {
+  if (isa<GlobalValue>(C))
+    return false;
+
+  for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
+       ++UI)
+    if (const Constant *CU = dyn_cast<Constant>(*UI)) {
+      if (!isSafeToDestroyConstant(CU))
+        return false;
+    } else
+      return false;
+  return true;
+}
+
+static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
+                             SmallPtrSet<const PHINode *, 16> &PhiUsers) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI) {
+    const User *U = *UI;
+    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+      GS.HasNonInstructionUser = true;
+
+      // If the result of the constantexpr isn't pointer type, then we won't
+      // know to expect it in various places.  Just reject early.
+      if (!isa<PointerType>(CE->getType()))
+        return true;
+
+      if (analyzeGlobalAux(CE, GS, PhiUsers))
+        return true;
+    } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
+      if (!GS.HasMultipleAccessingFunctions) {
+        const Function *F = I->getParent()->getParent();
+        if (GS.AccessingFunction == 0)
+          GS.AccessingFunction = F;
+        else if (GS.AccessingFunction != F)
+          GS.HasMultipleAccessingFunctions = true;
+      }
+      if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+        GS.IsLoaded = true;
+        // Don't hack on volatile loads.
+        if (LI->isVolatile())
+          return true;
+        GS.Ordering = strongerOrdering(GS.Ordering, LI->getOrdering());
+      } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+        // Don't allow a store OF the address, only stores TO the address.
+        if (SI->getOperand(0) == V)
+          return true;
+
+        // Don't hack on volatile stores.
+        if (SI->isVolatile())
+          return true;
+
+        GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering());
+
+        // If this is a direct store to the global (i.e., the global is a scalar
+        // value, not an aggregate), keep more specific information about
+        // stores.
+        if (GS.StoredType != GlobalStatus::Stored) {
+          if (const GlobalVariable *GV =
+                  dyn_cast<GlobalVariable>(SI->getOperand(1))) {
+            Value *StoredVal = SI->getOperand(0);
+
+            if (Constant *C = dyn_cast<Constant>(StoredVal)) {
+              if (C->isThreadDependent()) {
+                // The stored value changes between threads; don't track it.
+                return true;
+              }
+            }
+
+            if (StoredVal == GV->getInitializer()) {
+              if (GS.StoredType < GlobalStatus::InitializerStored)
+                GS.StoredType = GlobalStatus::InitializerStored;
+            } else if (isa<LoadInst>(StoredVal) &&
+                       cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
+              if (GS.StoredType < GlobalStatus::InitializerStored)
+                GS.StoredType = GlobalStatus::InitializerStored;
+            } else if (GS.StoredType < GlobalStatus::StoredOnce) {
+              GS.StoredType = GlobalStatus::StoredOnce;
+              GS.StoredOnceValue = StoredVal;
+            } else if (GS.StoredType == GlobalStatus::StoredOnce &&
+                       GS.StoredOnceValue == StoredVal) {
+              // noop.
+            } else {
+              GS.StoredType = GlobalStatus::Stored;
+            }
+          } else {
+            GS.StoredType = GlobalStatus::Stored;
+          }
+        }
+      } else if (isa<BitCastInst>(I)) {
+        if (analyzeGlobalAux(I, GS, PhiUsers))
+          return true;
+      } else if (isa<GetElementPtrInst>(I)) {
+        if (analyzeGlobalAux(I, GS, PhiUsers))
+          return true;
+      } else if (isa<SelectInst>(I)) {
+        if (analyzeGlobalAux(I, GS, PhiUsers))
+          return true;
+      } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+        // PHI nodes we can check just like select or GEP instructions, but we
+        // have to be careful about infinite recursion.
+        if (PhiUsers.insert(PN)) // Not already visited.
+          if (analyzeGlobalAux(I, GS, PhiUsers))
+            return true;
+      } else if (isa<CmpInst>(I)) {
+        GS.IsCompared = true;
+      } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
+        if (MTI->isVolatile())
+          return true;
+        if (MTI->getArgOperand(0) == V)
+          GS.StoredType = GlobalStatus::Stored;
+        if (MTI->getArgOperand(1) == V)
+          GS.IsLoaded = true;
+      } else if (const MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
+        assert(MSI->getArgOperand(0) == V && "Memset only takes one pointer!");
+        if (MSI->isVolatile())
+          return true;
+        GS.StoredType = GlobalStatus::Stored;
+      } else {
+        return true; // Any other non-load instruction might take address!
+      }
+    } else if (const Constant *C = dyn_cast<Constant>(U)) {
+      GS.HasNonInstructionUser = true;
+      // We might have a dead and dangling constant hanging off of here.
+      if (!isSafeToDestroyConstant(C))
+        return true;
+    } else {
+      GS.HasNonInstructionUser = true;
+      // Otherwise must be some other user.
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
+  SmallPtrSet<const PHINode *, 16> PhiUsers;
+  return analyzeGlobalAux(V, GS, PhiUsers);
+}
+
+GlobalStatus::GlobalStatus()
+    : IsCompared(false), IsLoaded(false), StoredType(NotStored),
+      StoredOnceValue(0), AccessingFunction(0),
+      HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
+      Ordering(NotAtomic) {}
-- 
cgit v1.1


From 79de3d7b3aae9c7cc1038a3223dc96dbdafbeb3f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 21 Oct 2013 18:55:08 +0000
Subject: Teach SimplifyCFG about address spaces

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193104 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyCFG.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 61c44fc..0833286 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -475,9 +475,13 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
           CV = ICI->getOperand(0);
 
   // Unwrap any lossless ptrtoint cast.
-  if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext()))
-    if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV))
-      CV = PTII->getOperand(0);
+  if (TD && CV) {
+    if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
+      Value *Ptr = PTII->getPointerOperand();
+      if (PTII->getType() == TD->getIntPtrType(Ptr->getType()))
+        CV = Ptr;
+    }
+  }
   return CV;
 }
 
@@ -925,7 +929,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
       // Convert pointer to int before we switch.
       if (CV->getType()->isPointerTy()) {
         assert(TD && "Cannot switch on pointer without DataLayout");
-        CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()),
+        CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getType()),
                                     "magicptr");
       }
 
@@ -2788,7 +2792,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD,
   if (CompVal->getType()->isPointerTy()) {
     assert(TD && "Cannot switch on pointer without DataLayout");
     CompVal = Builder.CreatePtrToInt(CompVal,
-                                     TD->getIntPtrType(CompVal->getContext()),
+                                     TD->getIntPtrType(CompVal->getType()),
                                      "magicptr");
   }
 
-- 
cgit v1.1


From 244d24597497c09ab68969c8bbbdf2576130262c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 21 Oct 2013 19:43:56 +0000
Subject: Use more type helper functions

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193109 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp           |  6 ++---
 lib/Transforms/Scalar/GVN.cpp              |  2 +-
 lib/Transforms/Vectorize/BBVectorize.cpp   | 36 ++++++++++++++++--------------
 lib/Transforms/Vectorize/LoopVectorize.cpp |  2 +-
 4 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 74ed4e2..82a59ed 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1175,8 +1175,7 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
   } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
     // PN's type is pointer to struct.  Make a new PHI of pointer to struct
     // field.
-    StructType *ST =
-      cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
+    StructType *ST = cast<StructType>(PN->getType()->getPointerElementType());
 
     PHINode *NewPN =
      PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
@@ -2013,8 +2012,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
   CSVals[1] = 0;
 
   StructType *StructTy =
-    cast <StructType>(
-    cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
+    cast<StructType>(GCL->getType()->getElementType()->getArrayElementType());
 
   // Create the new init list.
   std::vector<Constant*> CAList;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index fefc8ad..aa4e185 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1172,7 +1172,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
     Type *DestPTy =
       IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
     DestPTy = PointerType::get(DestPTy,
-                       cast<PointerType>(PtrVal->getType())->getAddressSpace());
+                               PtrVal->getType()->getPointerAddressSpace());
     Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
     PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
     LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 9a6a0e6..c5e1dcb 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -625,10 +625,10 @@ namespace {
         ConstantInt *IntOff = ConstOffSCEV->getValue();
         int64_t Offset = IntOff->getSExtValue();
 
-        Type *VTy = cast<PointerType>(IPtr->getType())->getElementType();
+        Type *VTy = IPtr->getType()->getPointerElementType();
         int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy);
 
-        Type *VTy2 = cast<PointerType>(JPtr->getType())->getElementType();
+        Type *VTy2 = JPtr->getType()->getPointerElementType();
         if (VTy != VTy2 && Offset < 0) {
           int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2);
           OffsetInElmts = Offset/VTy2TSS;
@@ -2231,11 +2231,12 @@ namespace {
     // The pointer value is taken to be the one with the lowest offset.
     Value *VPtr = IPtr;
 
-    Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType();
-    Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType();
+    Type *ArgTypeI = IPtr->getType()->getPointerElementType();
+    Type *ArgTypeJ = JPtr->getType()->getPointerElementType();
     Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
-    Type *VArgPtrType = PointerType::get(VArgType,
-      cast<PointerType>(IPtr->getType())->getAddressSpace());
+    Type *VArgPtrType
+      = PointerType::get(VArgType,
+                         IPtr->getType()->getPointerAddressSpace());
     return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o),
                         /* insert before */ I);
   }
@@ -2244,7 +2245,7 @@ namespace {
                      unsigned MaskOffset, unsigned NumInElem,
                      unsigned NumInElem1, unsigned IdxOffset,
                      std::vector<Constant*> &Mask) {
-    unsigned NumElem1 = cast<VectorType>(J->getType())->getNumElements();
+    unsigned NumElem1 = J->getType()->getVectorNumElements();
     for (unsigned v = 0; v < NumElem1; ++v) {
       int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
       if (m < 0) {
@@ -2271,18 +2272,18 @@ namespace {
     Type *ArgTypeJ = J->getType();
     Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
 
-    unsigned NumElemI = cast<VectorType>(ArgTypeI)->getNumElements();
+    unsigned NumElemI = ArgTypeI->getVectorNumElements();
 
     // Get the total number of elements in the fused vector type.
     // By definition, this must equal the number of elements in
     // the final mask.
-    unsigned NumElem = cast<VectorType>(VArgType)->getNumElements();
+    unsigned NumElem = VArgType->getVectorNumElements();
     std::vector<Constant*> Mask(NumElem);
 
     Type *OpTypeI = I->getOperand(0)->getType();
-    unsigned NumInElemI = cast<VectorType>(OpTypeI)->getNumElements();
+    unsigned NumInElemI = OpTypeI->getVectorNumElements();
     Type *OpTypeJ = J->getOperand(0)->getType();
-    unsigned NumInElemJ = cast<VectorType>(OpTypeJ)->getNumElements();
+    unsigned NumInElemJ = OpTypeJ->getVectorNumElements();
 
     // The fused vector will be:
     // -----------------------------------------------------
@@ -2427,11 +2428,12 @@ namespace {
 
       if (CanUseInputs) {
         unsigned LOpElem =
-          cast<VectorType>(cast<Instruction>(LOp)->getOperand(0)->getType())
-            ->getNumElements();
+          cast<Instruction>(LOp)->getOperand(0)->getType()
+            ->getVectorNumElements();
+
         unsigned HOpElem =
-          cast<VectorType>(cast<Instruction>(HOp)->getOperand(0)->getType())
-            ->getNumElements();
+          cast<Instruction>(HOp)->getOperand(0)->getType()
+            ->getVectorNumElements();
 
         // We have one or two input vectors. We need to map each index of the
         // operands to the index of the original vector.
@@ -2647,14 +2649,14 @@ namespace {
                                            getReplacementName(IBeforeJ ? I : J,
                                                               true, o, 1));
         }
-  
+
         NHOp->insertBefore(IBeforeJ ? J : I);
         HOp = NHOp;
       }
     }
 
     if (ArgType->isVectorTy()) {
-      unsigned numElem = cast<VectorType>(VArgType)->getNumElements();
+      unsigned numElem = VArgType->getVectorNumElements();
       std::vector<Constant*> Mask(numElem);
       for (unsigned v = 0; v < numElem; ++v) {
         unsigned Idx = v;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 11fd45e..72ddb74 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1069,7 +1069,7 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
   assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
   // Make sure that the pointer does not point to structs.
-  if (cast<PointerType>(Ptr->getType())->getElementType()->isAggregateType())
+  if (Ptr->getType()->getPointerElementType()->isAggregateType())
     return 0;
 
   // If this value is a pointer induction variable we know it is consecutive.
-- 
cgit v1.1


From f2089e1dd810236f716c1a45fa1e561dce5074e8 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Mon, 21 Oct 2013 20:07:30 +0000
Subject: SimplifyCFG: Don't duplicate calls to functions marked noduplicate v2

v2:
  - Use CI->cannotDuplicate()

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193115 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyCFG.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 0833286..1398697 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1561,6 +1561,19 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
   return true;
 }
 
+/// \returns True if this block contains a CallInst with the NoDuplicate
+/// attribute.
+static bool HasNoDuplicateCall(const BasicBlock *BB) {
+  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    const CallInst *CI = dyn_cast<CallInst>(I);
+    if (!CI)
+      continue;
+    if (CI->cannotDuplicate())
+      return true;
+  }
+  return false;
+}
+
 /// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch
 /// across this block.
 static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
@@ -1608,6 +1621,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) {
   // Now we know that this block has multiple preds and two succs.
   if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
 
+  if (HasNoDuplicateCall(BB)) return false;
+
   // Okay, this is a simple enough basic block.  See if any phi values are
   // constants.
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-- 
cgit v1.1


From d114b41e62d8b784a9a48644d72489eba37c8a31 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Mon, 21 Oct 2013 23:14:06 +0000
Subject: Fix spelling, grammar, and match naming convention for test files.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193130 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/DeadArgumentElimination.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 8621f1a..911c14e 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -359,13 +359,13 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
 
   // If a function seen at compile time is not necessarily the one linked to
   // the binary being built, it is illegal to change the actual arguments
-  // passing to it. These functions can be captured by isWeakForLinker().
+  // passed to it. These functions can be captured by isWeakForLinker().
   // *NOTE* that mayBeOverridden() is insufficient for this purpose as it
-  // dosen't include linkage types like AvailableExternallyLinkage and
+  // doesn't include linkage types like AvailableExternallyLinkage and
   // LinkOnceODRLinkage. Take link_odr* as an example, it indicates a set of
   // *EQUIVALENT* globals that can be merged at link-time. However, the
   // semantic of *EQUIVALENT*-functions includes parameters. Changing
-  // parameters breaks the assumption.
+  // parameters breaks this assumption.
   //
   if (Fn.isWeakForLinker())
     return false;
-- 
cgit v1.1


From 8e3851a6eb9fe5fc30094c3a00d2b89c7cd68cbd Mon Sep 17 00:00:00 2001
From: Shuxin Yang <shuxin.llvm@gmail.com>
Date: Wed, 23 Oct 2013 17:28:19 +0000
Subject: Use address-taken to disambiguate global variable and indirect
 memops.

 Major steps include:
 1). introduces a not-addr-taken bit-field in GlobalVariable
 2). GlobalOpt pass sets "not-address-taken" if it proves a global varirable
    dosen't have its address taken.
 3). AA use this info for disambiguation.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193251 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 82a59ed..a259b4d 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1723,6 +1723,7 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
   if (GlobalStatus::analyzeGlobal(GV, GS))
     return false;
 
+  GV->setAddressMaybeTaken(false);
   if (!GS.IsCompared && !GV->hasUnnamedAddr()) {
     GV->setUnnamedAddr(true);
     NumUnnamed++;
-- 
cgit v1.1


From d7da59004fc9262f33c96ad2736b36ff1235ce7b Mon Sep 17 00:00:00 2001
From: Yuchen Wu <yuchenericwu@hotmail.com>
Date: Wed, 23 Oct 2013 20:35:00 +0000
Subject: Fixed comment typo in GCOVProfiling.cpp

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193268 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/GCOVProfiling.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index f03b503..4e9afee 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -598,7 +598,7 @@ bool GCOVProfiler::emitProfileArcs() {
     };
     FTy = FunctionType::get(Builder.getVoidTy(), Params, false);
 
-    // Inialize the environment and register the local writeout and flush
+    // Initialize the environment and register the local writeout and flush
     // functions.
     Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
     Builder.CreateCall2(GCOVInit, WriteoutF, FlushF);
-- 
cgit v1.1


From 577ac566c45670cd8ef03e202cb92258c643574d Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Thu, 24 Oct 2013 00:43:38 +0000
Subject: Clarify comments in genLoopLimit.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193292 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/IndVarSimplify.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 9c092e6..7785855 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1506,9 +1506,10 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
     //
     // Valid Cases: (1) both integers is most common; (2) both may be pointers
-    // for simple memset-style loops; (3) IVInit is an integer and IVCount is a
-    // pointer may occur when enable-iv-rewrite generates a canonical IV on top
-    // of case #2.
+    // for simple memset-style loops.
+    //
+    // IVInit integer and IVCount pointer would only occur if a canonical IV
+    // were generated on top of case #2, which is not expected.
 
     const SCEV *IVLimit = 0;
     // For unit stride, IVCount = Start + BECount with 2's complement overflow.
-- 
cgit v1.1


From d084153a8fd8ee058fd4c39d4f15d3f988566d95 Mon Sep 17 00:00:00 2001
From: Juergen Ributzka <juergen@apple.com>
Date: Thu, 24 Oct 2013 05:29:56 +0000
Subject: Fix a bug in LinearFunctionTestReplace that created invalid loop exit
 checks.

Reviewed by Andy

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193303 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/IndVarSimplify.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 7785855..cfd8db0 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1479,8 +1479,14 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
   if (IndVar->getType()->isPointerTy()
       && !IVCount->getType()->isPointerTy()) {
 
+    // IVOffset will be the new GEP offset that is interpreted by GEP as a
+    // signed value. IVCount on the other hand represents the loop trip count,
+    // which is an unsigned value. FindLoopCounter only allows induction
+    // variables that have a positive unit stride of one. This means we don't
+    // have to handle the case of negative offsets (yet) and just need to zero
+    // extend IVCount.
     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
-    const SCEV *IVOffset = SE->getTruncateOrSignExtend(IVCount, OfsTy);
+    const SCEV *IVOffset = SE->getTruncateOrZeroExtend(IVCount, OfsTy);
 
     // Expand the code for the iteration count.
     assert(SE->isLoopInvariant(IVOffset, L) &&
-- 
cgit v1.1


From 5e1d0d39db5fefe013f58c124a94694f96bce2f1 Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nunoplopes@sapo.pt>
Date: Thu, 24 Oct 2013 09:17:24 +0000
Subject: fix PR17635: false positive with packed structures LLVM optimizers
 may widen accesses to packed structures that overflow the structure itself,
 but should be in bounds up to the alignment of the object

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193317 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/BoundsChecking.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 4a9e950..7a9f0f6 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -172,7 +172,8 @@ bool BoundsChecking::runOnFunction(Function &F) {
   TrapBB = 0;
   BuilderTy TheBuilder(F.getContext(), TargetFolder(TD));
   Builder = &TheBuilder;
-  ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext());
+  ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext(),
+                                           /*RoundToAlign=*/true);
   ObjSizeEval = &TheObjSizeEval;
 
   // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
-- 
cgit v1.1


From d6aa89eca5be76dece4b4cd44359e8577e587dff Mon Sep 17 00:00:00 2001
From: Renato Golin <renato.golin@linaro.org>
Date: Thu, 24 Oct 2013 14:50:51 +0000
Subject: Mark vector loops as already vectorized

Make sure we mark all loops (scalar and vector) when vectorizing,
so that we don't try to vectorize them anymore. Also, set unroll
to 1, since this is what we check for on early exit.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193349 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 72ddb74..317c1ff 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -801,6 +801,7 @@ struct LoopVectorizeHints {
         Vals.push_back(LoopID->getOperand(i));
 
     Vals.push_back(createHint(Context, Twine(Prefix(), "width").str(), Width));
+    Vals.push_back(createHint(Context, Twine(Prefix(), "unroll").str(), 1));
 
     MDNode *NewLoopID = MDNode::get(Context, Vals);
     // Set operand 0 to refer to the loop id itself.
@@ -1785,6 +1786,9 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
   LoopExitBlock = ExitBlock;
   LoopVectorBody = VecBody;
   LoopScalarBody = OldBasicBlock;
+
+  LoopVectorizeHints Hints(Lp, true);
+  Hints.setAlreadyVectorized(Lp);
 }
 
 /// This function returns the identity element (or neutral element) for
-- 
cgit v1.1


From 4a6b6eea2d7c1a0fa8e3ee23e1fa73f0307d1115 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Thu, 24 Oct 2013 16:38:33 +0000
Subject: Inliner: Handle readonly attribute per argument when adding memcpy

Patch by: Vincent Lejeune

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193356 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/InlineFunction.cpp | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index dabb67b..585658a 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -337,33 +337,35 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
 
 /// HandleByValArgument - When inlining a call site that has a byval argument,
 /// we have to make the implicit memcpy explicit by adding it.
-static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+static Value *HandleByValArgument(Value *PassedValue,
+                                  const Argument *ArgumentSignature,
+                                  Instruction *TheCall,
                                   const Function *CalledFunc,
                                   InlineFunctionInfo &IFI,
                                   unsigned ByValAlignment) {
-  Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+  Type *AggTy = cast<PointerType>(PassedValue->getType())->getElementType();
 
   // If the called function is readonly, then it could not mutate the caller's
   // copy of the byval'd memory.  In this case, it is safe to elide the copy and
   // temporary.
-  if (CalledFunc->onlyReadsMemory()) {
+  if (CalledFunc->onlyReadsMemory() || ArgumentSignature->onlyReadsMemory()) {
     // If the byval argument has a specified alignment that is greater than the
     // passed in pointer, then we either have to round up the input pointer or
     // give up on this transformation.
     if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
-      return Arg;
+      return PassedValue;
 
     // If the pointer is already known to be sufficiently aligned, or if we can
     // round it up to a larger alignment, then we don't need a temporary.
-    if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
+    if (getOrEnforceKnownAlignment(PassedValue, ByValAlignment,
                                    IFI.TD) >= ByValAlignment)
-      return Arg;
+      return PassedValue;
     
     // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
     // for code quality, but rarely happens and is required for correctness.
   }
   
-  LLVMContext &Context = Arg->getContext();
+  LLVMContext &Context = PassedValue->getContext();
 
   Type *VoidPtrTy = Type::getInt8PtrTy(Context);
   
@@ -379,7 +381,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
   
   Function *Caller = TheCall->getParent()->getParent(); 
   
-  Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), 
+  Value *NewAlloca = new AllocaInst(AggTy, 0, Align, PassedValue->getName(),
                                     &*Caller->begin()->begin());
   // Emit a memcpy.
   Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
@@ -387,7 +389,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
                                                  Intrinsic::memcpy, 
                                                  Tys);
   Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
-  Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
+  Value *SrcCast = new BitCastInst(PassedValue, VoidPtrTy, "tmp", TheCall);
   
   Value *Size;
   if (IFI.TD == 0)
@@ -588,13 +590,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
          E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
       Value *ActualArg = *AI;
+      const Argument *Arg = I;
 
       // When byval arguments actually inlined, we need to make the copy implied
       // by them explicit.  However, we don't do this if the callee is readonly
       // or readnone, because the copy would be unneeded: the callee doesn't
       // modify the struct.
       if (CS.isByValArgument(ArgNo)) {
-        ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+        ActualArg = HandleByValArgument(ActualArg, Arg, TheCall, CalledFunc, IFI,
                                         CalledFunc->getParamAlignment(ArgNo+1));
  
         // Calls that we inline may use the new alloca, so we need to clear
-- 
cgit v1.1


From 006183a9364660daba786d352df720e079412d60 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Fri, 25 Oct 2013 20:40:15 +0000
Subject: LoopVectorizer: Don't attempt to vectorize extractelement
 instructions

The loop vectorizer does not currently understand how to vectorize
extractelement instructions. The existing check, which excluded all
vector-valued instructions, did not catch extractelement instructions because
it checked only the return value. As a result, vectorization would proceed,
producing illegal instructions like this:

  %58 = extractelement <2 x i32> %15, i32 0
  %59 = extractelement i32 %58, i32 0

where the second extractelement is illegal because its first operand is not a vector.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193434 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 317c1ff..8b5424f 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2965,8 +2965,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       }
 
       // Check that the instruction return type is vectorizable.
-      if (!VectorType::isValidElementType(it->getType()) &&
-          !it->getType()->isVoidTy()) {
+      // Also, we can't vectorize extractelement instructions.
+      if ((!VectorType::isValidElementType(it->getType()) &&
+           !it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
         DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
         return false;
       }
-- 
cgit v1.1


From 0f978ea45945bf3b8f2ce20f46d78dc840894623 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Fri, 25 Oct 2013 21:29:52 +0000
Subject: Handle calls and invokes in GlobalStatus.

This patch teaches GlobalStatus to analyze a call that uses the global value as
a callee, not as an argument.

With this change internalize call handle the common use of linkonce_odr
functions. This reduces the number of linkonce_odr functions in a LTO build of
clang (checked with the emit-llvm gold plugin option) from 1730 to 60.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193436 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/GlobalStatus.cpp | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index 8fb79aa..5f0a563 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -11,6 +11,7 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
 
 using namespace llvm;
@@ -148,6 +149,10 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
         if (MSI->isVolatile())
           return true;
         GS.StoredType = GlobalStatus::Stored;
+      } else if (ImmutableCallSite C = I) {
+        if (!C.isCallee(UI))
+          return true;
+        GS.IsLoaded = true;
       } else {
         return true; // Any other non-load instruction might take address!
       }
-- 
cgit v1.1


From 4d4bbaf997c16f9e79503bd640306d784efd090e Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Fri, 25 Oct 2013 21:35:56 +0000
Subject: Fix SCEVExpander: don't try to expand quadratic recurrences outside a
 loop.

Partial fix for PR17459: wrong code at -O3 on x86_64-linux-gnu
(affecting trunk and 3.3)

When SCEV expands a recurrence outside of a loop it attempts to scale
by the stride of the recurrence. Chained recurrences don't work that
way. We could compute binomial coefficients, but would hve to
guarantee that the chained AddRec's are in a perfectly reduced form.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193438 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/IndVarSimplify.cpp     |  3 ++-
 lib/Transforms/Scalar/LoopStrengthReduce.cpp | 21 +++++++++++++++++++--
 2 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index cfd8db0..235aaaa 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -532,7 +532,8 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
         // and varies predictably *inside* the loop.  Evaluate the value it
         // contains when the loop exits, if possible.
         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
-        if (!SE->isLoopInvariant(ExitValue, L) || !isSafeToExpand(ExitValue))
+        if (!SE->isLoopInvariant(ExitValue, L) ||
+            !isSafeToExpand(ExitValue, *SE))
           continue;
 
         // Computing the value outside of the loop brings no benefit if :
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 14cb979..eff5268 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1170,6 +1170,13 @@ public:
   /// may be used.
   bool AllFixupsOutsideLoop;
 
+  /// RigidFormula is set to true to guarantee that this use will be associated
+  /// with a single formula--the one that initially matched. Some SCEV
+  /// expressions cannot be expanded. This allows LSR to consider the registers
+  /// used by those expressions without the need to expand them later after
+  /// changing the formula.
+  bool RigidFormula;
+
   /// WidestFixupType - This records the widest use type for any fixup using
   /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
   /// max fixup widths to be equivalent, because the narrower one may be relying
@@ -1188,6 +1195,7 @@ public:
                                       MinOffset(INT64_MAX),
                                       MaxOffset(INT64_MIN),
                                       AllFixupsOutsideLoop(true),
+                                      RigidFormula(false),
                                       WidestFixupType(0) {}
 
   bool HasFormulaWithSameRegs(const Formula &F) const;
@@ -1214,6 +1222,9 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
 /// InsertFormula - If the given formula has not yet been inserted, add it to
 /// the list, and return true. Return false otherwise.
 bool LSRUse::InsertFormula(const Formula &F) {
+  if (!Formulae.empty() && RigidFormula)
+    return false;
+
   SmallVector<const SCEV *, 4> Key = F.BaseRegs;
   if (F.ScaledReg) Key.push_back(F.ScaledReg);
   // Unstable sort by host order ok, because this is only used for uniquifying.
@@ -1433,7 +1444,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
   }
   case LSRUse::ICmpZero:
     // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
-    // Therefore, return 0 in case F.Scale == -1. 
+    // Therefore, return 0 in case F.Scale == -1.
     return F.Scale != -1;
 
   case LSRUse::Basic:
@@ -2943,7 +2954,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
 
         // x == y  -->  x - y == 0
         const SCEV *N = SE.getSCEV(NV);
-        if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
+        if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
           // S is normalized, so normalize N before folding it into S
           // to keep the result normalized.
           N = TransformForPostIncUse(Normalize, N, CI, 0,
@@ -2986,6 +2997,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
 /// and loop-computable portions.
 void
 LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
+  // Mark uses whose expressions cannot be expanded.
+  if (!isSafeToExpand(S, SE))
+    LU.RigidFormula = true;
+
   Formula F;
   F.InitialMatch(S, L, SE);
   bool Inserted = InsertFormula(LU, LUIdx, F);
@@ -4353,6 +4368,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
                            SCEVExpander &Rewriter,
                            SmallVectorImpl<WeakVH> &DeadInsts) const {
   const LSRUse &LU = Uses[LF.LUIdx];
+  if (LU.RigidFormula)
+    return LF.OperandValToReplace;
 
   // Determine an input position which will be dominated by the operands and
   // which will dominate the result.
-- 
cgit v1.1


From 887f9c5ec15582aec34aa6c28955d01e4e9961e2 Mon Sep 17 00:00:00 2001
From: Wan Xiaofei <xiaofei.wan@intel.com>
Date: Sat, 26 Oct 2013 03:08:02 +0000
Subject: Quick look-up for block in loop.

This patch implements quick look-up for block in loop by maintaining a hash set for blocks.
It improves the efficiency of loop analysis a lot, the biggest improvement could be 5-6%(458.sjeng).
Below are the compilation time for our benchmark in llc before & after the patch.

Benchmark	llc - trunk		llc - patched
401.bzip2	0.339081	100.00%	0.329657	102.86%
403.gcc		19.853966	100.00%	19.605466	101.27%
429.mcf		0.049823	100.00%	0.048451	102.83%
433.milc	0.514898	100.00%	0.510217	100.92%
444.namd	1.109328	100.00%	1.103481	100.53%
445.gobmk	4.988028	100.00%	4.929114	101.20%
456.hmmer	0.843871	100.00%	0.825865	102.18%
458.sjeng	0.754238	100.00%	0.714095	105.62%
464.h264ref	2.9668		100.00%	2.90612		102.09%
471.omnetpp	4.556533	100.00%	4.511886	100.99%
bitmnp01	0.038168	100.00%	0.0357		106.91%
idctrn01	0.037745	100.00%	0.037332	101.11%
libquake2	3.78689		100.00%	3.76209		100.66%
libquake_	2.251525	100.00%	2.234104	100.78%
linpack		0.033159	100.00%	0.032788	101.13%
matrix01	0.045319	100.00%	0.043497	104.19%
nbench		0.333161	100.00%	0.329799	101.02%
tblook01	0.017863	100.00%	0.017666	101.12%
ttsprk01	0.054337	100.00%	0.053057	102.41%

Reviewer	: Andrew Trick <atrick@apple.com>, Hal Finkel <hfinkel@anl.gov>
Approver	: Andrew Trick <atrick@apple.com>
Test		: Pass make check-all & llvm test-suite


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193460 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/LCSSA.cpp             | 15 ++-------------
 lib/Transforms/Vectorize/LoopVectorize.cpp | 11 ++++++-----
 2 files changed, 8 insertions(+), 18 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 2d1b166..f15e8d5 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -55,7 +55,6 @@ namespace {
     DominatorTree *DT;
     LoopInfo *LI;
     ScalarEvolution *SE;
-    std::vector<BasicBlock*> LoopBlocks;
     PredIteratorCache PredCache;
     Loop *L;
     
@@ -82,11 +81,6 @@ namespace {
       // Check the special guarantees that LCSSA makes.
       assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
     }
-
-    /// inLoop - returns true if the given block is within the current loop
-    bool inLoop(BasicBlock *B) const {
-      return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
-    }
   };
 }
   
@@ -129,11 +123,6 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
   if (ExitBlocks.empty())
     return false;
   
-  // Speed up queries by creating a sorted vector of blocks.
-  LoopBlocks.clear();
-  LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
-  array_pod_sort(LoopBlocks.begin(), LoopBlocks.end());
-  
   // Look at all the instructions in the loop, checking to see if they have uses
   // outside the loop.  If so, rewrite those uses.
   bool MadeChange = false;
@@ -198,7 +187,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
     if (PHINode *PN = dyn_cast<PHINode>(U))
       UserBB = PN->getIncomingBlock(UI);
     
-    if (InstBB != UserBB && !inLoop(UserBB))
+    if (InstBB != UserBB && !L->contains(UserBB))
       UsesToRewrite.push_back(&UI.getUse());
   }
 
@@ -244,7 +233,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
       // If the exit block has a predecessor not within the loop, arrange for
       // the incoming value use corresponding to that predecessor to be
       // rewritten in terms of a different LCSSA PHI.
-      if (!inLoop(*PI))
+      if (!L->contains(*PI))
         UsesToRewrite.push_back(
           &PN->getOperandUse(
             PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8b5424f..507f67f 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2694,14 +2694,14 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
     return false;
 
   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
-  std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
 
   // A list of pointers that we can safely read and write to.
   SmallPtrSet<Value *, 8> SafePointes;
 
   // Collect safe addresses.
-  for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
-    BasicBlock *BB = LoopBlocks[i];
+  for (Loop::block_iterator BI = TheLoop->block_begin(),
+         BE = TheLoop->block_end(); BI != BE; ++BI) {
+    BasicBlock *BB = *BI;
 
     if (blockNeedsPredication(BB))
       continue;
@@ -2715,8 +2715,9 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
   }
 
   // Collect the blocks that need predication.
-  for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
-    BasicBlock *BB = LoopBlocks[i];
+  for (Loop::block_iterator BI = TheLoop->block_begin(),
+         BE = TheLoop->block_end(); BI != BE; ++BI) {
+    BasicBlock *BB = *BI;
 
     // We don't support switch statements inside loops.
     if (!isa<BranchInst>(BB->getTerminator()))
-- 
cgit v1.1


From 69bd41dfe33f24414be281ba5e2204b7348c33ae Mon Sep 17 00:00:00 2001
From: Shuxin Yang <shuxin.llvm@gmail.com>
Date: Sun, 27 Oct 2013 03:08:44 +0000
Subject: Revert r193251 : Use address-taken to disambiguate global variable
 and indirect memops.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193489 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp | 1 -
 1 file changed, 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index a259b4d..82a59ed 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1723,7 +1723,6 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
   if (GlobalStatus::analyzeGlobal(GV, GS))
     return false;
 
-  GV->setAddressMaybeTaken(false);
   if (!GS.IsCompared && !GV->hasUnnamedAddr()) {
     GV->setUnnamedAddr(true);
     NumUnnamed++;
-- 
cgit v1.1


From 4a6b3a9a770ec2064fb5975ff2d57419c1339a21 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Tue, 29 Oct 2013 01:33:50 +0000
Subject: SLPVectorizer: Use vector type for vectorized memory operations

No test case, because with the current cost model we don't see a difference.
An upcoming ARM memory cost model change will expose and test this bug.

radar://15332579

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193572 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4d82bc4..012521a 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1023,14 +1023,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
       // Cost of wide load - cost of scalar loads.
       int ScalarLdCost = VecTy->getNumElements() *
       TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
-      int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+      int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, 1, 0);
       return VecLdCost - ScalarLdCost;
     }
     case Instruction::Store: {
       // We know that we can merge the stores. Calculate the cost.
       int ScalarStCost = VecTy->getNumElements() *
       TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
-      int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
+      int VecStCost = TTI->getMemoryOpCost(Instruction::Store, VecTy, 1, 0);
       return VecStCost - ScalarStCost;
     }
     default:
-- 
cgit v1.1


From 7e8cebf22d170769b0bf0c2a69309faa0e36ac4c Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Tue, 29 Oct 2013 01:33:53 +0000
Subject: ARM cost model: Account for zero cost scalar SROA instructions

By vectorizing a series of srl, or, ... instructions we have obfuscated the
intention so much that the backend does not know how to fold this code away.

radar://15336950

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193573 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 012521a..2797a21 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1013,9 +1013,24 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
         TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());
         VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);
       } else {
-        ScalarCost = VecTy->getNumElements() *
-        TTI->getArithmeticInstrCost(Opcode, ScalarTy);
-        VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
+        // Certain instructions can be cheaper to vectorize if they have a
+        // constant second vector operand.
+        TargetTransformInfo::OperandValueKind Op1VK =
+            TargetTransformInfo::OK_AnyValue;
+        TargetTransformInfo::OperandValueKind Op2VK =
+            TargetTransformInfo::OK_UniformConstantValue;
+
+        // Check whether all second operands are constant.
+        for (unsigned i = 0; i < VL.size(); ++i)
+          if (!isa<ConstantInt>(cast<Instruction>(VL[i])->getOperand(1))) {
+            Op2VK = TargetTransformInfo::OK_AnyValue;
+            break;
+          }
+
+        ScalarCost =
+            VecTy->getNumElements() *
+            TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK);
+        VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK);
       }
       return VecCost - ScalarCost;
     }
-- 
cgit v1.1


From b7ff48e3744a3a9800c65afddfc004977b8102b5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 30 Oct 2013 19:05:41 +0000
Subject: Fix GVN creating bitcast between address spaces

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193710 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/GVN.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index aa4e185..731a6d0 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1088,14 +1088,15 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
   if (Offset == -1)
     return Offset;
 
+  unsigned AS = Src->getType()->getPointerAddressSpace();
   // Otherwise, see if we can constant fold a load from the constant with the
   // offset applied as appropriate.
   Src = ConstantExpr::getBitCast(Src,
-                                 llvm::Type::getInt8PtrTy(Src->getContext()));
+                                 Type::getInt8PtrTy(Src->getContext(), AS));
   Constant *OffsetCst =
     ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
   Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
-  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+  Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
   if (ConstantFoldLoadFromConstPtr(Src, &TD))
     return Offset;
   return -1;
@@ -1247,15 +1248,16 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
   // Otherwise, this is a memcpy/memmove from a constant global.
   MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
   Constant *Src = cast<Constant>(MTI->getSource());
+  unsigned AS = Src->getType()->getPointerAddressSpace();
 
   // Otherwise, see if we can constant fold a load from the constant with the
   // offset applied as appropriate.
   Src = ConstantExpr::getBitCast(Src,
-                                 llvm::Type::getInt8PtrTy(Src->getContext()));
+                                 Type::getInt8PtrTy(Src->getContext(), AS));
   Constant *OffsetCst =
-  ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+    ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
   Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
-  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+  Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
   return ConstantFoldLoadFromConstPtr(Src, &TD);
 }
 
-- 
cgit v1.1


From 9effcbb879728f8a1e0e86c9cb777f11a9a3be7d Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 30 Oct 2013 22:54:58 +0000
Subject: Teach scalarrepl about address spaces

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193720 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/ScalarReplAggregates.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 33bbe15..57b290e 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -963,7 +963,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
   if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
     SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
   else if (SV->getType()->isPointerTy())
-    SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()));
+    SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getType()));
 
   // Zero extend or truncate the value if needed.
   if (SV->getType() != AllocaType) {
-- 
cgit v1.1


From c143c7573bfd0d55cf283cc2676dbd852f939c87 Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 31 Oct 2013 03:03:55 +0000
Subject: Merge CallGraph and BasicCallGraph.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193734 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/ArgumentPromotion.cpp | 2 +-
 lib/Transforms/IPO/FunctionAttrs.cpp     | 2 +-
 lib/Transforms/IPO/InlineAlways.cpp      | 2 +-
 lib/Transforms/IPO/InlineSimple.cpp      | 2 +-
 lib/Transforms/IPO/PruneEH.cpp           | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index c42d506..65c57e1 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -88,7 +88,7 @@ char ArgPromotion::ID = 0;
 INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
                 "Promote 'by reference' arguments to scalars", false, false)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
 INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
                 "Promote 'by reference' arguments to scalars", false, false)
 
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 386cb71..60e5f06 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -137,7 +137,7 @@ char FunctionAttrs::ID = 0;
 INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
                 "Deduce function attributes", false, false)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
                 "Deduce function attributes", false, false)
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index a0095da..437597e 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -63,7 +63,7 @@ public:
 char AlwaysInliner::ID = 0;
 INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
                 "Inliner for always_inline functions", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
 INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
 INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
                 "Inliner for always_inline functions", false, false)
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index a4f7026..9e607ab 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -61,7 +61,7 @@ public:
 char SimpleInliner::ID = 0;
 INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
                 "Function Integration/Inlining", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
 INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
 INITIALIZE_PASS_END(SimpleInliner, "inline",
                 "Function Integration/Inlining", false, false)
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 89529de..b160913 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -51,7 +51,7 @@ namespace {
 char PruneEH::ID = 0;
 INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
                 "Remove unused exception handling info", false, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
 INITIALIZE_PASS_END(PruneEH, "prune-eh",
                 "Remove unused exception handling info", false, false)
 
-- 
cgit v1.1


From 7e667c56cf7e27ff521ceb86518beab32bfb630d Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Thu, 31 Oct 2013 20:51:58 +0000
Subject: Use LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN instead of the "dso list".

There are two ways one could implement hiding of linkonce_odr symbols in LTO:
* LLVM tells the linker which symbols can be hidden if not used from native
  files.
* The linker tells LLVM which symbols are not used from other object files,
  but will be put in the dso symbol table if present.

GOLD's API is the second option. It was implemented almost 1:1 in llvm by
passing the list down to internalize.

LLVM already had partial support for the first option. It is also very similar
to how ld64 handles hiding these symbols when *not* doing LTO.

This patch then
* removes the APIs for the DSO list.
* marks LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN all linkonce_odr unnamed_addr
  global values and other linkonce_odr whose address is not used.
* makes the gold plugin responsible for handling the API mismatch.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193800 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/IPO.cpp                |  2 +-
 lib/Transforms/IPO/Internalize.cpp        | 64 +++++++------------------------
 lib/Transforms/IPO/PassManagerBuilder.cpp |  2 +-
 3 files changed, 15 insertions(+), 53 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 5f26bac..5d563d8 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -98,7 +98,7 @@ void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
   std::vector<const char *> Export;
   if (AllButMain)
     Export.push_back("main");
-  unwrap(PM)->add(createInternalizePass(Export, None));
+  unwrap(PM)->add(createInternalizePass(Export));
 }
 
 void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index e615918..64e2ced 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -11,18 +11,11 @@
 // If the function or variable is not in the list of external names given to
 // the pass it is marked as internal.
 //
-// This transformation would not be legal or profitable in a regular
-// compilation, but it gets extra information from the linker about what is safe
-// or profitable.
+// This transformation would not be legal in a regular compilation, but it gets
+// extra information from the linker about what is safe.
 //
-// As an example of a normally illegal transformation: Internalizing a function
-// with external linkage. Only if we are told it is only used from within this
-// module, it is safe to do it.
-//
-// On the profitability side: It is always legal to internalize a linkonce_odr
-// whose address is not used. Doing so normally would introduce code bloat, but
-// if we are told by the linker that the only use of this would be for a
-// DSO symbol table, it is profitable to hide it.
+// For example: Internalizing a function with external linkage. Only if we are
+// told it is only used from within this module, it is safe to do it.
 //
 //===----------------------------------------------------------------------===//
 
@@ -58,20 +51,13 @@ APIList("internalize-public-api-list", cl::value_desc("list"),
         cl::desc("A list of symbol names to preserve"),
         cl::CommaSeparated);
 
-static cl::list<std::string>
-DSOList("internalize-dso-list", cl::value_desc("list"),
-        cl::desc("A list of symbol names need for a dso symbol table"),
-        cl::CommaSeparated);
-
 namespace {
   class InternalizePass : public ModulePass {
     std::set<std::string> ExternalNames;
-    std::set<std::string> DSONames;
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit InternalizePass();
-    explicit InternalizePass(ArrayRef<const char *> ExportList,
-                             ArrayRef<const char *> DSOList);
+    explicit InternalizePass(ArrayRef<const char *> ExportList);
     void LoadFile(const char *Filename);
     virtual bool runOnModule(Module &M);
 
@@ -92,21 +78,15 @@ InternalizePass::InternalizePass()
   if (!APIFile.empty())           // If a filename is specified, use it.
     LoadFile(APIFile.c_str());
   ExternalNames.insert(APIList.begin(), APIList.end());
-  DSONames.insert(DSOList.begin(), DSOList.end());
 }
 
-InternalizePass::InternalizePass(ArrayRef<const char *> ExportList,
-                                 ArrayRef<const char *> DSOList)
+InternalizePass::InternalizePass(ArrayRef<const char *> ExportList)
   : ModulePass(ID){
   initializeInternalizePassPass(*PassRegistry::getPassRegistry());
   for(ArrayRef<const char *>::const_iterator itr = ExportList.begin();
         itr != ExportList.end(); itr++) {
     ExternalNames.insert(*itr);
   }
-  for(ArrayRef<const char *>::const_iterator itr = DSOList.begin();
-        itr != DSOList.end(); itr++) {
-    DSONames.insert(*itr);
-  }
 }
 
 void InternalizePass::LoadFile(const char *Filename) {
@@ -126,8 +106,7 @@ void InternalizePass::LoadFile(const char *Filename) {
 }
 
 static bool shouldInternalize(const GlobalValue &GV,
-                              const std::set<std::string> &ExternalNames,
-                              const std::set<std::string> &DSONames) {
+                              const std::set<std::string> &ExternalNames) {
   // Function must be defined here
   if (GV.isDeclaration())
     return false;
@@ -144,23 +123,7 @@ static bool shouldInternalize(const GlobalValue &GV,
   if (ExternalNames.count(GV.getName()))
     return false;
 
-  // Not needed for the symbol table?
-  if (!DSONames.count(GV.getName()))
-    return true;
-
-  // Not a linkonce. Someone can depend on it being on the symbol table.
-  if (!GV.hasLinkOnceLinkage())
-    return false;
-
-  // The address is not important, we can hide it.
-  if (GV.hasUnnamedAddr())
-    return true;
-
-  GlobalStatus GS;
-  if (GlobalStatus::analyzeGlobal(&GV, GS))
-    return false;
-
-  return !GS.IsCompared;
+  return true;
 }
 
 bool InternalizePass::runOnModule(Module &M) {
@@ -189,7 +152,7 @@ bool InternalizePass::runOnModule(Module &M) {
   // Mark all functions not in the api as internal.
   // FIXME: maybe use private linkage?
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    if (!shouldInternalize(*I, ExternalNames, DSONames))
+    if (!shouldInternalize(*I, ExternalNames))
       continue;
 
     I->setLinkage(GlobalValue::InternalLinkage);
@@ -226,7 +189,7 @@ bool InternalizePass::runOnModule(Module &M) {
   // FIXME: maybe use private linkage?
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
        I != E; ++I) {
-    if (!shouldInternalize(*I, ExternalNames, DSONames))
+    if (!shouldInternalize(*I, ExternalNames))
       continue;
 
     I->setLinkage(GlobalValue::InternalLinkage);
@@ -238,7 +201,7 @@ bool InternalizePass::runOnModule(Module &M) {
   // Mark all aliases that are not in the api as internal as well.
   for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
        I != E; ++I) {
-    if (!shouldInternalize(*I, ExternalNames, DSONames))
+    if (!shouldInternalize(*I, ExternalNames))
       continue;
 
     I->setLinkage(GlobalValue::InternalLinkage);
@@ -254,7 +217,6 @@ ModulePass *llvm::createInternalizePass() {
   return new InternalizePass();
 }
 
-ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList,
-                                        ArrayRef<const char *> DSOList) {
-  return new InternalizePass(ExportList, DSOList);
+ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList) {
+  return new InternalizePass(ExportList);
 }
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 0017c1b..1386201 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -277,7 +277,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
   // for a main function.  If main is defined, mark all other functions
   // internal.
   if (Internalize)
-    PM.add(createInternalizePass("main", None));
+    PM.add(createInternalizePass("main"));
 
   // Propagate constants at call sites into the functions they call.  This
   // opens opportunities for globalopt (and inlining) by substituting function
-- 
cgit v1.1


From ef34496b3fc197fe03da6fd86214d5e9b37d4368 Mon Sep 17 00:00:00 2001
From: Manman Ren <manman.ren@gmail.com>
Date: Thu, 31 Oct 2013 21:56:03 +0000
Subject: Do not convert "call asm" to "invoke asm" in Inliner.

Given that backend does not handle "invoke asm" correctly ("invoke asm" will be
handled by SelectionDAGBuilder::visitInlineAsm, which does not have the right
setup for LPadToCallSiteMap) and we already made the assumption that inline asm
does not throw in InstCombiner::visitCallSite, we are going to make the same
assumption in Inliner to make sure we don't convert "call asm" to "invoke asm".

If it becomes necessary to add support for "invoke asm" later on, we will need
to modify the backend as well as remove the assumptions that inline asm does
not throw.

Fix rdar://15317907


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193808 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/InlineFunction.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 585658a..5642911 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -193,7 +193,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
     CallInst *CI = dyn_cast<CallInst>(I);
 
     // If this call cannot unwind, don't convert it to an invoke.
-    if (!CI || CI->doesNotThrow())
+    // Inline asm calls cannot throw.
+    if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
       continue;
 
     // Convert this function call into an invoke instruction.  First, split the
-- 
cgit v1.1


From d272a1223314a69e4678816feeff2cfb3e740f8f Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Fri, 1 Nov 2013 03:05:04 +0000
Subject: LoopVectorizer: Clear all member data structures in
 RuntimeCheck.reset()

Clear all data structures when resetting the RuntimeCheck data structure.

No test case. This was exposed by an upcomming change.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193852 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 507f67f..e972326 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -493,6 +493,8 @@ public:
       Pointers.clear();
       Starts.clear();
       Ends.clear();
+      IsWritePtr.clear();
+      DependencySetId.clear();
     }
 
     /// Insert a pointer and calculate the start and end SCEVs.
-- 
cgit v1.1


From 0097e155025767c11790912dcf780f82dffaffb1 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Fri, 1 Nov 2013 03:05:07 +0000
Subject: LoopVectorizer: If dependency checks fail try runtime checks

When a dependence check fails we can still try to vectorize loops with runtime
array bounds checks.

This helps linpack to vectorize a loop in dgefa. And we are back to 2x of the
scalar performance on a corei7-avx.

radar://15339680

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193853 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 52 +++++++++++++++++++++++++++---
 1 file changed, 47 insertions(+), 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index e972326..f18707c 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3061,7 +3061,7 @@ public:
   /// non-intersection.
   bool canCheckPtrAtRT(LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
                        unsigned &NumComparisons, ScalarEvolution *SE,
-                       Loop *TheLoop);
+                       Loop *TheLoop, bool ShouldCheckStride = false);
 
   /// \brief Goes over all memory accesses, checks whether a RT check is needed
   /// and builds sets of dependent accesses.
@@ -3075,6 +3075,7 @@ public:
   bool isRTCheckNeeded() { return IsRTCheckNeeded; }
 
   bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
+  void resetDepChecks() { CheckDeps.clear(); }
 
   MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
 
@@ -3129,10 +3130,15 @@ static bool hasComputableBounds(ScalarEvolution *SE, Value *Ptr) {
   return AR->isAffine();
 }
 
+/// \brief Check the stride of the pointer and ensure that it does not wrap in
+/// the address space.
+static int isStridedPtr(ScalarEvolution *SE, DataLayout *DL, Value *Ptr,
+                        const Loop *Lp);
+
 bool AccessAnalysis::canCheckPtrAtRT(
                        LoopVectorizationLegality::RuntimePointerCheck &RtCheck,
                         unsigned &NumComparisons, ScalarEvolution *SE,
-                        Loop *TheLoop) {
+                        Loop *TheLoop, bool ShouldCheckStride) {
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
   unsigned NumReadPtrChecks = 0;
@@ -3160,7 +3166,10 @@ bool AccessAnalysis::canCheckPtrAtRT(
     else
       ++NumReadPtrChecks;
 
-    if (hasComputableBounds(SE, Ptr)) {
+    if (hasComputableBounds(SE, Ptr) &&
+        // When we run after a failing dependency check we have to make sure we
+        // don't have wrapping pointers.
+        (!ShouldCheckStride || isStridedPtr(SE, DL, Ptr, TheLoop) == 1)) {
       // The id of the dependence set.
       unsigned DepId;
 
@@ -3342,8 +3351,9 @@ public:
   typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
   typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
 
-  MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop *L) :
-    SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0) {}
+  MemoryDepChecker(ScalarEvolution *Se, DataLayout *Dl, const Loop *L)
+      : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
+        ShouldRetryWithRuntimeCheck(false) {}
 
   /// \brief Register the location (instructions are given increasing numbers)
   /// of a write access.
@@ -3373,6 +3383,10 @@ public:
   /// the accesses safely with.
   unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
 
+  /// \brief In same cases when the dependency check fails we can still
+  /// vectorize the loop with a dynamic array access check.
+  bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
+
 private:
   ScalarEvolution *SE;
   DataLayout *DL;
@@ -3390,6 +3404,10 @@ private:
   // We can access this many bytes in parallel safely.
   unsigned MaxSafeDepDistBytes;
 
+  /// \brief If we see a non constant dependence distance we can still try to
+  /// vectorize this loop with runtime checks.
+  bool ShouldRetryWithRuntimeCheck;
+
   /// \brief Check whether there is a plausible dependence between the two
   /// accesses.
   ///
@@ -3587,6 +3605,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
   if (!C) {
     DEBUG(dbgs() << "LV: Dependence because of non constant distance\n");
+    ShouldRetryWithRuntimeCheck = true;
     return true;
   }
 
@@ -3876,6 +3895,29 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
     CanVecMem = DepChecker.areDepsSafe(DependentAccesses,
                                        Accesses.getDependenciesToCheck());
     MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
+
+    if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
+      DEBUG(dbgs() << "LV: Retrying with memory checks\n");
+      NeedRTCheck = true;
+
+      // Clear the dependency checks. We assume they are not needed.
+      Accesses.resetDepChecks();
+
+      PtrRtCheck.reset();
+      PtrRtCheck.Need = true;
+
+      CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
+                                         TheLoop, true);
+      // Check that we did not collect too many pointers or found an unsizeable
+      // pointer.
+      if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
+        DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
+        PtrRtCheck.reset();
+        return false;
+      }
+
+      CanVecMem = true;
+    }
   }
 
   DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
-- 
cgit v1.1


From 7208b0763c9fca2c6364c642d28fd014a496cc37 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Fri, 1 Nov 2013 14:09:50 +0000
Subject: LoopVectorize: Look for consecutive acces in GEPs with trailing zero
 indices

If we have a pointer to a single-element struct we can still build wide loads
and stores to it (if there is no padding).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193860 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 49 +++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index f18707c..ee94173 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1069,6 +1069,31 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
   return Builder.CreateAdd(Val, Cv, "induction");
 }
 
+/// \brief Find the operand of the GEP that should be checked for consecutive
+/// stores. This ignores trailing indices that have no effect on the final
+/// pointer.
+static unsigned getGEPInductionOperand(DataLayout *DL,
+                                       const GetElementPtrInst *Gep) {
+  unsigned LastOperand = Gep->getNumOperands() - 1;
+  unsigned GEPAllocSize = DL->getTypeAllocSize(
+      cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
+
+  // Walk backwards and try to peel off zeros.
+  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
+    // Find the type we're currently indexing into.
+    gep_type_iterator GEPTI = gep_type_begin(Gep);
+    std::advance(GEPTI, LastOperand - 1);
+
+    // If it's a type with the same allocation size as the result of the GEP we
+    // can peel off the zero index.
+    if (DL->getTypeAllocSize(*GEPTI) != GEPAllocSize)
+      break;
+    --LastOperand;
+  }
+
+  return LastOperand;
+}
+
 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
   assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
   // Make sure that the pointer does not point to structs.
@@ -1090,8 +1115,6 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
     return 0;
 
   unsigned NumOperands = Gep->getNumOperands();
-  Value *LastIndex = Gep->getOperand(NumOperands - 1);
-
   Value *GpPtr = Gep->getPointerOperand();
   // If this GEP value is a consecutive pointer induction variable and all of
   // the indices are constant then we know it is consecutive. We can
@@ -1115,14 +1138,18 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
       return -1;
   }
 
-  // Check that all of the gep indices are uniform except for the last.
-  for (unsigned i = 0; i < NumOperands - 1; ++i)
-    if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+  unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
+
+  // Check that all of the gep indices are uniform except for our induction
+  // operand.
+  for (unsigned i = 0; i != NumOperands; ++i)
+    if (i != InductionOperand &&
+        !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
       return 0;
 
-  // We can emit wide load/stores only if the last index is the induction
-  // variable.
-  const SCEV *Last = SE->getSCEV(LastIndex);
+  // We can emit wide load/stores only if the last non-zero index is the
+  // induction variable.
+  const SCEV *Last = SE->getSCEV(Gep->getOperand(InductionOperand));
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
     const SCEV *Step = AR->getStepRecurrence(*SE);
 
@@ -1219,7 +1246,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
     // The last index does not have to be the induction. It can be
     // consecutive and be a function of the index. For example A[I+1];
     unsigned NumOperands = Gep->getNumOperands();
-    unsigned LastOperand = NumOperands - 1;
+    unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
     // Create the new GEP with the new induction variable.
     GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
 
@@ -1228,9 +1255,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
       Instruction *GepOperandInst = dyn_cast<Instruction>(GepOperand);
 
       // Update last index or loop invariant instruction anchored in loop.
-      if (i == LastOperand ||
+      if (i == InductionOperand ||
           (GepOperandInst && OrigLoop->contains(GepOperandInst))) {
-        assert((i == LastOperand ||
+        assert((i == InductionOperand ||
                SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) &&
                "Must be last index or loop invariant");
 
-- 
cgit v1.1


From f4775827d046aa12f6aaffd5bd4746744e8fdff8 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Fri, 1 Nov 2013 22:18:19 +0000
Subject: LoopVectorizer: Perform redundancy elimination on induction variables

When the loop vectorizer was part of the SCC inliner pass manager gvn would
run after the loop vectorizer followed by instcombine. This way redundancy
(multiple uses) were removed and instcombine could perform scalarization on the
induction variables. Having moved the loop vectorizer to later we no longer run
any form of redundancy elimination before we perform instcombine. This caused
vectorized induction variables to survive that did not before.

On a recent iMac this helps linpack back from 6000Mflops to 7000Mflops.

This should also help lpbench and paq8p.

I ran a Release (without Asserts) build over the test-suite and did not see any
negative impact on compile time.

radar://15339680

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193891 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 35 +++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index ee94173..7f77784 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2272,8 +2272,41 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
     (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
     (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
   }// end of for each redux variable.
- 
+
   fixLCSSAPHIs();
+
+  // Perform simple cse.
+  SmallPtrSet<Instruction*, 16> Visited;
+  SmallVector<Instruction*, 16> ToRemove;
+  for (BasicBlock::iterator I = LoopVectorBody->begin(),
+       E = LoopVectorBody->end(); I != E; ++I) {
+      Instruction *In = I;
+
+      if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In) &&
+          !isa<ShuffleVectorInst>(In) && !isa<GetElementPtrInst>(In))
+        continue;
+
+      // Check if we can replace this instruction with any of the
+      // visited instructions.
+      for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
+           ve = Visited.end(); v != ve; ++v) {
+        if (In->isIdenticalTo(*v)) {
+          In->replaceAllUsesWith(*v);
+          ToRemove.push_back(In);
+          In = 0;
+          break;
+        }
+      }
+      if (In)
+        Visited.insert(In);
+
+  }
+  // Erase all of the instructions that we RAUWed.
+  for (SmallVectorImpl<Instruction *>::iterator v = ToRemove.begin(),
+       ve = ToRemove.end(); v != ve; ++v) {
+    assert((*v)->getNumUses() == 0 && "Can't remove instructions with uses");
+    (*v)->eraseFromParent();
+  }
 }
 
 void InnerLoopVectorizer::fixLCSSAPHIs() {
-- 
cgit v1.1


From bc28e88a2861ab1183e138f19e92e5d862eaa8a6 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Fri, 1 Nov 2013 23:28:54 +0000
Subject: LoopVectorizer: Move cse code into its own function

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193895 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 69 ++++++++++++++++--------------
 1 file changed, 37 insertions(+), 32 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7f77784..fe73cd9 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2055,6 +2055,41 @@ Value *createMinMaxOp(IRBuilder<> &Builder,
   return Select;
 }
 
+///\brief Perform cse of induction variable instructions.
+static void cse(BasicBlock *BB) {
+  // Perform simple cse.
+  SmallPtrSet<Instruction*, 16> Visited;
+  SmallVector<Instruction*, 16> ToRemove;
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      Instruction *In = I;
+
+      if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In) &&
+          !isa<ShuffleVectorInst>(In) && !isa<GetElementPtrInst>(In))
+        continue;
+
+      // Check if we can replace this instruction with any of the
+      // visited instructions.
+      for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
+           ve = Visited.end(); v != ve; ++v) {
+        if (In->isIdenticalTo(*v)) {
+          In->replaceAllUsesWith(*v);
+          ToRemove.push_back(In);
+          In = 0;
+          break;
+        }
+      }
+      if (In)
+        Visited.insert(In);
+
+  }
+  // Erase all of the instructions that we RAUWed.
+  for (SmallVectorImpl<Instruction *>::iterator v = ToRemove.begin(),
+       ve = ToRemove.end(); v != ve; ++v) {
+    assert((*v)->getNumUses() == 0 && "Can't remove instructions with uses");
+    (*v)->eraseFromParent();
+  }
+}
+
 void
 InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
   //===------------------------------------------------===//
@@ -2275,38 +2310,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 
   fixLCSSAPHIs();
 
-  // Perform simple cse.
-  SmallPtrSet<Instruction*, 16> Visited;
-  SmallVector<Instruction*, 16> ToRemove;
-  for (BasicBlock::iterator I = LoopVectorBody->begin(),
-       E = LoopVectorBody->end(); I != E; ++I) {
-      Instruction *In = I;
-
-      if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In) &&
-          !isa<ShuffleVectorInst>(In) && !isa<GetElementPtrInst>(In))
-        continue;
-
-      // Check if we can replace this instruction with any of the
-      // visited instructions.
-      for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
-           ve = Visited.end(); v != ve; ++v) {
-        if (In->isIdenticalTo(*v)) {
-          In->replaceAllUsesWith(*v);
-          ToRemove.push_back(In);
-          In = 0;
-          break;
-        }
-      }
-      if (In)
-        Visited.insert(In);
-
-  }
-  // Erase all of the instructions that we RAUWed.
-  for (SmallVectorImpl<Instruction *>::iterator v = ToRemove.begin(),
-       ve = ToRemove.end(); v != ve; ++v) {
-    assert((*v)->getNumUses() == 0 && "Can't remove instructions with uses");
-    (*v)->eraseFromParent();
-  }
+  // Remove redundant induction instructions.
+  cse(LoopVectorBody);
 }
 
 void InnerLoopVectorizer::fixLCSSAPHIs() {
-- 
cgit v1.1


From ff566d8f4492d7f32814656eaeca75635526d2db Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Sat, 2 Nov 2013 13:39:00 +0000
Subject: LoopVectorize: Remove quadratic behavior the local CSE.

Doing this with a hash map doesn't change behavior and avoids calling
isIdenticalTo O(n^2) times. This should probably eventually move into a utility
class shared with EarlyCSE and the limited CSE in the SLPVectorizer.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193926 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 66 ++++++++++++++++++------------
 1 file changed, 40 insertions(+), 26 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index fe73cd9..6db7f68 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Transforms/Vectorize.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -2055,38 +2056,51 @@ Value *createMinMaxOp(IRBuilder<> &Builder,
   return Select;
 }
 
+namespace {
+struct CSEDenseMapInfo {
+  static bool canHandle(Instruction *I) {
+    return isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+           isa<ShuffleVectorInst>(I) || isa<GetElementPtrInst>(I);
+  }
+  static inline Instruction *getEmptyKey() {
+    return DenseMapInfo<Instruction *>::getEmptyKey();
+  }
+  static inline Instruction *getTombstoneKey() {
+    return DenseMapInfo<Instruction *>::getTombstoneKey();
+  }
+  static unsigned getHashValue(Instruction *I) {
+    assert(canHandle(I) && "Unknown instruction!");
+    return hash_combine(I->getOpcode(), hash_combine_range(I->value_op_begin(),
+                                                           I->value_op_end()));
+  }
+  static bool isEqual(Instruction *LHS, Instruction *RHS) {
+    if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
+        LHS == getTombstoneKey() || RHS == getTombstoneKey())
+      return LHS == RHS;
+    return LHS->isIdenticalTo(RHS);
+  }
+};
+}
+
 ///\brief Perform cse of induction variable instructions.
 static void cse(BasicBlock *BB) {
   // Perform simple cse.
-  SmallPtrSet<Instruction*, 16> Visited;
-  SmallVector<Instruction*, 16> ToRemove;
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-      Instruction *In = I;
+  SmallDenseMap<Instruction *, Instruction *, 4, CSEDenseMapInfo> CSEMap;
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+    Instruction *In = I++;
 
-      if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In) &&
-          !isa<ShuffleVectorInst>(In) && !isa<GetElementPtrInst>(In))
-        continue;
+    if (!CSEDenseMapInfo::canHandle(In))
+      continue;
 
-      // Check if we can replace this instruction with any of the
-      // visited instructions.
-      for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
-           ve = Visited.end(); v != ve; ++v) {
-        if (In->isIdenticalTo(*v)) {
-          In->replaceAllUsesWith(*v);
-          ToRemove.push_back(In);
-          In = 0;
-          break;
-        }
-      }
-      if (In)
-        Visited.insert(In);
+    // Check if we can replace this instruction with any of the
+    // visited instructions.
+    if (Instruction *V = CSEMap.lookup(In)) {
+      In->replaceAllUsesWith(V);
+      In->eraseFromParent();
+      continue;
+    }
 
-  }
-  // Erase all of the instructions that we RAUWed.
-  for (SmallVectorImpl<Instruction *>::iterator v = ToRemove.begin(),
-       ve = ToRemove.end(); v != ve; ++v) {
-    assert((*v)->getNumUses() == 0 && "Can't remove instructions with uses");
-    (*v)->eraseFromParent();
+    CSEMap[In] = In;
   }
 }
 
-- 
cgit v1.1


From 9bbc7b4e49e26a1bfcc1ec503b5c7567258a743d Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Sat, 2 Nov 2013 14:46:27 +0000
Subject: SLPVectorizer: Remove duplicated function.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193927 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2797a21..9a21653 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -206,14 +206,6 @@ static bool CanReuseExtract(ArrayRef<Value *> VL) {
   return true;
 }
 
-static bool all_equal(SmallVectorImpl<Value *> &V) {
-  Value *First = V[0];
-  for (int i = 1, e = V.size(); i != e; ++i)
-    if (V[i] != First)
-      return false;
-  return true;
-}
-
 static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
                                            SmallVectorImpl<Value *> &Left,
                                            SmallVectorImpl<Value *> &Right) {
@@ -301,8 +293,8 @@ static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
     Right.push_back(V1);
   }
 
-  bool LeftBroadcast = all_equal(Left);
-  bool RightBroadcast = all_equal(Right);
+  bool LeftBroadcast = isSplat(Left);
+  bool RightBroadcast = isSplat(Right);
 
   // Don't reorder if the operands where good to begin with.
   if (!(LeftBroadcast || RightBroadcast) &&
-- 
cgit v1.1


From 208130f11331eccab26c0a6f3146cd1891e53e33 Mon Sep 17 00:00:00 2001
From: Bob Wilson <bob.wilson@apple.com>
Date: Sun, 3 Nov 2013 06:48:38 +0000
Subject: Convert calls to __sinpi and __cospi into __sincospi_stret

This adds an SimplifyLibCalls case which converts the special __sinpi and
__cospi (float & double variants) into a __sincospi_stret where appropriate to
remove duplicated work.

Patch by Tim Northover

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193943 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyLibCalls.cpp | 156 ++++++++++++++++++++++++++++++
 1 file changed, 156 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index cbdd070..d838851 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
@@ -1252,6 +1253,155 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
   }
 };
 
+struct SinCosPiOpt : public LibCallOptimization {
+  SinCosPiOpt() {}
+
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Make sure the prototype is as expected, otherwise the rest of the
+    // function is probably invalid and likely to abort.
+    if (!isTrigLibCall(CI))
+      return 0;
+
+    Value *Arg = CI->getArgOperand(0);
+    SmallVector<CallInst *, 1> SinCalls;
+    SmallVector<CallInst *, 1> CosCalls;
+    SmallVector<CallInst *, 1> SinCosCalls;
+
+    bool IsFloat = Arg->getType()->isFloatTy();
+
+    // Look for all compatible sinpi, cospi and sincospi calls with the same
+    // argument. If there are enough (in some sense) we can make the
+    // substitution.
+    for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+         UI != UE; ++UI)
+      classifyArgUse(*UI, CI->getParent(), IsFloat, SinCalls, CosCalls,
+                     SinCosCalls);
+
+    // It's only worthwhile if both sinpi and cospi are actually used.
+    if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
+      return 0;
+
+    Value *Sin, *Cos, *SinCos;
+    insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
+                     SinCos);
+
+    replaceTrigInsts(SinCalls, Sin);
+    replaceTrigInsts(CosCalls, Cos);
+    replaceTrigInsts(SinCosCalls, SinCos);
+
+    return 0;
+  }
+
+  bool isTrigLibCall(CallInst *CI) {
+    Function *Callee = CI->getCalledFunction();
+    FunctionType *FT = Callee->getFunctionType();
+
+    // We can only hope to do anything useful if we can ignore things like errno
+    // and floating-point exceptions.
+    bool AttributesSafe = CI->hasFnAttr(Attribute::NoUnwind) &&
+                          CI->hasFnAttr(Attribute::ReadNone);
+
+    // Other than that we need float(float) or double(double)
+    return AttributesSafe && FT->getNumParams() == 1 &&
+           FT->getReturnType() == FT->getParamType(0) &&
+           (FT->getParamType(0)->isFloatTy() ||
+            FT->getParamType(0)->isDoubleTy());
+  }
+
+  void classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
+                      SmallVectorImpl<CallInst *> &SinCalls,
+                      SmallVectorImpl<CallInst *> &CosCalls,
+                      SmallVectorImpl<CallInst *> &SinCosCalls) {
+    CallInst *CI = dyn_cast<CallInst>(Val);
+
+    if (!CI)
+      return;
+
+    Function *Callee = CI->getCalledFunction();
+    StringRef FuncName = Callee->getName();
+    LibFunc::Func Func;
+    if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) ||
+        !isTrigLibCall(CI))
+      return;
+
+    if (IsFloat) {
+      if (Func == LibFunc::sinpif)
+        SinCalls.push_back(CI);
+      else if (Func == LibFunc::cospif)
+        CosCalls.push_back(CI);
+      else if (Func == LibFunc::sincospi_stretf)
+        SinCosCalls.push_back(CI);
+    } else {
+      if (Func == LibFunc::sinpi)
+        SinCalls.push_back(CI);
+      else if (Func == LibFunc::cospi)
+        CosCalls.push_back(CI);
+      else if (Func == LibFunc::sincospi_stret)
+        SinCosCalls.push_back(CI);
+    }
+  }
+
+  void replaceTrigInsts(SmallVectorImpl<CallInst*> &Calls, Value *Res) {
+    for (SmallVectorImpl<CallInst*>::iterator I = Calls.begin(),
+           E = Calls.end();
+         I != E; ++I) {
+      LCS->replaceAllUsesWith(*I, Res);
+    }
+  }
+
+  void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
+                        bool UseFloat, Value *&Sin, Value *&Cos,
+                        Value *&SinCos) {
+    Type *ArgTy = Arg->getType();
+    Type *ResTy;
+    StringRef Name;
+
+    Triple T(OrigCallee->getParent()->getTargetTriple());
+    if (UseFloat) {
+      Name = "__sincospi_stretf";
+
+      assert(T.getArch() != Triple::x86 && "x86 messy and unsupported for now");
+      // x86_64 can't use {float, float} since that would be returned in both
+      // xmm0 and xmm1, which isn't what a real struct would do.
+      ResTy = T.getArch() == Triple::x86_64
+                  ? static_cast<Type *>(VectorType::get(ArgTy, 2))
+                  : static_cast<Type *>(StructType::get(ArgTy, ArgTy, NULL));
+    } else {
+      Name = "__sincospi_stret";
+      ResTy = StructType::get(ArgTy, ArgTy, NULL);
+    }
+
+    Module *M = OrigCallee->getParent();
+    Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
+                                           ResTy, ArgTy, NULL);
+
+    if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
+      // If the argument is an instruction, it must dominate all uses so put our
+      // sincos call there.
+      BasicBlock::iterator Loc = ArgInst;
+      B.SetInsertPoint(ArgInst->getParent(), ++Loc);
+    } else {
+      // Otherwise (e.g. for a constant) the beginning of the function is as
+      // good a place as any.
+      BasicBlock &EntryBB = B.GetInsertBlock()->getParent()->getEntryBlock();
+      B.SetInsertPoint(&EntryBB, EntryBB.begin());
+    }
+
+    SinCos = B.CreateCall(Callee, Arg, "sincospi");
+
+    if (SinCos->getType()->isStructTy()) {
+      Sin = B.CreateExtractValue(SinCos, 0, "sinpi");
+      Cos = B.CreateExtractValue(SinCos, 1, "cospi");
+    } else {
+      Sin = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 0),
+                                   "sinpi");
+      Cos = B.CreateExtractElement(SinCos, ConstantInt::get(B.getInt32Ty(), 1),
+                                   "cospi");
+    }
+  }
+
+};
+
 //===----------------------------------------------------------------------===//
 // Integer Library Call Optimizations
 //===----------------------------------------------------------------------===//
@@ -1764,6 +1914,7 @@ static MemSetOpt MemSet;
 // Math library call optimizations.
 static UnaryDoubleFPOpt UnaryDoubleFP(false);
 static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+static SinCosPiOpt SinCosPi;
 
   // Integer library call optimizations.
 static FFSOpt FFS;
@@ -1848,6 +1999,11 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
       case LibFunc::cos:
       case LibFunc::cosl:
         return &Cos;
+      case LibFunc::sinpif:
+      case LibFunc::sinpi:
+      case LibFunc::cospif:
+      case LibFunc::cospi:
+        return &SinCosPi;
       case LibFunc::powf:
       case LibFunc::pow:
       case LibFunc::powl:
-- 
cgit v1.1


From 42864070b016976b87c64830395e96ae0e49279c Mon Sep 17 00:00:00 2001
From: David Majnemer <david.majnemer@gmail.com>
Date: Sun, 3 Nov 2013 11:09:39 +0000
Subject: Spell "Actual" correctly

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193954 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/InlineSimple.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 9e607ab..57379a3 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -28,7 +28,7 @@ using namespace llvm;
 
 namespace {
 
-/// \brief Actaul inliner pass implementation.
+/// \brief Actual inliner pass implementation.
 ///
 /// The common implementation of the inlining logic is shared between this
 /// inliner pass and the always inliner pass. The two passes use different cost
-- 
cgit v1.1


From 16d10987184281aff35c80542a3c02e7dcb7b59b Mon Sep 17 00:00:00 2001
From: David Majnemer <david.majnemer@gmail.com>
Date: Sun, 3 Nov 2013 12:22:13 +0000
Subject: Revert "Inliner: Handle readonly attribute per argument when adding
 memcpy"

This reverts commit r193356, it caused PR17781.

A reduced test case covering this regression has been added to the test suite.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193955 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/InlineFunction.cpp | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 5642911..d021bce 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -338,35 +338,33 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
 
 /// HandleByValArgument - When inlining a call site that has a byval argument,
 /// we have to make the implicit memcpy explicit by adding it.
-static Value *HandleByValArgument(Value *PassedValue,
-                                  const Argument *ArgumentSignature,
-                                  Instruction *TheCall,
+static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
                                   const Function *CalledFunc,
                                   InlineFunctionInfo &IFI,
                                   unsigned ByValAlignment) {
-  Type *AggTy = cast<PointerType>(PassedValue->getType())->getElementType();
+  Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
 
   // If the called function is readonly, then it could not mutate the caller's
   // copy of the byval'd memory.  In this case, it is safe to elide the copy and
   // temporary.
-  if (CalledFunc->onlyReadsMemory() || ArgumentSignature->onlyReadsMemory()) {
+  if (CalledFunc->onlyReadsMemory()) {
     // If the byval argument has a specified alignment that is greater than the
     // passed in pointer, then we either have to round up the input pointer or
     // give up on this transformation.
     if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
-      return PassedValue;
+      return Arg;
 
     // If the pointer is already known to be sufficiently aligned, or if we can
     // round it up to a larger alignment, then we don't need a temporary.
-    if (getOrEnforceKnownAlignment(PassedValue, ByValAlignment,
+    if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
                                    IFI.TD) >= ByValAlignment)
-      return PassedValue;
+      return Arg;
     
     // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
     // for code quality, but rarely happens and is required for correctness.
   }
   
-  LLVMContext &Context = PassedValue->getContext();
+  LLVMContext &Context = Arg->getContext();
 
   Type *VoidPtrTy = Type::getInt8PtrTy(Context);
   
@@ -382,7 +380,7 @@ static Value *HandleByValArgument(Value *PassedValue,
   
   Function *Caller = TheCall->getParent()->getParent(); 
   
-  Value *NewAlloca = new AllocaInst(AggTy, 0, Align, PassedValue->getName(),
+  Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), 
                                     &*Caller->begin()->begin());
   // Emit a memcpy.
   Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
@@ -390,7 +388,7 @@ static Value *HandleByValArgument(Value *PassedValue,
                                                  Intrinsic::memcpy, 
                                                  Tys);
   Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
-  Value *SrcCast = new BitCastInst(PassedValue, VoidPtrTy, "tmp", TheCall);
+  Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
   
   Value *Size;
   if (IFI.TD == 0)
@@ -591,14 +589,13 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
          E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
       Value *ActualArg = *AI;
-      const Argument *Arg = I;
 
       // When byval arguments actually inlined, we need to make the copy implied
       // by them explicit.  However, we don't do this if the callee is readonly
       // or readnone, because the copy would be unneeded: the callee doesn't
       // modify the struct.
       if (CS.isByValArgument(ArgNo)) {
-        ActualArg = HandleByValArgument(ActualArg, Arg, TheCall, CalledFunc, IFI,
+        ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
                                         CalledFunc->getParamAlignment(ArgNo+1));
  
         // Calls that we inline may use the new alloca, so we need to clear
-- 
cgit v1.1


From 0c7ba3cef2d99bf15175303d5e2523fe898d009d Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Sun, 3 Nov 2013 12:27:52 +0000
Subject: SLPVectorizer: When CSEing generated gathers only scan blocks
 containing them.

Instead of doing a RPO traversal of the whole function remember the blocks
containing gathers (typically <= 2) and scan them in dominator-first order.

The actual CSE is still quadratic, but I'm not confident that adding a
scoped hash table here is worth it as we're only looking at the generated
instructions and not arbitrary code.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193956 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 57 +++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 20 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9a21653..9082b9d 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1620,9 +1620,22 @@ Value *BoUpSLP::vectorizeTree() {
   return VectorizableTree[0].VectorizedValue;
 }
 
+class DTCmp {
+  const DominatorTree *DT;
+
+public:
+  DTCmp(const DominatorTree *DT) : DT(DT) {}
+  bool operator()(const BasicBlock *A, const BasicBlock *B) const {
+    return DT->dominates(A, B);
+  }
+};
+
 void BoUpSLP::optimizeGatherSequence() {
   DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
         << " gather sequences instructions.\n");
+  // Keep a list of visited BBs to run CSE on. It is typically small.
+  SmallPtrSet<BasicBlock *, 4> VisitedBBs;
+  SmallVector<BasicBlock *, 4> CSEWorkList;
   // LICM InsertElementInst sequences.
   for (SetVector<Instruction *>::iterator it = GatherSeq.begin(),
        e = GatherSeq.end(); it != e; ++it) {
@@ -1631,6 +1644,9 @@ void BoUpSLP::optimizeGatherSequence() {
     if (!Insert)
       continue;
 
+    if (VisitedBBs.insert(Insert->getParent()))
+      CSEWorkList.push_back(Insert->getParent());
+
     // Check if this block is inside a loop.
     Loop *L = LI->getLoopFor(Insert->getParent());
     if (!L)
@@ -1655,45 +1671,46 @@ void BoUpSLP::optimizeGatherSequence() {
     Insert->moveBefore(PreHeader->getTerminator());
   }
 
+  // Sort blocks by domination. This ensures we visit a block after all blocks
+  // dominating it are visited.
+  std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), DTCmp(DT));
+
   // Perform O(N^2) search over the gather sequences and merge identical
   // instructions. TODO: We can further optimize this scan if we split the
   // instructions into different buckets based on the insert lane.
-  SmallPtrSet<Instruction*, 16> Visited;
-  SmallVector<Instruction*, 16> ToRemove;
-  ReversePostOrderTraversal<Function*> RPOT(F);
-  for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
-       E = RPOT.end(); I != E; ++I) {
+  SmallVector<Instruction *, 16> Visited;
+  for (SmallVectorImpl<BasicBlock *>::iterator I = CSEWorkList.begin(),
+                                               E = CSEWorkList.end();
+       I != E; ++I) {
+    assert(I == CSEWorkList.begin() || !DT->dominates(*I, *llvm::prior(I)) &&
+           "Worklist not sorted properly!");
     BasicBlock *BB = *I;
-    // For all instructions in the function:
-    for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
-      Instruction *In = it;
+    // For all instructions in blocks containing gather sequences:
+    for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
+      Instruction *In = it++;
       if ((!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In)) ||
           !GatherSeq.count(In))
         continue;
 
       // Check if we can replace this instruction with any of the
       // visited instructions.
-      for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
-           ve = Visited.end(); v != ve; ++v) {
+      for (SmallVectorImpl<Instruction *>::iterator v = Visited.begin(),
+                                                    ve = Visited.end();
+           v != ve; ++v) {
         if (In->isIdenticalTo(*v) &&
             DT->dominates((*v)->getParent(), In->getParent())) {
           In->replaceAllUsesWith(*v);
-          ToRemove.push_back(In);
+          In->eraseFromParent();
           In = 0;
           break;
         }
       }
-      if (In)
-        Visited.insert(In);
+      if (In) {
+        assert(std::find(Visited.begin(), Visited.end(), In) == Visited.end());
+        Visited.push_back(In);
+      }
     }
   }
-
-  // Erase all of the instructions that we RAUWed.
-  for (SmallVectorImpl<Instruction *>::iterator v = ToRemove.begin(),
-       ve = ToRemove.end(); v != ve; ++v) {
-    assert((*v)->getNumUses() == 0 && "Can't remove instructions with uses");
-    (*v)->eraseFromParent();
-  }
 }
 
 /// The SLPVectorizer Pass.
-- 
cgit v1.1


From ec346c1314b19d4289cac5db8d81a89c2b40d3aa Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Sun, 3 Nov 2013 12:54:32 +0000
Subject: SLPVectorizer: Add a missing pair of parens. No functionality change.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193958 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9082b9d..7ab79ad 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1682,7 +1682,7 @@ void BoUpSLP::optimizeGatherSequence() {
   for (SmallVectorImpl<BasicBlock *>::iterator I = CSEWorkList.begin(),
                                                E = CSEWorkList.end();
        I != E; ++I) {
-    assert(I == CSEWorkList.begin() || !DT->dominates(*I, *llvm::prior(I)) &&
+    assert((I == CSEWorkList.begin() || !DT->dominates(*I, *llvm::prior(I))) &&
            "Worklist not sorted properly!");
     BasicBlock *BB = *I;
     // For all instructions in blocks containing gather sequences:
-- 
cgit v1.1


From eba6d384489be4e56718186aa7ed7e484df24613 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 4 Nov 2013 20:36:06 +0000
Subject: Scalarize select vector arguments when extracted.

When the elements are extracted from a select on vectors
or a vector select, do the select on the extracted scalars
from the input if there is only one use.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194013 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineVectorOps.cpp           | 32 ++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 805c5d2..1e72410 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -282,6 +282,38 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
         Worklist.AddValue(EE);
         return CastInst::Create(CI->getOpcode(), EE, EI.getType());
       }
+    } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+      if (SI->hasOneUse()) {
+        // TODO: For a select on vectors, it might be useful to do this if it
+        // has multiple extractelement uses. For vector select, that seems to
+        // fight the vectorizer.
+
+        // If we are extracting an element from a vector select or a select on
+        // vectors, a select on the scalars extracted from the vector arguments.
+        Value *TrueVal = SI->getTrueValue();
+        Value *FalseVal = SI->getFalseValue();
+
+        Value *Cond = SI->getCondition();
+        if (Cond->getType()->isVectorTy()) {
+          Cond = Builder->CreateExtractElement(Cond,
+                                               EI.getIndexOperand(),
+                                               Cond->getName() + ".elt");
+        }
+
+        Value *V1Elem
+          = Builder->CreateExtractElement(TrueVal,
+                                          EI.getIndexOperand(),
+                                          TrueVal->getName() + ".elt");
+
+        Value *V2Elem
+          = Builder->CreateExtractElement(FalseVal,
+                                          EI.getIndexOperand(),
+                                          FalseVal->getName() + ".elt");
+        return SelectInst::Create(Cond,
+                                  V1Elem,
+                                  V2Elem,
+                                  SI->getName() + ".elt");
+      }
     }
   }
   return 0;
-- 
cgit v1.1


From 63d8f88686e9bde3b6ee94f24c2ab1d4e1df3e4a Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Mon, 4 Nov 2013 21:34:55 +0000
Subject: SLPVectorizer: Use properlyDominates to satisfy the irreflexivity of
 a strict weak ordering.

STL debug mode checks this.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194015 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7ab79ad..5c185f1 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1626,7 +1626,7 @@ class DTCmp {
 public:
   DTCmp(const DominatorTree *DT) : DT(DT) {}
   bool operator()(const BasicBlock *A, const BasicBlock *B) const {
-    return DT->dominates(A, B);
+    return DT->properlyDominates(A, B);
   }
 };
 
-- 
cgit v1.1


From 6f744ee498357155d3b0b6466b2f34a785c9c435 Mon Sep 17 00:00:00 2001
From: Shuxin Yang <shuxin.llvm@gmail.com>
Date: Mon, 4 Nov 2013 21:44:01 +0000
Subject: Remove dead code

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194017 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 82a59ed..7e91897 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -97,12 +97,6 @@ INITIALIZE_PASS_END(GlobalOpt, "globalopt",
 
 ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
 
-namespace {
-
-
-
-}
-
 /// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
 /// as a root?  If so, we might not really want to eliminate the stores to it.
 static bool isLeakCheckerRoot(GlobalVariable *GV) {
-- 
cgit v1.1


From c88eb08d02f0aa17352e06c4e235bc1f225b2266 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Tue, 5 Nov 2013 00:08:03 +0000
Subject: Add a runtime unrolling parameter to the LoopUnroll pass constructor

As with the other loop unrolling parameters (the unrolling threshold, partial
unrolling, etc.) runtime unrolling can now also be controlled via the
constructor. This will be necessary for moving non-trivial unrolling late in
the pass manager (after loop vectorization).

No functionality change intended.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194027 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/LoopUnrollPass.cpp | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index d47a3c3..08ac38d 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -49,14 +49,16 @@ namespace {
   class LoopUnroll : public LoopPass {
   public:
     static char ID; // Pass ID, replacement for typeid
-    LoopUnroll(int T = -1, int C = -1,  int P = -1) : LoopPass(ID) {
+    LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
       CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
       CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
       CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
+      CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
 
       UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
       UserAllowPartial = (P != -1) ||
                          (UnrollAllowPartial.getNumOccurrences() > 0);
+      UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);
       UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0);
 
       initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
@@ -78,9 +80,11 @@ namespace {
     unsigned CurrentCount;
     unsigned CurrentThreshold;
     bool     CurrentAllowPartial;
+    bool     CurrentRuntime;
     bool     UserCount;            // CurrentCount is user-specified.
     bool     UserThreshold;        // CurrentThreshold is user-specified.
     bool     UserAllowPartial;     // CurrentAllowPartial is user-specified.
+    bool     UserRuntime;          // CurrentRuntime is user-specified.
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -115,8 +119,9 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
 
-Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
-  return new LoopUnroll(Threshold, Count, AllowPartial);
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
+                                 int Runtime) {
+  return new LoopUnroll(Threshold, Count, AllowPartial, Runtime);
 }
 
 /// ApproximateLoopSize - Approximate the size of the loop.
@@ -155,7 +160,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   UP.OptSizeThreshold = OptSizeUnrollThreshold;
   UP.Count = CurrentCount;
   UP.Partial = CurrentAllowPartial;
-  UP.Runtime = UnrollRuntime;
+  UP.Runtime = CurrentRuntime;
   TTI.getUnrollingPreferences(L, UP);
 
   // Determine the current unrolling threshold.  While this is normally set
@@ -181,8 +186,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
   }
 
-  bool Runtime = UnrollRuntime.getNumOccurrences() == 0 ?
-                 UP.Runtime : UnrollRuntime;
+  bool Runtime = UserRuntime ? CurrentRuntime : UP.Runtime;
 
   // Use a default unroll-count if the user doesn't specify a value
   // and the trip count is a run-time value.  The default is different
-- 
cgit v1.1


From f23af8bfd8609b9594947876209c3f0de070ea4f Mon Sep 17 00:00:00 2001
From: Michael Gottesman <mgottesman@apple.com>
Date: Tue, 5 Nov 2013 16:02:40 +0000
Subject: [objc-arc] Convert the one directional retain/release relation assert
 to a conditional check + fail.

Due to the previously added overflow checks, we can have a retain/release
relation that is one directional. This occurs specifically when we run into an
additive overflow causing us to drop state in only one direction. If that
occurs, we should bail and not optimize that retain/release instead of
asserting.

Apologies for the size of the testcase. It is necessary to cause the additive
cfg overflow to trigger.

rdar://15377890

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194083 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index b419a7e..1cc665b 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -2405,7 +2405,15 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
         if (Jt == Releases.end())
           return false;
         const RRInfo &NewRetainReleaseRRI = Jt->second;
-        assert(NewRetainReleaseRRI.Calls.count(NewRetain));
+
+        // If the release does not have a reference to the retain as well,
+        // something happened which is unaccounted for. Do not do anything.
+        //
+        // This can happen if we catch an additive overflow during path count
+        // merging.
+        if (!NewRetainReleaseRRI.Calls.count(NewRetain))
+          return false;
+
         if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
 
           // If we overflow when we compute the path count, don't remove/move
@@ -2481,9 +2489,16 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
         if (Jt == Retains.end())
           return false;
         const RRInfo &NewReleaseRetainRRI = Jt->second;
-        assert(NewReleaseRetainRRI.Calls.count(NewRelease));
-        if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
 
+        // If the retain does not have a reference to the release as well,
+        // something happened which is unaccounted for. Do not do anything.
+        //
+        // This can happen if we catch an additive overflow during path count
+        // merging.
+        if (!NewReleaseRetainRRI.Calls.count(NewRelease))
+          return false;
+
+        if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
           // If we overflow when we compute the path count, don't remove/move
           // anything.
           const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()];
-- 
cgit v1.1


From ab09d1e0ead4ede09f22ef3645bbcf0785d667f8 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Fri, 8 Nov 2013 19:58:21 +0000
Subject: Remove dead code from LoopUnswitch

LoopUnswitch's code simplification routine has logic to convert conditional
branches into unconditional branches, after unswitching makes the condition
constant, and then remove any blocks that renders dead. Unfortunately, this
code is dead, currently broken, and furthermore, has never been alive (at least
as far back at 2006).

No functionality change intended.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194277 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/LoopUnswitch.cpp | 127 ---------------------------------
 1 file changed, 127 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 59aff31..c4ebfd5 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -212,8 +212,6 @@ namespace {
                                         Instruction *InsertPt);
 
     void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
-    void RemoveBlockIfDead(BasicBlock *BB,
-                           std::vector<Instruction*> &Worklist, Loop *l);
     void RemoveLoopFromHierarchy(Loop *L);
     bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
                                     BasicBlock **LoopExit = 0);
@@ -946,114 +944,6 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
   ++NumSimplify;
 }
 
-/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop
-/// information, and remove any dead successors it has.
-///
-void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
-                                     std::vector<Instruction*> &Worklist,
-                                     Loop *L) {
-  if (pred_begin(BB) != pred_end(BB)) {
-    // This block isn't dead, since an edge to BB was just removed, see if there
-    // are any easy simplifications we can do now.
-    if (BasicBlock *Pred = BB->getSinglePredecessor()) {
-      // If it has one pred, fold phi nodes in BB.
-      while (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
-        ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
-
-      // If this is the header of a loop and the only pred is the latch, we now
-      // have an unreachable loop.
-      if (Loop *L = LI->getLoopFor(BB))
-        if (loopHeader == BB && L->contains(Pred)) {
-          // Remove the branch from the latch to the header block, this makes
-          // the header dead, which will make the latch dead (because the header
-          // dominates the latch).
-          LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
-          Pred->getTerminator()->eraseFromParent();
-          new UnreachableInst(BB->getContext(), Pred);
-
-          // The loop is now broken, remove it from LI.
-          RemoveLoopFromHierarchy(L);
-
-          // Reprocess the header, which now IS dead.
-          RemoveBlockIfDead(BB, Worklist, L);
-          return;
-        }
-
-      // If pred ends in a uncond branch, add uncond branch to worklist so that
-      // the two blocks will get merged.
-      if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
-        if (BI->isUnconditional())
-          Worklist.push_back(BI);
-    }
-    return;
-  }
-
-  DEBUG(dbgs() << "Nuking dead block: " << *BB);
-
-  // Remove the instructions in the basic block from the worklist.
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-    RemoveFromWorklist(I, Worklist);
-
-    // Anything that uses the instructions in this basic block should have their
-    // uses replaced with undefs.
-    // If I is not void type then replaceAllUsesWith undef.
-    // This allows ValueHandlers and custom metadata to adjust itself.
-    if (!I->getType()->isVoidTy())
-      I->replaceAllUsesWith(UndefValue::get(I->getType()));
-  }
-
-  // If this is the edge to the header block for a loop, remove the loop and
-  // promote all subloops.
-  if (Loop *BBLoop = LI->getLoopFor(BB)) {
-    if (BBLoop->getLoopLatch() == BB) {
-      RemoveLoopFromHierarchy(BBLoop);
-      if (currentLoop == BBLoop) {
-        currentLoop = 0;
-        redoLoop = false;
-      }
-    }
-  }
-
-  // Remove the block from the loop info, which removes it from any loops it
-  // was in.
-  LI->removeBlock(BB);
-
-  // Remove phi node entries in successors for this block.
-  TerminatorInst *TI = BB->getTerminator();
-  SmallVector<BasicBlock*, 4> Succs;
-  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
-    Succs.push_back(TI->getSuccessor(i));
-    TI->getSuccessor(i)->removePredecessor(BB);
-  }
-
-  // Unique the successors, remove anything with multiple uses.
-  array_pod_sort(Succs.begin(), Succs.end());
-  Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
-
-  // Remove the basic block, including all of the instructions contained in it.
-  LPM->deleteSimpleAnalysisValue(BB, L);
-  BB->eraseFromParent();
-  // Remove successor blocks here that are not dead, so that we know we only
-  // have dead blocks in this list.  Nondead blocks have a way of becoming dead,
-  // then getting removed before we revisit them, which is badness.
-  //
-  for (unsigned i = 0; i != Succs.size(); ++i)
-    if (pred_begin(Succs[i]) != pred_end(Succs[i])) {
-      // One exception is loop headers.  If this block was the preheader for a
-      // loop, then we DO want to visit the loop so the loop gets deleted.
-      // We know that if the successor is a loop header, that this loop had to
-      // be the preheader: the case where this was the latch block was handled
-      // above and headers can only have two predecessors.
-      if (!LI->isLoopHeader(Succs[i])) {
-        Succs.erase(Succs.begin()+i);
-        --i;
-      }
-    }
-
-  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
-    RemoveBlockIfDead(Succs[i], Worklist, L);
-}
-
 /// RemoveLoopFromHierarchy - We have discovered that the specified loop has
 /// become unwrapped, either because the backedge was deleted, or because the
 /// edge into the header was removed.  If the edge into the header from the
@@ -1262,23 +1152,6 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         continue;
       }
 
-      if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
-        // Conditional branch.  Turn it into an unconditional branch, then
-        // remove dead blocks.
-        continue;  // FIXME: Enable.
-
-        DEBUG(dbgs() << "Folded branch: " << *BI);
-        BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
-        BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
-        DeadSucc->removePredecessor(BI->getParent(), true);
-        Worklist.push_back(BranchInst::Create(LiveSucc, BI));
-        LPM->deleteSimpleAnalysisValue(BI, L);
-        BI->eraseFromParent();
-        RemoveFromWorklist(BI, Worklist);
-        ++NumSimplify;
-
-        RemoveBlockIfDead(DeadSucc, Worklist, L);
-      }
       continue;
     }
   }
-- 
cgit v1.1


From 432bdf65719c521206daaf90970505bea027c944 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sun, 10 Nov 2013 01:44:37 +0000
Subject: Teach MergeFunctions about address spaces

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194342 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/MergeFunctions.cpp | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 7e4c1668..b8397d6 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -214,9 +214,12 @@ bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
     return true;
   if (Ty1->getTypeID() != Ty2->getTypeID()) {
     if (TD) {
-      LLVMContext &Ctx = Ty1->getContext();
-      if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true;
-      if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true;
+
+      if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ty1))
+        return true;
+
+      if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ty2))
+        return true;
     }
     return false;
   }
@@ -352,14 +355,19 @@ bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
 // Determine whether two GEP operations perform the same underlying arithmetic.
 bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
                                          const GEPOperator *GEP2) {
-  // When we have target data, we can reduce the GEP down to the value in bytes
-  // added to the address.
-  unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 1;
-  APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0);
-  if (TD &&
-      GEP1->accumulateConstantOffset(*TD, Offset1) &&
-      GEP2->accumulateConstantOffset(*TD, Offset2)) {
-    return Offset1 == Offset2;
+  unsigned AS = GEP1->getPointerAddressSpace();
+  if (AS != GEP2->getPointerAddressSpace())
+    return false;
+
+  if (TD) {
+    // When we have target data, we can reduce the GEP down to the value in bytes
+    // added to the address.
+    unsigned BitWidth = TD ? TD->getPointerSizeInBits(AS) : 1;
+    APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0);
+    if (GEP1->accumulateConstantOffset(*TD, Offset1) &&
+        GEP2->accumulateConstantOffset(*TD, Offset2)) {
+      return Offset1 == Offset2;
+    }
   }
 
   if (GEP1->getPointerOperand()->getType() !=
-- 
cgit v1.1


From 30150a128c7b2488225331417153ccec75bac65c Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Sun, 10 Nov 2013 04:13:31 +0000
Subject: SimplifyCFG has a heuristics for out-of-order processors that decides
 when it is worthwhile to merge branches. It tries to estimate if the operands
 of the instruction that we want to hoist are ready. This commit marks
 function arguments as 'ready' because they require no calculation. This
 boosts libquantum and a few other workloads from the testsuite.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194346 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyCFG.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 1398697..d36d9dc 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2096,7 +2096,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
       for (Instruction::op_iterator OI = BonusInst->op_begin(),
            OE = BonusInst->op_end(); OI != OE; ++OI) {
         Value *V = *OI;
-        if (!isa<Constant>(V))
+        if (!isa<Constant>(V) && !isa<Argument>(V))
           UsedValues.insert(V);
       }
 
-- 
cgit v1.1


From 6d9e013447efb7f9fbed8d3348d6dbde208f32a7 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sun, 10 Nov 2013 04:46:57 +0000
Subject: Use type form of getIntPtrType.

This should be inconsequential and is work
towards removing the default address space
arguments.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194347 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyLibCalls.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index d838851..a060c34 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1047,7 +1047,7 @@ struct MemSetOpt : public LibCallOptimization {
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         !FT->getParamType(0)->isPointerTy() ||
         !FT->getParamType(1)->isIntegerTy() ||
-        FT->getParamType(2) != TD->getIntPtrType(*Context))
+        FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0)))
       return 0;
 
     // memset(p, v, n) -> llvm.memset(p, v, n, 1)
-- 
cgit v1.1


From 855c29d82c0358f43d1dc22f5330bb31a74adfd1 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Sun, 10 Nov 2013 07:34:34 +0000
Subject: Revert "Resurrect r191017 " GVN proceeds in the presence of dead
 code" plus a fix to PR17307 & 17308."

This causes PR17852.

This reverts commit d93e8a06b2ca09ab18f390cd514b7443e2e571f7.

Conflicts:
	test/Transforms/GVN/cond_br2.ll

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194348 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/GVN.cpp | 174 ++----------------------------------------
 1 file changed, 6 insertions(+), 168 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 731a6d0..957c123 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -21,7 +21,6 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
@@ -508,9 +507,7 @@ namespace {
     enum ValType {
       SimpleVal,  // A simple offsetted value that is accessed.
       LoadVal,    // A value produced by a load.
-      MemIntrin,  // A memory intrinsic which is loaded from.
-      UndefVal    // A UndefValue representing a value from dead block (which
-                  // is not yet physically removed from the CFG). 
+      MemIntrin   // A memory intrinsic which is loaded from.
     };
   
     /// V - The value that is live out of the block.
@@ -548,20 +545,10 @@ namespace {
       Res.Offset = Offset;
       return Res;
     }
-
-    static AvailableValueInBlock getUndef(BasicBlock *BB) {
-      AvailableValueInBlock Res;
-      Res.BB = BB;
-      Res.Val.setPointer(0);
-      Res.Val.setInt(UndefVal);
-      Res.Offset = 0;
-      return Res;
-    }
-
+  
     bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
     bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
     bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
-    bool isUndefValue() const { return Val.getInt() == UndefVal; }
   
     Value *getSimpleValue() const {
       assert(isSimpleValue() && "Wrong accessor");
@@ -589,7 +576,6 @@ namespace {
     DominatorTree *DT;
     const DataLayout *TD;
     const TargetLibraryInfo *TLI;
-    SetVector<BasicBlock *> DeadBlocks;
 
     ValueTable VN;
 
@@ -712,9 +698,6 @@ namespace {
     unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
                                          const BasicBlockEdge &Root);
     bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
-    bool processFoldableCondBr(BranchInst *BI);
-    void addDeadBlock(BasicBlock *BB);
-    void assignValNumForDeadCode();
   };
 
   char GVN::ID = 0;
@@ -1272,10 +1255,8 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   // just use the dominating value directly.
   if (ValuesPerBlock.size() == 1 &&
       gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
-                                               LI->getParent())) {
-    assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block");
+                                               LI->getParent()))
     return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
-  }
 
   // Otherwise, we have to construct SSA form.
   SmallVector<PHINode*, 8> NewPHIs;
@@ -1345,7 +1326,7 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
                    << *getCoercedLoadValue() << '\n'
                    << *Res << '\n' << "\n\n\n");
     }
-  } else if (isMemIntrinValue()) {
+  } else {
     const DataLayout *TD = gvn.getDataLayout();
     assert(TD && "Need target data to handle type mismatch case");
     Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
@@ -1353,10 +1334,6 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
     DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
                  << "  " << *getMemIntrinValue() << '\n'
                  << *Res << '\n' << "\n\n\n");
-  } else {
-    assert(isUndefValue() && "Should be UndefVal");
-    DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
-    return UndefValue::get(LoadTy);
   }
   return Res;
 }
@@ -1380,13 +1357,6 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
     BasicBlock *DepBB = Deps[i].getBB();
     MemDepResult DepInfo = Deps[i].getResult();
 
-    if (DeadBlocks.count(DepBB)) {
-      // Dead dependent mem-op disguise as a load evaluating the same value
-      // as the load in question.
-      ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
-      continue;
-    }
-
     if (!DepInfo.isDef() && !DepInfo.isClobber()) {
       UnavailableBlocks.push_back(DepBB);
       continue;
@@ -2223,13 +2193,11 @@ bool GVN::processInstruction(Instruction *I) {
   // For conditional branches, we can perform simple conditional propagation on
   // the condition value itself.
   if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
-    if (!BI->isConditional())
+    if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
       return false;
 
-    if (isa<Constant>(BI->getCondition()))
-      return processFoldableCondBr(BI);
-
     Value *BranchCond = BI->getCondition();
+
     BasicBlock *TrueSucc = BI->getSuccessor(0);
     BasicBlock *FalseSucc = BI->getSuccessor(1);
     // Avoid multiple edges early.
@@ -2346,9 +2314,6 @@ bool GVN::runOnFunction(Function& F) {
   }
 
   if (EnablePRE) {
-    // Fabricate val-num for dead-code in order to suppress assertion in
-    // performPRE().
-    assignValNumForDeadCode();
     bool PREChanged = true;
     while (PREChanged) {
       PREChanged = performPRE(F);
@@ -2362,9 +2327,6 @@ bool GVN::runOnFunction(Function& F) {
   // Actually, when this happens, we should just fully integrate PRE into GVN.
 
   cleanupGlobalSets();
-  // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
-  // iteration. 
-  DeadBlocks.clear();
 
   return Changed;
 }
@@ -2375,9 +2337,6 @@ bool GVN::processBlock(BasicBlock *BB) {
   // (and incrementing BI before processing an instruction).
   assert(InstrsToErase.empty() &&
          "We expect InstrsToErase to be empty across iterations");
-  if (DeadBlocks.count(BB))
-    return false;
-
   bool ChangedFunction = false;
 
   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
@@ -2671,124 +2630,3 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
     }
   }
 }
-
-// BB is declared dead, which implied other blocks become dead as well. This
-// function is to add all these blocks to "DeadBlocks". For the dead blocks'
-// live successors, update their phi nodes by replacing the operands
-// corresponding to dead blocks with UndefVal.
-//
-void GVN::addDeadBlock(BasicBlock *BB) {
-  SmallVector<BasicBlock *, 4> NewDead;
-  SmallSetVector<BasicBlock *, 4> DF;
-
-  NewDead.push_back(BB);
-  while (!NewDead.empty()) {
-    BasicBlock *D = NewDead.pop_back_val();
-    if (DeadBlocks.count(D))
-      continue;
-
-    // All blocks dominated by D are dead.
-    SmallVector<BasicBlock *, 8> Dom;
-    DT->getDescendants(D, Dom);
-    DeadBlocks.insert(Dom.begin(), Dom.end());
-    
-    // Figure out the dominance-frontier(D).
-    for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(),
-           E = Dom.end(); I != E; I++) {
-      BasicBlock *B = *I;
-      for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) {
-        BasicBlock *S = *SI;
-        if (DeadBlocks.count(S))
-          continue;
-
-        bool AllPredDead = true;
-        for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++)
-          if (!DeadBlocks.count(*PI)) {
-            AllPredDead = false;
-            break;
-          }
-
-        if (!AllPredDead) {
-          // S could be proved dead later on. That is why we don't update phi
-          // operands at this moment.
-          DF.insert(S);
-        } else {
-          // While S is not dominated by D, it is dead by now. This could take
-          // place if S already have a dead predecessor before D is declared
-          // dead.
-          NewDead.push_back(S);
-        }
-      }
-    }
-  }
-
-  // For the dead blocks' live successors, update their phi nodes by replacing
-  // the operands corresponding to dead blocks with UndefVal.
-  for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end();
-        I != E; I++) {
-    BasicBlock *B = *I;
-    if (DeadBlocks.count(B))
-      continue;
-
-    for (pred_iterator PI = pred_begin(B), PE = pred_end(B); PI != PE; PI++) {
-      BasicBlock *P = *PI;
-      if (!DeadBlocks.count(P))
-        continue;
-      for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
-        PHINode &Phi = cast<PHINode>(*II);
-        Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
-                             UndefValue::get(Phi.getType()));
-      }
-    }
-  }
-}
-
-// If the given branch is recognized as a foldable branch (i.e. conditional
-// branch with constant condition), it will perform following analyses and
-// transformation.
-//  1) If the dead out-coming edge is a critical-edge, split it. Let 
-//     R be the target of the dead out-coming edge.
-//  1) Identify the set of dead blocks implied by the branch's dead outcoming
-//     edge. The result of this step will be {X| X is dominated by R}
-//  2) Identify those blocks which haves at least one dead prodecessor. The
-//     result of this step will be dominance-frontier(R).
-//  3) Update the PHIs in DF(R) by replacing the operands corresponding to 
-//     dead blocks with "UndefVal" in an hope these PHIs will optimized away.
-//
-// Return true iff *NEW* dead code are found.
-bool GVN::processFoldableCondBr(BranchInst *BI) {
-  if (!BI || BI->isUnconditional())
-    return false;
-
-  ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
-  if (!Cond)
-    return false;
-
-  BasicBlock *DeadRoot = Cond->getZExtValue() ? 
-                         BI->getSuccessor(1) : BI->getSuccessor(0);
-  if (DeadBlocks.count(DeadRoot))
-    return false;
-
-  if (!DeadRoot->getSinglePredecessor())
-    DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
-
-  addDeadBlock(DeadRoot);
-  return true;
-}
-
-// performPRE() will trigger assert if it come across an instruciton without
-// associated val-num. As it normally has far more live instructions than dead
-// instructions, it makes more sense just to "fabricate" a val-number for the
-// dead code than checking if instruction involved is dead or not.
-void GVN::assignValNumForDeadCode() {
-  for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(),
-        E = DeadBlocks.end(); I != E; I++) {
-    BasicBlock *BB = *I;
-    for (BasicBlock::iterator II = BB->begin(), EE = BB->end();
-          II != EE; II++) {
-      Instruction *Inst = &*II;
-      unsigned ValNum = VN.lookup_or_add(Inst);
-      addToLeaderTable(ValNum, Inst, BB);
-    }
-  }
-}
-- 
cgit v1.1


From 4590b8c090dade63d6cb1ba39109331607036f4d Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Mon, 11 Nov 2013 13:37:10 +0000
Subject: [msan] Propagate origin for insertvalue, extractvalue.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194374 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 5ce88a9..512b809 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2059,7 +2059,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
     DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
     setShadow(&I, ResShadow);
-    setOrigin(&I, getCleanOrigin());
+    setOriginForNaryOp(I);
   }
 
   void visitInsertValueInst(InsertValueInst &I) {
@@ -2072,7 +2072,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
     DEBUG(dbgs() << "   Res:        " << *Res << "\n");
     setShadow(&I, Res);
-    setOrigin(&I, getCleanOrigin());
+    setOriginForNaryOp(I);
   }
 
   void dumpInst(Instruction &I) {
-- 
cgit v1.1


From 4921d5b0a9b70d201f18804452aeb62f1c41fcc3 Mon Sep 17 00:00:00 2001
From: Renato Golin <renato.golin@linaro.org>
Date: Mon, 11 Nov 2013 16:27:35 +0000
Subject: Move debug message in vectorizer

No functional change, just better reporting.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194388 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6db7f68..bc649b3 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -965,15 +965,12 @@ struct LoopVectorize : public LoopPass {
     unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width,
                                         VF.Cost);
 
-    if (VF.Width == 1) {
-      DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
-    }
-
     DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
           F->getParent()->getModuleIdentifier() << '\n');
     DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n');
 
     if (VF.Width == 1) {
+      DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
       if (UF == 1)
         return false;
       // We decided not to vectorize, but we may want to unroll.
-- 
cgit v1.1


From 6c7a7c6474ea60c40e2dbb15f5b6cf0265098ace Mon Sep 17 00:00:00 2001
From: Shuxin Yang <shuxin.llvm@gmail.com>
Date: Mon, 11 Nov 2013 22:00:23 +0000
Subject:   Fix PR17952.

  The symptom is that an assertion is triggered. The assertion was added by
me to detect the situation when value is propagated from dead blocks.
(We can certainly get rid of assertion; it is safe to do so, because propagating
 value from dead block to alive join node is certainly ok.)

  The root cause of this bug is : edge-splitting is conducted on the fly,
the edge being split could be a dead edge, therefore the block that
split the critial edge needs to be flagged "dead" as well.

  There are 3 ways to fix this bug:
  1) Get rid of the assertion as I mentioned eariler
  2) When an dead edge is split, flag the inserted block "dead".
  3) proactively split the critical edges connecting dead and live blocks when
     new dead blocks are revealed.

  This fix go for 3) with additional 2 LOC.

  Testing case was added by Rafael the other day.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194424 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/GVN.cpp | 181 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 175 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 957c123..d353765 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
@@ -507,7 +508,9 @@ namespace {
     enum ValType {
       SimpleVal,  // A simple offsetted value that is accessed.
       LoadVal,    // A value produced by a load.
-      MemIntrin   // A memory intrinsic which is loaded from.
+      MemIntrin,  // A memory intrinsic which is loaded from.
+      UndefVal    // A UndefValue representing a value from dead block (which
+                  // is not yet physically removed from the CFG). 
     };
   
     /// V - The value that is live out of the block.
@@ -545,10 +548,20 @@ namespace {
       Res.Offset = Offset;
       return Res;
     }
-  
+
+    static AvailableValueInBlock getUndef(BasicBlock *BB) {
+      AvailableValueInBlock Res;
+      Res.BB = BB;
+      Res.Val.setPointer(0);
+      Res.Val.setInt(UndefVal);
+      Res.Offset = 0;
+      return Res;
+    }
+
     bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
     bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
     bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+    bool isUndefValue() const { return Val.getInt() == UndefVal; }
   
     Value *getSimpleValue() const {
       assert(isSimpleValue() && "Wrong accessor");
@@ -576,6 +589,7 @@ namespace {
     DominatorTree *DT;
     const DataLayout *TD;
     const TargetLibraryInfo *TLI;
+    SetVector<BasicBlock *> DeadBlocks;
 
     ValueTable VN;
 
@@ -698,6 +712,9 @@ namespace {
     unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
                                          const BasicBlockEdge &Root);
     bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
+    bool processFoldableCondBr(BranchInst *BI);
+    void addDeadBlock(BasicBlock *BB);
+    void assignValNumForDeadCode();
   };
 
   char GVN::ID = 0;
@@ -1255,8 +1272,10 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   // just use the dominating value directly.
   if (ValuesPerBlock.size() == 1 &&
       gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
-                                               LI->getParent()))
+                                               LI->getParent())) {
+    assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block");
     return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
+  }
 
   // Otherwise, we have to construct SSA form.
   SmallVector<PHINode*, 8> NewPHIs;
@@ -1326,7 +1345,7 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
                    << *getCoercedLoadValue() << '\n'
                    << *Res << '\n' << "\n\n\n");
     }
-  } else {
+  } else if (isMemIntrinValue()) {
     const DataLayout *TD = gvn.getDataLayout();
     assert(TD && "Need target data to handle type mismatch case");
     Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
@@ -1334,6 +1353,10 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
     DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
                  << "  " << *getMemIntrinValue() << '\n'
                  << *Res << '\n' << "\n\n\n");
+  } else {
+    assert(isUndefValue() && "Should be UndefVal");
+    DEBUG(dbgs() << "GVN COERCED NONLOCAL Undef:\n";);
+    return UndefValue::get(LoadTy);
   }
   return Res;
 }
@@ -1357,6 +1380,13 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
     BasicBlock *DepBB = Deps[i].getBB();
     MemDepResult DepInfo = Deps[i].getResult();
 
+    if (DeadBlocks.count(DepBB)) {
+      // Dead dependent mem-op disguise as a load evaluating the same value
+      // as the load in question.
+      ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB));
+      continue;
+    }
+
     if (!DepInfo.isDef() && !DepInfo.isClobber()) {
       UnavailableBlocks.push_back(DepBB);
       continue;
@@ -2193,11 +2223,13 @@ bool GVN::processInstruction(Instruction *I) {
   // For conditional branches, we can perform simple conditional propagation on
   // the condition value itself.
   if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
-    if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+    if (!BI->isConditional())
       return false;
 
-    Value *BranchCond = BI->getCondition();
+    if (isa<Constant>(BI->getCondition()))
+      return processFoldableCondBr(BI);
 
+    Value *BranchCond = BI->getCondition();
     BasicBlock *TrueSucc = BI->getSuccessor(0);
     BasicBlock *FalseSucc = BI->getSuccessor(1);
     // Avoid multiple edges early.
@@ -2314,6 +2346,9 @@ bool GVN::runOnFunction(Function& F) {
   }
 
   if (EnablePRE) {
+    // Fabricate val-num for dead-code in order to suppress assertion in
+    // performPRE().
+    assignValNumForDeadCode();
     bool PREChanged = true;
     while (PREChanged) {
       PREChanged = performPRE(F);
@@ -2327,6 +2362,9 @@ bool GVN::runOnFunction(Function& F) {
   // Actually, when this happens, we should just fully integrate PRE into GVN.
 
   cleanupGlobalSets();
+  // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
+  // iteration. 
+  DeadBlocks.clear();
 
   return Changed;
 }
@@ -2337,6 +2375,9 @@ bool GVN::processBlock(BasicBlock *BB) {
   // (and incrementing BI before processing an instruction).
   assert(InstrsToErase.empty() &&
          "We expect InstrsToErase to be empty across iterations");
+  if (DeadBlocks.count(BB))
+    return false;
+
   bool ChangedFunction = false;
 
   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
@@ -2630,3 +2671,131 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
     }
   }
 }
+
+// BB is declared dead, which implied other blocks become dead as well. This
+// function is to add all these blocks to "DeadBlocks". For the dead blocks'
+// live successors, update their phi nodes by replacing the operands
+// corresponding to dead blocks with UndefVal.
+//
+void GVN::addDeadBlock(BasicBlock *BB) {
+  SmallVector<BasicBlock *, 4> NewDead;
+  SmallSetVector<BasicBlock *, 4> DF;
+
+  NewDead.push_back(BB);
+  while (!NewDead.empty()) {
+    BasicBlock *D = NewDead.pop_back_val();
+    if (DeadBlocks.count(D))
+      continue;
+
+    // All blocks dominated by D are dead.
+    SmallVector<BasicBlock *, 8> Dom;
+    DT->getDescendants(D, Dom);
+    DeadBlocks.insert(Dom.begin(), Dom.end());
+    
+    // Figure out the dominance-frontier(D).
+    for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(),
+           E = Dom.end(); I != E; I++) {
+      BasicBlock *B = *I;
+      for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) {
+        BasicBlock *S = *SI;
+        if (DeadBlocks.count(S))
+          continue;
+
+        bool AllPredDead = true;
+        for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++)
+          if (!DeadBlocks.count(*PI)) {
+            AllPredDead = false;
+            break;
+          }
+
+        if (!AllPredDead) {
+          // S could be proved dead later on. That is why we don't update phi
+          // operands at this moment.
+          DF.insert(S);
+        } else {
+          // While S is not dominated by D, it is dead by now. This could take
+          // place if S already have a dead predecessor before D is declared
+          // dead.
+          NewDead.push_back(S);
+        }
+      }
+    }
+  }
+
+  // For the dead blocks' live successors, update their phi nodes by replacing
+  // the operands corresponding to dead blocks with UndefVal.
+  for(SmallSetVector<BasicBlock *, 4>::iterator I = DF.begin(), E = DF.end();
+        I != E; I++) {
+    BasicBlock *B = *I;
+    if (DeadBlocks.count(B))
+      continue;
+
+    for (pred_iterator PI = pred_begin(B), PE = pred_end(B); PI != PE; PI++) {
+      BasicBlock *P = *PI;
+
+      if (!DeadBlocks.count(P))
+        continue;
+
+      if (isCriticalEdge(P->getTerminator(), GetSuccessorNumber(P, B))) {
+        if (BasicBlock *S = splitCriticalEdges(P, B))
+          DeadBlocks.insert(P = S);
+      }
+
+      for (BasicBlock::iterator II = B->begin(); isa<PHINode>(II); ++II) {
+        PHINode &Phi = cast<PHINode>(*II);
+        Phi.setIncomingValue(Phi.getBasicBlockIndex(P),
+                             UndefValue::get(Phi.getType()));
+      }
+    }
+  }
+}
+
+// If the given branch is recognized as a foldable branch (i.e. conditional
+// branch with constant condition), it will perform following analyses and
+// transformation.
+//  1) If the dead out-coming edge is a critical-edge, split it. Let 
+//     R be the target of the dead out-coming edge.
+//  1) Identify the set of dead blocks implied by the branch's dead outcoming
+//     edge. The result of this step will be {X| X is dominated by R}
+//  2) Identify those blocks which haves at least one dead prodecessor. The
+//     result of this step will be dominance-frontier(R).
+//  3) Update the PHIs in DF(R) by replacing the operands corresponding to 
+//     dead blocks with "UndefVal" in an hope these PHIs will optimized away.
+//
+// Return true iff *NEW* dead code are found.
+bool GVN::processFoldableCondBr(BranchInst *BI) {
+  if (!BI || BI->isUnconditional())
+    return false;
+
+  ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+  if (!Cond)
+    return false;
+
+  BasicBlock *DeadRoot = Cond->getZExtValue() ? 
+                         BI->getSuccessor(1) : BI->getSuccessor(0);
+  if (DeadBlocks.count(DeadRoot))
+    return false;
+
+  if (!DeadRoot->getSinglePredecessor())
+    DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot);
+
+  addDeadBlock(DeadRoot);
+  return true;
+}
+
+// performPRE() will trigger assert if it come across an instruciton without
+// associated val-num. As it normally has far more live instructions than dead
+// instructions, it makes more sense just to "fabricate" a val-number for the
+// dead code than checking if instruction involved is dead or not.
+void GVN::assignValNumForDeadCode() {
+  for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(),
+        E = DeadBlocks.end(); I != E; I++) {
+    BasicBlock *BB = *I;
+    for (BasicBlock::iterator II = BB->begin(), EE = BB->end();
+          II != EE; II++) {
+      Instruction *Inst = &*II;
+      unsigned ValNum = VN.lookup_or_add(Inst);
+      addToLeaderTable(ValNum, Inst, BB);
+    }
+  }
+}
-- 
cgit v1.1


From f42264e7e4cb947bcd1ff7dd48fd0ba4e9fbbf9f Mon Sep 17 00:00:00 2001
From: Yuchen Wu <yuchenericwu@hotmail.com>
Date: Tue, 12 Nov 2013 04:59:08 +0000
Subject: llvm-cov: Added call to update run/program counts.

Also updated test files that were generated from this change.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194453 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/GCOVProfiling.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 4e9afee..206bffb 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -102,6 +102,7 @@ namespace {
     Constant *getIncrementIndirectCounterFunc();
     Constant *getEmitFunctionFunc();
     Constant *getEmitArcsFunc();
+    Constant *getSummaryInfoFunc();
     Constant *getDeleteWriteoutFunctionListFunc();
     Constant *getDeleteFlushFunctionListFunc();
     Constant *getEndFileFunc();
@@ -700,6 +701,11 @@ Constant *GCOVProfiler::getEmitArcsFunc() {
   return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
 }
 
+Constant *GCOVProfiler::getSummaryInfoFunc() {
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+  return M->getOrInsertFunction("llvm_gcda_summary_info", FTy);
+}
+
 Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() {
   FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
   return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy);
@@ -746,6 +752,7 @@ Function *GCOVProfiler::insertCounterWriteout(
   Constant *StartFile = getStartFileFunc();
   Constant *EmitFunction = getEmitFunctionFunc();
   Constant *EmitArcs = getEmitArcsFunc();
+  Constant *SummaryInfo = getSummaryInfoFunc();
   Constant *EndFile = getEndFileFunc();
 
   NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
@@ -772,6 +779,7 @@ Function *GCOVProfiler::insertCounterWriteout(
                             Builder.getInt32(Arcs),
                             Builder.CreateConstGEP2_64(GV, 0, 0));
       }
+      Builder.CreateCall(SummaryInfo);
       Builder.CreateCall(EndFile);
     }
   }
-- 
cgit v1.1


From e26299d76e4b1ef0fa9bfc89d55dd1a28a1220f4 Mon Sep 17 00:00:00 2001
From: Shuxin Yang <shuxin.llvm@gmail.com>
Date: Tue, 12 Nov 2013 08:33:03 +0000
Subject: Correct a glitch in r194424 which may invalidate iterator.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194457 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/GVN.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index d353765..6af269d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -2730,7 +2730,9 @@ void GVN::addDeadBlock(BasicBlock *BB) {
     if (DeadBlocks.count(B))
       continue;
 
-    for (pred_iterator PI = pred_begin(B), PE = pred_end(B); PI != PE; PI++) {
+    SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B));
+    for (SmallVectorImpl<BasicBlock *>::iterator PI = Preds.begin(),
+           PE = Preds.end(); PI != PE; PI++) {
       BasicBlock *P = *PI;
 
       if (!DeadBlocks.count(P))
-- 
cgit v1.1


From f681437cb082bf6fb5da43c8acd4e1313ba3b213 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Tue, 12 Nov 2013 12:24:36 +0000
Subject: SimplifyCFG: Use existing constant folding logic when forming switch
 tables.

Both simpler and more powerful than the hand-rolled folding logic.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194475 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyCFG.cpp | 51 ++++++++++++++----------------------
 1 file changed, 20 insertions(+), 31 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index d36d9dc..d56bb32 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -3323,28 +3324,10 @@ static Constant *LookupConstant(Value *V,
 /// simple instructions such as binary operations where both operands are
 /// constant or can be replaced by constants from the ConstantPool. Returns the
 /// resulting constant on success, 0 otherwise.
-static Constant *ConstantFold(Instruction *I,
-                         const SmallDenseMap<Value*, Constant*>& ConstantPool) {
-  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
-    Constant *A = LookupConstant(BO->getOperand(0), ConstantPool);
-    if (!A)
-      return 0;
-    Constant *B = LookupConstant(BO->getOperand(1), ConstantPool);
-    if (!B)
-      return 0;
-    return ConstantExpr::get(BO->getOpcode(), A, B);
-  }
-
-  if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
-    Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
-    if (!A)
-      return 0;
-    Constant *B = LookupConstant(I->getOperand(1), ConstantPool);
-    if (!B)
-      return 0;
-    return ConstantExpr::getCompare(Cmp->getPredicate(), A, B);
-  }
-
+static Constant *
+ConstantFold(Instruction *I,
+             const SmallDenseMap<Value *, Constant *> &ConstantPool,
+             const DataLayout *DL) {
   if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
     Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
     if (!A)
@@ -3356,14 +3339,19 @@ static Constant *ConstantFold(Instruction *I,
     return 0;
   }
 
-  if (CastInst *Cast = dyn_cast<CastInst>(I)) {
-    Constant *A = LookupConstant(I->getOperand(0), ConstantPool);
-    if (!A)
+  SmallVector<Constant *, 4> COps;
+  for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
+    if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
+      COps.push_back(A);
+    else
       return 0;
-    return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy());
   }
 
-  return 0;
+  if (CmpInst *Cmp = dyn_cast<CmpInst>(I))
+    return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
+                                           COps[1], DL);
+
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL);
 }
 
 /// GetCaseResults - Try to determine the resulting constant values in phi nodes
@@ -3375,7 +3363,8 @@ GetCaseResults(SwitchInst *SI,
                ConstantInt *CaseVal,
                BasicBlock *CaseDest,
                BasicBlock **CommonDest,
-               SmallVectorImpl<std::pair<PHINode*,Constant*> > &Res) {
+               SmallVectorImpl<std::pair<PHINode *, Constant *> > &Res,
+               const DataLayout *DL) {
   // The block from which we enter the common destination.
   BasicBlock *Pred = SI->getParent();
 
@@ -3394,7 +3383,7 @@ GetCaseResults(SwitchInst *SI,
     } else if (isa<DbgInfoIntrinsic>(I)) {
       // Skip debug intrinsic.
       continue;
-    } else if (Constant *C = ConstantFold(I, ConstantPool)) {
+    } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) {
       // Instruction is side-effect free and constant.
       ConstantPool.insert(std::make_pair(I, C));
     } else {
@@ -3718,7 +3707,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
     typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
     ResultsTy Results;
     if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
-                        Results))
+                        Results, TD))
       return false;
 
     // Append the result from this case to the list for each phi.
@@ -3732,7 +3721,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
   // Get the resulting values for the default case.
   SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
   if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
-                      DefaultResultsList))
+                      DefaultResultsList, TD))
     return false;
   for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
     PHINode *PHI = DefaultResultsList[I].first;
-- 
cgit v1.1


From 46456f6a2ff5b81a3ea60bdcf9a0813c9fa4257f Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Tue, 12 Nov 2013 20:21:43 +0000
Subject: Corruptly merge constants with explicit and implicit alignments.

Constant merge can merge a constant with implicit alignment with one that has
explicit alignment. Before this change it was assuming that the explicit
alignment was higher than the implicit one, causing the result to be under
aligned in some cases.

Fixes pr17815.

Patch by Chris Smowton!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194506 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/ConstantMerge.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index a7bf188..d94c0f4 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -93,9 +93,12 @@ bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const {
 }
 
 unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
+  unsigned Align = GV->getAlignment();
+  if (Align)
+    return Align;
   if (TD)
     return TD->getPreferredAlignment(GV);
-  return GV->getAlignment();
+  return 0;
 }
 
 bool ConstantMerge::runOnModule(Module &M) {
@@ -210,9 +213,9 @@ bool ConstantMerge::runOnModule(Module &M) {
       // Bump the alignment if necessary.
       if (Replacements[i].first->getAlignment() ||
           Replacements[i].second->getAlignment()) {
-        Replacements[i].second->setAlignment(std::max(
-            Replacements[i].first->getAlignment(),
-            Replacements[i].second->getAlignment()));
+        Replacements[i].second->setAlignment(
+            std::max(getAlignment(Replacements[i].first),
+                     getAlignment(Replacements[i].second)));
       }
 
       // Eliminate any uses of the dead global.
-- 
cgit v1.1


From f3bd3ea3fe1abf414d3a38a684f3df7382a1a8d5 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Tue, 12 Nov 2013 22:37:16 +0000
Subject: FoldBranchToCommonDest merges branches into a single branch with
 or/and of the condition. It has a heuristics for estimating when some of the
 dependencies are processed by out-of-order processors. This patch adds
 another rule to the heuristics that says that if the "BonusInstruction" that
 we speculatively execute is used by the condition of the second branch then
 it is okay to hoist it. This change exposes more opportunities for other
 passes to transform the code. It does not matter that much that we if-convert
 the code because the selectiondag builder splits or/and branches into
 multiple branches when profitable.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194524 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyCFG.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index d56bb32..ff50b12 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2090,8 +2090,13 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
     // Ensure that any values used in the bonus instruction are also used
     // by the terminator of the predecessor.  This means that those values
     // must already have been resolved, so we won't be inhibiting the
-    // out-of-order core by speculating them earlier.
-    if (BonusInst) {
+    // out-of-order core by speculating them earlier. We also allow
+    // instructions that are used by the terminator's condition because it
+    // exposes more merging opportunities.
+    bool UsedByBranch = (BonusInst && BonusInst->hasOneUse() &&
+                         *BonusInst->use_begin() == Cond);
+
+    if (BonusInst && !UsedByBranch) {
       // Collect the values used by the bonus inst
       SmallPtrSet<Value*, 4> UsedValues;
       for (Instruction::op_iterator OI = BonusInst->op_begin(),
-- 
cgit v1.1


From 6c84f7ad2dbba5d1afa26a4929c3b032ae9e7e10 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Tue, 12 Nov 2013 22:38:59 +0000
Subject: Fold (iszero(A&K1) | iszero(A&K2)) ->  (A&(K1|K2)) != (K1|K2) if we
 know that K1 and K2 are 'one-hot' (only one bit is on).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194525 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 53 ++++++++++++++++++++--
 1 file changed, 50 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 98a7b2d..9603f22 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1543,10 +1543,60 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
   return 0;
 }
 
+/// IsSingleBitValue - Returns true for "one-hot" values (values where at most
+/// one bit can be set).
+static bool IsOneHotValue(Value *V) {
+  // Match 1<<K.
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
+    if (BO->getOpcode() == Instruction::Shl) {
+      ConstantInt *One = dyn_cast<ConstantInt>(BO->getOperand(0));
+      return One && One->isOne();
+    }
+
+  // Check for power of two integer constants.
+  if (ConstantInt *K = dyn_cast<ConstantInt>(V))
+    return K->getValue().isPowerOf2();
+
+  return false;
+}
+
 /// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
 Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
 
+  // Fold (iszero(A & K1) | iszero(A & K2)) ->  (A & (K1 | K2)) != (K1 | K2)
+  // if K1 and K2 are a one-bit mask.
+  ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+  ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+
+  if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero() &&
+      RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) {
+
+    BinaryOperator *LAnd = dyn_cast<BinaryOperator>(LHS->getOperand(0));
+    BinaryOperator *RAnd = dyn_cast<BinaryOperator>(RHS->getOperand(0));
+    if (LAnd && RAnd && LAnd->hasOneUse() && RHS->hasOneUse() &&
+        LAnd->getOpcode() == Instruction::And &&
+        RAnd->getOpcode() == Instruction::And) {
+
+      Value *Mask = 0;
+      Value *Masked = 0;
+      if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
+          IsOneHotValue(LAnd->getOperand(1)) &&
+          IsOneHotValue(RAnd->getOperand(1))) {
+        Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
+        Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
+      } else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
+                 IsOneHotValue(LAnd->getOperand(0)) &&
+                 IsOneHotValue(RAnd->getOperand(0))) {
+        Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
+        Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
+      }
+
+      if (Masked)
+        return Builder->CreateICmp(ICmpInst::ICMP_NE, Masked, Mask);
+    }
+  }
+
   // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
   if (PredicatesFoldable(LHSCC, RHSCC)) {
     if (LHS->getOperand(0) == RHS->getOperand(1) &&
@@ -1567,9 +1617,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
     return V;
 
   Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
-  ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
-  ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
-
   if (LHS->hasOneUse() || RHS->hasOneUse()) {
     // (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
     // (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1)
-- 
cgit v1.1


From 0d833348c2dea181e08d3ece8da18079653f96ee Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nrotem@apple.com>
Date: Wed, 13 Nov 2013 01:12:01 +0000
Subject: Update the docs to match the function name.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194537 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 9603f22..88bb69b 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1543,7 +1543,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
   return 0;
 }
 
-/// IsSingleBitValue - Returns true for "one-hot" values (values where at most
+/// IsOneHotValue - Returns true for "one-hot" values (values where at most
 /// one bit can be set).
 static bool IsOneHotValue(Value *V) {
   // Match 1<<K.
-- 
cgit v1.1


From 563b29f8db68275407ffcd2a9a5f0ba77ee5e899 Mon Sep 17 00:00:00 2001
From: Diego Novillo <dnovillo@google.com>
Date: Wed, 13 Nov 2013 12:22:21 +0000
Subject: SampleProfileLoader pass. Initial setup.

This adds a new scalar pass that reads a file with samples generated
by 'perf' during runtime. The samples read from the profile are
incorporated and emmited as IR metadata reflecting that profile.

The profile file is assumed to have been generated by an external
profile source. The profile information is converted into IR metadata,
which is later used by the analysis routines to estimate block
frequencies, edge weights and other related data.

External profile information files have no fixed format, each profiler
is free to define its own. This includes both the on-disk representation
of the profile and the kind of profile information stored in the file.
A common kind of profile is based on sampling (e.g., perf), which
essentially counts how many times each line of the program has been
executed during the run.

The SampleProfileLoader pass is organized as a scalar transformation.
On startup, it reads the file given in -sample-profile-file to
determine what kind of profile it contains.  This file is assumed to
contain profile information for the whole application. The profile
data in the file is read and incorporated into the internal state of
the corresponding profiler.

To facilitate testing, I've organized the profilers to support two file
formats: text and native. The native format is whatever on-disk
representation the profiler wants to support, I think this will mostly
be bitcode files, but it could be anything the profiler wants to
support. To do this, every profiler must implement the
SampleProfile::loadNative() function.

The text format is mostly meant for debugging. Records are separated by
newlines, but each profiler is free to interpret records as it sees fit.
Profilers must implement the SampleProfile::loadText() function.

Finally, the pass will call SampleProfile::emitAnnotations() for each
function in the current translation unit. This function needs to
translate the loaded profile into IR metadata, which the analyzer will
later be able to use.

This patch implements the first steps towards the above design. I've
implemented a sample-based flat profiler. The format of the profile is
fairly simplistic. Each sampled function contains a list of relative
line locations (from the start of the function) together with a count
representing how many samples were collected at that line during
execution. I generate this profile using perf and a separate converter
tool.

Currently, I have only implemented a text format for these profiles. I
am interested in initial feedback to the whole approach before I send
the other parts of the implementation for review.

This patch implements:

- The SampleProfileLoader pass.
- The base ExternalProfile class with the core interface.
- A SampleProfile sub-class using the above interface. The profiler
  generates branch weight metadata on every branch instructions that
  matches the profiles.
- A text loader class to assist the implementation of
  SampleProfile::loadText().
- Basic unit tests for the pass.

Additionally, the patch uses profile information to compute branch
weights based on instruction samples.

This patch converts instruction samples into branch weights. It
does a fairly simplistic conversion:

Given a multi-way branch instruction, it calculates the weight of
each branch based on the maximum sample count gathered from each
target basic block.

Note that this assignment of branch weights is somewhat lossy and can be
misleading. If a basic block has more than one incoming branch, all the
incoming branches will get the same weight. In reality, it may be that
only one of them is the most heavily taken branch.

I will adjust this assignment in subsequent patches.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194566 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/CMakeLists.txt    |   1 +
 lib/Transforms/Scalar/SampleProfile.cpp | 479 ++++++++++++++++++++++++++++++++
 lib/Transforms/Scalar/Scalar.cpp        |   1 +
 3 files changed, 481 insertions(+)
 create mode 100644 lib/Transforms/Scalar/SampleProfile.cpp

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 3b89fd4..ee45600 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -23,6 +23,7 @@ add_llvm_library(LLVMScalarOpts
   PartiallyInlineLibCalls.cpp
   Reassociate.cpp
   Reg2Mem.cpp
+  SampleProfile.cpp
   SCCP.cpp
   SROA.cpp
   Scalar.cpp
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
new file mode 100644
index 0000000..f549185
--- /dev/null
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -0,0 +1,479 @@
+//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SampleProfileLoader transformation. This pass
+// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
+// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
+// profile information in the given profile.
+//
+// This pass generates branch weight annotations on the IR:
+//
+// - prof: Represents branch weights. This annotation is added to branches
+//      to indicate the weights of each edge coming out of the branch.
+//      The weight of each edge is the weight of the target block for
+//      that edge. The weight of a block B is computed as the maximum
+//      number of samples found in B.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sample-profile"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+// Command line option to specify the file to read samples from. This is
+// mainly used for debugging.
+static cl::opt<std::string> SampleProfileFile(
+    "sample-profile-file", cl::init(""), cl::value_desc("filename"),
+    cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
+
+namespace {
+/// \brief Sample-based profile reader.
+///
+/// Each profile contains sample counts for all the functions
+/// executed. Inside each function, statements are annotated with the
+/// collected samples on all the instructions associated with that
+/// statement.
+///
+/// For this to produce meaningful data, the program needs to be
+/// compiled with some debug information (at minimum, line numbers:
+/// -gline-tables-only). Otherwise, it will be impossible to match IR
+/// instructions to the line numbers collected by the profiler.
+///
+/// From the profile file, we are interested in collecting the
+/// following information:
+///
+/// * A list of functions included in the profile (mangled names).
+///
+/// * For each function F:
+///   1. The total number of samples collected in F.
+///
+///   2. The samples collected at each line in F. To provide some
+///      protection against source code shuffling, line numbers should
+///      be relative to the start of the function.
+class SampleProfile {
+public:
+  SampleProfile(StringRef F) : Profiles(0), Filename(F) {}
+
+  virtual void dump();
+  virtual void loadText();
+  virtual void loadNative() { llvm_unreachable("not implemented"); }
+  virtual bool emitAnnotations(Function &F);
+  void printFunctionProfile(raw_ostream &OS, StringRef FName);
+  void dumpFunctionProfile(StringRef FName);
+
+protected:
+  typedef DenseMap<uint32_t, uint32_t> BodySampleMap;
+  typedef DenseMap<BasicBlock *, uint32_t> BlockWeightMap;
+
+  /// \brief Representation of the runtime profile for a function.
+  ///
+  /// This data structure contains the runtime profile for a given
+  /// function. It contains the total number of samples collected
+  /// in the function and a map of samples collected in every statement.
+  struct FunctionProfile {
+    /// \brief Total number of samples collected inside this function.
+    ///
+    /// Samples are cumulative, they include all the samples collected
+    /// inside this function and all its inlined callees.
+    unsigned TotalSamples;
+
+    // \brief Total number of samples collected at the head of the function.
+    unsigned TotalHeadSamples;
+
+    /// \brief Map line offsets to collected samples.
+    ///
+    /// Each entry in this map contains the number of samples
+    /// collected at the corresponding line offset. All line locations
+    /// are an offset from the start of the function.
+    BodySampleMap BodySamples;
+
+    /// \brief Map basic blocks to their computed weights.
+    ///
+    /// The weight of a basic block is defined to be the maximum
+    /// of all the instruction weights in that block.
+    BlockWeightMap BlockWeights;
+  };
+
+  uint32_t getInstWeight(Instruction &I, unsigned FirstLineno,
+                         BodySampleMap &BodySamples);
+  uint32_t computeBlockWeight(BasicBlock *B, unsigned FirstLineno,
+                              BodySampleMap &BodySamples);
+
+  /// \brief Map every function to its associated profile.
+  ///
+  /// The profile of every function executed at runtime is collected
+  /// in the structure FunctionProfile. This maps function objects
+  /// to their corresponding profiles.
+  StringMap<FunctionProfile> Profiles;
+
+  /// \brief Path name to the file holding the profile data.
+  ///
+  /// The format of this file is defined by each profiler
+  /// independently. If possible, the profiler should have a text
+  /// version of the profile format to be used in constructing test
+  /// cases and debugging.
+  StringRef Filename;
+};
+
+/// \brief Loader class for text-based profiles.
+///
+/// This class defines a simple interface to read text files containing
+/// profiles. It keeps track of line number information and location of
+/// the file pointer. Users of this class are responsible for actually
+/// parsing the lines returned by the readLine function.
+///
+/// TODO - This does not really belong here. It is a generic text file
+/// reader. It should be moved to the Support library and made more general.
+class ExternalProfileTextLoader {
+public:
+  ExternalProfileTextLoader(StringRef F) : Filename(F) {
+    error_code EC;
+    EC = MemoryBuffer::getFile(Filename, Buffer);
+    if (EC)
+      report_fatal_error("Could not open profile file " + Filename + ": " +
+                         EC.message());
+    FP = Buffer->getBufferStart();
+    Lineno = 0;
+  }
+
+  /// \brief Read a line from the mapped file.
+  StringRef readLine() {
+    size_t Length = 0;
+    const char *start = FP;
+    while (FP != Buffer->getBufferEnd() && *FP != '\n') {
+      Length++;
+      FP++;
+    }
+    if (FP != Buffer->getBufferEnd())
+      FP++;
+    Lineno++;
+    return StringRef(start, Length);
+  }
+
+  /// \brief Return true, if we've reached EOF.
+  bool atEOF() const { return FP == Buffer->getBufferEnd(); }
+
+  /// \brief Report a parse error message and stop compilation.
+  void reportParseError(Twine Msg) const {
+    report_fatal_error(Filename + ":" + Twine(Lineno) + ": " + Msg + "\n");
+  }
+
+private:
+  /// \brief Memory buffer holding the text file.
+  OwningPtr<MemoryBuffer> Buffer;
+
+  /// \brief Current position into the memory buffer.
+  const char *FP;
+
+  /// \brief Current line number.
+  int64_t Lineno;
+
+  /// \brief Path name where to the profile file.
+  StringRef Filename;
+};
+
+/// \brief Sample profile pass.
+///
+/// This pass reads profile data from the file specified by
+/// -sample-profile-file and annotates every affected function with the
+/// profile information found in that file.
+class SampleProfileLoader : public FunctionPass {
+public:
+  // Class identification, replacement for typeinfo
+  static char ID;
+
+  SampleProfileLoader(StringRef Name = SampleProfileFile)
+      : FunctionPass(ID), Profiler(0), Filename(Name) {
+    initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual bool doInitialization(Module &M);
+
+  void dump() { Profiler->dump(); }
+
+  virtual const char *getPassName() const { return "Sample profile pass"; }
+
+  virtual bool runOnFunction(Function &F);
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesCFG();
+  }
+
+protected:
+  /// \brief Profile reader object.
+  OwningPtr<SampleProfile> Profiler;
+
+  /// \brief Name of the profile file to load.
+  StringRef Filename;
+};
+}
+
+/// \brief Print the function profile for \p FName on stream \p OS.
+///
+/// \param OS Stream to emit the output to.
+/// \param FName Name of the function to print.
+void SampleProfile::printFunctionProfile(raw_ostream &OS, StringRef FName) {
+  FunctionProfile FProfile = Profiles[FName];
+  OS << "Function: " << FName << ", " << FProfile.TotalSamples << ", "
+     << FProfile.TotalHeadSamples << ", " << FProfile.BodySamples.size()
+     << " sampled lines\n";
+  for (BodySampleMap::const_iterator SI = FProfile.BodySamples.begin(),
+                                     SE = FProfile.BodySamples.end();
+       SI != SE; ++SI)
+    OS << "\tline offset: " << SI->first
+       << ", number of samples: " << SI->second << "\n";
+  OS << "\n";
+}
+
+/// \brief Dump the function profile for \p FName.
+///
+/// \param FName Name of the function to print.
+void SampleProfile::dumpFunctionProfile(StringRef FName) {
+  printFunctionProfile(dbgs(), FName);
+}
+
+/// \brief Dump all the function profiles found.
+void SampleProfile::dump() {
+  for (StringMap<FunctionProfile>::const_iterator I = Profiles.begin(),
+                                                  E = Profiles.end();
+       I != E; ++I)
+    dumpFunctionProfile(I->getKey());
+}
+
+/// \brief Load samples from a text file.
+///
+/// The file is divided in two segments:
+///
+/// Symbol table (represented with the string "symbol table")
+///    Number of symbols in the table
+///    symbol 1
+///    symbol 2
+///    ...
+///    symbol N
+///
+/// Function body profiles
+///    function1:total_samples:total_head_samples:number_of_locations
+///    location_offset_1: number_of_samples
+///    location_offset_2: number_of_samples
+///    ...
+///    location_offset_N: number_of_samples
+///
+/// Function names must be mangled in order for the profile loader to
+/// match them in the current translation unit.
+///
+/// Since this is a flat profile, a function that shows up more than
+/// once gets all its samples aggregated across all its instances.
+/// TODO - flat profiles are too imprecise to provide good optimization
+/// opportunities. Convert them to context-sensitive profile.
+///
+/// This textual representation is useful to generate unit tests and
+/// for debugging purposes, but it should not be used to generate
+/// profiles for large programs, as the representation is extremely
+/// inefficient.
+void SampleProfile::loadText() {
+  ExternalProfileTextLoader Loader(Filename);
+
+  // Read the symbol table.
+  StringRef Line = Loader.readLine();
+  if (Line != "symbol table")
+    Loader.reportParseError("Expected 'symbol table', found " + Line);
+  int NumSymbols;
+  Line = Loader.readLine();
+  if (Line.getAsInteger(10, NumSymbols))
+    Loader.reportParseError("Expected a number, found " + Line);
+  for (int I = 0; I < NumSymbols; I++) {
+    StringRef FName = Loader.readLine();
+    FunctionProfile &FProfile = Profiles[FName];
+    FProfile.BodySamples.clear();
+    FProfile.TotalSamples = 0;
+    FProfile.TotalHeadSamples = 0;
+  }
+
+  // Read the profile of each function. Since each function may be
+  // mentioned more than once, and we are collecting flat profiles,
+  // accumulate samples as we parse them.
+  Regex HeadRE("^([^:]+):([0-9]+):([0-9]+):([0-9]+)$");
+  Regex LineSample("^([0-9]+): ([0-9]+)$");
+  while (!Loader.atEOF()) {
+    SmallVector<StringRef, 4> Matches;
+    Line = Loader.readLine();
+    if (!HeadRE.match(Line, &Matches))
+      Loader.reportParseError("Expected 'mangled_name:NUM:NUM:NUM', found " +
+                              Line);
+    assert(Matches.size() == 5);
+    StringRef FName = Matches[1];
+    unsigned NumSamples, NumHeadSamples, NumSampledLines;
+    Matches[2].getAsInteger(10, NumSamples);
+    Matches[3].getAsInteger(10, NumHeadSamples);
+    Matches[4].getAsInteger(10, NumSampledLines);
+    FunctionProfile &FProfile = Profiles[FName];
+    FProfile.TotalSamples += NumSamples;
+    FProfile.TotalHeadSamples += NumHeadSamples;
+    BodySampleMap &SampleMap = FProfile.BodySamples;
+    unsigned I;
+    for (I = 0; I < NumSampledLines && !Loader.atEOF(); I++) {
+      Line = Loader.readLine();
+      if (!LineSample.match(Line, &Matches))
+        Loader.reportParseError("Expected 'NUM: NUM', found " + Line);
+      assert(Matches.size() == 3);
+      unsigned LineOffset, NumSamples;
+      Matches[1].getAsInteger(10, LineOffset);
+      Matches[2].getAsInteger(10, NumSamples);
+      SampleMap[LineOffset] += NumSamples;
+    }
+
+    if (I < NumSampledLines)
+      Loader.reportParseError("Unexpected end of file");
+  }
+}
+
+/// \brief Get the weight for an instruction.
+///
+/// The "weight" of an instruction \p Inst is the number of samples
+/// collected on that instruction at runtime. To retrieve it, we
+/// need to compute the line number of \p Inst relative to the start of its
+/// function. We use \p FirstLineno to compute the offset. We then
+/// look up the samples collected for \p Inst using \p BodySamples.
+///
+/// \param Inst Instruction to query.
+/// \param FirstLineno Line number of the first instruction in the function.
+/// \param BodySamples Map of relative source line locations to samples.
+///
+/// \returns The profiled weight of I.
+uint32_t SampleProfile::getInstWeight(Instruction &Inst, unsigned FirstLineno,
+                                      BodySampleMap &BodySamples) {
+  unsigned LOffset = Inst.getDebugLoc().getLine() - FirstLineno + 1;
+  return BodySamples.lookup(LOffset);
+}
+
+/// \brief Compute the weight of a basic block.
+///
+/// The weight of basic block \p B is the maximum weight of all the
+/// instructions in B.
+///
+/// \param B The basic block to query.
+/// \param FirstLineno The line number for the first line in the
+///     function holding B.
+/// \param BodySamples The map containing all the samples collected in that
+///     function.
+///
+/// \returns The computed weight of B.
+uint32_t SampleProfile::computeBlockWeight(BasicBlock *B, unsigned FirstLineno,
+                                           BodySampleMap &BodySamples) {
+  // If we've computed B's weight before, return it.
+  Function *F = B->getParent();
+  FunctionProfile &FProfile = Profiles[F->getName()];
+  std::pair<BlockWeightMap::iterator, bool> Entry =
+      FProfile.BlockWeights.insert(std::make_pair(B, 0));
+  if (!Entry.second)
+    return Entry.first->second;
+
+  // Otherwise, compute and cache B's weight.
+  uint32_t Weight = 0;
+  for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) {
+    uint32_t InstWeight = getInstWeight(*I, FirstLineno, BodySamples);
+    if (InstWeight > Weight)
+      Weight = InstWeight;
+  }
+  Entry.first->second = Weight;
+  return Weight;
+}
+
+/// \brief Generate branch weight metadata for all branches in \p F.
+///
+/// For every branch instruction B in \p F, we compute the weight of the
+/// target block for each of the edges out of B. This is the weight
+/// that we associate with that branch.
+///
+/// TODO - This weight assignment will most likely be wrong if the
+/// target branch has more than two predecessors. This needs to be done
+/// using some form of flow propagation.
+///
+/// Once all the branch weights are computed, we emit the MD_prof
+/// metadata on B using the computed values.
+///
+/// \param F The function to query.
+bool SampleProfile::emitAnnotations(Function &F) {
+  bool Changed = false;
+  FunctionProfile &FProfile = Profiles[F.getName()];
+  unsigned FirstLineno = inst_begin(F)->getDebugLoc().getLine();
+  MDBuilder MDB(F.getContext());
+
+  // Clear the block weights cache.
+  FProfile.BlockWeights.clear();
+
+  // When we find a branch instruction: For each edge E out of the branch,
+  // the weight of E is the weight of the target block.
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    BasicBlock *B = I;
+    TerminatorInst *TI = B->getTerminator();
+    if (TI->getNumSuccessors() == 1)
+      continue;
+    if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+      continue;
+
+    SmallVector<uint32_t, 4> Weights;
+    unsigned NSuccs = TI->getNumSuccessors();
+    for (unsigned I = 0; I < NSuccs; ++I) {
+      BasicBlock *Succ = TI->getSuccessor(I);
+      uint32_t Weight =
+          computeBlockWeight(Succ, FirstLineno, FProfile.BodySamples);
+      Weights.push_back(Weight);
+    }
+
+    TI->setMetadata(llvm::LLVMContext::MD_prof,
+                    MDB.createBranchWeights(Weights));
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+char SampleProfileLoader::ID = 0;
+INITIALIZE_PASS(SampleProfileLoader, "sample-profile", "Sample Profile loader",
+                false, false)
+
+bool SampleProfileLoader::runOnFunction(Function &F) {
+  return Profiler->emitAnnotations(F);
+}
+
+bool SampleProfileLoader::doInitialization(Module &M) {
+  Profiler.reset(new SampleProfile(Filename));
+  Profiler->loadText();
+  return true;
+}
+
+FunctionPass *llvm::createSampleProfileLoaderPass() {
+  return new SampleProfileLoader(SampleProfileFile);
+}
+
+FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) {
+  return new SampleProfileLoader(Name);
+}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 0c3ffbc..72e00e1 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -28,6 +28,7 @@ using namespace llvm;
 /// ScalarOpts library.
 void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeADCEPass(Registry);
+  initializeSampleProfileLoaderPass(Registry);
   initializeCodeGenPreparePass(Registry);
   initializeConstantPropagationPass(Registry);
   initializeCorrelatedValuePropagationPass(Registry);
-- 
cgit v1.1


From 4223b9601058369536caa1d15c9c19bc7c5a3706 Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <samsonov@google.com>
Date: Wed, 13 Nov 2013 13:09:39 +0000
Subject: Fix -Wdelete-non-virtual-dtor warnings by making SampleProfile
 methods non-virtual

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194568 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/SampleProfile.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
index f549185..9bcd702 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -81,10 +81,10 @@ class SampleProfile {
 public:
   SampleProfile(StringRef F) : Profiles(0), Filename(F) {}
 
-  virtual void dump();
-  virtual void loadText();
-  virtual void loadNative() { llvm_unreachable("not implemented"); }
-  virtual bool emitAnnotations(Function &F);
+  void dump();
+  void loadText();
+  void loadNative() { llvm_unreachable("not implemented"); }
+  bool emitAnnotations(Function &F);
   void printFunctionProfile(raw_ostream &OS, StringRef FName);
   void dumpFunctionProfile(StringRef FName);
 
-- 
cgit v1.1


From a305ffb65becc1031abbf85e70aec8fb3c337986 Mon Sep 17 00:00:00 2001
From: Jakub Staszak <kubastaszak@gmail.com>
Date: Wed, 13 Nov 2013 20:09:11 +0000
Subject: Use StringRef instead of std::string

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194601 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/LoopUnroll.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 0b679fe..9955bfd 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -90,7 +90,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
   // Move all definitions in the successor to the predecessor...
   OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
 
-  std::string OldName = BB->getName();
+  StringRef OldName = BB->getName();
 
   // Erase basic block from the function...
 
-- 
cgit v1.1


From 34432aeb6d42fbe3e327d1d339ea4156c99aa133 Mon Sep 17 00:00:00 2001
From: Evgeniy Stepanov <eugeni.stepanov@gmail.com>
Date: Thu, 14 Nov 2013 12:29:04 +0000
Subject: [msan] Fast path optimization for wrap-indirect-calls feature of
 MemorySanitizer.

Indirect call wrapping helps MSanDR (dynamic instrumentation companion tool
for MSan) to catch all cases where execution leaves a compiler-instrumented
module by allowing the tool to rewrite targets of indirect calls.

This change is an optimization that skips wrapping for calls when target is
inside the current module. This relies on the linker providing symbols at the
begin and end of the module code (or code + data, does not really matter).
Gold linker provides such symbols by default. GNU (BFD) linker needs a link
flag: -Wl,--defsym=__executable_start=0.

More info:
https://code.google.com/p/memory-sanitizer/wiki/MSanDR#Native_exec


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194697 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/MemorySanitizer.cpp | 77 ++++++++++++++++++----
 1 file changed, 65 insertions(+), 12 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 512b809..d547adc 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -190,6 +190,10 @@ static cl::opt<std::string> ClWrapIndirectCalls("msan-wrap-indirect-calls",
        cl::desc("Wrap indirect calls with a given function"),
        cl::Hidden);
 
+static cl::opt<bool> ClWrapIndirectCallsFast("msan-wrap-indirect-calls-fast",
+       cl::desc("Do not wrap indirect calls with target in the same module"),
+       cl::Hidden, cl::init(true));
+
 namespace {
 
 /// \brief An instrumentation pass implementing detection of uninitialized
@@ -240,6 +244,9 @@ class MemorySanitizer : public FunctionPass {
   /// function.
   GlobalVariable *OriginTLS;
 
+  GlobalVariable *MsandrModuleStart;
+  GlobalVariable *MsandrModuleEnd;
+
   /// \brief The run-time callback to print a warning.
   Value *WarningFn;
   /// \brief Run-time helper that copies origin info for a memory range.
@@ -375,6 +382,17 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
     IndirectCallWrapperFn = M.getOrInsertFunction(
         ClWrapIndirectCalls, AnyFunctionPtrTy, AnyFunctionPtrTy, NULL);
   }
+
+  if (ClWrapIndirectCallsFast) {
+    MsandrModuleStart = new GlobalVariable(
+        M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage,
+        0, "__executable_start");
+    MsandrModuleStart->setVisibility(GlobalVariable::HiddenVisibility);
+    MsandrModuleEnd = new GlobalVariable(
+        M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage,
+        0, "_end");
+    MsandrModuleEnd->setVisibility(GlobalVariable::HiddenVisibility);
+  }
 }
 
 /// \brief Module-level initialization.
@@ -489,6 +507,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   };
   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
   SmallVector<Instruction*, 16> StoreList;
+  SmallVector<CallSite, 16> IndirectCallList;
 
   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
@@ -588,6 +607,48 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     DEBUG(dbgs() << "DONE:\n" << F);
   }
 
+  void materializeIndirectCalls() {
+    for (size_t i = 0, n = IndirectCallList.size(); i < n; i++) {
+      CallSite CS = IndirectCallList[i];
+      Instruction *I = CS.getInstruction();
+      BasicBlock *B = I->getParent();
+      IRBuilder<> IRB(I);
+      Value *Fn0 = CS.getCalledValue();
+      Value *Fn = IRB.CreateBitCast(Fn0, MS.AnyFunctionPtrTy);
+
+      if (ClWrapIndirectCallsFast) {
+        // Check that call target is inside this module limits.
+        Value *Start =
+            IRB.CreateBitCast(MS.MsandrModuleStart, MS.AnyFunctionPtrTy);
+        Value *End = IRB.CreateBitCast(MS.MsandrModuleEnd, MS.AnyFunctionPtrTy);
+
+        Value *NotInThisModule = IRB.CreateOr(IRB.CreateICmpULT(Fn, Start),
+                                              IRB.CreateICmpUGE(Fn, End));
+
+        PHINode *NewFnPhi =
+            IRB.CreatePHI(Fn0->getType(), 2, "msandr.indirect_target");
+
+        Instruction *CheckTerm = SplitBlockAndInsertIfThen(
+            cast<Instruction>(NotInThisModule),
+            /* Unreachable */ false, MS.ColdCallWeights);
+
+        IRB.SetInsertPoint(CheckTerm);
+        // Slow path: call wrapper function to possibly transform the call
+        // target.
+        Value *NewFn = IRB.CreateBitCast(
+            IRB.CreateCall(MS.IndirectCallWrapperFn, Fn), Fn0->getType());
+
+        NewFnPhi->addIncoming(Fn0, B);
+        NewFnPhi->addIncoming(NewFn, dyn_cast<Instruction>(NewFn)->getParent());
+        CS.setCalledFunction(NewFnPhi);
+      } else {
+        Value *NewFn = IRB.CreateBitCast(
+            IRB.CreateCall(MS.IndirectCallWrapperFn, Fn), Fn0->getType());
+        CS.setCalledFunction(NewFn);
+      }
+    }
+  }
+
   /// \brief Add MemorySanitizer instrumentation to a function.
   bool runOnFunction() {
     MS.initializeCallbacks(*F.getParent());
@@ -630,6 +691,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // Insert shadow value checks.
     materializeChecks();
 
+    // Wrap indirect calls.
+    materializeIndirectCalls();
+
     return true;
   }
 
@@ -1809,17 +1873,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
   }
 
-  // Replace call to (*Fn) with a call to (*IndirectCallWrapperFn(Fn)).
-  void wrapIndirectCall(IRBuilder<> &IRB, CallSite CS) {
-    Value *Fn = CS.getCalledValue();
-    Value *NewFn = IRB.CreateBitCast(
-        IRB.CreateCall(MS.IndirectCallWrapperFn,
-                       IRB.CreateBitCast(Fn, MS.AnyFunctionPtrTy)),
-        Fn->getType());
-    setShadow(NewFn, getShadow(Fn));
-    CS.setCalledFunction(NewFn);
-  }
-
   void visitCallSite(CallSite CS) {
     Instruction &I = *CS.getInstruction();
     assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite");
@@ -1860,7 +1913,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     IRBuilder<> IRB(&I);
 
     if (MS.WrapIndirectCalls && !CS.getCalledFunction())
-      wrapIndirectCall(IRB, CS);
+      IndirectCallList.push_back(CS);
 
     unsigned ArgOffset = 0;
     DEBUG(dbgs() << "  CallSite: " << I << "\n");
-- 
cgit v1.1


From 8cc5f7cd59c69250ab3b6a68e38405dcdb6a4b25 Mon Sep 17 00:00:00 2001
From: Kostya Serebryany <kcc@google.com>
Date: Thu, 14 Nov 2013 13:27:41 +0000
Subject: [asan] Poor man's coverage that works with ASan

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194701 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/AddressSanitizer.cpp           | 52 ++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index de0a43b..77e9e0e 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -77,6 +77,7 @@ static const char *const kAsanUnregisterGlobalsName =
 static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
 static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
 static const char *const kAsanInitName = "__asan_init_v3";
+static const char *const kAsanCovName = "__sanitizer_cov";
 static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
 static const char *const kAsanMappingOffsetName = "__asan_mapping_offset";
 static const char *const kAsanMappingScaleName = "__asan_mapping_scale";
@@ -134,6 +135,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
 // This flag may need to be replaced with -f[no]asan-globals.
 static cl::opt<bool> ClGlobals("asan-globals",
        cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClCoverage("asan-coverage",
+       cl::desc("ASan coverage"), cl::Hidden, cl::init(false));
 static cl::opt<bool> ClInitializers("asan-initialization-order",
        cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
 static cl::opt<bool> ClMemIntrin("asan-memintrin",
@@ -324,6 +327,7 @@ struct AddressSanitizer : public FunctionPass {
   bool LooksLikeCodeInBug11395(Instruction *I);
   void FindDynamicInitializers(Module &M);
   bool GlobalIsLinkerInitialized(GlobalVariable *G);
+  bool InjectCoverage(Function &F);
 
   bool CheckInitOrder;
   bool CheckUseAfterReturn;
@@ -339,6 +343,7 @@ struct AddressSanitizer : public FunctionPass {
   Function *AsanCtorFunction;
   Function *AsanInitFunction;
   Function *AsanHandleNoReturnFunc;
+  Function *AsanCovFunction;
   OwningPtr<SpecialCaseList> BL;
   // This array is indexed by AccessIsWrite and log2(AccessSize).
   Function *AsanErrorCallback[2][kNumberOfAccessSizes];
@@ -1085,6 +1090,8 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
 
   AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
       kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
+  AsanCovFunction = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanCovName, IRB.getVoidTy(), IntptrTy, NULL));
   // We insert an empty inline asm after __asan_report* to avoid callback merge.
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
                             StringRef(""), StringRef(""),
@@ -1156,6 +1163,47 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   return false;
 }
 
+// Poor man's coverage that works with ASan.
+// We create a Guard boolean variable with the same linkage
+// as the function and inject this code into the entry block:
+// if (*Guard) {
+//    __sanitizer_cov(&F);
+//    *Guard = 1;
+// }
+// The accesses to Guard are atomic. The rest of the logic is
+// in __sanitizer_cov (it's fine to call it more than once).
+//
+// This coverage implementation provides very limited data:
+// it only tells if a given function was ever executed.
+// No counters, no per-basic-block or per-edge data.
+// But for many use cases this is what we need and the added slowdown
+// is negligible. This simple implementation will probably be obsoleted
+// by the upcoming Clang-based coverage implementation.
+// By having it here and now we hope to
+//  a) get the functionality to users earlier and
+//  b) collect usage statistics to help improve Clang coverage design.
+bool AddressSanitizer::InjectCoverage(Function &F) {
+  if (!ClCoverage) return false;
+  IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt());
+  Type *Int8Ty = IRB.getInt8Ty();
+  GlobalVariable *Guard = new GlobalVariable(
+      *F.getParent(), Int8Ty, false, F.getLinkage(),
+      Constant::getNullValue(Int8Ty), "__asan_gen_cov_" + F.getName());
+  LoadInst *Load = IRB.CreateLoad(Guard);
+  Load->setAtomic(Monotonic);
+  Load->setAlignment(1);
+  Value *Cmp = IRB.CreateICmpEQ(Constant::getNullValue(Int8Ty), Load);
+  Instruction *Ins = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+  IRB.SetInsertPoint(Ins);
+  // We pass &F to __sanitizer_cov. We could avoid this and rely on
+  // GET_CALLER_PC, but having the PC of the first instruction is just nice.
+  IRB.CreateCall(AsanCovFunction, IRB.CreatePointerCast(&F, IntptrTy));
+  StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int8Ty, 1), Guard);
+  Store->setAtomic(Monotonic);
+  Store->setAlignment(1);
+  return true;
+}
+
 bool AddressSanitizer::runOnFunction(Function &F) {
   if (BL->isIn(F)) return false;
   if (&F == AsanCtorFunction) return false;
@@ -1251,6 +1299,10 @@ bool AddressSanitizer::runOnFunction(Function &F) {
   }
 
   bool res = NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
+
+  if (InjectCoverage(F))
+    res = true;
+
   DEBUG(dbgs() << "ASAN done instrumenting: " << res << " " << F << "\n");
 
   if (ClKeepUninstrumented) {
-- 
cgit v1.1


From 2475da80edafcf3a45bddbc937e60312dba435ad Mon Sep 17 00:00:00 2001
From: Bob Wilson <bob.wilson@apple.com>
Date: Fri, 15 Nov 2013 03:28:22 +0000
Subject: Revert "[asan] Poor man's coverage that works with ASan"

This reverts commit 194701. Apple's bootstrapped LTO builds have been failing,
and this change (along with compiler-rt 194702-194704) is the only thing on
the blamelist.  I will either reappy these changes or help debug the problem,
depending on whether this fixes the buildbots.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194780 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/AddressSanitizer.cpp           | 52 ----------------------
 1 file changed, 52 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 77e9e0e..de0a43b 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -77,7 +77,6 @@ static const char *const kAsanUnregisterGlobalsName =
 static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
 static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
 static const char *const kAsanInitName = "__asan_init_v3";
-static const char *const kAsanCovName = "__sanitizer_cov";
 static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
 static const char *const kAsanMappingOffsetName = "__asan_mapping_offset";
 static const char *const kAsanMappingScaleName = "__asan_mapping_scale";
@@ -135,8 +134,6 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
 // This flag may need to be replaced with -f[no]asan-globals.
 static cl::opt<bool> ClGlobals("asan-globals",
        cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClCoverage("asan-coverage",
-       cl::desc("ASan coverage"), cl::Hidden, cl::init(false));
 static cl::opt<bool> ClInitializers("asan-initialization-order",
        cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
 static cl::opt<bool> ClMemIntrin("asan-memintrin",
@@ -327,7 +324,6 @@ struct AddressSanitizer : public FunctionPass {
   bool LooksLikeCodeInBug11395(Instruction *I);
   void FindDynamicInitializers(Module &M);
   bool GlobalIsLinkerInitialized(GlobalVariable *G);
-  bool InjectCoverage(Function &F);
 
   bool CheckInitOrder;
   bool CheckUseAfterReturn;
@@ -343,7 +339,6 @@ struct AddressSanitizer : public FunctionPass {
   Function *AsanCtorFunction;
   Function *AsanInitFunction;
   Function *AsanHandleNoReturnFunc;
-  Function *AsanCovFunction;
   OwningPtr<SpecialCaseList> BL;
   // This array is indexed by AccessIsWrite and log2(AccessSize).
   Function *AsanErrorCallback[2][kNumberOfAccessSizes];
@@ -1090,8 +1085,6 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
 
   AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
       kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
-  AsanCovFunction = checkInterfaceFunction(M.getOrInsertFunction(
-      kAsanCovName, IRB.getVoidTy(), IntptrTy, NULL));
   // We insert an empty inline asm after __asan_report* to avoid callback merge.
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
                             StringRef(""), StringRef(""),
@@ -1163,47 +1156,6 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   return false;
 }
 
-// Poor man's coverage that works with ASan.
-// We create a Guard boolean variable with the same linkage
-// as the function and inject this code into the entry block:
-// if (*Guard) {
-//    __sanitizer_cov(&F);
-//    *Guard = 1;
-// }
-// The accesses to Guard are atomic. The rest of the logic is
-// in __sanitizer_cov (it's fine to call it more than once).
-//
-// This coverage implementation provides very limited data:
-// it only tells if a given function was ever executed.
-// No counters, no per-basic-block or per-edge data.
-// But for many use cases this is what we need and the added slowdown
-// is negligible. This simple implementation will probably be obsoleted
-// by the upcoming Clang-based coverage implementation.
-// By having it here and now we hope to
-//  a) get the functionality to users earlier and
-//  b) collect usage statistics to help improve Clang coverage design.
-bool AddressSanitizer::InjectCoverage(Function &F) {
-  if (!ClCoverage) return false;
-  IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt());
-  Type *Int8Ty = IRB.getInt8Ty();
-  GlobalVariable *Guard = new GlobalVariable(
-      *F.getParent(), Int8Ty, false, F.getLinkage(),
-      Constant::getNullValue(Int8Ty), "__asan_gen_cov_" + F.getName());
-  LoadInst *Load = IRB.CreateLoad(Guard);
-  Load->setAtomic(Monotonic);
-  Load->setAlignment(1);
-  Value *Cmp = IRB.CreateICmpEQ(Constant::getNullValue(Int8Ty), Load);
-  Instruction *Ins = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
-  IRB.SetInsertPoint(Ins);
-  // We pass &F to __sanitizer_cov. We could avoid this and rely on
-  // GET_CALLER_PC, but having the PC of the first instruction is just nice.
-  IRB.CreateCall(AsanCovFunction, IRB.CreatePointerCast(&F, IntptrTy));
-  StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int8Ty, 1), Guard);
-  Store->setAtomic(Monotonic);
-  Store->setAlignment(1);
-  return true;
-}
-
 bool AddressSanitizer::runOnFunction(Function &F) {
   if (BL->isIn(F)) return false;
   if (&F == AsanCtorFunction) return false;
@@ -1299,10 +1251,6 @@ bool AddressSanitizer::runOnFunction(Function &F) {
   }
 
   bool res = NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
-
-  if (InjectCoverage(F))
-    res = true;
-
   DEBUG(dbgs() << "ASAN done instrumenting: " << res << " " << F << "\n");
 
   if (ClKeepUninstrumented) {
-- 
cgit v1.1


From 6dd44d3b7f33c9984dfb40461630d50c4fed1234 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 15 Nov 2013 05:45:08 +0000
Subject: Add instcombine visitor for addrspacecast

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194786 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombine.h        | 1 +
 lib/Transforms/InstCombine/InstCombineCasts.cpp | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index ee95106..a5eddc2 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -178,6 +178,7 @@ public:
   Instruction *visitPtrToInt(PtrToIntInst &CI);
   Instruction *visitIntToPtr(IntToPtrInst &CI);
   Instruction *visitBitCast(BitCastInst &CI);
+  Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
   Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
                               Instruction *FI);
   Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 01894cb..a1aedd4 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1851,3 +1851,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     return commonPointerCastTransforms(CI);
   return commonCastTransforms(CI);
 }
+
+Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
+  return commonCastTransforms(CI);
+}
-- 
cgit v1.1


From 4b8991424a8967dfdafc1768a9748f67e6c8b36f Mon Sep 17 00:00:00 2001
From: Bob Wilson <bob.wilson@apple.com>
Date: Fri, 15 Nov 2013 07:16:09 +0000
Subject: Reapply "[asan] Poor man's coverage that works with ASan"

I was able to successfully run a bootstrapped LTO build of clang with
r194701, so this change does not seem to be the cause of our failing
buildbots.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194789 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../Instrumentation/AddressSanitizer.cpp           | 52 ++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index de0a43b..77e9e0e 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -77,6 +77,7 @@ static const char *const kAsanUnregisterGlobalsName =
 static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
 static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
 static const char *const kAsanInitName = "__asan_init_v3";
+static const char *const kAsanCovName = "__sanitizer_cov";
 static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
 static const char *const kAsanMappingOffsetName = "__asan_mapping_offset";
 static const char *const kAsanMappingScaleName = "__asan_mapping_scale";
@@ -134,6 +135,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
 // This flag may need to be replaced with -f[no]asan-globals.
 static cl::opt<bool> ClGlobals("asan-globals",
        cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClCoverage("asan-coverage",
+       cl::desc("ASan coverage"), cl::Hidden, cl::init(false));
 static cl::opt<bool> ClInitializers("asan-initialization-order",
        cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
 static cl::opt<bool> ClMemIntrin("asan-memintrin",
@@ -324,6 +327,7 @@ struct AddressSanitizer : public FunctionPass {
   bool LooksLikeCodeInBug11395(Instruction *I);
   void FindDynamicInitializers(Module &M);
   bool GlobalIsLinkerInitialized(GlobalVariable *G);
+  bool InjectCoverage(Function &F);
 
   bool CheckInitOrder;
   bool CheckUseAfterReturn;
@@ -339,6 +343,7 @@ struct AddressSanitizer : public FunctionPass {
   Function *AsanCtorFunction;
   Function *AsanInitFunction;
   Function *AsanHandleNoReturnFunc;
+  Function *AsanCovFunction;
   OwningPtr<SpecialCaseList> BL;
   // This array is indexed by AccessIsWrite and log2(AccessSize).
   Function *AsanErrorCallback[2][kNumberOfAccessSizes];
@@ -1085,6 +1090,8 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
 
   AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
       kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
+  AsanCovFunction = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanCovName, IRB.getVoidTy(), IntptrTy, NULL));
   // We insert an empty inline asm after __asan_report* to avoid callback merge.
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
                             StringRef(""), StringRef(""),
@@ -1156,6 +1163,47 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   return false;
 }
 
+// Poor man's coverage that works with ASan.
+// We create a Guard boolean variable with the same linkage
+// as the function and inject this code into the entry block:
+// if (*Guard) {
+//    __sanitizer_cov(&F);
+//    *Guard = 1;
+// }
+// The accesses to Guard are atomic. The rest of the logic is
+// in __sanitizer_cov (it's fine to call it more than once).
+//
+// This coverage implementation provides very limited data:
+// it only tells if a given function was ever executed.
+// No counters, no per-basic-block or per-edge data.
+// But for many use cases this is what we need and the added slowdown
+// is negligible. This simple implementation will probably be obsoleted
+// by the upcoming Clang-based coverage implementation.
+// By having it here and now we hope to
+//  a) get the functionality to users earlier and
+//  b) collect usage statistics to help improve Clang coverage design.
+bool AddressSanitizer::InjectCoverage(Function &F) {
+  if (!ClCoverage) return false;
+  IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt());
+  Type *Int8Ty = IRB.getInt8Ty();
+  GlobalVariable *Guard = new GlobalVariable(
+      *F.getParent(), Int8Ty, false, F.getLinkage(),
+      Constant::getNullValue(Int8Ty), "__asan_gen_cov_" + F.getName());
+  LoadInst *Load = IRB.CreateLoad(Guard);
+  Load->setAtomic(Monotonic);
+  Load->setAlignment(1);
+  Value *Cmp = IRB.CreateICmpEQ(Constant::getNullValue(Int8Ty), Load);
+  Instruction *Ins = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
+  IRB.SetInsertPoint(Ins);
+  // We pass &F to __sanitizer_cov. We could avoid this and rely on
+  // GET_CALLER_PC, but having the PC of the first instruction is just nice.
+  IRB.CreateCall(AsanCovFunction, IRB.CreatePointerCast(&F, IntptrTy));
+  StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int8Ty, 1), Guard);
+  Store->setAtomic(Monotonic);
+  Store->setAlignment(1);
+  return true;
+}
+
 bool AddressSanitizer::runOnFunction(Function &F) {
   if (BL->isIn(F)) return false;
   if (&F == AsanCtorFunction) return false;
@@ -1251,6 +1299,10 @@ bool AddressSanitizer::runOnFunction(Function &F) {
   }
 
   bool res = NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
+
+  if (InjectCoverage(F))
+    res = true;
+
   DEBUG(dbgs() << "ASAN done instrumenting: " << res << " " << F << "\n");
 
   if (ClKeepUninstrumented) {
-- 
cgit v1.1


From 8f15c6822251bbe7eb21732c46aa6d9f30ba8836 Mon Sep 17 00:00:00 2001
From: Kostya Serebryany <kcc@google.com>
Date: Fri, 15 Nov 2013 09:52:05 +0000
Subject: [asan] use GlobalValue::PrivateLinkage for coverage guard to save
 quite a bit of code size

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194800 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/AddressSanitizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 77e9e0e..d8c3f8e 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1187,7 +1187,7 @@ bool AddressSanitizer::InjectCoverage(Function &F) {
   IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt());
   Type *Int8Ty = IRB.getInt8Ty();
   GlobalVariable *Guard = new GlobalVariable(
-      *F.getParent(), Int8Ty, false, F.getLinkage(),
+      *F.getParent(), Int8Ty, false, GlobalValue::PrivateLinkage,
       Constant::getNullValue(Int8Ty), "__asan_gen_cov_" + F.getName());
   LoadInst *Load = IRB.CreateLoad(Guard);
   Load->setAtomic(Monotonic);
-- 
cgit v1.1


From c160efc28b815dbea73f0243f0729c1f0e1fabdb Mon Sep 17 00:00:00 2001
From: Manman Ren <manman.ren@gmail.com>
Date: Fri, 15 Nov 2013 20:41:15 +0000
Subject: ArgumentPromotion: correctly transfer TBAA tags and alignments.

We used to use std::map<IndicesVector, LoadInst*> for OriginalLoads, and when we
try to promote two arguments, they will both write to OriginalLoads causing
created loads for the two arguments to have the same original load. And the same
tbaa tag and alignment will be put to the created loads for the two arguments.

The fix is to use std::map<std::pair<Argument*, IndicesVector>, LoadInst*>
for OriginalLoads, so each Argument will write to different parts of the map.

PR17906


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194846 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/ArgumentPromotion.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 65c57e1..df08091 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -504,7 +504,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   // OriginalLoads - Keep track of a representative load instruction from the
   // original function so that we can tell the alias analysis implementation
   // what the new GEP/Load instructions we are inserting look like.
-  std::map<IndicesVector, LoadInst*> OriginalLoads;
+  // We need to keep the original loads for each argument and the elements
+  // of the argument that are accessed.
+  std::map<std::pair<Argument*, IndicesVector>, LoadInst*> OriginalLoads;
 
   // Attribute - Keep track of the parameter attributes for the arguments
   // that we are *not* promoting. For the ones that we do promote, the parameter
@@ -569,7 +571,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
         else
           // Take any load, we will use it only to update Alias Analysis
           OrigLoad = cast<LoadInst>(User->use_back());
-        OriginalLoads[Indices] = OrigLoad;
+        OriginalLoads[std::make_pair(I, Indices)] = OrigLoad;
       }
 
       // Add a parameter to the function for each element passed in.
@@ -676,7 +678,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
         for (ScalarizeTable::iterator SI = ArgIndices.begin(),
                E = ArgIndices.end(); SI != E; ++SI) {
           Value *V = *AI;
-          LoadInst *OrigLoad = OriginalLoads[*SI];
+          LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, *SI)];
           if (!SI->empty()) {
             Ops.reserve(SI->size());
             Type *ElTy = V->getType();
-- 
cgit v1.1


From 4634338655449a6e76b6a948c91fbe5bc736d24b Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Fri, 15 Nov 2013 23:09:33 +0000
Subject: LoopVectorizer: Use abi alignment for accesses with no alignment

When we vectorize a scalar access with no alignment specified, we have to set
the target's abi alignment of the scalar access on the vectorized access.
Using the same alignment of zero would be wrong because most targets will have a
bigger abi alignment for vector types.

This probably fixes PR17878.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194876 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index bc649b3..e624bb4 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1205,6 +1205,10 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
   Type *DataTy = VectorType::get(ScalarDataTy, VF);
   Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
   unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
+  // An alignment of 0 means target abi alignment. We need to use the scalar's
+  // target abi alignment in such a case.
+  if (!Alignment)
+    Alignment = DL->getABITypeAlignment(ScalarDataTy);
   unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
   unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
   unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
-- 
cgit v1.1


From e9cdbf68e542bbb79597d6233dd2a339c89862a2 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Sat, 16 Nov 2013 16:00:48 +0000
Subject: InstCombine: fold (A >> C) == (B >> C) --> (A^B) < (1 << C) for
 constant Cs.

This is common in bitfield code.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194925 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCompares.cpp | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index e624572..226126b 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2948,6 +2948,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
                             Builder->CreateTrunc(B, A->getType()));
     }
 
+    // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
+    // For lshr and ashr pairs.
+    if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
+         match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
+        (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
+         match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
+      unsigned TypeBits = Cst1->getBitWidth();
+      unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+      if (ShAmt < TypeBits && ShAmt != 0) {
+        ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE
+                                       ? ICmpInst::ICMP_UGE
+                                       : ICmpInst::ICMP_ULT;
+        Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+        APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
+        return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal));
+      }
+    }
+
     // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
     // "icmp (and X, mask), cst"
     uint64_t ShAmt = 0;
-- 
cgit v1.1


From 64fa501b1081b5d5c25e5e9639075abb0cb724d9 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Sat, 16 Nov 2013 21:29:08 +0000
Subject: Apply the InstCombine fptrunc sqrt optimization to llvm.sqrt

InstCombine, in visitFPTrunc, applies the following optimization to sqrt calls:

  (fptrunc (sqrt (fpext x))) -> (sqrtf x)

but does not apply the same optimization to llvm.sqrt. This is a problem
because, to enable vectorization, Clang generates llvm.sqrt instead of sqrt in
fast-math mode, and because this optimization is being applied to sqrt and not
applied to llvm.sqrt, sometimes the fast-math code is slower.

This change makes InstCombine apply this optimization to llvm.sqrt as well.

This fixes the specific problem in PR17758, although the same underlying issue
(optimizations applied to libcalls are not applied to intrinsics) exists for
other optimizations in SimplifyLibCalls.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194935 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCasts.cpp | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a1aedd4..72377dc 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1262,9 +1262,14 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
   }
 
   // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+  // Note that we restrict this transformation based on
+  // TLI->has(LibFunc::sqrtf), even for the sqrt intrinsic, because
+  // TLI->has(LibFunc::sqrtf) is sufficient to guarantee that the
+  // single-precision intrinsic can be expanded in the backend.
   CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
   if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
-      Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) &&
+      (Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) ||
+       Call->getCalledFunction()->getIntrinsicID() == Intrinsic::sqrt) &&
       Call->getNumArgOperands() == 1 &&
       Call->hasOneUse()) {
     CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
@@ -1275,11 +1280,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
         Arg->getOperand(0)->getType()->isFloatTy()) {
       Function *Callee = Call->getCalledFunction();
       Module *M = CI.getParent()->getParent()->getParent();
-      Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
-                                                   Callee->getAttributes(),
-                                                   Builder->getFloatTy(),
-                                                   Builder->getFloatTy(),
-                                                   NULL);
+      Constant *SqrtfFunc = (Callee->getIntrinsicID() == Intrinsic::sqrt) ?
+        Intrinsic::getDeclaration(M, Intrinsic::sqrt, Builder->getFloatTy()) :
+        M->getOrInsertFunction("sqrtf", Callee->getAttributes(),
+                               Builder->getFloatTy(), Builder->getFloatTy(),
+                               NULL);
       CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
                                        "sqrtfcall");
       ret->setAttributes(Callee->getAttributes());
-- 
cgit v1.1


From bebe48dbfe00078329341945bfb11f778ace6d12 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Sat, 16 Nov 2013 23:59:05 +0000
Subject: Add a loop rerolling pass

This adds a loop rerolling pass: the opposite of (partial) loop unrolling. The
transformation aims to take loops like this:

for (int i = 0; i < 3200; i += 5) {
  a[i]     += alpha * b[i];
  a[i + 1] += alpha * b[i + 1];
  a[i + 2] += alpha * b[i + 2];
  a[i + 3] += alpha * b[i + 3];
  a[i + 4] += alpha * b[i + 4];
}

and turn them into this:

for (int i = 0; i < 3200; ++i) {
  a[i] += alpha * b[i];
}

and loops like this:

for (int i = 0; i < 500; ++i) {
  x[3*i] = foo(0);
  x[3*i+1] = foo(0);
  x[3*i+2] = foo(0);
}

and turn them into this:

for (int i = 0; i < 1500; ++i) {
  x[i] = foo(0);
}

There are two motivations for this transformation:

  1. Code-size reduction (especially relevant, obviously, when compiling for
code size).

  2. Providing greater choice to the loop vectorizer (and generic unroller) to
choose the unrolling factor (and a better ability to vectorize). The loop
vectorizer can take vector lengths and register pressure into account when
choosing an unrolling factor, for example, and a pre-unrolled loop limits that
choice. This is especially problematic if the manual unrolling was optimized
for a machine different from the current target.

The current implementation is limited to single basic-block loops only. The
rerolling recognition should work regardless of how the loop iterations are
intermixed within the loop body (subject to dependency and side-effect
constraints), but the significant restriction is that the order of the
instructions in each iteration must be identical. This seems sufficient to
capture all current use cases.

This pass is not currently enabled by default at any optimization level.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194939 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/PassManagerBuilder.cpp |    6 +
 lib/Transforms/Scalar/CMakeLists.txt      |    1 +
 lib/Transforms/Scalar/LoopRerollPass.cpp  | 1184 +++++++++++++++++++++++++++++
 lib/Transforms/Scalar/Scalar.cpp          |    5 +
 4 files changed, 1196 insertions(+)
 create mode 100644 lib/Transforms/Scalar/LoopRerollPass.cpp

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 1386201..5399e68 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -54,6 +54,10 @@ static cl::opt<bool> UseNewSROA("use-new-sroa",
   cl::init(true), cl::Hidden,
   cl::desc("Enable the new, experimental SROA pass"));
 
+static cl::opt<bool>
+RunLoopRerolling("reroll-loops", cl::Hidden,
+                 cl::desc("Run the loop rerolling pass"));
+
 PassManagerBuilder::PassManagerBuilder() {
     OptLevel = 2;
     SizeLevel = 0;
@@ -216,6 +220,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
 
   addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
 
+  if (RunLoopRerolling)
+    MPM.add(createLoopRerollPass());
   if (SLPVectorize)
     MPM.add(createSLPVectorizerPass());   // Vectorize parallel scalar chains.
 
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index ee45600..626c810 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_library(LLVMScalarOpts
   LoopInstSimplify.cpp
   LoopRotation.cpp
   LoopStrengthReduce.cpp
+  LoopRerollPass.cpp
   LoopUnrollPass.cpp
   LoopUnswitch.cpp
   LowerAtomic.cpp
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
new file mode 100644
index 0000000..eb39cd0
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -0,0 +1,1184 @@
+//===-- LoopReroll.cpp - Loop rerolling pass ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a simple loop reroller.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-reroll"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+using namespace llvm;
+
+STATISTIC(NumRerolledLoops, "Number of rerolled loops");
+
+static cl::opt<unsigned>
+MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
+  cl::desc("The maximum increment for loop rerolling"));
+
+// This loop re-rolling transformation aims to transform loops like this:
+//
+// int foo(int a);
+// void bar(int *x) {
+//   for (int i = 0; i < 500; i += 3) {
+//     foo(i);
+//     foo(i+1);
+//     foo(i+2);
+//   }
+// }
+//
+// into a loop like this:
+//
+// void bar(int *x) {
+//   for (int i = 0; i < 500; ++i)
+//     foo(i);
+// }
+//
+// It does this by looking for loops that, besides the latch code, are composed
+// of isomorphic DAGs of instructions, with each DAG rooted at some increment
+// to the induction variable, and where each DAG is isomorphic to the DAG
+// rooted at the induction variable (excepting the sub-DAGs which root the
+// other induction-variable increments). In other words, we're looking for loop
+// bodies of the form:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// f(%iv)
+// %iv.1 = add %iv, 1                <-- a root increment
+// f(%iv.1)
+// %iv.2 = add %iv, 2                <-- a root increment
+// f(%iv.2)
+// %iv.scale_m_1 = add %iv, scale-1  <-- a root increment
+// f(%iv.scale_m_1)
+// ...
+// %iv.next = add %iv, scale
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+//
+// where each f(i) is a set of instructions that, collectively, are a function
+// only of i (and other loop-invariant values).
+//
+// As a special case, we can also reroll loops like this:
+//
+// int foo(int);
+// void bar(int *x) {
+//   for (int i = 0; i < 500; ++i) {
+//     x[3*i] = foo(0);
+//     x[3*i+1] = foo(0);
+//     x[3*i+2] = foo(0);
+//   }
+// }
+//
+// into this:
+//
+// void bar(int *x) {
+//   for (int i = 0; i < 1500; ++i)
+//     x[i] = foo(0);
+// }
+//
+// in which case, we're looking for inputs like this:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// %scaled.iv = mul %iv, scale
+// f(%scaled.iv)
+// %scaled.iv.1 = add %scaled.iv, 1
+// f(%scaled.iv.1)
+// %scaled.iv.2 = add %scaled.iv, 2
+// f(%scaled.iv.2)
+// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
+// f(%scaled.iv.scale_m_1)
+// ...
+// %iv.next = add %iv, 1
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+
+namespace {
+  class LoopReroll : public LoopPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    LoopReroll() : LoopPass(ID) {
+      initializeLoopRerollPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+      AU.addRequired<DominatorTree>();
+      AU.addPreserved<DominatorTree>();
+      AU.addRequired<ScalarEvolution>();
+      AU.addRequired<TargetLibraryInfo>();
+    }
+
+protected:
+    AliasAnalysis *AA;
+    LoopInfo *LI;
+    ScalarEvolution *SE;
+    DataLayout *DL;
+    TargetLibraryInfo *TLI;
+    DominatorTree *DT;
+
+    typedef SmallVector<Instruction *, 16> SmallInstructionVector;
+    typedef SmallSet<Instruction *, 16>   SmallInstructionSet;
+
+    // A chain of isomorphic instructions, indentified by a single-use PHI,
+    // representing a reduction. Only the last value may be used outside the
+    // loop.
+    struct SimpleLoopReduction {
+      SimpleLoopReduction(Instruction *P, Loop *L)
+        : Valid(false), Instructions(1, P) {
+        assert(isa<PHINode>(P) && "First reduction instruction must be a PHI");
+        add(L);
+      }
+
+      bool valid() const {
+        return Valid;
+      }
+
+      Instruction *getPHI() const {
+        assert(Valid && "Using invalid reduction");
+        return Instructions.front();
+      }
+
+      Instruction *getReducedValue() const {
+        assert(Valid && "Using invalid reduction");
+        return Instructions.back();
+      }
+
+      Instruction *get(size_t i) const {
+        assert(Valid && "Using invalid reduction");
+        return Instructions[i+1];
+      }
+
+      Instruction *operator [] (size_t i) const { return get(i); }
+
+      // The size, ignoring the initial PHI.
+      size_t size() const {
+        assert(Valid && "Using invalid reduction");
+        return Instructions.size()-1;
+      }
+
+      typedef SmallInstructionVector::iterator iterator;
+      typedef SmallInstructionVector::const_iterator const_iterator;
+
+      iterator begin() {
+        assert(Valid && "Using invalid reduction");
+        return llvm::next(Instructions.begin());
+      }
+
+      const_iterator begin() const {
+        assert(Valid && "Using invalid reduction");
+        return llvm::next(Instructions.begin());
+      }
+
+      iterator end() { return Instructions.end(); }
+      const_iterator end() const { return Instructions.end(); }
+
+    protected:
+      bool Valid;
+      SmallInstructionVector Instructions;
+
+      void add(Loop *L);
+    };
+
+    // The set of all reductions, and state tracking of possible reductions
+    // during loop instruction processing.
+    struct ReductionTracker {
+      typedef SmallVector<SimpleLoopReduction, 16> SmallReductionVector;
+
+      // Add a new possible reduction.
+      void addSLR(SimpleLoopReduction &SLR) {
+        PossibleReds.push_back(SLR);
+      }
+
+      // Setup to track possible reductions corresponding to the provided
+      // rerolling scale. Only reductions with a number of non-PHI instructions
+      // that is divisible by the scale are considered. Three instructions sets
+      // are filled in:
+      //   - A set of all possible instructions in eligible reductions.
+      //   - A set of all PHIs in eligible reductions
+      //   - A set of all reduced values (last instructions) in eligible reductions.
+      void restrictToScale(uint64_t Scale,
+                           SmallInstructionSet &PossibleRedSet,
+                           SmallInstructionSet &PossibleRedPHISet,
+                           SmallInstructionSet &PossibleRedLastSet) {
+        PossibleRedIdx.clear();
+        PossibleRedIter.clear();
+        Reds.clear();
+
+        for (unsigned i = 0, e = PossibleReds.size(); i != e; ++i)
+          if (PossibleReds[i].size() % Scale == 0) {
+            PossibleRedLastSet.insert(PossibleReds[i].getReducedValue());
+            PossibleRedPHISet.insert(PossibleReds[i].getPHI());
+      
+            PossibleRedSet.insert(PossibleReds[i].getPHI());
+            PossibleRedIdx[PossibleReds[i].getPHI()] = i;
+            for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(),
+                 JE = PossibleReds[i].end(); J != JE; ++J) {
+              PossibleRedSet.insert(*J);
+              PossibleRedIdx[*J] = i;
+            }
+          }
+      }
+
+      // The functions below are used while processing the loop instructions.
+
+      // Are the two instructions both from reductions, and furthermore, from
+      // the same reduction?
+      bool isPairInSame(Instruction *J1, Instruction *J2) {
+        DenseMap<Instruction *, int>::iterator J1I = PossibleRedIdx.find(J1);
+        if (J1I != PossibleRedIdx.end()) {
+          DenseMap<Instruction *, int>::iterator J2I = PossibleRedIdx.find(J2);
+          if (J2I != PossibleRedIdx.end() && J1I->second == J2I->second)
+            return true;
+        }
+
+        return false;
+      }
+
+      // The two provided instructions, the first from the base iteration, and
+      // the second from iteration i, form a matched pair. If these are part of
+      // a reduction, record that fact.
+      void recordPair(Instruction *J1, Instruction *J2, unsigned i) {
+        if (PossibleRedIdx.count(J1)) {
+          assert(PossibleRedIdx.count(J2) &&
+                 "Recording reduction vs. non-reduction instruction?");
+
+          PossibleRedIter[J1] = 0;
+          PossibleRedIter[J2] = i;
+
+          int Idx = PossibleRedIdx[J1];
+          assert(Idx == PossibleRedIdx[J2] &&
+                 "Recording pair from different reductions?");
+          Reds.insert(PossibleRedIdx[J1]);
+        }
+      }
+
+      // The functions below can be called after we've finished processing all
+      // instructions in the loop, and we know which reductions were selected.
+
+      // Is the provided instruction the PHI of a reduction selected for
+      // rerolling?
+      bool isSelectedPHI(Instruction *J) {
+        if (!isa<PHINode>(J))
+          return false;
+
+        for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
+             RI != RIE; ++RI) {
+          int i = *RI;
+          if (cast<Instruction>(J) == PossibleReds[i].getPHI())
+            return true;
+        }
+
+        return false;
+      }
+
+      bool validateSelected();
+      void replaceSelected();
+
+    protected:
+      // The vector of all possible reductions (for any scale).
+      SmallReductionVector PossibleReds;
+
+      DenseMap<Instruction *, int> PossibleRedIdx;
+      DenseMap<Instruction *, int> PossibleRedIter;
+      DenseSet<int> Reds;
+    };
+
+    void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
+    void collectPossibleReductions(Loop *L,
+           ReductionTracker &Reductions);
+    void collectInLoopUserSet(Loop *L,
+           const SmallInstructionVector &Roots,
+           const SmallInstructionSet &Exclude,
+           const SmallInstructionSet &Final,
+           DenseSet<Instruction *> &Users);
+    void collectInLoopUserSet(Loop *L,
+           Instruction * Root,
+           const SmallInstructionSet &Exclude,
+           const SmallInstructionSet &Final,
+           DenseSet<Instruction *> &Users);
+    bool findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
+                          Instruction *&IV,
+                          SmallInstructionVector &LoopIncs);
+    bool collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale, Instruction *IV,
+                         SmallVector<SmallInstructionVector, 32> &Roots,
+                         SmallInstructionSet &AllRoots,
+                         SmallInstructionVector &LoopIncs);
+    bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount,
+                ReductionTracker &Reductions);
+  };
+}
+
+char LoopReroll::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
+
+Pass *llvm::createLoopRerollPass() {
+  return new LoopReroll;
+}
+
+// Returns true if the provided instruction is used outside the given loop.
+// This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
+// non-loop blocks to be outside the loop.
+static bool hasUsesOutsideLoop(Instruction *I, Loop *L) {
+  for (Value::use_iterator UI = I->use_begin(),
+       UIE = I->use_end(); UI != UIE; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+    if (!L->contains(User))
+      return true;
+  }
+
+  return false;
+}
+
+// Collect the list of loop induction variables with respect to which it might
+// be possible to reroll the loop.
+void LoopReroll::collectPossibleIVs(Loop *L,
+                                    SmallInstructionVector &PossibleIVs) {
+  BasicBlock *Header = L->getHeader();
+  for (BasicBlock::iterator I = Header->begin(),
+       IE = Header->getFirstInsertionPt(); I != IE; ++I) {
+    if (!isa<PHINode>(I))
+      continue;
+    if (!I->getType()->isIntegerTy())
+      continue;
+
+    if (const SCEVAddRecExpr *PHISCEV =
+        dyn_cast<SCEVAddRecExpr>(SE->getSCEV(I))) {
+      if (PHISCEV->getLoop() != L)
+        continue;
+      if (!PHISCEV->isAffine())
+        continue;
+      if (const SCEVConstant *IncSCEV =
+          dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) {
+        if (!IncSCEV->getValue()->getValue().isStrictlyPositive())
+          continue;
+        if (IncSCEV->getValue()->uge(MaxInc))
+          continue;
+
+        DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " <<
+              *PHISCEV << "\n");
+        PossibleIVs.push_back(I);
+      }
+    }
+  }
+}
+
+// Add the remainder of the reduction-variable chain to the instruction vector
+// (the initial PHINode has already been added). If successful, the object is
+// marked as valid.
+void LoopReroll::SimpleLoopReduction::add(Loop *L) {
+  assert(!Valid && "Cannot add to an already-valid chain");
+
+  // The reduction variable must be a chain of single-use instructions
+  // (including the PHI), except for the last value (which is used by the PHI
+  // and also outside the loop).
+  Instruction *C = Instructions.front();
+
+  do {
+    C = cast<Instruction>(*C->use_begin());
+    if (C->hasOneUse()) {
+      if (!C->isBinaryOp())
+        return;
+
+      if (!(isa<PHINode>(Instructions.back()) ||
+            C->isSameOperationAs(Instructions.back())))
+        return;
+
+      Instructions.push_back(C);
+    }
+  } while (C->hasOneUse());
+
+  if (Instructions.size() < 2 ||
+      !C->isSameOperationAs(Instructions.back()) ||
+      C->use_begin() == C->use_end())
+    return;
+
+  // C is now the (potential) last instruction in the reduction chain.
+  for (Value::use_iterator UI = C->use_begin(), UIE = C->use_end();
+       UI != UIE; ++UI) {
+    // The only in-loop user can be the initial PHI.
+    if (L->contains(cast<Instruction>(*UI)))
+      if (cast<Instruction>(*UI ) != Instructions.front())
+        return;
+  }
+
+  Instructions.push_back(C);
+  Valid = true;
+}
+
+// Collect the vector of possible reduction variables.
+void LoopReroll::collectPossibleReductions(Loop *L,
+  ReductionTracker &Reductions) {
+  BasicBlock *Header = L->getHeader();
+  for (BasicBlock::iterator I = Header->begin(),
+       IE = Header->getFirstInsertionPt(); I != IE; ++I) {
+    if (!isa<PHINode>(I))
+      continue;
+    if (!I->getType()->isSingleValueType())
+      continue;
+
+    SimpleLoopReduction SLR(I, L);
+    if (!SLR.valid())
+      continue;
+
+    DEBUG(dbgs() << "LRR: Possible reduction: " << *I << " (with " <<
+          SLR.size() << " chained instructions)\n");
+    Reductions.addSLR(SLR);
+  }
+}
+
+// Collect the set of all users of the provided root instruction. This set of
+// users contains not only the direct users of the root instruction, but also
+// all users of those users, and so on. There are two exceptions:
+//
+//   1. Instructions in the set of excluded instructions are never added to the
+//   use set (even if they are users). This is used, for example, to exclude
+//   including root increments in the use set of the primary IV.
+//
+//   2. Instructions in the set of final instructions are added to the use set
+//   if they are users, but their users are not added. This is used, for
+//   example, to prevent a reduction update from forcing all later reduction
+//   updates into the use set.
+void LoopReroll::collectInLoopUserSet(Loop *L,
+  Instruction *Root, const SmallInstructionSet &Exclude,
+  const SmallInstructionSet &Final,
+  DenseSet<Instruction *> &Users) {
+  SmallInstructionVector Queue(1, Root);
+  while (!Queue.empty()) {
+    Instruction *I = Queue.pop_back_val();
+    if (!Users.insert(I).second)
+      continue;
+
+    if (!Final.count(I))
+      for (Value::use_iterator UI = I->use_begin(),
+           UIE = I->use_end(); UI != UIE; ++UI) {
+        Instruction *User = cast<Instruction>(*UI);
+        if (PHINode *PN = dyn_cast<PHINode>(User)) {
+          // Ignore "wrap-around" uses to PHIs of this loop's header.
+          if (PN->getIncomingBlock(UI) == L->getHeader())
+            continue;
+        }
+  
+        if (L->contains(User) && !Exclude.count(User)) {
+          Queue.push_back(User);
+        }
+      }
+
+    // We also want to collect single-user "feeder" values.
+    for (User::op_iterator OI = I->op_begin(),
+         OIE = I->op_end(); OI != OIE; ++OI) {
+      if (Instruction *Op = dyn_cast<Instruction>(*OI))
+        if (Op->hasOneUse() && L->contains(Op) && !Exclude.count(Op) &&
+            !Final.count(Op))
+          Queue.push_back(Op);
+    }
+  }
+}
+
+// Collect all of the users of all of the provided root instructions (combined
+// into a single set).
+void LoopReroll::collectInLoopUserSet(Loop *L,
+  const SmallInstructionVector &Roots,
+  const SmallInstructionSet &Exclude,
+  const SmallInstructionSet &Final,
+  DenseSet<Instruction *> &Users) {
+  for (SmallInstructionVector::const_iterator I = Roots.begin(),
+       IE = Roots.end(); I != IE; ++I)
+    collectInLoopUserSet(L, *I, Exclude, Final, Users);
+}
+
+static bool isSimpleLoadStore(Instruction *I) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return LI->isSimple();
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->isSimple();
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+    return !MI->isVolatile();
+  return false;
+}
+
+// Recognize loops that are setup like this:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// %scaled.iv = mul %iv, scale
+// f(%scaled.iv)
+// %scaled.iv.1 = add %scaled.iv, 1
+// f(%scaled.iv.1)
+// %scaled.iv.2 = add %scaled.iv, 2
+// f(%scaled.iv.2)
+// %scaled.iv.scale_m_1 = add %scaled.iv, scale-1
+// f(%scaled.iv.scale_m_1)
+// ...
+// %iv.next = add %iv, 1
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+//
+// and, if found, set IV = %scaled.iv, and add %iv.next to LoopIncs.
+bool LoopReroll::findScaleFromMul(Instruction *RealIV, uint64_t &Scale,
+                                  Instruction *&IV,
+                                  SmallInstructionVector &LoopIncs) {
+  // This is a special case: here we're looking for all uses (except for
+  // the increment) to be multiplied by a common factor. The increment must
+  // be by one. This is to capture loops like:
+  //   for (int i = 0; i < 500; ++i) {
+  //     foo(3*i); foo(3*i+1); foo(3*i+2);
+  //   }
+  if (RealIV->getNumUses() != 2)
+    return false;
+  const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(RealIV));
+  Instruction *User1 = cast<Instruction>(*RealIV->use_begin()),
+              *User2 = cast<Instruction>(*llvm::next(RealIV->use_begin()));
+  if (!SE->isSCEVable(User1->getType()) || !SE->isSCEVable(User2->getType()))
+    return false;
+  const SCEVAddRecExpr *User1SCEV =
+                         dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User1)),
+                       *User2SCEV =
+                         dyn_cast<SCEVAddRecExpr>(SE->getSCEV(User2));
+  if (!User1SCEV || !User1SCEV->isAffine() ||
+      !User2SCEV || !User2SCEV->isAffine())
+    return false;
+
+  // We assume below that User1 is the scale multiply and User2 is the
+  // increment. If this can't be true, then swap them.
+  if (User1SCEV == RealIVSCEV->getPostIncExpr(*SE)) {
+    std::swap(User1, User2);
+    std::swap(User1SCEV, User2SCEV);
+  }
+
+  if (User2SCEV != RealIVSCEV->getPostIncExpr(*SE))
+    return false;
+  assert(User2SCEV->getStepRecurrence(*SE)->isOne() &&
+         "Invalid non-unit step for multiplicative scaling");
+  LoopIncs.push_back(User2);
+
+  if (const SCEVConstant *MulScale =
+      dyn_cast<SCEVConstant>(User1SCEV->getStepRecurrence(*SE))) {
+    // Make sure that both the start and step have the same multiplier.
+    if (RealIVSCEV->getStart()->getType() != MulScale->getType())
+      return false;
+    if (SE->getMulExpr(RealIVSCEV->getStart(), MulScale) !=
+        User1SCEV->getStart())
+      return false;
+
+    ConstantInt *MulScaleCI = MulScale->getValue();
+    if (!MulScaleCI->uge(2) || MulScaleCI->uge(MaxInc))
+      return false;
+    Scale = MulScaleCI->getZExtValue();
+    IV = User1;
+  } else
+    return false;
+
+  DEBUG(dbgs() << "LRR: Found possible scaling " << *User1 << "\n");
+  return true;
+}
+
+// Collect all root increments with respect to the provided induction variable
+// (normally the PHI, but sometimes a multiply). A root increment is an
+// instruction, normally an add, with a positive constant less than Scale. In a
+// rerollable loop, each of these increments is the root of an instruction
+// graph isomorphic to the others. Also, we collect the final induction
+// increment (the increment equal to the Scale), and its users in LoopIncs.
+bool LoopReroll::collectAllRoots(Loop *L, uint64_t Inc, uint64_t Scale,
+                                 Instruction *IV,
+                                 SmallVector<SmallInstructionVector, 32> &Roots,
+                                 SmallInstructionSet &AllRoots,
+                                 SmallInstructionVector &LoopIncs) {
+  for (Value::use_iterator UI = IV->use_begin(),
+       UIE = IV->use_end(); UI != UIE; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+    if (!SE->isSCEVable(User->getType()))
+      continue;
+    if (User->getType() != IV->getType())
+      continue;
+    if (!L->contains(User))
+      continue;
+    if (hasUsesOutsideLoop(User, L))
+      continue;
+
+    if (const SCEVConstant *Diff = dyn_cast<SCEVConstant>(SE->getMinusSCEV(
+          SE->getSCEV(User), SE->getSCEV(IV)))) {
+      uint64_t Idx = Diff->getValue()->getValue().getZExtValue();
+      if (Idx > 0 && Idx < Scale) {
+        Roots[Idx-1].push_back(User);
+        AllRoots.insert(User);
+      } else if (Idx == Scale && Inc > 1) {
+        LoopIncs.push_back(User);
+      }
+    }
+  }
+
+  if (Roots[0].empty())
+    return false;
+  bool AllSame = true;
+  for (unsigned i = 1; i < Scale-1; ++i)
+    if (Roots[i].size() != Roots[0].size()) {
+      AllSame = false;
+      break;
+    }
+
+  if (!AllSame)
+    return false;
+
+  return true;
+}
+
+// Validate the selected reductions. All iterations must have an isomorphic
+// part of the reduction chain and, for non-associative reductions, the chain
+// entries must appear in order.
+bool LoopReroll::ReductionTracker::validateSelected() {
+  // For a non-associative reduction, the chain entries must appear in order.
+  for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
+       RI != RIE; ++RI) {
+    int i = *RI;
+    int PrevIter = 0, BaseCount = 0, Count = 0;
+    for (SimpleLoopReduction::iterator J = PossibleReds[i].begin(),
+         JE = PossibleReds[i].end(); J != JE; ++J) {
+	// Note that all instructions in the chain must have been found because
+	// all instructions in the function must have been assigned to some
+	// iteration.
+      int Iter = PossibleRedIter[*J];
+      if (Iter != PrevIter && Iter != PrevIter + 1 &&
+          !PossibleReds[i].getReducedValue()->isAssociative()) {
+        DEBUG(dbgs() << "LRR: Out-of-order non-associative reduction: " <<
+                        *J << "\n");
+        return false;
+      }
+
+      if (Iter != PrevIter) {
+        if (Count != BaseCount) {
+          DEBUG(dbgs() << "LRR: Iteration " << PrevIter <<
+                " reduction use count " << Count <<
+                " is not equal to the base use count " <<
+                BaseCount << "\n");
+          return false;
+        }
+
+        Count = 0;
+      }
+
+      ++Count;
+      if (Iter == 0)
+        ++BaseCount;
+
+      PrevIter = Iter;
+    }
+  }
+
+  return true;
+}
+
+// For all selected reductions, remove all parts except those in the first
+// iteration (and the PHI). Replace outside uses of the reduced value with uses
+// of the first-iteration reduced value (in other words, reroll the selected
+// reductions).
+void LoopReroll::ReductionTracker::replaceSelected() {
+  // Fixup reductions to refer to the last instruction associated with the
+  // first iteration (not the last).
+  for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
+       RI != RIE; ++RI) {
+    int i = *RI;
+    int j = 0;
+    for (int e = PossibleReds[i].size(); j != e; ++j)
+      if (PossibleRedIter[PossibleReds[i][j]] != 0) {
+        --j;
+        break;
+      }
+
+    // Replace users with the new end-of-chain value.
+    SmallInstructionVector Users;
+    for (Value::use_iterator UI =
+           PossibleReds[i].getReducedValue()->use_begin(),
+         UIE = PossibleReds[i].getReducedValue()->use_end(); UI != UIE; ++UI)
+      Users.push_back(cast<Instruction>(*UI));
+
+    for (SmallInstructionVector::iterator J = Users.begin(),
+         JE = Users.end(); J != JE; ++J)
+      (*J)->replaceUsesOfWith(PossibleReds[i].getReducedValue(),
+                              PossibleReds[i][j]);
+  }
+}
+
+// Reroll the provided loop with respect to the provided induction variable.
+// Generally, we're looking for a loop like this:
+//
+// %iv = phi [ (preheader, ...), (body, %iv.next) ]
+// f(%iv)
+// %iv.1 = add %iv, 1                <-- a root increment
+// f(%iv.1)
+// %iv.2 = add %iv, 2                <-- a root increment
+// f(%iv.2)
+// %iv.scale_m_1 = add %iv, scale-1  <-- a root increment
+// f(%iv.scale_m_1)
+// ...
+// %iv.next = add %iv, scale
+// %cmp = icmp(%iv, ...)
+// br %cmp, header, exit
+//
+// Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
+// instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
+// be intermixed with eachother. The restriction imposed by this algorithm is
+// that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
+// etc. be the same.
+//
+// First, we collect the use set of %iv, excluding the other increment roots.
+// This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
+// times, having collected the use set of f(%iv.(i+1)), during which we:
+//   - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
+//     the next unmatched instruction in f(%iv.(i+1)).
+//   - Ensure that both matched instructions don't have any external users
+//     (with the exception of last-in-chain reduction instructions).
+//   - Track the (aliasing) write set, and other side effects, of all
+//     instructions that belong to future iterations that come before the matched
+//     instructions. If the matched instructions read from that write set, then
+//     f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
+//     f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
+//     if any of these future instructions had side effects (could not be
+//     speculatively executed), and so do the matched instructions, when we
+//     cannot reorder those side-effect-producing instructions, and rerolling
+//     fails.
+//
+// Finally, we make sure that all loop instructions are either loop increment
+// roots, belong to simple latch code, parts of validated reductions, part of
+// f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
+// have been validated), then we reroll the loop.
+bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
+                        const SCEV *IterCount,
+                        ReductionTracker &Reductions) {
+  const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
+  uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
+                   getValue()->getZExtValue();
+  // The collection of loop increment instructions.
+  SmallInstructionVector LoopIncs;
+  uint64_t Scale = Inc;
+
+  // The effective induction variable, IV, is normally also the real induction
+  // variable. When we're dealing with a loop like:
+  //   for (int i = 0; i < 500; ++i)
+  //     x[3*i] = ...;
+  //     x[3*i+1] = ...;
+  //     x[3*i+2] = ...;
+  // then the real IV is still i, but the effective IV is (3*i).
+  Instruction *RealIV = IV;
+  if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs))
+    return false;
+
+  assert(Scale <= MaxInc && "Scale is too large");
+  assert(Scale > 1 && "Scale must be at least 2");
+
+  // The set of increment instructions for each increment value.
+  SmallVector<SmallInstructionVector, 32> Roots(Scale-1);
+  SmallInstructionSet AllRoots;
+  if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs))
+    return false;
+
+  DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
+                  *RealIV << "\n");
+
+  // An array of just the possible reductions for this scale factor. When we
+  // collect the set of all users of some root instructions, these reduction
+  // instructions are treated as 'final' (their uses are not considered).
+  // This is important because we don't want the root use set to search down
+  // the reduction chain.
+  SmallInstructionSet PossibleRedSet;
+  SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet;
+  Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet,
+                             PossibleRedLastSet);
+
+  // We now need to check for equivalence of the use graph of each root with
+  // that of the primary induction variable (excluding the roots). Our goal
+  // here is not to solve the full graph isomorphism problem, but rather to
+  // catch common cases without a lot of work. As a result, we will assume
+  // that the relative order of the instructions in each unrolled iteration
+  // is the same (although we will not make an assumption about how the
+  // different iterations are intermixed). Note that while the order must be
+  // the same, the instructions may not be in the same basic block.
+  SmallInstructionSet Exclude(AllRoots);
+  Exclude.insert(LoopIncs.begin(), LoopIncs.end());
+
+  DenseSet<Instruction *> BaseUseSet;
+  collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet);
+
+  DenseSet<Instruction *> AllRootUses;
+  std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1);
+
+  bool MatchFailed = false;
+  for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) {
+    DenseSet<Instruction *> &RootUseSet = RootUseSets[i];
+    collectInLoopUserSet(L, Roots[i], SmallInstructionSet(),
+                         PossibleRedSet, RootUseSet);
+
+    DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() <<
+                    " vs. iteration increment " << (i+1) <<
+                    " use set size: " << RootUseSet.size() << "\n");
+
+    if (BaseUseSet.size() != RootUseSet.size()) {
+      MatchFailed = true;
+      break;
+    }
+
+    // In addition to regular aliasing information, we need to look for
+    // instructions from later (future) iterations that have side effects
+    // preventing us from reordering them past other instructions with side
+    // effects.
+    bool FutureSideEffects = false;
+    AliasSetTracker AST(*AA);
+
+    // The map between instructions in f(%iv.(i+1)) and f(%iv).
+    DenseMap<Value *, Value *> BaseMap;
+
+    assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops");
+    for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(),
+         JE = Header->end(); J1 != JE && !MatchFailed; ++J1) {
+      if (cast<Instruction>(J1) == RealIV)
+        continue;
+      if (cast<Instruction>(J1) == IV)
+        continue;
+      if (!BaseUseSet.count(J1))
+        continue;
+      if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs.
+        continue;
+
+      while (J2 != JE && (!RootUseSet.count(J2) ||
+             std::find(Roots[i].begin(), Roots[i].end(), J2) !=
+               Roots[i].end())) {
+        // As we iterate through the instructions, instructions that don't
+        // belong to previous iterations (or the base case), must belong to
+        // future iterations. We want to track the alias set of writes from
+        // previous iterations.
+        if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) &&
+            !AllRootUses.count(J2)) {
+          if (J2->mayWriteToMemory())
+            AST.add(J2);
+
+          // Note: This is specifically guarded by a check on isa<PHINode>,
+          // which while a valid (somewhat arbitrary) micro-optimization, is
+          // needed because otherwise isSafeToSpeculativelyExecute returns
+          // false on PHI nodes.
+          if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL))
+            FutureSideEffects = true; 
+        }
+
+        ++J2;
+      }
+
+      if (!J1->isSameOperationAs(J2)) {
+        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+                        " vs. " << *J2 << "\n");
+        MatchFailed = true;
+        break;
+      }
+
+      // Make sure that this instruction, which is in the use set of this
+      // root instruction, does not also belong to the base set or the set of
+      // some previous root instruction.
+      if (BaseUseSet.count(J2) || AllRootUses.count(J2)) {
+        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+                        " vs. " << *J2 << " (prev. case overlap)\n");
+        MatchFailed = true;
+        break;
+      }
+
+      // Make sure that we don't alias with any instruction in the alias set
+      // tracker. If we do, then we depend on a future iteration, and we
+      // can't reroll.
+      if (J2->mayReadFromMemory()) {
+        for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end();
+             K != KE && !MatchFailed; ++K) {
+          if (K->aliasesUnknownInst(J2, *AA)) {
+            DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+                            " vs. " << *J2 << " (depends on future store)\n");
+            MatchFailed = true;
+            break;
+          }
+        }
+      }
+
+      // If we've past an instruction from a future iteration that may have
+      // side effects, and this instruction might also, then we can't reorder
+      // them, and this matching fails. As an exception, we allow the alias
+      // set tracker to handle regular (simple) load/store dependencies.
+      if (FutureSideEffects &&
+            ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) ||
+             (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) {
+        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+                        " vs. " << *J2 <<
+                        " (side effects prevent reordering)\n");
+        MatchFailed = true;
+        break;
+      }
+
+      // For instructions that are part of a reduction, if the operation is
+      // associative, then don't bother matching the operands (because we
+      // already know that the instructions are isomorphic, and the order
+      // within the iteration does not matter). For non-associative reductions,
+      // we do need to match the operands, because we need to reject
+      // out-of-order instructions within an iteration!
+      // For example (assume floating-point addition), we need to reject this:
+      //   x += a[i]; x += b[i];
+      //   x += a[i+1]; x += b[i+1];
+      //   x += b[i+2]; x += a[i+2];
+      bool InReduction = Reductions.isPairInSame(J1, J2);
+
+      if (!(InReduction && J1->isAssociative())) {
+        bool Swapped = false, SomeOpMatched = false;;
+        for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
+          Value *Op2 = J2->getOperand(j);
+
+	  // If this is part of a reduction (and the operation is not
+	  // associatve), then we match all operands, but not those that are
+	  // part of the reduction.
+          if (InReduction)
+            if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
+              if (Reductions.isPairInSame(J2, Op2I))
+                continue;
+
+          DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
+          if (BMI != BaseMap.end())
+            Op2 = BMI->second;
+          else if (std::find(Roots[i].begin(), Roots[i].end(),
+                             (Instruction*) Op2) != Roots[i].end())
+            Op2 = IV;
+
+          if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
+	    // If we've not already decided to swap the matched operands, and
+	    // we've not already matched our first operand (note that we could
+	    // have skipped matching the first operand because it is part of a
+	    // reduction above), and the instruction is commutative, then try
+	    // the swapped match.
+            if (!Swapped && J1->isCommutative() && !SomeOpMatched &&
+                J1->getOperand(!j) == Op2) {
+              Swapped = true;
+            } else {
+              DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+                              " vs. " << *J2 << " (operand " << j << ")\n");
+              MatchFailed = true;
+              break;
+            }
+          }
+
+          SomeOpMatched = true;
+        }
+      }
+
+      if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) ||
+          (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) {
+        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
+                        " vs. " << *J2 << " (uses outside loop)\n");
+        MatchFailed = true;
+        break;
+      }
+
+      if (!MatchFailed)
+        BaseMap.insert(std::pair<Value *, Value *>(J2, J1));
+
+      AllRootUses.insert(J2);
+      Reductions.recordPair(J1, J2, i+1);
+
+      ++J2;
+    }
+  }
+
+  if (MatchFailed)
+    return false;
+
+  DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
+                  *RealIV << "\n");
+
+  DenseSet<Instruction *> LoopIncUseSet;
+  collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(),
+                       SmallInstructionSet(), LoopIncUseSet);
+  DEBUG(dbgs() << "LRR: Loop increment set size: " <<
+                  LoopIncUseSet.size() << "\n");
+
+  // Make sure that all instructions in the loop have been included in some
+  // use set.
+  for (BasicBlock::iterator J = Header->begin(), JE = Header->end();
+       J != JE; ++J) {
+    if (isa<DbgInfoIntrinsic>(J))
+      continue;
+    if (cast<Instruction>(J) == RealIV)
+      continue;
+    if (cast<Instruction>(J) == IV)
+      continue;
+    if (BaseUseSet.count(J) || AllRootUses.count(J) ||
+        (LoopIncUseSet.count(J) && (J->isTerminator() ||
+                                    isSafeToSpeculativelyExecute(J, DL))))
+      continue;
+
+    if (AllRoots.count(J))
+      continue;
+
+    if (Reductions.isSelectedPHI(J))
+      continue;
+
+    DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV <<
+                    " unprocessed instruction found: " << *J << "\n");
+    MatchFailed = true;
+    break;
+  }
+
+  if (MatchFailed)
+    return false;
+
+  DEBUG(dbgs() << "LRR: all instructions processed from " <<
+                  *RealIV << "\n");
+
+  if (!Reductions.validateSelected())
+    return false;
+
+  // At this point, we've validated the rerolling, and we're committed to
+  // making changes!
+
+  Reductions.replaceSelected();
+
+  // Remove instructions associated with non-base iterations.
+  for (BasicBlock::reverse_iterator J = Header->rbegin();
+       J != Header->rend();) {
+    if (AllRootUses.count(&*J)) {
+      Instruction *D = &*J;
+      DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
+      D->eraseFromParent();
+      continue;
+    }
+
+    ++J; 
+  }
+
+  // Insert the new induction variable.
+  const SCEV *Start = RealIVSCEV->getStart();
+  if (Inc == 1)
+    Start = SE->getMulExpr(Start,
+                           SE->getConstant(Start->getType(), Scale));
+  const SCEVAddRecExpr *H =
+    cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start,
+                           SE->getConstant(RealIVSCEV->getType(), 1),
+                           L, SCEV::FlagAnyWrap));
+  { // Limit the lifetime of SCEVExpander.
+    SCEVExpander Expander(*SE, "reroll");
+    PHINode *NewIV =
+      cast<PHINode>(Expander.expandCodeFor(H, IV->getType(),
+                                           Header->begin()));
+    for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
+         JE = BaseUseSet.end(); J != JE; ++J)
+      (*J)->replaceUsesOfWith(IV, NewIV);
+
+    if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
+      if (LoopIncUseSet.count(BI)) {
+        const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
+        if (Inc == 1)
+          ICSCEV =
+            SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
+        Value *IC;
+        if (isa<SCEVConstant>(ICSCEV)) {
+          IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), BI);
+        } else {
+          BasicBlock *Preheader = L->getLoopPreheader();
+          if (!Preheader)
+            Preheader = InsertPreheaderForLoop(L, this);
+
+          IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(),
+                                      Preheader->getTerminator());
+        }
+ 
+        Value *NewIVNext = NewIV->getIncomingValueForBlock(Header); 
+        Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIVNext, IC,
+                                   "exitcond");
+        BI->setCondition(Cond);
+
+        if (BI->getSuccessor(1) != Header)
+          BI->swapSuccessors();
+      }
+    }
+  }
+
+  SimplifyInstructionsInBlock(Header, DL, TLI);
+  DeleteDeadPHIs(Header, TLI);
+  ++NumRerolledLoops;
+  return true;
+}
+
+bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+  AA = &getAnalysis<AliasAnalysis>();
+  LI = &getAnalysis<LoopInfo>();
+  SE = &getAnalysis<ScalarEvolution>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
+  DL = getAnalysisIfAvailable<DataLayout>();
+  DT = &getAnalysis<DominatorTree>();
+
+  BasicBlock *Header = L->getHeader();
+  DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
+        "] Loop %" << Header->getName() << " (" <<
+        L->getNumBlocks() << " block(s))\n");
+
+  bool Changed = false;
+
+  // For now, we'll handle only single BB loops.
+  if (L->getNumBlocks() > 1)
+    return Changed;
+
+  if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+    return Changed;
+
+  const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
+  const SCEV *IterCount =
+    SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->getType(), 1));
+  DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
+
+  // First, we need to find the induction variable with respect to which we can
+  // reroll (there may be several possible options).
+  SmallInstructionVector PossibleIVs;
+  collectPossibleIVs(L, PossibleIVs);
+
+  if (PossibleIVs.empty()) {
+    DEBUG(dbgs() << "LRR: No possible IVs found\n");
+    return Changed;
+  }
+
+  ReductionTracker Reductions;
+  collectPossibleReductions(L, Reductions);
+
+  // For each possible IV, collect the associated possible set of 'root' nodes
+  // (i+1, i+2, etc.).
+  for (SmallInstructionVector::iterator I = PossibleIVs.begin(),
+       IE = PossibleIVs.end(); I != IE; ++I)
+    if (reroll(*I, L, Header, IterCount, Reductions)) {
+      Changed = true;
+      break;
+    }
+
+  return Changed;
+}
+
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 72e00e1..857597e 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -44,6 +44,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLoopInstSimplifyPass(Registry);
   initializeLoopRotatePass(Registry);
   initializeLoopStrengthReducePass(Registry);
+  initializeLoopRerollPass(Registry);
   initializeLoopUnrollPass(Registry);
   initializeLoopUnswitchPass(Registry);
   initializeLoopIdiomRecognizePass(Registry);
@@ -112,6 +113,10 @@ void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLoopRotatePass());
 }
 
+void LLVMAddLoopRerollPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLoopRerollPass());
+}
+
 void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLoopUnrollPass());
 }
-- 
cgit v1.1


From b7dabccbce5fc6fcf7b36669eb04abcb001e7f9e Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Sun, 17 Nov 2013 01:21:54 +0000
Subject: Fix ndebug-build unused variable in loop rerolling

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194941 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/LoopRerollPass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index eb39cd0..335af81 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -276,7 +276,7 @@ protected:
           int Idx = PossibleRedIdx[J1];
           assert(Idx == PossibleRedIdx[J2] &&
                  "Recording pair from different reductions?");
-          Reds.insert(PossibleRedIdx[J1]);
+          Reds.insert(Idx);
         }
       }
 
-- 
cgit v1.1


From 390564206f67b742ad7cbee1cf17ae52efa77cb6 Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Sun, 17 Nov 2013 02:06:35 +0000
Subject: Add the cold attribute to error-reporting call sites

Generally speaking, control flow paths with error reporting calls are cold.
So far, error reporting calls are calls to perror and calls to fprintf,
fwrite, etc. with stderr as the stream. This can be extended in the future.

The primary motivation is to improve block placement (the cold attribute
affects the static branch prediction heuristics).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194943 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/SimplifyLibCalls.cpp | 72 +++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index a060c34..15b3e66 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -27,11 +27,16 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 
 using namespace llvm;
 
+static cl::opt<bool>
+ColdErrorCalls("error-reporting-is-cold",  cl::init(true),
+  cl::Hidden, cl::desc("Treat error-reporting calls as cold"));
+
 /// This class is the abstract base class for the set of optimizations that
 /// corresponds to one library call.
 namespace {
@@ -1506,6 +1511,54 @@ struct ToAsciiOpt : public LibCallOptimization {
 // Formatting and IO Library Call Optimizations
 //===----------------------------------------------------------------------===//
 
+struct ErrorReportingOpt : public LibCallOptimization {
+  ErrorReportingOpt(int S = -1) : StreamArg(S) {}
+
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &) {
+    // Error reporting calls should be cold, mark them as such.
+    // This applies even to non-builtin calls: it is only a hint and applies to
+    // functions that the frontend might not understand as builtins.
+
+    // This heuristic was suggested in:
+    // Improving Static Branch Prediction in a Compiler
+    // Brian L. Deitrich, Ben-Chung Cheng, Wen-mei W. Hwu
+    // Proceedings of PACT'98, Oct. 1998, IEEE
+
+    if (!CI->hasFnAttr(Attribute::Cold) && isReportingError(Callee, CI)) {
+      CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold);
+    }
+
+    return 0;
+  }
+
+protected:
+  bool isReportingError(Function *Callee, CallInst *CI) {
+    if (!ColdErrorCalls)
+      return false;
+ 
+    if (!Callee || !Callee->isDeclaration())
+      return false;
+
+    if (StreamArg < 0)
+      return true;
+
+    // These functions might be considered cold, but only if their stream
+    // argument is stderr.
+
+    if (StreamArg >= (int) CI->getNumArgOperands())
+      return false;
+    LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
+    if (!LI)
+      return false;
+    GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
+    if (!GV || !GV->isDeclaration())
+      return false;
+    return GV->getName() == "stderr";
+  }
+
+  int StreamArg;
+};
+
 struct PrintFOpt : public LibCallOptimization {
   Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
                                    IRBuilder<> &B) {
@@ -1686,6 +1739,9 @@ struct SPrintFOpt : public LibCallOptimization {
 struct FPrintFOpt : public LibCallOptimization {
   Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
                                    IRBuilder<> &B) {
+    ErrorReportingOpt ER(/* StreamArg = */ 0);
+    (void) ER.callOptimizer(Callee, CI, B);
+
     // All the optimizations depend on the format string.
     StringRef FormatStr;
     if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
@@ -1763,6 +1819,9 @@ struct FPrintFOpt : public LibCallOptimization {
 
 struct FWriteOpt : public LibCallOptimization {
   virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    ErrorReportingOpt ER(/* StreamArg = */ 3);
+    (void) ER.callOptimizer(Callee, CI, B);
+
     // Require a pointer, an integer, an integer, a pointer, returning integer.
     FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
@@ -1796,6 +1855,9 @@ struct FWriteOpt : public LibCallOptimization {
 
 struct FPutsOpt : public LibCallOptimization {
   virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    ErrorReportingOpt ER(/* StreamArg = */ 1);
+    (void) ER.callOptimizer(Callee, CI, B);
+
     // These optimizations require DataLayout.
     if (!TD) return 0;
 
@@ -1924,6 +1986,9 @@ static IsAsciiOpt IsAscii;
 static ToAsciiOpt ToAscii;
 
 // Formatting and IO library call optimizations.
+static ErrorReportingOpt ErrorReporting;
+static ErrorReportingOpt ErrorReporting0(0);
+static ErrorReportingOpt ErrorReporting1(1);
 static PrintFOpt PrintF;
 static SPrintFOpt SPrintF;
 static FPrintFOpt FPrintF;
@@ -2038,6 +2103,13 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
         return &FPuts;
       case LibFunc::puts:
         return &Puts;
+      case LibFunc::perror:
+        return &ErrorReporting;
+      case LibFunc::vfprintf:
+      case LibFunc::fiprintf:
+        return &ErrorReporting0;
+      case LibFunc::fputc:
+        return &ErrorReporting1;
       case LibFunc::ceil:
       case LibFunc::fabs:
       case LibFunc::floor:
-- 
cgit v1.1


From c8dc96be28fd1a3d6ddebbb48b8d55b61e4bd89b Mon Sep 17 00:00:00 2001
From: Hal Finkel <hfinkel@anl.gov>
Date: Sun, 17 Nov 2013 16:02:50 +0000
Subject: Add a loop rerolling flag to the PassManagerBuilder

This adds a boolean member variable to the PassManagerBuilder to control loop
rerolling (just like we have for unrolling and the various vectorization
options). This is necessary for control by the frontend. Loop rerolling remains
disabled by default at all optimization levels.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194966 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/PassManagerBuilder.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 5399e68..24c5018 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -69,6 +69,7 @@ PassManagerBuilder::PassManagerBuilder() {
     SLPVectorize = RunSLPVectorization;
     LoopVectorize = RunLoopVectorization;
     LateVectorize = LateVectorization;
+    RerollLoops = RunLoopRerolling;
 }
 
 PassManagerBuilder::~PassManagerBuilder() {
@@ -220,7 +221,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
 
   addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
 
-  if (RunLoopRerolling)
+  if (RerollLoops)
     MPM.add(createLoopRerollPass());
   if (SLPVectorize)
     MPM.add(createSLPVectorizerPass());   // Vectorize parallel scalar chains.
-- 
cgit v1.1


From 80ccd9ea59b8911f12836da98aceedce4ebc6a6f Mon Sep 17 00:00:00 2001
From: NAKAMURA Takumi <geek4civic@gmail.com>
Date: Sun, 17 Nov 2013 18:05:34 +0000
Subject: Utils/LoopUnroll.cpp: Tweak (StringRef)OldName to be valid until it
 is used, since r194601.

eraseFromParent() invalidates OldName.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194970 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/LoopUnroll.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 9955bfd..162807d 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -90,6 +90,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
   // Move all definitions in the successor to the predecessor...
   OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
 
+  // OldName will be valid until erased.
   StringRef OldName = BB->getName();
 
   // Erase basic block from the function...
@@ -102,12 +103,13 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
     }
   }
   LI->removeBlock(BB);
-  BB->eraseFromParent();
 
   // Inherit predecessor's name if it exists...
   if (!OldName.empty() && !OnlyPred->hasName())
     OnlyPred->setName(OldName);
 
+  BB->eraseFromParent();
+
   return OnlyPred;
 }
 
-- 
cgit v1.1


From 07a3c481c656c9cc1e0ace3d599eef1fa81e3cc6 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Mon, 18 Nov 2013 13:14:32 +0000
Subject: LoopVectorizer: Extend the induction variable to a larger type

In some case the loop exit count computation can overflow. Extend the type to
prevent most of those cases.

The problem is loops like:
int main ()
{
  int a = 1;
  char b = 0;
  lbl:
    a &= 4;
    b--;
    if (b) goto lbl;
  return a;
}

The backedge count is 255. The induction variable type is i8. If we add one to
255 to get the exit count we overflow to zero.

To work around this issue we extend the type of the induction variable to i32 in
the case of i8 and i16.

PR17532

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195008 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index e624bb4..79f80f3 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1537,6 +1537,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
   const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop);
   assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
 
+  ExitCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
   // Get the total trip count from the count by adding 1.
   ExitCount = SE->getAddExpr(ExitCount,
                              SE->getConstant(ExitCount->getType(), 1));
@@ -2888,6 +2889,11 @@ static Type *convertPointerToIntegerType(DataLayout &DL, Type *Ty) {
   if (Ty->isPointerTy())
     return DL.getIntPtrType(Ty);
 
+  // It is possible that char's or short's overflow when we ask for the loop's
+  // trip count, work around this by changing the type size.
+  if (Ty->getScalarSizeInBits() < 32)
+    return Type::getInt32Ty(Ty->getContext());
+
   return Ty;
 }
 
-- 
cgit v1.1


From 64409ad8e3b360b84349042f14b57f87a5c0ca18 Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <samsonov@google.com>
Date: Mon, 18 Nov 2013 14:53:55 +0000
Subject: [ASan] Fix PR17867 - make sure ASan doesn't crash if use-after-scope
 and use-after-return are combined.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195014 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Instrumentation/AddressSanitizer.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index d8c3f8e..d731ec5 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -426,6 +426,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   // Stores a place and arguments of poisoning/unpoisoning call for alloca.
   struct AllocaPoisonCall {
     IntrinsicInst *InsBefore;
+    AllocaInst *AI;
     uint64_t Size;
     bool DoPoison;
   };
@@ -504,7 +505,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
     AllocaInst *AI = findAllocaForValue(II.getArgOperand(1));
     if (!AI) return;
     bool DoPoison = (ID == Intrinsic::lifetime_end);
-    AllocaPoisonCall APC = {&II, SizeValue, DoPoison};
+    AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison};
     AllocaPoisonCallVec.push_back(APC);
   }
 
@@ -1523,11 +1524,10 @@ void FunctionStackPoisoner::poisonStack() {
   bool HavePoisonedAllocas = false;
   for (size_t i = 0, n = AllocaPoisonCallVec.size(); i < n; i++) {
     const AllocaPoisonCall &APC = AllocaPoisonCallVec[i];
-    IntrinsicInst *II = APC.InsBefore;
-    AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
-    assert(AI);
-    IRBuilder<> IRB(II);
-    poisonAlloca(AI, APC.Size, IRB, APC.DoPoison);
+    assert(APC.InsBefore);
+    assert(APC.AI);
+    IRBuilder<> IRB(APC.InsBefore);
+    poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison);
     HavePoisonedAllocas |= APC.DoPoison;
   }
 
-- 
cgit v1.1


From 940267e7f208751fdc48dbb7d6b5d86b6310ce7c Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Mon, 18 Nov 2013 23:04:38 +0000
Subject: Debug info: Let LowerDbgDeclare perfom the dbg.declare -> dbg.value
 lowering only for load/stores to scalar allocas. The resulting values confuse
 the backend and don't add anything because we can describe array-allocas with
 a dbg.declare intrinsic just fine.

rdar://problem/15464571

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195052 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Utils/Local.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 82b8da3..2768041 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -1045,7 +1045,11 @@ bool llvm::LowerDbgDeclare(Function &F) {
   for (SmallVectorImpl<DbgDeclareInst *>::iterator I = Dbgs.begin(),
          E = Dbgs.end(); I != E; ++I) {
     DbgDeclareInst *DDI = *I;
-    if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+    AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
+    // If this is an alloca for a scalar variable, insert a dbg.value
+    // at each load and store to the alloca and erase the dbg.declare.
+    if (AI && !AI->isArrayAllocation()) {
+
       // We only remove the dbg.declare intrinsic if all uses are
       // converted to dbg.value intrinsics.
       bool RemoveDDI = true;
-- 
cgit v1.1


From 43f41cc550e74346f927dc45cb82c57ddcf07117 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Wed, 20 Nov 2013 06:15:56 +0000
Subject: Merging r195118:
 ------------------------------------------------------------------------
 r195118 | chandlerc | 2013-11-19 01:03:18 -0800 (Tue, 19 Nov 2013) | 22 lines

Fix an issue where SROA computed different results based on the relative
order of slices of the alloca which have exactly the same size and other
properties. This was found by a perniciously unstable sort
implementation used to flush out buggy uses of the algorithm.

The fundamental idea is that findCommonType should return the best
common type it can find across all of the slices in the range. There
were two bugs here previously:

1) We would accept an integer type smaller than a byte-width multiple,
   and if there were different bit-width integer types, we would accept
   the first one. This caused an actual failure in the testcase updated
   here when the sort order changed.
2) If we found a bad combination of types or a non-load, non-store use
   before an integer typed load or store we would bail, but if we found
   the integere typed load or store, we would use it. The correct
   behavior is to always use an integer typed operation which covers the
   partition if one exists.

While a clever debugging sort algorithm found problem #1 in our existing
test cases, I have no useful test case ideas for #2. I spotted in by
inspection when looking at this code.
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195217 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/SROA.cpp | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index a0be2c6..9f3fc83 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -938,6 +938,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
                             AllocaSlices::const_iterator E,
                             uint64_t EndOffset) {
   Type *Ty = 0;
+  bool IgnoreNonIntegralTypes = false;
   for (AllocaSlices::const_iterator I = B; I != E; ++I) {
     Use *U = I->getUse();
     if (isa<IntrinsicInst>(*U->getUser()))
@@ -946,29 +947,37 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
       continue;
 
     Type *UserTy = 0;
-    if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
+    if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
       UserTy = LI->getType();
-    else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
       UserTy = SI->getValueOperand()->getType();
-    else
-      return 0; // Bail if we have weird uses.
+    } else {
+      IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
+      continue;
+    }
 
     if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
       // If the type is larger than the partition, skip it. We only encounter
       // this for split integer operations where we want to use the type of the
-      // entity causing the split.
-      if (ITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
+      // entity causing the split. Also skip if the type is not a byte width
+      // multiple.
+      if (ITy->getBitWidth() % 8 != 0 ||
+          ITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
         continue;
 
       // If we have found an integer type use covering the alloca, use that
-      // regardless of the other types, as integers are often used for a
-      // "bucket
-      // of bits" type.
+      // regardless of the other types, as integers are often used for
+      // a "bucket of bits" type.
+      //
+      // NB: This *must* be the only return from inside the loop so that the
+      // order of slices doesn't impact the computed type.
       return ITy;
+    } else if (IgnoreNonIntegralTypes) {
+      continue;
     }
 
     if (Ty && Ty != UserTy)
-      return 0;
+      IgnoreNonIntegralTypes = true; // Give up on anything but an iN type.
 
     Ty = UserTy;
   }
-- 
cgit v1.1


From 8e5b91849a39173b1ce1c15e0e279b94562204b5 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Wed, 20 Nov 2013 06:27:56 +0000
Subject: Merging r195161:
 ------------------------------------------------------------------------
 r195161 | arnolds | 2013-11-19 14:20:18 -0800 (Tue, 19 Nov 2013) | 1 line

SLPVectorizer: Fix whitespace errors
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195221 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5c185f1..0c962d6 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1869,7 +1869,7 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
     }
   }
 
-    return Changed;
+  return Changed;
 }
 
 bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
@@ -1974,7 +1974,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
     return false;
 
   unsigned Opcode0 = I0->getOpcode();
-  
+
   Type *Ty0 = I0->getType();
   unsigned Sz = DL->getTypeSizeInBits(Ty0);
   unsigned VF = MinVecRegSize / Sz;
@@ -1989,11 +1989,11 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
   }
 
   bool Changed = false;
-    
+
   for (unsigned i = 0, e = VL.size(); i < e; ++i) {
     unsigned OpsWidth = 0;
-      
-    if (i + VF > e) 
+
+    if (i + VF > e)
       OpsWidth = e - i;
     else
       OpsWidth = VF;
@@ -2001,23 +2001,24 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
     if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
       break;
 
-    DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " << "\n");
+    DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
+                 << "\n");
     ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
-      
+
     R.buildTree(Ops);
     int Cost = R.getTreeCost();
-       
+
     if (Cost < -SLPCostThreshold) {
       DEBUG(dbgs() << "SLP: Vectorizing pair at cost:" << Cost << ".\n");
       R.vectorizeTree();
-        
+
       // Move to the next bundle.
       i += VF - 1;
       Changed = true;
     }
   }
-    
-  return Changed; 
+
+  return Changed;
 }
 
 bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
-- 
cgit v1.1


From dd9c5e98c87a33eae1fe0ec9e03bc41f6f3a731d Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Wed, 20 Nov 2013 06:28:12 +0000
Subject: Merging r195162:
 ------------------------------------------------------------------------
 r195162 | arnolds | 2013-11-19 14:20:20 -0800 (Tue, 19 Nov 2013) | 12 lines

SLPVectorizer: Fix stale for Value pointer array

We are slicing an array of Value pointers and process those slices in a loop.
The problem is that we might invalidate a later slice by vectorizing a former
slice.

Use a WeakVH to track the pointer. If the pointer is deleted or RAUW'ed we can
tell.

The test case will only fail when running with libgmalloc.

radar://15498655
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195222 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0c962d6..2b498a8 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1833,6 +1833,21 @@ private:
   StoreListMap StoreRefs;
 };
 
+/// \brief Check that the Values in the slice in VL array are still existant in
+/// the WeakVH array.
+/// Vectorization of part of the VL array may cause later values in the VL array
+/// to become invalid. We track when this has happened in the WeakVH array.
+static bool hasValueBeenRAUWed(ArrayRef<Value *> &VL,
+                               SmallVectorImpl<WeakVH> &VH,
+                               unsigned SliceBegin,
+                               unsigned SliceSize) {
+  for (unsigned i = SliceBegin; i < SliceBegin + SliceSize; ++i)
+    if (VH[i] != VL[i])
+      return true;
+
+  return false;
+}
+
 bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
                                           int CostThreshold, BoUpSLP &R) {
   unsigned ChainLen = Chain.size();
@@ -1845,11 +1860,19 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
   if (!isPowerOf2_32(Sz) || VF < 2)
     return false;
 
+  // Keep track of values that were delete by vectorizing in the loop below.
+  SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end());
+
   bool Changed = false;
   // Look for profitable vectorizable trees at all offsets, starting at zero.
   for (unsigned i = 0, e = ChainLen; i < e; ++i) {
     if (i + VF > e)
       break;
+
+    // Check that a previous iteration of this loop did not delete the Value.
+    if (hasValueBeenRAUWed(Chain, TrackValues, i, VF))
+      continue;
+
     DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i
           << "\n");
     ArrayRef<Value *> Operands = Chain.slice(i, VF);
@@ -1990,6 +2013,9 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
 
   bool Changed = false;
 
+  // Keep track of values that were delete by vectorizing in the loop below.
+  SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
+
   for (unsigned i = 0, e = VL.size(); i < e; ++i) {
     unsigned OpsWidth = 0;
 
@@ -2001,6 +2027,10 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
     if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
       break;
 
+    // Check that a previous iteration of this loop did not delete the Value.
+    if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth))
+      continue;
+
     DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
                  << "\n");
     ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
-- 
cgit v1.1


From 3343ddf466b414f811048dc9f3be2d55ffbb9658 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Mon, 25 Nov 2013 05:20:58 +0000
Subject: Merging r195477:
 ------------------------------------------------------------------------
 r195477 | rafael | 2013-11-22 09:58:12 -0800 (Fri, 22 Nov 2013) | 13 lines

Add a fixed version of r195470 back.

The fix is simply to use CurI instead of I when handling aliases to
avoid accessing a invalid iterator.

original message:

Convert linkonce* to weak* instead of strong.

Also refactor the logic into a helper function. This is an important improve
on mingw where the linker complains about mixed weak and strong symbols.
Converting to weak ensures that the symbol is not dropped, but keeps in a
comdat, making the linker happy.
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195603 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/ExtractGV.cpp | 54 +++++++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 17 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index fa3d72d..50fb3e6 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -21,6 +21,38 @@
 #include <algorithm>
 using namespace llvm;
 
+/// Make sure GV is visible from both modules. Delete is true if it is
+/// being deleted from this module.
+/// This also makes sure GV cannot be dropped so that references from
+/// the split module remain valid.
+static void makeVisible(GlobalValue &GV, bool Delete) {
+  bool Local = GV.hasLocalLinkage();
+  if (Local)
+    GV.setVisibility(GlobalValue::HiddenVisibility);
+
+  if (Local || Delete) {
+    GV.setLinkage(GlobalValue::ExternalLinkage);
+    return;
+  }
+
+  if (!GV.hasLinkOnceLinkage()) {
+    assert(!GV.isDiscardableIfUnused());
+    return;
+  }
+
+  // Map linkonce* to weak* so that llvm doesn't drop this GV.
+  switch(GV.getLinkage()) {
+  default:
+    llvm_unreachable("Unexpected linkage");
+  case GlobalValue::LinkOnceAnyLinkage:
+    GV.setLinkage(GlobalValue::WeakAnyLinkage);
+    return;
+  case GlobalValue::LinkOnceODRLinkage:
+    GV.setLinkage(GlobalValue::WeakODRLinkage);
+    return;
+  }
+}
+
 namespace {
   /// @brief A pass to extract specific functions and their dependencies.
   class GVExtractorPass : public ModulePass {
@@ -60,12 +92,7 @@ namespace {
             continue;
         }
 
-        bool Local = I->isDiscardableIfUnused();
-        if (Local)
-          I->setVisibility(GlobalValue::HiddenVisibility);
-
-        if (Local || Delete)
-          I->setLinkage(GlobalValue::ExternalLinkage);
+	makeVisible(*I, Delete);
 
         if (Delete)
           I->setInitializer(0);
@@ -80,12 +107,7 @@ namespace {
             continue;
         }
 
-        bool Local = I->isDiscardableIfUnused();
-        if (Local)
-          I->setVisibility(GlobalValue::HiddenVisibility);
-
-        if (Local || Delete)
-          I->setLinkage(GlobalValue::ExternalLinkage);
+	makeVisible(*I, Delete);
 
         if (Delete)
           I->deleteBody();
@@ -97,12 +119,10 @@ namespace {
         Module::alias_iterator CurI = I;
         ++I;
 
-        if (CurI->isDiscardableIfUnused()) {
-          CurI->setVisibility(GlobalValue::HiddenVisibility);
-          CurI->setLinkage(GlobalValue::ExternalLinkage);
-        }
+	bool Delete = deleteStuff == (bool)Named.count(CurI);
+	makeVisible(*CurI, Delete);
 
-        if (deleteStuff == (bool)Named.count(CurI)) {
+        if (Delete) {
           Type *Ty =  CurI->getType()->getElementType();
 
           CurI->removeFromParent();
-- 
cgit v1.1


From 215aad562cbff81f5b1ce5b570076b88a87998f8 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Mon, 25 Nov 2013 05:22:53 +0000
Subject: Merging r195492:
 ------------------------------------------------------------------------
 r195492 | arsenm | 2013-11-22 11:24:37 -0800 (Fri, 22 Nov 2013) | 1 line

StructurizeCFG: Fix inverting a branch on an argument
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195605 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/StructurizeCFG.cpp | 33 +++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 72fea80..0124dfd 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -323,21 +323,32 @@ Value *StructurizeCFG::invert(Value *Condition) {
   if (match(Condition, m_Not(m_Value(Condition))))
     return Condition;
 
-  // Third: Check all the users for an invert
-  BasicBlock *Parent = cast<Instruction>(Condition)->getParent();
-  for (Value::use_iterator I = Condition->use_begin(),
-       E = Condition->use_end(); I != E; ++I) {
+  if (Instruction *Inst = dyn_cast<Instruction>(Condition)) {
+    // Third: Check all the users for an invert
+    BasicBlock *Parent = Inst->getParent();
+    for (Value::use_iterator I = Condition->use_begin(),
+           E = Condition->use_end(); I != E; ++I) {
+
+      Instruction *User = dyn_cast<Instruction>(*I);
+      if (!User || User->getParent() != Parent)
+        continue;
 
-    Instruction *User = dyn_cast<Instruction>(*I);
-    if (!User || User->getParent() != Parent)
-      continue;
+      if (match(*I, m_Not(m_Specific(Condition))))
+        return *I;
+    }
+
+    // Last option: Create a new instruction
+    return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
+  }
 
-    if (match(*I, m_Not(m_Specific(Condition))))
-      return *I;
+  if (Argument *Arg = dyn_cast<Argument>(Condition)) {
+    BasicBlock &EntryBlock = Arg->getParent()->getEntryBlock();
+    return BinaryOperator::CreateNot(Condition,
+                                     Arg->getName() + ".inv",
+                                     EntryBlock.getTerminator());
   }
 
-  // Last option: Create a new instruction
-  return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
+  llvm_unreachable("Unhandled condition to invert");
 }
 
 /// \brief Build the condition for one edge
-- 
cgit v1.1


From e96466ecc0b2bdee0bed2156e12dc16f4adb2d50 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Mon, 25 Nov 2013 05:23:10 +0000
Subject: Merging r195493:
 ------------------------------------------------------------------------
 r195493 | arsenm | 2013-11-22 11:24:39 -0800 (Fri, 22 Nov 2013) | 6 lines

StructurizeCFG: Fix verification failure with some loops.

If the beginning of the loop was also the entry block
of the function, branches were inserted to the entry block
which isn't allowed. If this occurs, create a new dummy
function entry block that branches to the start of the loop.
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195606 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Scalar/StructurizeCFG.cpp | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 0124dfd..5045ff8 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -779,6 +779,20 @@ void StructurizeCFG::handleLoops(bool ExitUseAllowed,
     handleLoops(false, LoopEnd);
   }
 
+  // If the start of the loop is the entry block, we can't branch to it so
+  // insert a new dummy entry block.
+  Function *LoopFunc = LoopStart->getParent();
+  if (LoopStart == &LoopFunc->getEntryBlock()) {
+    LoopStart->setName("entry.orig");
+
+    BasicBlock *NewEntry =
+      BasicBlock::Create(LoopStart->getContext(),
+                         "entry",
+                         LoopFunc,
+                         LoopStart);
+    BranchInst::Create(LoopStart, NewEntry);
+  }
+
   // Create an extra loop end node
   LoopEnd = needPrefix(false);
   BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
-- 
cgit v1.1


From fc1f9531d3f9bf14b4b20b80f158317795d3d1d8 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Mon, 25 Nov 2013 05:27:31 +0000
Subject: Merging r195528:
 ------------------------------------------------------------------------
 r195528 | chandlerc | 2013-11-22 16:48:34 -0800 (Fri, 22 Nov 2013) | 7 lines

Migrate metadata information from scalar to vector instructions during
SLP vectorization. Based on the code in BBVectorizer.

Fixes PR17741.

Patch by Raul Silvera, reviewed by Hal and Nadav. Reformatted by my
driving of clang-format. =]
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195608 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 39 ++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2b498a8..f29efad 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -163,6 +163,37 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) {
   return Opcode;
 }
 
+/// \returns \p I after propagating metadata from \p VL.
+static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
+  Instruction *I0 = cast<Instruction>(VL[0]);
+  SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
+  I0->getAllMetadataOtherThanDebugLoc(Metadata);
+
+  for (unsigned i = 0, n = Metadata.size(); i != n; ++i) {
+    unsigned Kind = Metadata[i].first;
+    MDNode *MD = Metadata[i].second;
+
+    for (int i = 1, e = VL.size(); MD && i != e; i++) {
+      Instruction *I = cast<Instruction>(VL[i]);
+      MDNode *IMD = I->getMetadata(Kind);
+
+      switch (Kind) {
+      default:
+        MD = 0; // Remove unknown metadata
+        break;
+      case LLVMContext::MD_tbaa:
+        MD = MDNode::getMostGenericTBAA(MD, IMD);
+        break;
+      case LLVMContext::MD_fpmath:
+        MD = MDNode::getMostGenericFPMath(MD, IMD);
+        break;
+      }
+    }
+    I->setMetadata(Kind, MD);
+  }
+  return I;
+}
+
 /// \returns The type that all of the values in \p VL have or null if there
 /// are different types.
 static Type* getSameType(ArrayRef<Value *> VL) {
@@ -1478,6 +1509,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
       Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS);
       E->VectorizedValue = V;
+
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        return propagateMetadata(I, E->Scalars);
+
       return V;
     }
     case Instruction::Load: {
@@ -1494,7 +1529,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       LI = Builder.CreateLoad(VecPtr);
       LI->setAlignment(Alignment);
       E->VectorizedValue = LI;
-      return LI;
+      return propagateMetadata(LI, E->Scalars);
     }
     case Instruction::Store: {
       StoreInst *SI = cast<StoreInst>(VL0);
@@ -1513,7 +1548,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       StoreInst *S = Builder.CreateStore(VecValue, VecPtr);
       S->setAlignment(Alignment);
       E->VectorizedValue = S;
-      return S;
+      return propagateMetadata(S, E->Scalars);
     }
     default:
     llvm_unreachable("unknown inst");
-- 
cgit v1.1


From fcb80cc5656e4672702e3150bfe425f4a58b7a65 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Wed, 27 Nov 2013 04:52:57 +0000
Subject: Merging r195769:
 ------------------------------------------------------------------------
 r195769 | dyatkovskiy | 2013-11-26 08:11:03 -0800 (Tue, 26 Nov 2013) | 27
 lines

PR17925 bugfix.

Short description.

This issue is about case of treating pointers as integers.
We treat pointers as different if they references different address space.
At the same time, we treat pointers equal to integers (with machine address
width). It was a point of false-positive. Consider next case on 32bit machine:

void foo0(i32 addrespace(1)* %p)
void foo1(i32 addrespace(2)* %p)
void foo2(i32 %p)

foo0 != foo1, while
foo1 == foo2 and foo0 == foo2.

As you can see it breaks transitivity. That means that result depends on order
of how functions are presented in module. Next order causes merging of foo0
and foo1: foo2, foo0, foo1
First foo0 will be merged with foo2, foo0 will be erased. Second foo1 will be
merged with foo2.
Depending on order, things could be merged we don't expect to.

The fix:
Forbid to treat any pointer as integer, except for those, who belong to address space 0.


------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195810 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/MergeFunctions.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index b8397d6..3861421 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -210,19 +210,20 @@ private:
 // Any two pointers in the same address space are equivalent, intptr_t and
 // pointers are equivalent. Otherwise, standard type equivalence rules apply.
 bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
+
+  PointerType *PTy1 = dyn_cast<PointerType>(Ty1);
+  PointerType *PTy2 = dyn_cast<PointerType>(Ty2);
+
+  if (TD) {
+    if (PTy1 && PTy1->getAddressSpace() == 0) Ty1 = TD->getIntPtrType(Ty1);
+    if (PTy2 && PTy2->getAddressSpace() == 0) Ty2 = TD->getIntPtrType(Ty2);
+  }
+
   if (Ty1 == Ty2)
     return true;
-  if (Ty1->getTypeID() != Ty2->getTypeID()) {
-    if (TD) {
-
-      if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ty1))
-        return true;
 
-      if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ty2))
-        return true;
-    }
+  if (Ty1->getTypeID() != Ty2->getTypeID())
     return false;
-  }
 
   switch (Ty1->getTypeID()) {
   default:
@@ -244,8 +245,7 @@ bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
     return true;
 
   case Type::PointerTyID: {
-    PointerType *PTy1 = cast<PointerType>(Ty1);
-    PointerType *PTy2 = cast<PointerType>(Ty2);
+    assert(PTy1 && PTy2 && "Both types must be pointers here.");
     return PTy1->getAddressSpace() == PTy2->getAddressSpace();
   }
 
-- 
cgit v1.1


From 3209153cc9728358211b7305305b83cdd0ad1435 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Wed, 27 Nov 2013 05:34:53 +0000
Subject: Merging r195773:
 ------------------------------------------------------------------------
 r195773 | nadav | 2013-11-26 09:29:19 -0800 (Tue, 26 Nov 2013) | 6 lines

PR18060 - When we RAUW values with ExtractElement instructions in some cases
we generate PHI nodes with multiple entries from the same basic block but
with different values. Enabling CSE on ExtractElement instructions make sure
that all of the RAUWed instructions are the same.


------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195817 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f29efad..699be95 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1588,6 +1588,8 @@ Value *BoUpSLP::vectorizeTree() {
     if (PHINode *PN = dyn_cast<PHINode>(Vec)) {
       Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt());
       Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+      if (Instruction *Ins = dyn_cast<Instruction>(Ex))
+        GatherSeq.insert(Ins);
       User->replaceUsesOfWith(Scalar, Ex);
     } else if (isa<Instruction>(Vec)){
       if (PHINode *PH = dyn_cast<PHINode>(User)) {
@@ -1595,17 +1597,23 @@ Value *BoUpSLP::vectorizeTree() {
           if (PH->getIncomingValue(i) == Scalar) {
             Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
             Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+            if (Instruction *Ins = dyn_cast<Instruction>(Ex))
+              GatherSeq.insert(Ins);
             PH->setOperand(i, Ex);
           }
         }
       } else {
         Builder.SetInsertPoint(cast<Instruction>(User));
         Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+        if (Instruction *Ins = dyn_cast<Instruction>(Ex))
+          GatherSeq.insert(Ins);
         User->replaceUsesOfWith(Scalar, Ex);
      }
     } else {
       Builder.SetInsertPoint(F->getEntryBlock().begin());
       Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+      if (Instruction *Ins = dyn_cast<Instruction>(Ex))
+        GatherSeq.insert(Ins);
       User->replaceUsesOfWith(Scalar, Ex);
     }
 
-- 
cgit v1.1


From a0d44fe4cd92c11466b82af4f5089af845a2eeb5 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Wed, 27 Nov 2013 05:35:16 +0000
Subject: Merging r195791:
 ------------------------------------------------------------------------
 r195791 | nadav | 2013-11-26 14:24:25 -0800 (Tue, 26 Nov 2013) | 4 lines

PR1860 - We can't save a list of ExtractElement instructions to CSE because some of these instructions
may be removed and optimized in future iterations. Instead we save a list of basic blocks that we need to CSE.


------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195818 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 699be95..9f18596 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -520,6 +520,8 @@ private:
 
   /// Holds all of the instructions that we gathered.
   SetVector<Instruction *> GatherSeq;
+  /// A list of blocks that we are going to CSE.
+  SmallSet<BasicBlock *, 8> CSEBlocks;
 
   /// Numbers instructions in different blocks.
   DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
@@ -1274,6 +1276,7 @@ Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
     Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
     if (Instruction *Insrt = dyn_cast<Instruction>(Vec)) {
       GatherSeq.insert(Insrt);
+      CSEBlocks.insert(Insrt->getParent());
 
       // Add to our 'need-to-extract' list.
       if (ScalarToTreeEntry.count(VL[i])) {
@@ -1588,8 +1591,7 @@ Value *BoUpSLP::vectorizeTree() {
     if (PHINode *PN = dyn_cast<PHINode>(Vec)) {
       Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt());
       Value *Ex = Builder.CreateExtractElement(Vec, Lane);
-      if (Instruction *Ins = dyn_cast<Instruction>(Ex))
-        GatherSeq.insert(Ins);
+      CSEBlocks.insert(PN->getParent());
       User->replaceUsesOfWith(Scalar, Ex);
     } else if (isa<Instruction>(Vec)){
       if (PHINode *PH = dyn_cast<PHINode>(User)) {
@@ -1597,23 +1599,20 @@ Value *BoUpSLP::vectorizeTree() {
           if (PH->getIncomingValue(i) == Scalar) {
             Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
             Value *Ex = Builder.CreateExtractElement(Vec, Lane);
-            if (Instruction *Ins = dyn_cast<Instruction>(Ex))
-              GatherSeq.insert(Ins);
+            CSEBlocks.insert(PH->getIncomingBlock(i));
             PH->setOperand(i, Ex);
           }
         }
       } else {
         Builder.SetInsertPoint(cast<Instruction>(User));
         Value *Ex = Builder.CreateExtractElement(Vec, Lane);
-        if (Instruction *Ins = dyn_cast<Instruction>(Ex))
-          GatherSeq.insert(Ins);
+        CSEBlocks.insert(cast<Instruction>(User)->getParent());
         User->replaceUsesOfWith(Scalar, Ex);
      }
     } else {
       Builder.SetInsertPoint(F->getEntryBlock().begin());
       Value *Ex = Builder.CreateExtractElement(Vec, Lane);
-      if (Instruction *Ins = dyn_cast<Instruction>(Ex))
-        GatherSeq.insert(Ins);
+      CSEBlocks.insert(&F->getEntryBlock());
       User->replaceUsesOfWith(Scalar, Ex);
     }
 
@@ -1676,9 +1675,6 @@ public:
 void BoUpSLP::optimizeGatherSequence() {
   DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
         << " gather sequences instructions.\n");
-  // Keep a list of visited BBs to run CSE on. It is typically small.
-  SmallPtrSet<BasicBlock *, 4> VisitedBBs;
-  SmallVector<BasicBlock *, 4> CSEWorkList;
   // LICM InsertElementInst sequences.
   for (SetVector<Instruction *>::iterator it = GatherSeq.begin(),
        e = GatherSeq.end(); it != e; ++it) {
@@ -1687,9 +1683,6 @@ void BoUpSLP::optimizeGatherSequence() {
     if (!Insert)
       continue;
 
-    if (VisitedBBs.insert(Insert->getParent()))
-      CSEWorkList.push_back(Insert->getParent());
-
     // Check if this block is inside a loop.
     Loop *L = LI->getLoopFor(Insert->getParent());
     if (!L)
@@ -1716,6 +1709,7 @@ void BoUpSLP::optimizeGatherSequence() {
 
   // Sort blocks by domination. This ensures we visit a block after all blocks
   // dominating it are visited.
+  SmallVector<BasicBlock *, 8> CSEWorkList(CSEBlocks.begin(), CSEBlocks.end());
   std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), DTCmp(DT));
 
   // Perform O(N^2) search over the gather sequences and merge identical
@@ -1731,8 +1725,7 @@ void BoUpSLP::optimizeGatherSequence() {
     // For all instructions in blocks containing gather sequences:
     for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
       Instruction *In = it++;
-      if ((!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In)) ||
-          !GatherSeq.count(In))
+      if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
         continue;
 
       // Check if we can replace this instruction with any of the
@@ -1754,6 +1747,8 @@ void BoUpSLP::optimizeGatherSequence() {
       }
     }
   }
+  CSEBlocks.clear();
+  GatherSeq.clear();
 }
 
 /// The SLPVectorizer Pass.
-- 
cgit v1.1


From 3f297541c5440c4758b34214fdbbf9ae5414c0f1 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Sun, 1 Dec 2013 03:03:42 +0000
Subject: Merging r195787:
 ------------------------------------------------------------------------
 r195787 | arnolds | 2013-11-26 14:11:23 -0800 (Tue, 26 Nov 2013) | 8 lines

LoopVectorizer: Truncate i64 trip counts of i32 phis if necessary

In signed arithmetic we could end up with an i64 trip count for an i32 phi.
Because it is signed arithmetic we know that this is only defined if the i32
does not wrap. It is therefore safe to truncate the i64 trip count to a i32
value.

Fixes PR18049.
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195991 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 79f80f3..874db9f 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1537,6 +1537,15 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
   const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop);
   assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
 
+  // The exit count might have the type of i64 while the phi is i32. This can
+  // happen if we have an induction variable that is sign extended before the
+  // compare. The only way that we get a backedge taken count is that the
+  // induction variable was signed and as such will not overflow. In such a case
+  // truncation is legal.
+  if (ExitCount->getType()->getPrimitiveSizeInBits() >
+      IdxTy->getPrimitiveSizeInBits())
+    ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy);
+
   ExitCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
   // Get the total trip count from the count by adding 1.
   ExitCount = SE->getAddExpr(ExitCount,
-- 
cgit v1.1


From b50063e0ce18983513d6241c3bd638b074a98e31 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Sun, 1 Dec 2013 03:36:55 +0000
Subject: Merging r196004:
 ------------------------------------------------------------------------
 r196004 | void | 2013-11-30 19:36:07 -0800 (Sat, 30 Nov 2013) | 3 lines

Use 'unsigned char' to get this past gcc error message:

  error: invalid conversion from 'unsigned char' to '{anonymous}::Sequence'
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196005 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 1cc665b..2976df6 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -432,7 +432,7 @@ namespace {
     bool Partial;
 
     /// The current position in the sequence.
-    Sequence Seq : 8;
+    unsigned char Seq : 8;
 
     /// Unidirectional information about the current sequence.
     RRInfo RRI;
@@ -498,7 +498,7 @@ namespace {
     }
 
     Sequence GetSeq() const {
-      return Seq;
+      return static_cast<Sequence>(Seq);
     }
 
     void ClearSequenceProgress() {
@@ -538,7 +538,8 @@ namespace {
 
 void
 PtrState::Merge(const PtrState &Other, bool TopDown) {
-  Seq = MergeSeqs(Seq, Other.Seq, TopDown);
+  Seq = MergeSeqs(static_cast<Sequence>(Seq), static_cast<Sequence>(Other.Seq),
+                  TopDown);
   KnownPositiveRefCount &= Other.KnownPositiveRefCount;
 
   // If we're not in a sequence (anymore), drop all associated state.
-- 
cgit v1.1


From 21f315bc883057c75cedbd31b11f9924af064c2d Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Mon, 2 Dec 2013 19:14:12 +0000
Subject: Merging r196129:
 ------------------------------------------------------------------------
 r196129 | kkhoo | 2013-12-02 10:43:59 -0800 (Mon, 02 Dec 2013) | 1 line

Conservative fix for PR17827 - don't optimize a shift + and + compare sequence where the shift is logical unless the comparison is unsigned
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196132 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineCompares.cpp | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 226126b..9bb65ef 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1198,11 +1198,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       Type *AndTy = AndCST->getType();          // Type of the and.
 
       // We can fold this as long as we can't shift unknown bits
-      // into the mask.  This can only happen with signed shift
-      // rights, as they sign-extend.
+      // into the mask. This can happen with signed shift
+      // rights, as they sign-extend. With logical shifts,
+      // we must still make sure the comparison is not signed
+      // because we are effectively changing the
+      // position of the sign bit (PR17827).
+      // TODO: We can relax these constraints a bit more.
       if (ShAmt) {
-        bool CanFold = Shift->isLogicalShift();
-        if (!CanFold) {
+        bool CanFold = false;
+        unsigned ShiftOpcode = Shift->getOpcode();
+        if (ShiftOpcode == Instruction::AShr) {
           // To test for the bad case of the signed shr, see if any
           // of the bits shifted in could be tested after the mask.
           uint32_t TyBits = Ty->getPrimitiveSizeInBits();
@@ -1212,6 +1217,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) &
                AndCST->getValue()) == 0)
             CanFold = true;
+        } else if (ShiftOpcode == Instruction::Shl ||
+                   ShiftOpcode == Instruction::LShr) {
+          CanFold = !ICI.isSigned();
         }
 
         if (CanFold) {
-- 
cgit v1.1


From 7f6926930f48234484167e9ecce90f627a030702 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Fri, 6 Dec 2013 09:10:19 +0000
Subject: Merging r196508:
 ------------------------------------------------------------------------
 r196508 | arnolds | 2013-12-05 07:14:40 -0800 (Thu, 05 Dec 2013) | 12 lines

SLPVectorizer: An in-tree vectorized entry cannot also be a scalar external use

We were creating external uses for scalar values in MustGather entries that also
had a ScalarToTreeEntry (they also are present in a vectorized tuple). This
meant we would keep a value 'alive' as a scalar and vectorized causing havoc.
This is not necessary because when we create a MustGather vector we explicitly
create external uses entries for the insertelement instructions of the
MustGather vector elements.

Fixes PR18129.

radar://15582184
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196571 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9f18596..c72b51f 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -564,10 +564,8 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
            UE = Scalar->use_end(); User != UE; ++User) {
         DEBUG(dbgs() << "SLP: Checking user:" << **User << ".\n");
 
-        bool Gathered = MustGather.count(*User);
-
         // Skip in-tree scalars that become vectors.
-        if (ScalarToTreeEntry.count(*User) && !Gathered) {
+        if (ScalarToTreeEntry.count(*User)) {
           DEBUG(dbgs() << "SLP: \tInternal user will be removed:" <<
                 **User << ".\n");
           int Idx = ScalarToTreeEntry[*User]; (void) Idx;
@@ -1638,8 +1636,6 @@ Value *BoUpSLP::vectorizeTree() {
         for (Value::use_iterator User = Scalar->use_begin(),
              UE = Scalar->use_end(); User != UE; ++User) {
           DEBUG(dbgs() << "SLP: \tvalidating user:" << **User << ".\n");
-          assert(!MustGather.count(*User) &&
-                 "Replacing gathered value with undef");
 
           assert((ScalarToTreeEntry.count(*User) ||
                   // It is legal to replace the reduction users by undef.
-- 
cgit v1.1


From 7b7037563b12589e675c655e5d1e4f737f50fa9d Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Fri, 6 Dec 2013 22:12:13 +0000
Subject: Merging r196611:
 ------------------------------------------------------------------------
 r196611 | dexonsmith | 2013-12-06 13:48:36 -0800 (Fri, 06 Dec 2013) | 5 lines

Don't use isNullValue to evaluate ConstantExpr

ConstantExpr can evaluate to false even when isNullValue gives false.

Fixes PR18143.
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196614 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstructionCombining.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 27f1a3e..191a101 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -699,7 +699,10 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
       Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
       Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
       Value *InV = 0;
-      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+      // Beware of ConstantExpr:  it may eventually evaluate to getNullValue,
+      // even if currently isNullValue gives false.
+      Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
+      if (InC && !isa<ConstantExpr>(InC))
         InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
       else
         InV = Builder->CreateSelect(PN->getIncomingValue(i),
-- 
cgit v1.1


From fccbdd27bcd604e232dfa8c77105dcb625da305e Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Sat, 7 Dec 2013 21:24:29 +0000
Subject: Merging r196638:
 ------------------------------------------------------------------------
 r196638 | arsenm | 2013-12-06 18:58:45 -0800 (Fri, 06 Dec 2013) | 1 line

Fix assert with copy from global through addrspacecast
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196668 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 0a0727e..4c861b3 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -263,9 +263,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
         for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
           EraseInstFromFunction(*ToDelete[i]);
         Constant *TheSrc = cast<Constant>(Copy->getSource());
-        Instruction *NewI
-          = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
-                                                             AI.getType()));
+        Constant *Cast
+          = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
+        Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
         EraseInstFromFunction(*Copy);
         ++NumGlobalCopies;
         return NewI;
-- 
cgit v1.1


From b7e206eab9de36cefa28ca79b560772d69cfa607 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Sun, 8 Dec 2013 00:19:49 +0000
Subject: --- Reverse-merging r196668 into '.': U   
 lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp U   
 test/Transforms/InstCombine/addrspacecast.ll

git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196705 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 4c861b3..0a0727e 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -263,9 +263,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
         for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
           EraseInstFromFunction(*ToDelete[i]);
         Constant *TheSrc = cast<Constant>(Copy->getSource());
-        Constant *Cast
-          = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
-        Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
+        Instruction *NewI
+          = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
+                                                             AI.getType()));
         EraseInstFromFunction(*Copy);
         ++NumGlobalCopies;
         return NewI;
-- 
cgit v1.1


From 209178dacacb5c254926a9d8c72933f23feced9f Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Sun, 8 Dec 2013 00:25:40 +0000
Subject: Merging r196638:
 ------------------------------------------------------------------------
 r196638 | arsenm | 2013-12-06 18:58:45 -0800 (Fri, 06 Dec 2013) | 1 line

Fix assert with copy from global through addrspacecast
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196709 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 0a0727e..4c861b3 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -263,9 +263,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
         for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
           EraseInstFromFunction(*ToDelete[i]);
         Constant *TheSrc = cast<Constant>(Copy->getSource());
-        Instruction *NewI
-          = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
-                                                             AI.getType()));
+        Constant *Cast
+          = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
+        Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
         EraseInstFromFunction(*Copy);
         ++NumGlobalCopies;
         return NewI;
-- 
cgit v1.1


From c877b10446669bf107c19cab78b920ce9cffb989 Mon Sep 17 00:00:00 2001
From: Manman Ren <manman.ren@gmail.com>
Date: Mon, 9 Dec 2013 21:00:02 +0000
Subject: Merging r195505:
 ------------------------------------------------------------------------
 r195505 | mren | 2013-11-22 14:06:31 -0800 (Fri, 22 Nov 2013) | 8 lines

Debug Info: move StripDebugInfo from StripSymbols.cpp to DebugInfo.cpp.

We can share the implementation between StripSymbols and dropping debug info
for metadata versions that do not match.

Also update the comments to match the implementation. A follow-on patch will
drop the "Debug Info Version" module flag in StripDebugInfo.

------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@196816 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/StripSymbols.cpp | 51 -------------------------------------
 1 file changed, 51 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 1792aa8..c4f5cfc 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -231,57 +231,6 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
   return true;
 }
 
-// StripDebugInfo - Strip debug info in the module if it exists.
-// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and
-// llvm.dbg.region.end calls, and any globals they point to if now dead.
-static bool StripDebugInfo(Module &M) {
-
-  bool Changed = false;
-
-  // Remove all of the calls to the debugger intrinsics, and remove them from
-  // the module.
-  if (Function *Declare = M.getFunction("llvm.dbg.declare")) {
-    while (!Declare->use_empty()) {
-      CallInst *CI = cast<CallInst>(Declare->use_back());
-      CI->eraseFromParent();
-    }
-    Declare->eraseFromParent();
-    Changed = true;
-  }
-
-  if (Function *DbgVal = M.getFunction("llvm.dbg.value")) {
-    while (!DbgVal->use_empty()) {
-      CallInst *CI = cast<CallInst>(DbgVal->use_back());
-      CI->eraseFromParent();
-    }
-    DbgVal->eraseFromParent();
-    Changed = true;
-  }
-
-  for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
-         NME = M.named_metadata_end(); NMI != NME;) {
-    NamedMDNode *NMD = NMI;
-    ++NMI;
-    if (NMD->getName().startswith("llvm.dbg.")) {
-      NMD->eraseFromParent();
-      Changed = true;
-    }
-  }
-
-  for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
-    for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
-         ++FI)
-      for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
-           ++BI) {
-        if (!BI->getDebugLoc().isUnknown()) {
-          Changed = true;
-          BI->setDebugLoc(DebugLoc());
-        }
-      }
-
-  return Changed;
-}
-
 bool StripSymbols::runOnModule(Module &M) {
   bool Changed = false;
   Changed |= StripDebugInfo(M);
-- 
cgit v1.1


From dd36ddfaec578968b163fc4bbb7148921084aa6e Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Sat, 14 Dec 2013 08:04:09 +0000
Subject: Merging r197178:
 ------------------------------------------------------------------------
 r197178 | hfinkel | 2013-12-12 12:45:24 -0800 (Thu, 12 Dec 2013) | 9 lines

Fix a use-after-free error in GlobalOpt CleanupConstantGlobalUsers

GlobalOpt's CleanupConstantGlobalUsers function uses a worklist array to manage
constant users to be visited. The pointers in this array need to be weak
handles because when we delete a constant array, we may also be holding a
pointer to one of its elements (or an element of one of its elements if we're
dealing with an array of arrays) in the worklist.

Fixes PR17347.
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@197322 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/IPO/GlobalOpt.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 7e91897..2ea89a1 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -37,6 +37,7 @@
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -267,9 +268,17 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
 static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
                                        DataLayout *TD, TargetLibraryInfo *TLI) {
   bool Changed = false;
-  SmallVector<User*, 8> WorkList(V->use_begin(), V->use_end());
+  // Note that we need to use a weak value handle for the worklist items. When
+  // we delete a constant array, we may also be holding pointer to one of its
+  // elements (or an element of one of its elements if we're dealing with an
+  // array of arrays) in the worklist.
+  SmallVector<WeakVH, 8> WorkList(V->use_begin(), V->use_end());
   while (!WorkList.empty()) {
-    User *U = WorkList.pop_back_val();
+    Value *UV = WorkList.pop_back_val();
+    if (!UV)
+      continue;
+
+    User *U = cast<User>(UV);
 
     if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
       if (Init) {
-- 
cgit v1.1


From e39b15195a3607ee708be9d105b5fc591b4665dd Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Tue, 17 Dec 2013 01:28:35 +0000
Subject: Merging r197449:
 ------------------------------------------------------------------------
 r197449 | arnolds | 2013-12-16 17:11:01 -0800 (Mon, 16 Dec 2013) | 7 lines

LoopVectorizer: Don't if-convert constant expressions that can trap

A phi node operand or an instruction operand could be a constant expression that
can trap (division). Check that we don't vectorize such cases.

PR16729
radar://15653590
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@197453 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Transforms/Vectorize/LoopVectorize.cpp | 32 +++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

(limited to 'lib/Transforms')

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 874db9f..5e75871 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2781,6 +2781,23 @@ void InnerLoopVectorizer::updateAnalysis() {
   DEBUG(DT->verifyAnalysis());
 }
 
+/// \brief Check whether it is safe to if-convert this phi node.
+///
+/// Phi nodes with constant expressions that can trap are not safe to if
+/// convert.
+static bool canIfConvertPHINodes(BasicBlock *BB) {
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    PHINode *Phi = dyn_cast<PHINode>(I);
+    if (!Phi)
+      return true;
+    for (unsigned p = 0, e = Phi->getNumIncomingValues(); p != e; ++p)
+      if (Constant *C = dyn_cast<Constant>(Phi->getIncomingValue(p)))
+        if (C->canTrap())
+          return false;
+  }
+  return true;
+}
+
 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
   if (!EnableIfConversion)
     return false;
@@ -2807,6 +2824,7 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
   }
 
   // Collect the blocks that need predication.
+  BasicBlock *Header = TheLoop->getHeader();
   for (Loop::block_iterator BI = TheLoop->block_begin(),
          BE = TheLoop->block_end(); BI != BE; ++BI) {
     BasicBlock *BB = *BI;
@@ -2816,8 +2834,12 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
       return false;
 
     // We must be able to predicate all blocks that need to be predicated.
-    if (blockNeedsPredication(BB) && !blockCanBePredicated(BB, SafePointes))
+    if (blockNeedsPredication(BB)) {
+      if (!blockCanBePredicated(BB, SafePointes))
+        return false;
+    } else if (BB != Header && !canIfConvertPHINodes(BB))
       return false;
+
   }
 
   // We can if-convert this loop.
@@ -4371,6 +4393,14 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
     if (it->mayWriteToMemory() || it->mayThrow())
       return false;
 
+    // Check that we don't have a constant expression that can trap as operand.
+    for (Instruction::op_iterator OI = it->op_begin(), OE = it->op_end();
+         OI != OE; ++OI) {
+      if (Constant *C = dyn_cast<Constant>(*OI))
+        if (C->canTrap())
+          return false;
+    }
+
     // The instructions below can trap.
     switch (it->getOpcode()) {
     default: continue;
-- 
cgit v1.1