aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2012-08-23 19:08:53 -0700
committerStephen Hines <srhines@google.com>2012-08-23 19:08:53 -0700
commit31675153bd2d7617db8cb6aeb58054934c7b9f73 (patch)
treec1970fcebc736d4f731db0559a79a7ac5cb0f8bf /lib/Transforms
parent416bb6a168a9316547db6ce3909c515f70a84f52 (diff)
parent75dd7f0c4a2b3fb9e9d4d5a0517591810c57ed92 (diff)
downloadexternal_llvm-31675153bd2d7617db8cb6aeb58054934c7b9f73.zip
external_llvm-31675153bd2d7617db8cb6aeb58054934c7b9f73.tar.gz
external_llvm-31675153bd2d7617db8cb6aeb58054934c7b9f73.tar.bz2
Merge branch 'upstream' into merge_2
Conflicts: lib/Target/ARM/ARMCodeEmitter.cpp Change-Id: I6702d340c733e9721499b5d85b13b96ad9c14eb5
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp7
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp5
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp12
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp41
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp172
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp15
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp29
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp372
-rw-r--r--lib/Transforms/Instrumentation/MaximumSpanningTree.h53
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp70
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp13
-rw-r--r--lib/Transforms/Scalar/GVN.cpp103
-rw-r--r--lib/Transforms/Scalar/LICM.cpp37
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp3
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp2
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp189
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp120
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp24
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp126
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp7
20 files changed, 855 insertions, 545 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 60ce958..6d950d2 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -352,7 +352,8 @@ static bool IsSafeComputationToRemove(Value *V) {
return true;
if (!V->hasOneUse())
return false;
- if (isa<LoadInst>(V) || isa<Argument>(V) || isa<GlobalValue>(V))
+ if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
+ isa<GlobalValue>(V))
return false;
if (isAllocationFn(V))
return true;
@@ -442,12 +443,14 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV) {
Dead[i].second->eraseFromParent();
Instruction *I = Dead[i].first;
do {
+ if (isAllocationFn(I))
+ break;
Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
if (!J)
break;
I->eraseFromParent();
I = J;
- } while (!isAllocationFn(I));
+ } while (1);
I->eraseFromParent();
}
}
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index d8e8cf7..80bfc1c 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -27,6 +27,7 @@
#include "llvm/Instructions.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
+#include "llvm/TypeFinder.h"
#include "llvm/ValueSymbolTable.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
@@ -175,8 +176,8 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
// Strip any named types of their names.
static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
- std::vector<StructType*> StructTypes;
- M.findUsedStructTypes(StructTypes);
+ TypeFinder StructTypes;
+ StructTypes.run(M, false);
for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
StructType *STy = StructTypes[i];
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c1d9d01..cbe1ca4 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -51,8 +51,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
// if the size is something we can handle with a single primitive load/store.
// A single load+store correctly handles overlapping memory in the memmove
// case.
- unsigned Size = MemOpLength->getZExtValue();
- if (Size == 0) return MI; // Delete this mem transfer.
+ uint64_t Size = MemOpLength->getLimitedValue();
+ assert(Size && "0-sized memory transfering should be removed already.");
if (Size > 8 || (Size&(Size-1)))
return 0; // If not 1/2/4/8 bytes, exit.
@@ -133,11 +133,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
return 0;
- uint64_t Len = LenC->getZExtValue();
+ uint64_t Len = LenC->getLimitedValue();
Alignment = MI->getAlignment();
-
- // If the length is zero, this is a no-op
- if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+ assert(Len && "0-sized memory setting should be removed already.");
// memset(s,c,n) -> store s, c (for n=1,2,4,8)
if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
@@ -795,7 +793,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
if (CI->getCalledFunction() == 0) return 0;
InstCombineFortifiedLibCalls Simplifier(this);
- Simplifier.fold(CI, TD);
+ Simplifier.fold(CI, TD, TLI);
return Simplifier.NewInstruction;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7076d88..c3fc18c 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
@@ -2824,7 +2825,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
case ICmpInst::ICMP_UGE:
// (float)int >= -4.4 --> true
// (float)int >= 4.4 --> int > 4
- if (!RHS.isNegative())
+ if (RHS.isNegative())
return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
Pred = ICmpInst::ICMP_UGT;
break;
@@ -2985,6 +2986,44 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return Res;
}
break;
+ case Instruction::Call: {
+ CallInst *CI = cast<CallInst>(LHSI);
+ LibFunc::Func Func;
+ // Various optimization for fabs compared with zero.
+ if (RHSC->isNullValue() && CI->getCalledFunction() &&
+ TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
+ TLI->has(Func)) {
+ if (Func == LibFunc::fabs || Func == LibFunc::fabsf ||
+ Func == LibFunc::fabsl) {
+ switch (I.getPredicate()) {
+ default: break;
+ // fabs(x) < 0 --> false
+ case FCmpInst::FCMP_OLT:
+ return ReplaceInstUsesWith(I, Builder->getFalse());
+ // fabs(x) > 0 --> x != 0
+ case FCmpInst::FCMP_OGT:
+ return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) <= 0 --> x == 0
+ case FCmpInst::FCMP_OLE:
+ return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) >= 0 --> !isnan(x)
+ case FCmpInst::FCMP_OGE:
+ return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0),
+ RHSC);
+ // fabs(x) == 0 --> x == 0
+ // fabs(x) != 0 --> x != 0
+ case FCmpInst::FCMP_OEQ:
+ case FCmpInst::FCMP_UEQ:
+ case FCmpInst::FCMP_ONE:
+ case FCmpInst::FCMP_UNE:
+ return new FCmpInst(I.getPredicate(), CI->getArgOperand(0),
+ RHSC);
+ }
+ }
+ }
+ }
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index c485844..6ecb4c5 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -20,7 +20,154 @@
#include "llvm/ADT/Statistic.h"
using namespace llvm;
-STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
+
+/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
+/// some part of a constant global variable. This intentionally only accepts
+/// constant expressions because we can't rewrite arbitrary instructions.
+static bool pointsToConstantGlobal(Value *V) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return GV->isConstant();
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr)
+ return pointsToConstantGlobal(CE->getOperand(0));
+ return false;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
+/// pointer to an alloca. Ignore any reads of the pointer, return false if we
+/// see any stores or other unknown uses. If we see pointer arithmetic, keep
+/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
+/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
+/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// can optimize this.
+static bool
+isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
+ SmallVectorImpl<Instruction *> &ToDelete,
+ bool IsOffset = false) {
+ // We track lifetime intrinsics as we encounter them. If we decide to go
+ // ahead and replace the value with the global, this lets the caller quickly
+ // eliminate the markers.
+
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+ User *U = cast<Instruction>(*UI);
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+ // Ignore non-volatile loads, they are always ok.
+ if (!LI->isSimple()) return false;
+ continue;
+ }
+
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+ // If uses of the bitcast are ok, we are ok.
+ if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset))
+ return false;
+ continue;
+ }
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ // If the GEP has all zero indices, it doesn't offset the pointer. If it
+ // doesn't, it does.
+ if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete,
+ IsOffset || !GEP->hasAllZeroIndices()))
+ return false;
+ continue;
+ }
+
+ if (CallSite CS = U) {
+ // If this is the function being called then we treat it like a load and
+ // ignore it.
+ if (CS.isCallee(UI))
+ continue;
+
+ // If this is a readonly/readnone call site, then we know it is just a
+ // load (but one that potentially returns the value itself), so we can
+ // ignore it if we know that the value isn't captured.
+ unsigned ArgNo = CS.getArgumentNo(UI);
+ if (CS.onlyReadsMemory() &&
+ (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
+ continue;
+
+ // If this is being passed as a byval argument, the caller is making a
+ // copy, so it is only a read of the alloca.
+ if (CS.isByValArgument(ArgNo))
+ continue;
+ }
+
+ // Lifetime intrinsics can be handled by the caller.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ assert(II->use_empty() && "Lifetime markers have no result to use!");
+ ToDelete.push_back(II);
+ continue;
+ }
+ }
+
+ // If this is isn't our memcpy/memmove, reject it as something we can't
+ // handle.
+ MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
+ if (MI == 0)
+ return false;
+
+ // If the transfer is using the alloca as a source of the transfer, then
+ // ignore it since it is a load (unless the transfer is volatile).
+ if (UI.getOperandNo() == 1) {
+ if (MI->isVolatile()) return false;
+ continue;
+ }
+
+ // If we already have seen a copy, reject the second one.
+ if (TheCopy) return false;
+
+ // If the pointer has been offset from the start of the alloca, we can't
+ // safely handle this.
+ if (IsOffset) return false;
+
+ // If the memintrinsic isn't using the alloca as the dest, reject it.
+ if (UI.getOperandNo() != 0) return false;
+
+ // If the source of the memcpy/move is not a constant global, reject it.
+ if (!pointsToConstantGlobal(MI->getSource()))
+ return false;
+
+ // Otherwise, the transform is safe. Remember the copy instruction.
+ TheCopy = MI;
+ }
+ return true;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
+/// modified by a copy from a constant global. If we can prove this, we can
+/// replace any uses of the alloca with uses of the global directly.
+static MemTransferInst *
+isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
+ SmallVectorImpl<Instruction *> &ToDelete) {
+ MemTransferInst *TheCopy = 0;
+ if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
+ return TheCopy;
+ return 0;
+}
+
+/// getPointeeAlignment - Compute the minimum alignment of the value pointed
+/// to by the given pointer.
+static unsigned getPointeeAlignment(Value *V, const TargetData &TD) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == Instruction::BitCast ||
+ (CE->getOpcode() == Instruction::GetElementPtr &&
+ cast<GEPOperator>(CE)->hasAllZeroIndices()))
+ return getPointeeAlignment(CE->getOperand(0), TD);
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ if (!GV->isDeclaration())
+ return TD.getPreferredAlignment(GV);
+
+ if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
+ return TD.getABITypeAlignment(PT->getElementType());
+
+ return 0;
+}
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Ensure that the alloca array size argument has type intptr_t, so that
@@ -113,6 +260,29 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
}
}
+ // Check to see if this allocation is only modified by a memcpy/memmove from
+ // a constant global whose alignment is equal to or exceeds that of the
+ // allocation. If this is the case, we can change all users to use
+ // the constant global instead. This is commonly produced by the CFE by
+ // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
+ // is only subsequently read.
+ SmallVector<Instruction *, 4> ToDelete;
+ if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
+ if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
+ DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
+ DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
+ for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+ EraseInstFromFunction(*ToDelete[i]);
+ Constant *TheSrc = cast<Constant>(Copy->getSource());
+ Instruction *NewI
+ = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
+ AI.getType()));
+ EraseInstFromFunction(*Copy);
+ ++NumGlobalCopies;
+ return NewI;
+ }
+ }
+
// At last, use the generic allocation site handler to aggressively remove
// unused allocas.
return visitAllocSite(AI);
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index eb9945b..291e800 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -881,12 +881,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
if (TrueSI->getCondition() == CondVal) {
+ if (SI.getTrueValue() == TrueSI->getTrueValue())
+ return 0;
SI.setOperand(1, TrueSI->getTrueValue());
return &SI;
}
}
if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
if (FalseSI->getCondition() == CondVal) {
+ if (SI.getFalseValue() == FalseSI->getFalseValue())
+ return 0;
SI.setOperand(2, FalseSI->getFalseValue());
return &SI;
}
@@ -899,5 +903,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
+ if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
+ unsigned VWidth = VecTy->getNumElements();
+ APInt UndefElts(VWidth, 0);
+ APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ if (Value *V = SimplifyDemandedVectorElts(&SI, AllOnesEltMask, UndefElts)) {
+ if (V != &SI)
+ return ReplaceInstUsesWith(SI, V);
+ return &SI;
+ }
+ }
+
return 0;
}
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 125c74a..54be8ed 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -989,6 +989,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
break;
}
+ case Instruction::Select: {
+ APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
+ if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
+ for (unsigned i = 0; i < VWidth; i++) {
+ if (CV->getAggregateElement(i)->isNullValue())
+ LeftDemanded.clearBit(i);
+ else
+ RightDemanded.clearBit(i);
+ }
+ }
+
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
+ UndefElts2, Depth+1);
+ if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
+
+ // Output elements are undefined if both are undefined.
+ UndefElts &= UndefElts2;
+ break;
+ }
case Instruction::BitCast: {
// Vector->vector casts only.
VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
@@ -1074,6 +1097,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// like undef&0. The result is known zero, not undef.
UndefElts &= UndefElts2;
break;
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+ UndefElts, Depth+1);
+ if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+ break;
case Instruction::Call: {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 3368026..06f4d2f 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -61,6 +61,8 @@ static const int kAsanCtorAndCtorPriority = 1;
static const char *kAsanReportErrorTemplate = "__asan_report_";
static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
+static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
+static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
static const char *kAsanInitName = "__asan_init";
static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
@@ -86,8 +88,8 @@ static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes",
static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics",
cl::desc("instrument atomic instructions (rmw, cmpxchg)"),
cl::Hidden, cl::init(true));
-static cl::opt<bool> ClMergeCallbacks("asan-merge-callbacks",
- cl::desc("merge __asan_report_ callbacks to create fewer BBs"),
+static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path",
+ cl::desc("use instrumentation with slow path for all accesses"),
cl::Hidden, cl::init(false));
// This flag limits the number of instructions to be instrumented
// in any given BB. Normally, this should be set to unlimited (INT_MAX),
@@ -106,6 +108,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
// This flag may need to be replaced with -f[no]asan-globals.
static cl::opt<bool> ClGlobals("asan-globals",
cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInitializers("asan-initialization-order",
+ cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
static cl::opt<bool> ClMemIntrin("asan-memintrin",
cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
// This flag may need to be replaced with -fasan-blacklist.
@@ -145,24 +149,11 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
namespace {
-/// When the crash callbacks are merged, they receive some amount of arguments
-/// that are merged in a PHI node. This struct represents arguments from one
-/// call site.
-struct CrashArg {
- Value *Arg1;
- Value *Arg2;
-};
-
/// An object of this type is created while instrumenting every function.
struct AsanFunctionContext {
- AsanFunctionContext(Function &Function) : F(Function), CrashBlock() { }
+ AsanFunctionContext(Function &Function) : F(Function) { }
Function &F;
- // These are initially zero. If we require at least one call to
- // __asan_report_{read,write}{1,2,4,8,16}, an appropriate BB is created.
- BasicBlock *CrashBlock[2][kNumberOfAccessSizes];
- typedef SmallVector<CrashArg, 8> CrashArgsVec;
- CrashArgsVec CrashArgs[2][kNumberOfAccessSizes];
};
/// AddressSanitizer: instrument the code in module to find memory bugs.
@@ -175,7 +166,7 @@ struct AddressSanitizer : public ModulePass {
Value *Addr, uint32_t TypeSize, bool IsWrite);
Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
Value *ShadowValue, uint32_t TypeSize);
- Instruction *generateCrashCode(BasicBlock *BB, Value *Addr, Value *PC,
+ Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
bool IsWrite, size_t AccessSizeIndex);
bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI);
void instrumentMemIntrinsicParam(AsanFunctionContext &AFC,
@@ -184,6 +175,8 @@ struct AddressSanitizer : public ModulePass {
Instruction *InsertBefore, bool IsWrite);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
bool handleFunction(Module &M, Function &F);
+ void createInitializerPoisonCalls(Module &M,
+ Value *FirstAddr, Value *LastAddr);
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
bool poisonStackInFunction(Module &M, Function &F);
virtual bool runOnModule(Module &M);
@@ -191,7 +184,6 @@ struct AddressSanitizer : public ModulePass {
static char ID; // Pass identification, replacement for typeid
private:
-
uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
Type *Ty = AI->getAllocatedType();
uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
@@ -207,9 +199,12 @@ struct AddressSanitizer : public ModulePass {
}
Function *checkInterfaceFunction(Constant *FuncOrBitcast);
+ bool ShouldInstrumentGlobal(GlobalVariable *G);
void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
Value *ShadowBase, bool DoPoison);
bool LooksLikeCodeInBug11395(Instruction *I);
+ void FindDynamicInitializers(Module &M);
+ bool HasDynamicInitializer(GlobalVariable *G);
LLVMContext *C;
TargetData *TD;
@@ -226,6 +221,7 @@ struct AddressSanitizer : public ModulePass {
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
InlineAsm *EmptyAsm;
+ SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals;
};
} // namespace
@@ -267,24 +263,24 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
// ThenBlock
// Tail
//
-// If ThenBlock is zero, a new block is created and its terminator is returned.
-// Otherwize 0 is returned.
-static BranchInst *splitBlockAndInsertIfThen(Value *Cmp,
- BasicBlock *ThenBlock = 0) {
+// ThenBlock block is created and its terminator is returned.
+// If Unreachable, ThenBlock is terminated with UnreachableInst, otherwise
+// it is terminated with BranchInst to Tail.
+static TerminatorInst *splitBlockAndInsertIfThen(Value *Cmp, bool Unreachable) {
Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode();
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
TerminatorInst *HeadOldTerm = Head->getTerminator();
- BranchInst *CheckTerm = 0;
- if (!ThenBlock) {
- LLVMContext &C = Head->getParent()->getParent()->getContext();
- ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ LLVMContext &C = Head->getParent()->getParent()->getContext();
+ BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ TerminatorInst *CheckTerm;
+ if (Unreachable)
+ CheckTerm = new UnreachableInst(C, ThenBlock);
+ else
CheckTerm = BranchInst::Create(Tail, ThenBlock);
- }
BranchInst *HeadNewTerm =
BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
-
return CheckTerm;
}
@@ -336,7 +332,7 @@ bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,
Value *Cmp = IRB.CreateICmpNE(Length,
Constant::getNullValue(Length->getType()));
- InsertBefore = splitBlockAndInsertIfThen(Cmp);
+ InsertBefore = splitBlockAndInsertIfThen(Cmp, false);
}
instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true);
@@ -371,14 +367,50 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
return NULL;
}
+void AddressSanitizer::FindDynamicInitializers(Module& M) {
+ // Clang generates metadata identifying all dynamically initialized globals.
+ NamedMDNode *DynamicGlobals =
+ M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
+ if (!DynamicGlobals)
+ return;
+ for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) {
+ MDNode *MDN = DynamicGlobals->getOperand(i);
+ assert(MDN->getNumOperands() == 1);
+ Value *VG = MDN->getOperand(0);
+ // The optimizer may optimize away a global entirely, in which case we
+ // cannot instrument access to it.
+ if (!VG)
+ continue;
+
+ GlobalVariable *G = cast<GlobalVariable>(VG);
+ DynamicallyInitializedGlobals.insert(G);
+ }
+}
+// Returns true if a global variable is initialized dynamically in this TU.
+bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) {
+ return DynamicallyInitializedGlobals.count(G);
+}
+
void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {
bool IsWrite;
Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
assert(Addr);
- if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) {
- // We are accessing a global scalar variable. Nothing to catch here.
- return;
+ if (ClOpt && ClOptGlobals) {
+ if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
+ // If initialization order checking is disabled, a simple access to a
+ // dynamically initialized global is always valid.
+ if (!ClInitializers)
+ return;
+ // If a global variable does not have dynamic initialization we don't
+ // have to instrument it. However, if a global has external linkage, we
+ // assume it has dynamic initialization, as it may have an initializer
+ // in a different TU.
+ if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+ !HasDynamicInitializer(G))
+ return;
+ }
}
+
Type *OrigPtrTy = Addr->getType();
Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
@@ -407,15 +439,11 @@ Function *AddressSanitizer::checkInterfaceFunction(Constant *FuncOrBitcast) {
}
Instruction *AddressSanitizer::generateCrashCode(
- BasicBlock *BB, Value *Addr, Value *PC,
+ Instruction *InsertBefore, Value *Addr,
bool IsWrite, size_t AccessSizeIndex) {
- IRBuilder<> IRB(BB->getFirstNonPHI());
- CallInst *Call;
- if (PC)
- Call = IRB.CreateCall2(AsanErrorCallback[IsWrite][AccessSizeIndex],
- Addr, PC);
- else
- Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr);
+ IRBuilder<> IRB(InsertBefore);
+ CallInst *Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex],
+ Addr);
// We don't do Call->setDoesNotReturn() because the BB already has
// UnreachableInst at the end.
// This EmptyAsm is required to avoid callback merge.
@@ -436,7 +464,7 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
// (uint8_t) ((Addr & (Granularity-1)) + size - 1)
LastAccessedByte = IRB.CreateIntCast(
- LastAccessedByte, IRB.getInt8Ty(), false);
+ LastAccessedByte, ShadowValue->getType(), false);
// ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
}
@@ -456,112 +484,129 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
-
- BasicBlock *CrashBlock = 0;
- if (ClMergeCallbacks) {
- size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
- BasicBlock **Cached = &AFC.CrashBlock[IsWrite][AccessSizeIndex];
- if (!*Cached) {
- std::string BBName("crash_bb-");
- BBName += (IsWrite ? "w-" : "r-") + itostr(1 << AccessSizeIndex);
- BasicBlock *BB = BasicBlock::Create(*C, BBName, &AFC.F);
- new UnreachableInst(*C, BB);
- *Cached = BB;
- }
- CrashBlock = *Cached;
- // We need to pass the PC as the second parameter to __asan_report_*.
- // There are few problems:
- // - Some architectures (e.g. x86_32) don't have a cheap way to get the PC.
- // - LLVM doesn't have the appropriate intrinsic.
- // For now, put a random number into the PC, just to allow experiments.
- Value *PC = ConstantInt::get(IntptrTy, rand());
- CrashArg Arg = {AddrLong, PC};
- AFC.CrashArgs[IsWrite][AccessSizeIndex].push_back(Arg);
- } else {
- CrashBlock = BasicBlock::Create(*C, "crash_bb", &AFC.F);
- new UnreachableInst(*C, CrashBlock);
- size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
- Instruction *Crash =
- generateCrashCode(CrashBlock, AddrLong, 0, IsWrite, AccessSizeIndex);
- Crash->setDebugLoc(OrigIns->getDebugLoc());
- }
-
+ size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
size_t Granularity = 1 << MappingScale;
- if (TypeSize < 8 * Granularity) {
- BranchInst *CheckTerm = splitBlockAndInsertIfThen(Cmp);
- assert(CheckTerm->isUnconditional());
+ TerminatorInst *CrashTerm = 0;
+
+ if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
+ TerminatorInst *CheckTerm = splitBlockAndInsertIfThen(Cmp, false);
+ assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
BasicBlock *NextBB = CheckTerm->getSuccessor(0);
IRB.SetInsertPoint(CheckTerm);
Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
+ BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB);
+ CrashTerm = new UnreachableInst(*C, CrashBlock);
BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
ReplaceInstWithInst(CheckTerm, NewTerm);
} else {
- splitBlockAndInsertIfThen(Cmp, CrashBlock);
+ CrashTerm = splitBlockAndInsertIfThen(Cmp, true);
+ }
+
+ Instruction *Crash =
+ generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex);
+ Crash->setDebugLoc(OrigIns->getDebugLoc());
+}
+
+void AddressSanitizer::createInitializerPoisonCalls(Module &M,
+ Value *FirstAddr,
+ Value *LastAddr) {
+ // We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
+ Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
+ // If that function is not present, this TU contains no globals, or they have
+ // all been optimized away
+ if (!GlobalInit)
+ return;
+
+ // Set up the arguments to our poison/unpoison functions.
+ IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt());
+
+ // Declare our poisoning and unpoisoning functions.
+ Function *AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
+ Function *AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL));
+ AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
+
+ // Add a call to poison all external globals before the given function starts.
+ IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr);
+
+ // Add calls to unpoison all globals before each return instruction.
+ for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
+ I != E; ++I) {
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) {
+ CallInst::Create(AsanUnpoisonGlobals, "", RI);
+ }
}
}
+bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
+ Type *Ty = cast<PointerType>(G->getType())->getElementType();
+ DEBUG(dbgs() << "GLOBAL: " << *G);
+
+ if (!Ty->isSized()) return false;
+ if (!G->hasInitializer()) return false;
+ // Touch only those globals that will not be defined in other modules.
+ // Don't handle ODR type linkages since other modules may be built w/o asan.
+ if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+ G->getLinkage() != GlobalVariable::PrivateLinkage &&
+ G->getLinkage() != GlobalVariable::InternalLinkage)
+ return false;
+ // Two problems with thread-locals:
+ // - The address of the main thread's copy can't be computed at link-time.
+ // - Need to poison all copies, not just the main thread's one.
+ if (G->isThreadLocal())
+ return false;
+ // For now, just ignore this Alloca if the alignment is large.
+ if (G->getAlignment() > RedzoneSize) return false;
+
+ // Ignore all the globals with the names starting with "\01L_OBJC_".
+ // Many of those are put into the .cstring section. The linker compresses
+ // that section by removing the spare \0s after the string terminator, so
+ // our redzones get broken.
+ if ((G->getName().find("\01L_OBJC_") == 0) ||
+ (G->getName().find("\01l_OBJC_") == 0)) {
+ DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
+ return false;
+ }
+
+ if (G->hasSection()) {
+ StringRef Section(G->getSection());
+ // Ignore the globals from the __OBJC section. The ObjC runtime assumes
+ // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
+ // them.
+ if ((Section.find("__OBJC,") == 0) ||
+ (Section.find("__DATA, __objc_") == 0)) {
+ DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
+ return false;
+ }
+ // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
+ // Constant CFString instances are compiled in the following way:
+ // -- the string buffer is emitted into
+ // __TEXT,__cstring,cstring_literals
+ // -- the constant NSConstantString structure referencing that buffer
+ // is placed into __DATA,__cfstring
+ // Therefore there's no point in placing redzones into __DATA,__cfstring.
+ // Moreover, it causes the linker to crash on OS X 10.7
+ if (Section.find("__DATA,__cfstring") == 0) {
+ DEBUG(dbgs() << "Ignoring CFString: " << *G);
+ return false;
+ }
+ }
+
+ return true;
+}
+
// This function replaces all global variables with new variables that have
// trailing redzones. It also creates a function that poisons
// redzones and inserts this function into llvm.global_ctors.
bool AddressSanitizer::insertGlobalRedzones(Module &M) {
SmallVector<GlobalVariable *, 16> GlobalsToChange;
- for (Module::GlobalListType::iterator G = M.getGlobalList().begin(),
- E = M.getGlobalList().end(); G != E; ++G) {
- Type *Ty = cast<PointerType>(G->getType())->getElementType();
- DEBUG(dbgs() << "GLOBAL: " << *G);
-
- if (!Ty->isSized()) continue;
- if (!G->hasInitializer()) continue;
- // Touch only those globals that will not be defined in other modules.
- // Don't handle ODR type linkages since other modules may be built w/o asan.
- if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
- G->getLinkage() != GlobalVariable::PrivateLinkage &&
- G->getLinkage() != GlobalVariable::InternalLinkage)
- continue;
- // Two problems with thread-locals:
- // - The address of the main thread's copy can't be computed at link-time.
- // - Need to poison all copies, not just the main thread's one.
- if (G->isThreadLocal())
- continue;
- // For now, just ignore this Alloca if the alignment is large.
- if (G->getAlignment() > RedzoneSize) continue;
-
- // Ignore all the globals with the names starting with "\01L_OBJC_".
- // Many of those are put into the .cstring section. The linker compresses
- // that section by removing the spare \0s after the string terminator, so
- // our redzones get broken.
- if ((G->getName().find("\01L_OBJC_") == 0) ||
- (G->getName().find("\01l_OBJC_") == 0)) {
- DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
- continue;
- }
-
- if (G->hasSection()) {
- StringRef Section(G->getSection());
- // Ignore the globals from the __OBJC section. The ObjC runtime assumes
- // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
- // them.
- if ((Section.find("__OBJC,") == 0) ||
- (Section.find("__DATA, __objc_") == 0)) {
- DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
- continue;
- }
- // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
- // Constant CFString instances are compiled in the following way:
- // -- the string buffer is emitted into
- // __TEXT,__cstring,cstring_literals
- // -- the constant NSConstantString structure referencing that buffer
- // is placed into __DATA,__cfstring
- // Therefore there's no point in placing redzones into __DATA,__cfstring.
- // Moreover, it causes the linker to crash on OS X 10.7
- if (Section.find("__DATA,__cfstring") == 0) {
- DEBUG(dbgs() << "Ignoring CFString: " << *G);
- continue;
- }
- }
-
- GlobalsToChange.push_back(G);
+ for (Module::GlobalListType::iterator G = M.global_begin(),
+ E = M.global_end(); G != E; ++G) {
+ if (ShouldInstrumentGlobal(G))
+ GlobalsToChange.push_back(G);
}
size_t n = GlobalsToChange.size();
@@ -572,13 +617,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
// size_t size;
// size_t size_with_redzone;
// const char *name;
+ // size_t has_dynamic_init;
// We initialize an array of such structures and pass it to a run-time call.
StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
- IntptrTy, IntptrTy, NULL);
- SmallVector<Constant *, 16> Initializers(n);
+ IntptrTy, IntptrTy,
+ IntptrTy, NULL);
+ SmallVector<Constant *, 16> Initializers(n), DynamicInit;
IRBuilder<> IRB(CtorInsertBefore);
+ if (ClInitializers)
+ FindDynamicInitializers(M);
+
+ // The addresses of the first and last dynamically initialized globals in
+ // this TU. Used in initialization order checking.
+ Value *FirstDynamic = 0, *LastDynamic = 0;
+
for (size_t i = 0; i < n; i++) {
GlobalVariable *G = GlobalsToChange[i];
PointerType *PtrTy = cast<PointerType>(G->getType());
@@ -587,6 +641,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
uint64_t RightRedzoneSize = RedzoneSize +
(RedzoneSize - (SizeInBytes % RedzoneSize));
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
+ // Determine whether this global should be poisoned in initialization.
+ bool GlobalHasDynamicInitializer = HasDynamicInitializer(G);
StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
Constant *NewInitializer = ConstantStruct::get(
@@ -621,7 +677,16 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
ConstantInt::get(IntptrTy, SizeInBytes),
ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
ConstantExpr::getPointerCast(Name, IntptrTy),
+ ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
NULL);
+
+ // Populate the first and last globals declared in this TU.
+ if (ClInitializers && GlobalHasDynamicInitializer) {
+ LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy);
+ if (FirstDynamic == 0)
+ FirstDynamic = LastDynamic;
+ }
+
DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal);
}
@@ -630,8 +695,13 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage,
ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
+ // Create calls for poisoning before initializers run and unpoisoning after.
+ if (ClInitializers && FirstDynamic && LastDynamic)
+ createInitializerPoisonCalls(M, FirstDynamic, LastDynamic);
+
Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ kAsanRegisterGlobalsName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, NULL));
AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
IRB.CreateCall2(AsanRegisterGlobals,
@@ -694,12 +764,7 @@ bool AddressSanitizer::runOnModule(Module &M) {
std::string FunctionName = std::string(kAsanReportErrorTemplate) +
(AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex);
// If we are merging crash callbacks, they have two parameters.
- if (ClMergeCallbacks)
- AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>(
- M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy,
- IntptrTy, NULL));
- else
- AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>(
+ AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>(
M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
}
}
@@ -845,33 +910,6 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
NumInstrumented++;
}
- // Create PHI nodes and crash callbacks if we are merging crash callbacks.
- if (NumInstrumented) {
- for (size_t IsWrite = 0; IsWrite <= 1; IsWrite++) {
- for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
- AccessSizeIndex++) {
- BasicBlock *BB = AFC.CrashBlock[IsWrite][AccessSizeIndex];
- if (!BB) continue;
- assert(ClMergeCallbacks);
- AsanFunctionContext::CrashArgsVec &Args =
- AFC.CrashArgs[IsWrite][AccessSizeIndex];
- IRBuilder<> IRB(BB->getFirstNonPHI());
- size_t n = Args.size();
- PHINode *PN1 = IRB.CreatePHI(IntptrTy, n);
- PHINode *PN2 = IRB.CreatePHI(IntptrTy, n);
- // We need to match crash parameters and the predecessors.
- for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
- PI != PE; ++PI) {
- n--;
- PN1->addIncoming(Args[n].Arg1, *PI);
- PN2->addIncoming(Args[n].Arg2, *PI);
- }
- assert(n == 0);
- generateCrashCode(BB, PN1, PN2, IsWrite, AccessSizeIndex);
- }
- }
- }
-
DEBUG(dbgs() << F);
bool ChangedStack = poisonStackInFunction(M, F);
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index f76c77e..a4bb5a6 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -26,30 +26,6 @@ namespace llvm {
/// The type parameter T determines the type of the nodes of the graph.
template <typename T>
class MaximumSpanningTree {
-
- // A comparing class for comparing weighted edges.
- template <typename CT>
- struct EdgeWeightCompare {
- bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X,
- typename MaximumSpanningTree<CT>::EdgeWeight Y) const {
- if (X.second > Y.second) return true;
- if (X.second < Y.second) return false;
- if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) {
- if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) {
- if (BBX->size() > BBY->size()) return true;
- if (BBX->size() < BBY->size()) return false;
- }
- }
- if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) {
- if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) {
- if (BBX->size() > BBY->size()) return true;
- if (BBX->size() < BBY->size()) return false;
- }
- }
- return false;
- }
- };
-
public:
typedef std::pair<const T*, const T*> Edge;
typedef std::pair<Edge, double> EdgeWeight;
@@ -59,6 +35,33 @@ namespace llvm {
MaxSpanTree MST;
+ private:
+ // A comparing class for comparing weighted edges.
+ struct EdgeWeightCompare {
+ static bool getBlockSize(const T *X) {
+ const BasicBlock *BB = dyn_cast_or_null<BasicBlock>(X);
+ return BB ? BB->size() : 0;
+ }
+
+ bool operator()(EdgeWeight X, EdgeWeight Y) const {
+ if (X.second > Y.second) return true;
+ if (X.second < Y.second) return false;
+
+ // Equal edge weights: break ties by comparing block sizes.
+ size_t XSizeA = getBlockSize(X.first.first);
+ size_t YSizeA = getBlockSize(Y.first.first);
+ if (XSizeA > YSizeA) return true;
+ if (XSizeA < YSizeA) return false;
+
+ size_t XSizeB = getBlockSize(X.first.second);
+ size_t YSizeB = getBlockSize(Y.first.second);
+ if (XSizeB > YSizeB) return true;
+ if (XSizeB < YSizeB) return false;
+
+ return false;
+ }
+ };
+
public:
static char ID; // Class identification, replacement for typeinfo
@@ -66,7 +69,7 @@ namespace llvm {
/// spanning tree.
MaximumSpanningTree(EdgeWeights &EdgeVector) {
- std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>());
+ std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare());
// Create spanning tree, Forest contains a special data structure
// that makes checking if two nodes are already in a common (sub-)tree
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 277c4d5..a8deda8 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -66,11 +66,6 @@ static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
cl::desc("Disable branch optimizations in CodeGenPrepare"));
-// FIXME: Remove this abomination once all of the tests pass without it!
-static cl::opt<bool> DisableDeleteDeadBlocks(
- "disable-cgp-delete-dead-blocks", cl::Hidden, cl::init(false),
- cl::desc("Disable deleting dead blocks in CodeGenPrepare"));
-
static cl::opt<bool> DisableSelectToBranch(
"disable-cgp-select2branch", cl::Hidden, cl::init(false),
cl::desc("Disable select to branch conversion."));
@@ -116,6 +111,7 @@ namespace {
}
private:
+ bool EliminateFallThrough(Function &F);
bool EliminateMostlyEmptyBlocks(Function &F);
bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void EliminateMostlyEmptyBlock(BasicBlock *BB);
@@ -187,10 +183,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
WorkList.insert(*II);
}
- if (!DisableDeleteDeadBlocks)
- for (SmallPtrSet<BasicBlock*, 8>::iterator
- I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
- DeleteDeadBlock(*I);
+ for (SmallPtrSet<BasicBlock*, 8>::iterator
+ I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
+ DeleteDeadBlock(*I);
+
+ // Merge pairs of basic blocks with unconditional branches, connected by
+ // a single edge.
+ if (EverMadeChange || MadeChange)
+ MadeChange |= EliminateFallThrough(F);
if (MadeChange)
ModifiedDT = true;
@@ -203,6 +203,39 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
return EverMadeChange;
}
+/// EliminateFallThrough - Merge basic blocks which are connected
+/// by a single edge, where one of the basic blocks has a single successor
+/// pointing to the other basic block, which has a single predecessor.
+bool CodeGenPrepare::EliminateFallThrough(Function &F) {
+ bool Changed = false;
+ // Scan all of the blocks in the function, except for the entry block.
+ for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+ BasicBlock *BB = I++;
+ // If the destination block has a single pred, then this is a trivial
+ // edge, just collapse it.
+ BasicBlock *SinglePred = BB->getSinglePredecessor();
+
+ if (!SinglePred || SinglePred == BB) continue;
+
+ BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
+ if (Term && !Term->isConditional()) {
+ Changed = true;
+ DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
+ // Remember if SinglePred was the entry block of the function.
+ // If so, we will need to move BB back to the entry position.
+ bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ MergeBasicBlockIntoOnlyPred(BB, this);
+
+ if (isEntry && BB != &BB->getParent()->getEntryBlock())
+ BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+ // We have erased a block. Update the iterator.
+ I = BB;
+ }
+ }
+ return Changed;
+}
+
/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
/// debug info directives, and an unconditional branch. Passes before isel
/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
@@ -610,7 +643,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// that have the default "don't know" as the objectsize. Anything else
// should be left alone.
CodeGenPrepareFortifiedLibCalls Simplifier;
- return Simplifier.fold(CI, TD);
+ return Simplifier.fold(CI, TD, TLInfo);
}
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
@@ -645,10 +678,18 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
if (!TLI)
return false;
+ PHINode *PN = 0;
+ BitCastInst *BCI = 0;
Value *V = RI->getReturnValue();
- PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL;
- if (V && !PN)
- return false;
+ if (V) {
+ BCI = dyn_cast<BitCastInst>(V);
+ if (BCI)
+ V = BCI->getOperand(0);
+
+ PN = dyn_cast<PHINode>(V);
+ if (!PN)
+ return false;
+ }
BasicBlock *BB = RI->getParent();
if (PN && PN->getParent() != BB)
@@ -666,6 +707,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
if (PN) {
BasicBlock::iterator BI = BB->begin();
do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+ if (&*BI == BCI)
+ // Also skip over the bitcast.
+ ++BI;
if (&*BI != RI)
return false;
} else {
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 5eff0e5..8b1283f 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -378,7 +378,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
//
// We have to be careful here as *Off is signed while *.Size is unsigned.
if (EarlierOff >= LaterOff &&
- Later.Size > Earlier.Size &&
+ Later.Size >= Earlier.Size &&
uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
return OverwriteComplete;
@@ -740,12 +740,19 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
continue;
}
- if (isa<AllocaInst>(BBI) || isAllocLikeFn(BBI)) {
+ if (isa<AllocaInst>(BBI)) {
+ // Remove allocas from the list of dead stack objects; there can't be
+ // any references before the definition.
DeadStackObjects.remove(BBI);
continue;
}
if (CallSite CS = cast<Value>(BBI)) {
+ // Remove allocation function calls from the list of dead stack objects;
+ // there can't be any references before the definition.
+ if (isAllocLikeFn(BBI))
+ DeadStackObjects.remove(BBI);
+
// If this call does not access memory, it can't be loading any of our
// pointers.
if (AA->doesNotAccessMemory(CS))
@@ -771,7 +778,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// If all of the allocas were clobbered by the call then we're not going
// to find anything else to process.
if (DeadStackObjects.empty())
- return MadeChange;
+ break;
continue;
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 140864d..4822fd0 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -512,7 +512,7 @@ namespace {
/// have that value number. Use findLeader to query it.
struct LeaderTableEntry {
Value *Val;
- BasicBlock *BB;
+ const BasicBlock *BB;
LeaderTableEntry *Next;
};
DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
@@ -542,7 +542,7 @@ namespace {
private:
/// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
/// its value number.
- void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+ void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) {
LeaderTableEntry &Curr = LeaderTable[N];
if (!Curr.Val) {
Curr.Val = V;
@@ -608,13 +608,13 @@ namespace {
void dump(DenseMap<uint32_t, Value*> &d);
bool iterateOnFunction(Function &F);
bool performPRE(Function &F);
- Value *findLeader(BasicBlock *BB, uint32_t num);
+ Value *findLeader(const BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
- BasicBlock *Root);
- bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root);
+ const BasicBlockEdge &Root);
+ bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
};
char GVN::ID = 0;
@@ -1977,7 +1977,7 @@ bool GVN::processLoad(LoadInst *L) {
// and then scan the list to find one whose block dominates the block in
// question. This is fast because dominator tree queries consist of only
// a few comparisons of DFS numbers.
-Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
+Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
LeaderTableEntry Vals = LeaderTable[num];
if (!Vals.Val) return 0;
@@ -2004,22 +2004,13 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
/// use is dominated by the given basic block. Returns the number of uses that
/// were replaced.
unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
- BasicBlock *Root) {
+ const BasicBlockEdge &Root) {
unsigned Count = 0;
for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
UI != UE; ) {
Use &U = (UI++).getUse();
- // If From occurs as a phi node operand then the use implicitly lives in the
- // corresponding incoming block. Otherwise it is the block containing the
- // user that must be dominated by Root.
- BasicBlock *UsingBlock;
- if (PHINode *PN = dyn_cast<PHINode>(U.getUser()))
- UsingBlock = PN->getIncomingBlock(U);
- else
- UsingBlock = cast<Instruction>(U.getUser())->getParent();
-
- if (DT->dominates(Root, UsingBlock)) {
+ if (DT->dominates(Root, U)) {
U.set(To);
++Count;
}
@@ -2027,13 +2018,34 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
return Count;
}
+/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return
+/// true if every path from the entry block to 'Dst' passes via this edge. In
+/// particular 'Dst' must not be reachable via another edge from 'Src'.
+static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
+ DominatorTree *DT) {
+ // While in theory it is interesting to consider the case in which Dst has
+ // more than one predecessor, because Dst might be part of a loop which is
+ // only reachable from Src, in practice it is pointless since at the time
+ // GVN runs all such loops have preheaders, which means that Dst will have
+ // been changed to have only one predecessor, namely Src.
+ const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
+ const BasicBlock *Src = E.getStart();
+ assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
+ (void)Src;
+ return Pred != 0;
+}
+
/// propagateEquality - The given values are known to be equal in every block
/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
/// 'RHS' everywhere in the scope. Returns whether a change was made.
-bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
+bool GVN::propagateEquality(Value *LHS, Value *RHS,
+ const BasicBlockEdge &Root) {
SmallVector<std::pair<Value*, Value*>, 4> Worklist;
Worklist.push_back(std::make_pair(LHS, RHS));
bool Changed = false;
+ // For speed, compute a conservative fast approximation to
+ // DT->dominates(Root, Root.getEnd());
+ bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
while (!Worklist.empty()) {
std::pair<Value*, Value*> Item = Worklist.pop_back_val();
@@ -2065,9 +2077,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
LVN = RVN;
}
}
- assert((!isa<Instruction>(RHS) ||
- DT->properlyDominates(cast<Instruction>(RHS)->getParent(), Root)) &&
- "Instruction doesn't dominate scope!");
// If value numbering later sees that an instruction in the scope is equal
// to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
@@ -2076,8 +2085,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
// if RHS is an instruction (if an instruction in the scope is morphed into
// LHS then it will be turned into RHS by the next GVN iteration anyway, so
// using the leader table is about compiling faster, not optimizing better).
- if (!isa<Instruction>(RHS))
- addToLeaderTable(LVN, RHS, Root);
+ // The leader table only tracks basic blocks, not edges. Only add to if we
+ // have the simple case where the edge dominates the end.
+ if (RootDominatesEnd && !isa<Instruction>(RHS))
+ addToLeaderTable(LVN, RHS, Root.getEnd());
// Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
// LHS always has at least one use that is not dominated by Root, this will
@@ -2136,7 +2147,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
// If the number we were assigned was brand new then there is no point in
// looking for an instruction realizing it: there cannot be one!
if (Num < NextNum) {
- Value *NotCmp = findLeader(Root, Num);
+ Value *NotCmp = findLeader(Root.getEnd(), Num);
if (NotCmp && isa<Instruction>(NotCmp)) {
unsigned NumReplacements =
replaceAllDominatedUsesWith(NotCmp, NotVal, Root);
@@ -2146,7 +2157,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
}
// Ensure that any instruction in scope that gets the "A < B" value number
// is replaced with false.
- addToLeaderTable(Num, NotVal, Root);
+ // The leader table only tracks basic blocks, not edges. Only add to if we
+ // have the simple case where the edge dominates the end.
+ if (RootDominatesEnd)
+ addToLeaderTable(Num, NotVal, Root.getEnd());
continue;
}
@@ -2155,22 +2169,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
return Changed;
}
-/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return
-/// true if every path from the entry block to 'Dst' passes via this edge. In
-/// particular 'Dst' must not be reachable via another edge from 'Src'.
-static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst,
- DominatorTree *DT) {
- // While in theory it is interesting to consider the case in which Dst has
- // more than one predecessor, because Dst might be part of a loop which is
- // only reachable from Src, in practice it is pointless since at the time
- // GVN runs all such loops have preheaders, which means that Dst will have
- // been changed to have only one predecessor, namely Src.
- BasicBlock *Pred = Dst->getSinglePredecessor();
- assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
- (void)Src;
- return Pred != 0;
-}
-
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction *I) {
@@ -2210,18 +2208,20 @@ bool GVN::processInstruction(Instruction *I) {
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
+ // Avoid multiple edges early.
+ if (TrueSucc == FalseSucc)
+ return false;
+
BasicBlock *Parent = BI->getParent();
bool Changed = false;
- if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT))
- Changed |= propagateEquality(BranchCond,
- ConstantInt::getTrue(TrueSucc->getContext()),
- TrueSucc);
+ Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
+ BasicBlockEdge TrueE(Parent, TrueSucc);
+ Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
- if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT))
- Changed |= propagateEquality(BranchCond,
- ConstantInt::getFalse(FalseSucc->getContext()),
- FalseSucc);
+ Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
+ BasicBlockEdge FalseE(Parent, FalseSucc);
+ Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
return Changed;
}
@@ -2234,8 +2234,9 @@ bool GVN::processInstruction(Instruction *I) {
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
BasicBlock *Dst = i.getCaseSuccessor();
- if (isOnlyReachableViaThisEdge(Parent, Dst, DT))
- Changed |= propagateEquality(SwitchCond, i.getCaseValue(), Dst);
+ BasicBlockEdge E(Parent, Dst);
+ if (E.isSingleEdge())
+ Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
}
return Changed;
}
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 582948e..0192e92 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -175,7 +175,9 @@ namespace {
bool canSinkOrHoistInst(Instruction &I);
bool isNotUsedInLoop(Instruction &I);
- void PromoteAliasSet(AliasSet &AS);
+ void PromoteAliasSet(AliasSet &AS,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks,
+ SmallVectorImpl<Instruction*> &InsertPts);
};
}
@@ -256,10 +258,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ SmallVector<Instruction *, 8> InsertPts;
+
// Loop over all of the alias sets in the tracker object.
for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
I != E; ++I)
- PromoteAliasSet(*I);
+ PromoteAliasSet(*I, ExitBlocks, InsertPts);
}
// Clear out loops state information for the next iteration
@@ -631,6 +636,7 @@ namespace {
Value *SomePtr; // Designated pointer to store to.
SmallPtrSet<Value*, 4> &PointerMustAliases;
SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
+ SmallVectorImpl<Instruction*> &LoopInsertPts;
AliasSetTracker &AST;
DebugLoc DL;
int Alignment;
@@ -638,11 +644,12 @@ namespace {
LoopPromoter(Value *SP,
const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
SmallPtrSet<Value*, 4> &PMA,
- SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast,
- DebugLoc dl, int alignment)
+ SmallVectorImpl<BasicBlock*> &LEB,
+ SmallVectorImpl<Instruction*> &LIP,
+ AliasSetTracker &ast, DebugLoc dl, int alignment)
: LoadAndStorePromoter(Insts, S), SomePtr(SP),
- PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl),
- Alignment(alignment) {}
+ PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP),
+ AST(ast), DL(dl), Alignment(alignment) {}
virtual bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction*> &) const {
@@ -662,7 +669,7 @@ namespace {
for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = LoopExitBlocks[i];
Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
- Instruction *InsertPos = ExitBlock->getFirstInsertionPt();
+ Instruction *InsertPos = LoopInsertPts[i];
StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
@@ -684,7 +691,9 @@ namespace {
/// looping over the stores in the loop, looking for stores to Must pointers
/// which are loop invariant.
///
-void LICM::PromoteAliasSet(AliasSet &AS) {
+void LICM::PromoteAliasSet(AliasSet &AS,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks,
+ SmallVectorImpl<Instruction*> &InsertPts) {
// We can promote this alias set if it has a store, if it is a "Must" alias
// set, if the pointer is loop invariant, and if we are not eliminating any
// volatile loads or stores.
@@ -794,14 +803,20 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
// location is better than none.
DebugLoc DL = LoopUses[0]->getDebugLoc();
- SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getUniqueExitBlocks(ExitBlocks);
+ // Figure out the loop exits and their insertion points, if this is the
+ // first promotion.
+ if (ExitBlocks.empty()) {
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ InsertPts.resize(ExitBlocks.size());
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+ InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt();
+ }
// We use the SSAUpdater interface to insert phi nodes as required.
SmallVector<PHINode*, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- *CurAST, DL, Alignment);
+ InsertPts, *CurAST, DL, Alignment);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b14a713..0ae7a51 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -738,7 +738,8 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
bool Changed = false;
while (!DeadInsts.empty()) {
- Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
+ Value *V = DeadInsts.pop_back_val();
+ Instruction *I = dyn_cast_or_null<Instruction>(V);
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index ffcf97c..09687d8 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -543,6 +543,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
// Update the number of paths to the leaf.
IncorporateWeight(It->second, Weight, Opcode);
+#if 0 // TODO: Re-enable once PR13021 is fixed.
// The leaf already has one use from inside the expression. As we want
// exactly one such use, drop this new use of the leaf.
assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
@@ -559,6 +560,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
Leaves.erase(It);
continue;
}
+#endif
// If we still have uses that are not accounted for by the expression
// then it is not safe to modify the value.
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index ec835b1..8090fdf 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -56,7 +56,6 @@ STATISTIC(NumReplaced, "Number of allocas broken up");
STATISTIC(NumPromoted, "Number of allocas promoted");
STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
STATISTIC(NumConverted, "Number of aggregates converted to scalar");
-STATISTIC(NumGlobals, "Number of allocas copied from constant global");
namespace {
struct SROA : public FunctionPass {
@@ -183,9 +182,6 @@ namespace {
void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
bool ShouldAttemptScalarRepl(AllocaInst *AI);
-
- static MemTransferInst *isOnlyCopiedFromConstantGlobal(
- AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
};
// SROA_DT - SROA that uses DominatorTree.
@@ -612,11 +608,16 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- if (!GEP->hasAllConstantIndices())
- NonConstantIdx = Indices.pop_back_val();
+ Value* GEPNonConstantIdx = 0;
+ if (!GEP->hasAllConstantIndices()) {
+ assert(!NonConstantIdx &&
+ "Dynamic GEP reading from dynamic GEP unsupported");
+ GEPNonConstantIdx = Indices.pop_back_val();
+ } else
+ GEPNonConstantIdx = NonConstantIdx;
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
Indices);
- ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, NonConstantIdx);
+ ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx);
GEP->eraseFromParent();
continue;
}
@@ -1460,26 +1461,6 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
return false;
}
-/// getPointeeAlignment - Compute the minimum alignment of the value pointed
-/// to by the given pointer.
-static unsigned getPointeeAlignment(Value *V, const TargetData &TD) {
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::BitCast ||
- (CE->getOpcode() == Instruction::GetElementPtr &&
- cast<GEPOperator>(CE)->hasAllZeroIndices()))
- return getPointeeAlignment(CE->getOperand(0), TD);
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- if (!GV->isDeclaration())
- return TD.getPreferredAlignment(GV);
-
- if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
- return TD.getABITypeAlignment(PT->getElementType());
-
- return 0;
-}
-
-
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
// which runs on all of the alloca instructions in the function, removing them
// if they are only used by getelementptr instructions.
@@ -1511,29 +1492,6 @@ bool SROA::performScalarRepl(Function &F) {
if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
continue;
- // Check to see if this allocation is only modified by a memcpy/memmove from
- // a constant global whose alignment is equal to or exceeds that of the
- // allocation. If this is the case, we can change all users to use
- // the constant global instead. This is commonly produced by the CFE by
- // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
- // is only subsequently read.
- SmallVector<Instruction *, 4> ToDelete;
- if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) {
- if (AI->getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
- DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
- DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
- for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
- ToDelete[i]->eraseFromParent();
- Constant *TheSrc = cast<Constant>(Copy->getSource());
- AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
- Copy->eraseFromParent(); // Don't mutate the global.
- AI->eraseFromParent();
- ++NumGlobals;
- Changed = true;
- continue;
- }
- }
-
// Check to see if we can perform the core SROA transformation. We cannot
// transform the allocation instruction if it is an array allocation
// (allocations OF arrays are ok though), and an allocation of a scalar
@@ -2651,134 +2609,3 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
return true;
}
-
-
-
-/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
-/// some part of a constant global variable. This intentionally only accepts
-/// constant expressions because we don't can't rewrite arbitrary instructions.
-static bool PointsToConstantGlobal(Value *V) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
- return GV->isConstant();
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::BitCast ||
- CE->getOpcode() == Instruction::GetElementPtr)
- return PointsToConstantGlobal(CE->getOperand(0));
- return false;
-}
-
-/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
-/// pointer to an alloca. Ignore any reads of the pointer, return false if we
-/// see any stores or other unknown uses. If we see pointer arithmetic, keep
-/// track of whether it moves the pointer (with isOffset) but otherwise traverse
-/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
-/// the alloca, and if the source pointer is a pointer to a constant global, we
-/// can optimize this.
-static bool
-isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
- bool isOffset,
- SmallVector<Instruction *, 4> &LifetimeMarkers) {
- // We track lifetime intrinsics as we encounter them. If we decide to go
- // ahead and replace the value with the global, this lets the caller quickly
- // eliminate the markers.
-
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
- User *U = cast<Instruction>(*UI);
-
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- // Ignore non-volatile loads, they are always ok.
- if (!LI->isSimple()) return false;
- continue;
- }
-
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
- // If uses of the bitcast are ok, we are ok.
- if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset,
- LifetimeMarkers))
- return false;
- continue;
- }
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
- // If the GEP has all zero indices, it doesn't offset the pointer. If it
- // doesn't, it does.
- if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
- isOffset || !GEP->hasAllZeroIndices(),
- LifetimeMarkers))
- return false;
- continue;
- }
-
- if (CallSite CS = U) {
- // If this is the function being called then we treat it like a load and
- // ignore it.
- if (CS.isCallee(UI))
- continue;
-
- // If this is a readonly/readnone call site, then we know it is just a
- // load (but one that potentially returns the value itself), so we can
- // ignore it if we know that the value isn't captured.
- unsigned ArgNo = CS.getArgumentNo(UI);
- if (CS.onlyReadsMemory() &&
- (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
- continue;
-
- // If this is being passed as a byval argument, the caller is making a
- // copy, so it is only a read of the alloca.
- if (CS.isByValArgument(ArgNo))
- continue;
- }
-
- // Lifetime intrinsics can be handled by the caller.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
- assert(II->use_empty() && "Lifetime markers have no result to use!");
- LifetimeMarkers.push_back(II);
- continue;
- }
- }
-
- // If this is isn't our memcpy/memmove, reject it as something we can't
- // handle.
- MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
- if (MI == 0)
- return false;
-
- // If the transfer is using the alloca as a source of the transfer, then
- // ignore it since it is a load (unless the transfer is volatile).
- if (UI.getOperandNo() == 1) {
- if (MI->isVolatile()) return false;
- continue;
- }
-
- // If we already have seen a copy, reject the second one.
- if (TheCopy) return false;
-
- // If the pointer has been offset from the start of the alloca, we can't
- // safely handle this.
- if (isOffset) return false;
-
- // If the memintrinsic isn't using the alloca as the dest, reject it.
- if (UI.getOperandNo() != 0) return false;
-
- // If the source of the memcpy/move is not a constant global, reject it.
- if (!PointsToConstantGlobal(MI->getSource()))
- return false;
-
- // Otherwise, the transform is safe. Remember the copy instruction.
- TheCopy = MI;
- }
- return true;
-}
-
-/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
-/// modified by a copy from a constant global. If we can prove this, we can
-/// replace any uses of the alloca with uses of the global directly.
-MemTransferInst *
-SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
- SmallVector<Instruction*, 4> &ToDelete) {
- MemTransferInst *TheCopy = 0;
- if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete))
- return TheCopy;
- return 0;
-}
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index a1a8a41..3904419 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -157,14 +157,15 @@ struct StrCatOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- EmitStrLenMemCpy(Src, Dst, Len, B);
- return Dst;
+ return EmitStrLenMemCpy(Src, Dst, Len, B);
}
- void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
+ Value *EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
// We need to find the end of the destination string. That's where the
// memory is to be moved to. We just generate a call to strlen.
- Value *DstLen = EmitStrLen(Dst, B, TD);
+ Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
+ if (!DstLen)
+ return 0;
// Now that we have the destination's length, we must index into the
// destination's pointer to get the actual memcpy destination (end of
@@ -175,6 +176,7 @@ struct StrCatOpt : public LibCallOptimization {
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(CpyDst, Src,
ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+ return Dst;
}
};
@@ -221,8 +223,7 @@ struct StrNCatOpt : public StrCatOpt {
// strncat(x, s, c) -> strcat(x, s)
// s is constant so the strcat can be optimized further
- EmitStrLenMemCpy(Src, Dst, SrcLen, B);
- return Dst;
+ return EmitStrLenMemCpy(Src, Dst, SrcLen, B);
}
};
@@ -254,7 +255,7 @@ struct StrChrOpt : public LibCallOptimization {
return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
ConstantInt::get(TD->getIntPtrType(*Context), Len),
- B, TD);
+ B, TD, TLI);
}
// Otherwise, the character is a constant, see if the first argument is
@@ -299,7 +300,7 @@ struct StrRChrOpt : public LibCallOptimization {
if (!getConstantStringInfo(SrcStr, Str)) {
// strrchr(s, 0) -> strchr(s, 0)
if (TD && CharC->isZero())
- return EmitStrChr(SrcStr, '\0', B, TD);
+ return EmitStrChr(SrcStr, '\0', B, TD, TLI);
return 0;
}
@@ -355,7 +356,7 @@ struct StrCmpOpt : public LibCallOptimization {
return EmitMemCmp(Str1P, Str2P,
ConstantInt::get(TD->getIntPtrType(*Context),
- std::min(Len1, Len2)), B, TD);
+ std::min(Len1, Len2)), B, TD, TLI);
}
return 0;
@@ -391,7 +392,7 @@ struct StrNCmpOpt : public LibCallOptimization {
return ConstantInt::get(CI->getType(), 0);
if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
- return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
StringRef Str1, Str2;
bool HasStr1 = getConstantStringInfo(Str1P, Str1);
@@ -447,11 +448,10 @@ struct StrCpyOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- if (OptChkCall)
- EmitMemCpyChk(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- CI->getArgOperand(2), B, TD);
- else
+ if (!OptChkCall ||
+ !EmitMemCpyChk(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ CI->getArgOperand(2), B, TD, TLI))
B.CreateMemCpy(Dst, Src,
ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
return Dst;
@@ -480,8 +480,10 @@ struct StpCpyOpt: public LibCallOptimization {
if (!TD) return 0;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) // stpcpy(x,x) -> x+strlen(x)
- return B.CreateInBoundsGEP(Dst, EmitStrLen(Src, B, TD));
+ if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
+ Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+ }
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
@@ -494,9 +496,8 @@ struct StpCpyOpt: public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
- if (OptChkCall)
- EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD);
- else
+ if (!OptChkCall || !EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B,
+ TD, TLI))
B.CreateMemCpy(Dst, Src, LenV, 1);
return DstEnd;
}
@@ -609,7 +610,7 @@ struct StrPBrkOpt : public LibCallOptimization {
// strpbrk(s, "a") -> strchr(s, 'a')
if (TD && HasS2 && S2.size() == 1)
- return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD);
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI);
return 0;
}
@@ -698,7 +699,7 @@ struct StrCSpnOpt : public LibCallOptimization {
// strcspn(s, "") -> strlen(s)
if (TD && HasS2 && S2.empty())
- return EmitStrLen(CI->getArgOperand(0), B, TD);
+ return EmitStrLen(CI->getArgOperand(0), B, TD, TLI);
return 0;
}
@@ -722,9 +723,13 @@ struct StrStrOpt : public LibCallOptimization {
// fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
- Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD);
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI);
+ if (!StrLen)
+ return 0;
Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
- StrLen, B, TD);
+ StrLen, B, TD, TLI);
+ if (!StrNCmp)
+ return 0;
for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
UI != UE; ) {
ICmpInst *Old = cast<ICmpInst>(*UI++);
@@ -760,9 +765,10 @@ struct StrStrOpt : public LibCallOptimization {
}
// fold strstr(x, "y") -> strchr(x, 'y').
- if (HasStr2 && ToFindStr.size() == 1)
- return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0),
- ToFindStr[0], B, TD), CI->getType());
+ if (HasStr2 && ToFindStr.size() == 1) {
+ Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI);
+ return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0;
+ }
return 0;
}
};
@@ -1179,8 +1185,8 @@ struct PrintFOpt : public LibCallOptimization {
// printf("x") -> putchar('x'), even for '%'.
if (FormatStr.size() == 1) {
- Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD);
- if (CI->use_empty()) return CI;
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
+ if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1191,26 +1197,26 @@ struct PrintFOpt : public LibCallOptimization {
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
Value *GV = B.CreateGlobalString(FormatStr, "str");
- EmitPutS(GV, B, TD);
- return CI->use_empty() ? (Value*)CI :
- ConstantInt::get(CI->getType(), FormatStr.size()+1);
+ Value *NewCI = EmitPutS(GV, B, TD, TLI);
+ return (CI->use_empty() || !NewCI) ?
+ NewCI :
+ ConstantInt::get(CI->getType(), FormatStr.size()+1);
}
// Optimize specific format strings.
// printf("%c", chr) --> putchar(chr)
if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isIntegerTy()) {
- Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD);
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
- if (CI->use_empty()) return CI;
+ if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isPointerTy()) {
- EmitPutS(CI->getArgOperand(1), B, TD);
- return CI;
+ return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
}
return 0;
}
@@ -1297,7 +1303,9 @@ struct SPrintFOpt : public LibCallOptimization {
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
- Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD);
+ Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
+ if (!Len)
+ return 0;
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
@@ -1364,8 +1372,8 @@ struct FWriteOpt : public LibCallOptimization {
// This optimisation is only valid, if the return value is unused.
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
- EmitFPutC(Char, CI->getArgOperand(3), B, TD);
- return ConstantInt::get(CI->getType(), 1);
+ Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
}
return 0;
@@ -1390,10 +1398,10 @@ struct FPutsOpt : public LibCallOptimization {
// fputs(s,F) --> fwrite(s,1,strlen(s),F)
uint64_t Len = GetStringLength(CI->getArgOperand(0));
if (!Len) return 0;
- EmitFWrite(CI->getArgOperand(0),
- ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
- CI->getArgOperand(1), B, TD, TLI);
- return CI; // Known to have no uses (see above).
+ // Known to have no uses (see above).
+ return EmitFWrite(CI->getArgOperand(0),
+ ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+ CI->getArgOperand(1), B, TD, TLI);
}
};
@@ -1417,11 +1425,11 @@ struct FPrintFOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- EmitFWrite(CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context),
- FormatStr.size()),
- CI->getArgOperand(0), B, TD, TLI);
- return ConstantInt::get(CI->getType(), FormatStr.size());
+ Value *NewCI = EmitFWrite(CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ FormatStr.size()),
+ CI->getArgOperand(0), B, TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
}
// The remaining optimizations require the format string to be "%s" or "%c"
@@ -1434,16 +1442,16 @@ struct FPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 'c') {
// fprintf(F, "%c", chr) --> fputc(chr, F)
if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
- EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
- return ConstantInt::get(CI->getType(), 1);
+ Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
+ TD, TLI);
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
return 0;
- EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
- return CI;
+ return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
}
return 0;
}
@@ -1494,8 +1502,8 @@ struct PutsOpt : public LibCallOptimization {
if (Str.empty() && CI->use_empty()) {
// puts("") -> putchar('\n')
- Value *Res = EmitPutChar(B.getInt32('\n'), B, TD);
- if (CI->use_empty()) return CI;
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
+ if (CI->use_empty() || !Res) return Res;
return B.CreateIntCast(Res, CI->getType(), true);
}
@@ -1633,6 +1641,8 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["llvm.exp2.f64"] = &Exp2;
Optimizations["llvm.exp2.f32"] = &Exp2;
+ if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf))
+ Optimizations["fabs"] = &UnaryDoubleFP;
if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf))
Optimizations["floor"] = &UnaryDoubleFP;
if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf))
@@ -1643,6 +1653,8 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["rint"] = &UnaryDoubleFP;
if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf))
Optimizations["nearbyint"] = &UnaryDoubleFP;
+ if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf))
+ Optimizations["trunc"] = &UnaryDoubleFP;
// Integer Optimizations
Optimizations["ffs"] = &FFS;
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 5576432..2679b93 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -659,10 +659,26 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
// If the return instruction returns a value, and if the value was a
// PHI node in "BB", propagate the right value into the return.
for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
- i != e; ++i)
- if (PHINode *PN = dyn_cast<PHINode>(*i))
- if (PN->getParent() == BB)
- *i = PN->getIncomingValueForBlock(Pred);
+ i != e; ++i) {
+ Value *V = *i;
+ Instruction *NewBC = 0;
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
+ // Return value might be bitcasted. Clone and insert it before the
+ // return instruction.
+ V = BCI->getOperand(0);
+ NewBC = BCI->clone();
+ Pred->getInstList().insert(NewRet, NewBC);
+ *i = NewBC;
+ }
+ if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (PN->getParent() == BB) {
+ if (NewBC)
+ NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred));
+ else
+ *i = PN->getIncomingValueForBlock(Pred);
+ }
+ }
+ }
// Update any PHI nodes in the returning block to realize that we no
// longer branch to them.
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 27f7724..e13fd71 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -34,7 +34,11 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
/// EmitStrLen - Emit a call to the strlen function to the builder, for the
/// specified pointer. This always returns an integer value of size intptr_t.
-Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strlen))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -53,11 +57,41 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
return CI;
}
+/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the
+/// specified pointer. Ptr is required to be some pointer type, MaxLen must
+/// be of size_t type, and the return value has 'intptr_t' type.
+Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
+ const TargetData *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strnlen))
+ return 0;
+
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[2];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+ Attribute::NoUnwind);
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI),
+ TD->getIntPtrType(Context),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ NULL);
+ CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen");
+ if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
/// EmitStrChr - Emit a call to the strchr function to the builder, for the
/// specified pointer and character. Ptr is required to be some pointer type,
/// and the return value has 'i8*' type.
Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
- const TargetData *TD) {
+ const TargetData *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strchr))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI =
AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
@@ -75,7 +109,11 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
- IRBuilder<> &B, const TargetData *TD) {
+ IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::strncmp))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -101,7 +139,11 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
/// specified pointer arguments.
Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
- const TargetData *TD, StringRef Name) {
+ const TargetData *TD, const TargetLibraryInfo *TLI,
+ StringRef Name) {
+ if (!TLI->has(LibFunc::strcpy))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
@@ -119,7 +161,11 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
/// specified pointer arguments.
Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
- IRBuilder<> &B, const TargetData *TD, StringRef Name) {
+ IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI, StringRef Name) {
+ if (!TLI->has(LibFunc::strncpy))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
@@ -139,7 +185,11 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
/// are pointers.
Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
- IRBuilder<> &B, const TargetData *TD) {
+ IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memcpy_chk))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI;
AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
@@ -162,7 +212,11 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
- Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Value *Len, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memchr))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI;
AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
@@ -183,7 +237,11 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
/// EmitMemCmp - Emit a call to the memcmp function.
Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
- Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Value *Len, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::memcmp))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -236,7 +294,11 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
-Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::putchar))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
B.getInt32Ty(), NULL);
@@ -254,7 +316,11 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
/// EmitPutS - Emit a call to the puts function. This assumes that Str is
/// some pointer.
-void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::puts))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -267,13 +333,16 @@ void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
-
+ return CI;
}
/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
/// an integer and File is a pointer to FILE.
-void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
- const TargetData *TD) {
+Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+ const TargetData *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fputc))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[2];
AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
@@ -295,12 +364,16 @@ void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
+ return CI;
}
/// EmitFPutS - Emit a call to the puts function. Str is required to be a
/// pointer and File is a pointer to FILE.
-void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
- const TargetData *TD, const TargetLibraryInfo *TLI) {
+Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+ const TargetData *TD, const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fputs))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -321,13 +394,17 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
+ return CI;
}
/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
-void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
- IRBuilder<> &B, const TargetData *TD,
- const TargetLibraryInfo *TLI) {
+Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+ IRBuilder<> &B, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
+ if (!TLI->has(LibFunc::fwrite))
+ return 0;
+
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeWithIndex AWI[3];
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -354,11 +431,13 @@ void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
+ return CI;
}
SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
-bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
+bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD,
+ const TargetLibraryInfo *TLI) {
// We really need TargetData for later.
if (!TD) return false;
@@ -446,7 +525,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
// string lengths for varying.
if (isFoldable(2, 1, true)) {
Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
- Name.substr(2, 6));
+ TLI, Name.substr(2, 6));
+ if (!Ret)
+ return false;
replaceCall(Ret);
return true;
}
@@ -464,7 +545,10 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
if (isFoldable(3, 2, false)) {
Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, TD, Name.substr(2, 7));
+ CI->getArgOperand(2), B, TD, TLI,
+ Name.substr(2, 7));
+ if (!Ret)
+ return false;
replaceCall(Ret);
return true;
}
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index b3f5289..72d4199 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -39,7 +39,7 @@ SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
: AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
SSAUpdater::~SSAUpdater() {
- delete &getAvailableVals(AV);
+ delete static_cast<AvailableValsTy*>(AV);
}
/// Initialize - Reset this object to get ready for a new set of SSA
@@ -214,6 +214,11 @@ void SSAUpdater::RewriteUse(Use &U) {
else
V = GetValueInMiddleOfBlock(User->getParent());
+ // Notify that users of the existing value that it is being replaced.
+ Value *OldVal = U.get();
+ if (OldVal != V && OldVal->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(OldVal, V);
+
U.set(V);
}