aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp6
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp96
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp7
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp6
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp25
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp260
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp57
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp2
-rw-r--r--lib/Transforms/Scalar/GVN.cpp10
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp83
-rw-r--r--lib/Transforms/Scalar/SROA.cpp431
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp4
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp9
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp26
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp406
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp4
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp231
20 files changed, 1055 insertions, 624 deletions
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 8a6bfc6..fa3d72d 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -60,7 +60,7 @@ namespace {
continue;
}
- bool Local = I->hasLocalLinkage();
+ bool Local = I->isDiscardableIfUnused();
if (Local)
I->setVisibility(GlobalValue::HiddenVisibility);
@@ -80,7 +80,7 @@ namespace {
continue;
}
- bool Local = I->hasLocalLinkage();
+ bool Local = I->isDiscardableIfUnused();
if (Local)
I->setVisibility(GlobalValue::HiddenVisibility);
@@ -97,7 +97,7 @@ namespace {
Module::alias_iterator CurI = I;
++I;
- if (CurI->hasLocalLinkage()) {
+ if (CurI->isDiscardableIfUnused()) {
CurI->setVisibility(GlobalValue::HiddenVisibility);
CurI->setLinkage(GlobalValue::ExternalLinkage);
}
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 47b2b51..027a9f2 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -391,9 +391,9 @@ LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
LLVMPassManagerRef PM,
- bool Internalize,
- bool RunInliner) {
+ LLVMBool Internalize,
+ LLVMBool RunInliner) {
PassManagerBuilder *Builder = unwrap(PMB);
PassManagerBase *LPM = unwrap(PM);
- Builder->populateLTOPassManager(*LPM, Internalize, RunInliner);
+ Builder->populateLTOPassManager(*LPM, Internalize != 0, RunInliner != 0);
}
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index c6d60d6..3c5781c 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -150,7 +150,9 @@ namespace {
typedef SmallVector<const FAddend*, 4> AddendVect;
Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
-
+
+ Value *performFactorization(Instruction *I);
+
/// Convert given addend to a Value
Value *createAddendVal(const FAddend &A, bool& NeedNeg);
@@ -159,6 +161,7 @@ namespace {
Value *createFSub(Value *Opnd0, Value *Opnd1);
Value *createFAdd(Value *Opnd0, Value *Opnd1);
Value *createFMul(Value *Opnd0, Value *Opnd1);
+ Value *createFDiv(Value *Opnd0, Value *Opnd1);
Value *createFNeg(Value *V);
Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
void createInstPostProc(Instruction *NewInst);
@@ -388,6 +391,78 @@ unsigned FAddend::drillAddendDownOneStep
return BreakNum;
}
+// Try to perform following optimization on the input instruction I. Return the
+// simplified expression if was successful; otherwise, return 0.
+//
+// Instruction "I" is Simplified into
+// -------------------------------------------------------
+// (x * y) +/- (x * z) x * (y +/- z)
+// (y / x) +/- (z / x) (y +/- z) / x
+//
+Value *FAddCombine::performFactorization(Instruction *I) {
+ assert((I->getOpcode() == Instruction::FAdd ||
+ I->getOpcode() == Instruction::FSub) && "Expect add/sub");
+
+ Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
+ Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
+
+ if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
+ return 0;
+
+ bool isMpy = false;
+ if (I0->getOpcode() == Instruction::FMul)
+ isMpy = true;
+ else if (I0->getOpcode() != Instruction::FDiv)
+ return 0;
+
+ Value *Opnd0_0 = I0->getOperand(0);
+ Value *Opnd0_1 = I0->getOperand(1);
+ Value *Opnd1_0 = I1->getOperand(0);
+ Value *Opnd1_1 = I1->getOperand(1);
+
+ // Input Instr I Factor AddSub0 AddSub1
+ // ----------------------------------------------
+ // (x*y) +/- (x*z) x y z
+ // (y/x) +/- (z/x) x y z
+ //
+ Value *Factor = 0;
+ Value *AddSub0 = 0, *AddSub1 = 0;
+
+ if (isMpy) {
+ if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
+ Factor = Opnd0_0;
+ else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1)
+ Factor = Opnd0_1;
+
+ if (Factor) {
+ AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0;
+ AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0;
+ }
+ } else if (Opnd0_1 == Opnd1_1) {
+ Factor = Opnd0_1;
+ AddSub0 = Opnd0_0;
+ AddSub1 = Opnd1_0;
+ }
+
+ if (!Factor)
+ return 0;
+
+ // Create expression "NewAddSub = AddSub0 +/- AddsSub1"
+ Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ?
+ createFAdd(AddSub0, AddSub1) :
+ createFSub(AddSub0, AddSub1);
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
+ const APFloat &F = CFP->getValueAPF();
+ if (!F.isNormal() || F.isDenormal())
+ return 0;
+ }
+
+ if (isMpy)
+ return createFMul(Factor, NewAddSub);
+
+ return createFDiv(NewAddSub, Factor);
+}
+
Value *FAddCombine::simplify(Instruction *I) {
assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode");
@@ -471,7 +546,8 @@ Value *FAddCombine::simplify(Instruction *I) {
return R;
}
- return 0;
+ // step 6: Try factorization as the last resort,
+ return performFactorization(I);
}
Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
@@ -627,7 +703,8 @@ Value *FAddCombine::createNaryFAdd
Value *FAddCombine::createFSub
(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFSub(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
@@ -639,13 +716,22 @@ Value *FAddCombine::createFNeg(Value *V) {
Value *FAddCombine::createFAdd
(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFAdd(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
Value *V = Builder->CreateFMul(Opnd0, Opnd1);
- createInstPostProc(cast<Instruction>(V));
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
+ return V;
+}
+
+Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) {
+ Value *V = Builder->CreateFDiv(Opnd0, Opnd1);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ createInstPostProc(I);
return V;
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 4332467..990cbc3 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -22,8 +22,8 @@ using namespace PatternMatch;
/// AddOne - Add one to a ConstantInt.
-static Constant *AddOne(Constant *C) {
- return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+static Constant *AddOne(ConstantInt *C) {
+ return ConstantInt::get(C->getContext(), C->getValue() + 1);
}
/// SubOne - Subtract one from a ConstantInt.
static Constant *SubOne(ConstantInt *C) {
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a960ab2..d162223 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -104,6 +104,12 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+ // If the allocation has multiple uses, only promote it if we're not
+ // shrinking the amount of memory being allocated.
+ uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy);
+ uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy);
+ if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0;
+
// See if we can satisfy the modulus by pulling a scale out of the array
// size argument.
unsigned ArraySizeScale;
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index bad46b4..32fdb9b 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1333,13 +1333,14 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
// Transform (icmp pred iM (shl iM %v, N), CI)
- // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N))
- // Transform the shl to a trunc if (trunc (CI>>N)) has no loss.
+ // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N))
+ // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N.
// This enables to get rid of the shift in favor of a trunc which can be
// free on the target. It has the additional benefit of comparing to a
// smaller constant, which will be target friendly.
unsigned Amt = ShAmt->getLimitedValue(TypeBits-1);
- if (Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
+ if (LHSI->hasOneUse() &&
+ Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt);
Constant *NCI = ConstantExpr::getTrunc(
ConstantExpr::getAShr(RHS,
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 8e4267f..173f2bf 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -402,7 +402,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
return ReplaceInstUsesWith(I, V);
}
- // (MDC +/- C1) * C2 => (MDC * C2) +/- (C1 * C2)
+ // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C)
Instruction *FAddSub = dyn_cast<Instruction>(Op0);
if (FAddSub &&
(FAddSub->getOpcode() == Instruction::FAdd ||
@@ -420,8 +420,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (C1 && C1->getValueAPF().isNormal() &&
isFMulOrFDivWithConstant(Opnd0)) {
- Value *M0 = ConstantExpr::getFMul(C1, C);
- Value *M1 = isNormalFp(cast<ConstantFP>(M0)) ?
+ Value *M1 = ConstantExpr::getFMul(C1, C);
+ Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ?
foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
0;
if (M0 && M1) {
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 6877475..92b42ee 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -71,7 +71,7 @@ static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *kAsanInitName = "__asan_init_v1";
+static const char *kAsanInitName = "__asan_init_v2";
static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
static const char *kAsanMappingScaleName = "__asan_mapping_scale";
@@ -244,7 +244,7 @@ static size_t RedzoneSizeForScale(int MappingScale) {
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer : public FunctionPass {
- AddressSanitizer(bool CheckInitOrder = false,
+ AddressSanitizer(bool CheckInitOrder = true,
bool CheckUseAfterReturn = false,
bool CheckLifetime = false,
StringRef BlacklistFile = StringRef(),
@@ -315,7 +315,7 @@ struct AddressSanitizer : public FunctionPass {
class AddressSanitizerModule : public ModulePass {
public:
- AddressSanitizerModule(bool CheckInitOrder = false,
+ AddressSanitizerModule(bool CheckInitOrder = true,
StringRef BlacklistFile = StringRef(),
bool ZeroBaseShadow = false)
: ModulePass(ID),
@@ -531,9 +531,12 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
// Create a constant for Str so that we can pass it to the run-time lib.
static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
- return new GlobalVariable(M, StrConst->getType(), true,
+ GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true,
GlobalValue::PrivateLinkage, StrConst,
kAsanGenPrefix);
+ GV->setUnnamedAddr(true); // Ok to merge these.
+ GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
+ return GV;
}
static bool GlobalWasGeneratedByAsan(GlobalVariable *G) {
@@ -885,11 +888,12 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
// size_t size;
// size_t size_with_redzone;
// const char *name;
+ // const char *module_name;
// size_t has_dynamic_init;
// We initialize an array of such structures and pass it to a run-time call.
StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
IntptrTy, IntptrTy,
- IntptrTy, NULL);
+ IntptrTy, IntptrTy, NULL);
SmallVector<Constant *, 16> Initializers(n), DynamicInit;
@@ -901,6 +905,9 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
// this TU. Used in initialization order checking.
Value *FirstDynamic = 0, *LastDynamic = 0;
+ GlobalVariable *ModuleName = createPrivateGlobalForString(
+ M, M.getModuleIdentifier());
+
for (size_t i = 0; i < n; i++) {
static const uint64_t kMaxGlobalRedzone = 1 << 18;
GlobalVariable *G = GlobalsToChange[i];
@@ -930,11 +937,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
NewTy, G->getInitializer(),
Constant::getNullValue(RightRedZoneTy), NULL);
- SmallString<2048> DescriptionOfGlobal = G->getName();
- DescriptionOfGlobal += " (";
- DescriptionOfGlobal += M.getModuleIdentifier();
- DescriptionOfGlobal += ")";
- GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal);
+ GlobalVariable *Name = createPrivateGlobalForString(M, G->getName());
// Create a new global variable with enough space for a redzone.
GlobalVariable *NewGlobal = new GlobalVariable(
@@ -958,6 +961,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
ConstantInt::get(IntptrTy, SizeInBytes),
ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
ConstantExpr::getPointerCast(Name, IntptrTy),
+ ConstantExpr::getPointerCast(ModuleName, IntptrTy),
ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
NULL);
@@ -1095,6 +1099,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
bool AddressSanitizer::runOnFunction(Function &F) {
if (BL->isIn(F)) return false;
if (&F == AsanCtorFunction) return false;
+ if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
initializeCallbacks(*F.getParent());
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index eb0dc1e..e8d4ac8 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugLoc.h"
#include "llvm/Support/InstIterator.h"
@@ -39,31 +40,57 @@
#include <utility>
using namespace llvm;
+static cl::opt<std::string>
+DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden,
+ cl::ValueRequired);
+
+GCOVOptions GCOVOptions::getDefault() {
+ GCOVOptions Options;
+ Options.EmitNotes = true;
+ Options.EmitData = true;
+ Options.UseCfgChecksum = false;
+ Options.NoRedZone = false;
+ Options.FunctionNamesInData = true;
+
+ if (DefaultGCOVVersion.size() != 4) {
+ llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
+ DefaultGCOVVersion);
+ }
+ memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
+ return Options;
+}
+
namespace {
class GCOVProfiler : public ModulePass {
public:
static char ID;
- GCOVProfiler()
- : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false),
- UseExtraChecksum(false), NoRedZone(false) {
+ GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) {
+ ReversedVersion[0] = Options.Version[3];
+ ReversedVersion[1] = Options.Version[2];
+ ReversedVersion[2] = Options.Version[1];
+ ReversedVersion[3] = Options.Version[0];
+ ReversedVersion[4] = '\0';
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
- GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format,
- bool useExtraChecksum, bool NoRedZone_)
- : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
- Use402Format(use402Format), UseExtraChecksum(useExtraChecksum),
- NoRedZone(NoRedZone_) {
- assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
+ GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){
+ assert((Options.EmitNotes || Options.EmitData) &&
+ "GCOVProfiler asked to do nothing?");
+ ReversedVersion[0] = Options.Version[3];
+ ReversedVersion[1] = Options.Version[2];
+ ReversedVersion[2] = Options.Version[1];
+ ReversedVersion[3] = Options.Version[0];
+ ReversedVersion[4] = '\0';
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
virtual const char *getPassName() const {
return "GCOV Profiler";
}
+
private:
bool runOnModule(Module &M);
- // Create the GCNO files for the Module based on DebugInfo.
- void emitGCNO();
+ // Create the .gcno files for the Module based on DebugInfo.
+ void emitProfileNotes();
// Modify the program to track transitions along edges and call into the
// profiling runtime to emit .gcda files when run.
@@ -74,6 +101,7 @@ namespace {
Constant *getIncrementIndirectCounterFunc();
Constant *getEmitFunctionFunc();
Constant *getEmitArcsFunc();
+ Constant *getDeleteFlushFunctionListFunc();
Constant *getEndFileFunc();
// Create or retrieve an i32 state value that is used to represent the
@@ -84,22 +112,22 @@ namespace {
// block number.
GlobalVariable *buildEdgeLookupTable(Function *F,
GlobalVariable *Counter,
- const UniqueVector<BasicBlock *> &Preds,
- const UniqueVector<BasicBlock *> &Succs);
+ const UniqueVector<BasicBlock *>&Preds,
+ const UniqueVector<BasicBlock*>&Succs);
// Add the function to write out all our counters to the global destructor
// list.
- void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
+ Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*,
+ MDNode*> >);
+ Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
void insertIndirectCounterIncrement();
- void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
std::string mangleName(DICompileUnit CU, const char *NewStem);
- bool EmitNotes;
- bool EmitData;
- bool Use402Format;
- bool UseExtraChecksum;
- bool NoRedZone;
+ GCOVOptions Options;
+
+ // Reversed, NUL-terminated copy of Options.Version.
+ char ReversedVersion[5];
Module *M;
LLVMContext *Ctx;
@@ -110,12 +138,8 @@ char GCOVProfiler::ID = 0;
INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
"Insert instrumentation for GCOV profiling", false, false)
-ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
- bool Use402Format,
- bool UseExtraChecksum,
- bool NoRedZone) {
- return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum,
- NoRedZone);
+ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
+ return new GCOVProfiler(Options);
}
namespace {
@@ -253,8 +277,8 @@ namespace {
// object users can construct, the blocks and lines will be rooted here.
class GCOVFunction : public GCOVRecord {
public:
- GCOVFunction(DISubprogram SP, raw_ostream *os,
- bool Use402Format, bool UseExtraChecksum) {
+ GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident,
+ bool UseCfgChecksum) {
this->os = os;
Function *F = SP.getFunction();
@@ -268,13 +292,12 @@ namespace {
writeBytes(FunctionTag, 4);
uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
1 + lengthOfGCOVString(SP.getFilename()) + 1;
- if (UseExtraChecksum)
+ if (UseCfgChecksum)
++BlockLen;
write(BlockLen);
- uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP);
write(Ident);
write(0); // lineno checksum
- if (UseExtraChecksum)
+ if (UseCfgChecksum)
write(0); // cfg checksum
writeGCOVString(SP.getName());
writeGCOVString(SP.getFilename());
@@ -358,12 +381,12 @@ bool GCOVProfiler::runOnModule(Module &M) {
this->M = &M;
Ctx = &M.getContext();
- if (EmitNotes) emitGCNO();
- if (EmitData) return emitProfileArcs();
+ if (Options.EmitNotes) emitProfileNotes();
+ if (Options.EmitData) return emitProfileArcs();
return false;
}
-void GCOVProfiler::emitGCNO() {
+void GCOVProfiler::emitProfileNotes() {
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CU_Nodes) return;
@@ -376,10 +399,9 @@ void GCOVProfiler::emitGCNO() {
std::string ErrorInfo;
raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
raw_fd_ostream::F_Binary);
- if (!Use402Format)
- out.write("oncg*404MVLL", 12);
- else
- out.write("oncg*204MVLL", 12);
+ out.write("oncg", 4);
+ out.write(ReversedVersion, 4);
+ out.write("MVLL", 4);
DIArray SPs = CU.getSubprograms();
for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
@@ -388,7 +410,7 @@ void GCOVProfiler::emitGCNO() {
Function *F = SP.getFunction();
if (!F) continue;
- GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum);
+ GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum);
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
GCOVBlock &Block = Func.getBlock(BB);
@@ -469,21 +491,18 @@ bool GCOVProfiler::emitProfileArcs() {
Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
Edge);
Value *Count = Builder.CreateLoad(Counter);
- Count = Builder.CreateAdd(Count,
- ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+ Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Counter);
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- Value *Sel = Builder.CreateSelect(
- BI->getCondition(),
- ConstantInt::get(Type::getInt64Ty(*Ctx), Edge),
- ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1));
+ Value *Sel = Builder.CreateSelect(BI->getCondition(),
+ Builder.getInt64(Edge),
+ Builder.getInt64(Edge + 1));
SmallVector<Value *, 2> Idx;
- Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx)));
+ Idx.push_back(Builder.getInt64(0));
Idx.push_back(Sel);
Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx);
Value *Count = Builder.CreateLoad(Counter);
- Count = Builder.CreateAdd(Count,
- ConstantInt::get(Type::getInt64Ty(*Ctx),1));
+ Count = Builder.CreateAdd(Count, Builder.getInt64(1));
Builder.CreateStore(Count, Counter);
} else {
ComplexEdgePreds.insert(BB);
@@ -500,10 +519,9 @@ bool GCOVProfiler::emitProfileArcs() {
ComplexEdgePreds, ComplexEdgeSuccs);
GlobalVariable *EdgeState = getEdgeStateValue();
- Type *Int32Ty = Type::getInt32Ty(*Ctx);
for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator());
- Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState);
+ Builder.CreateStore(Builder.getInt32(i), EdgeState);
}
for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
// call runtime to perform increment
@@ -522,8 +540,42 @@ bool GCOVProfiler::emitProfileArcs() {
}
}
- insertCounterWriteout(CountersBySP);
- insertFlush(CountersBySP);
+ Function *WriteoutF = insertCounterWriteout(CountersBySP);
+ Function *FlushF = insertFlush(CountersBySP);
+
+ // Create a small bit of code that registers the "__llvm_gcov_writeout" to
+ // be executed at exit and the "__llvm_gcov_flush" function to be executed
+ // when "__gcov_flush" is called.
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
+ "__llvm_gcov_init", M);
+ F->setUnnamedAddr(true);
+ F->setLinkage(GlobalValue::InternalLinkage);
+ F->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ F->addFnAttr(Attribute::NoRedZone);
+
+ BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
+ IRBuilder<> Builder(BB);
+
+ FTy = FunctionType::get(Builder.getInt32Ty(),
+ PointerType::get(FTy, 0), false);
+ Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy);
+ Builder.CreateCall(AtExitFn, WriteoutF);
+
+ // Register the local flush function.
+ FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ FTy = FunctionType::get(Builder.getVoidTy(),
+ PointerType::get(FTy, 0), false);
+ Constant *RegFlush =
+ M->getOrInsertFunction("llvm_register_flush_function", FTy);
+ Builder.CreateCall(RegFlush, FlushF);
+
+ // Make sure that all the flush function list is deleted.
+ Builder.CreateCall(AtExitFn, getDeleteFlushFunctionListFunc());
+ Builder.CreateRetVoid();
+
+ appendToGlobalCtors(*M, F, 0);
}
if (InsertIndCounterIncrCode)
@@ -560,8 +612,8 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) {
for (int i = 0; i != Successors; ++i) {
BasicBlock *Succ = TI->getSuccessor(i);
- IRBuilder<> builder(Succ);
- Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0,
+ IRBuilder<> Builder(Succ);
+ Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
Edge + i);
EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) +
(Preds.idFor(BB)-1)] = cast<Constant>(Counter);
@@ -581,8 +633,11 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
}
Constant *GCOVProfiler::getStartFileFunc() {
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
- Type::getInt8PtrTy(*Ctx), false);
+ Type *Args[] = {
+ Type::getInt8PtrTy(*Ctx), // const char *orig_filename
+ Type::getInt8PtrTy(*Ctx), // const char version[4]
+ };
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
}
@@ -598,9 +653,10 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
}
Constant *GCOVProfiler::getEmitFunctionFunc() {
- Type *Args[2] = {
+ Type *Args[3] = {
Type::getInt32Ty(*Ctx), // uint32_t ident
Type::getInt8PtrTy(*Ctx), // const char *function_name
+ Type::getInt8Ty(*Ctx), // uint8_t use_extra_checksum
};
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
return M->getOrInsertFunction("llvm_gcda_emit_function", FTy);
@@ -611,11 +667,15 @@ Constant *GCOVProfiler::getEmitArcsFunc() {
Type::getInt32Ty(*Ctx), // uint32_t num_counters
Type::getInt64PtrTy(*Ctx), // uint64_t *counters
};
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
- Args, false);
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false);
return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy);
}
+Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() {
+ FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy);
+}
+
Constant *GCOVProfiler::getEndFileFunc() {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
return M->getOrInsertFunction("llvm_gcda_end_file", FTy);
@@ -634,7 +694,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() {
return GV;
}
-void GCOVProfiler::insertCounterWriteout(
+Function *GCOVProfiler::insertCounterWriteout(
ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
@@ -643,7 +703,7 @@ void GCOVProfiler::insertCounterWriteout(
"__llvm_gcov_writeout", M);
WriteoutF->setUnnamedAddr(true);
WriteoutF->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
WriteoutF->addFnAttr(Attribute::NoRedZone);
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
@@ -659,50 +719,31 @@ void GCOVProfiler::insertCounterWriteout(
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
DICompileUnit CU(CU_Nodes->getOperand(i));
std::string FilenameGcda = mangleName(CU, "gcda");
- Builder.CreateCall(StartFile,
- Builder.CreateGlobalStringPtr(FilenameGcda));
- for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator
- I = CountersBySP.begin(), E = CountersBySP.end();
- I != E; ++I) {
- DISubprogram SP(I->second);
- intptr_t ident = reinterpret_cast<intptr_t>(I->second);
- Builder.CreateCall2(EmitFunction,
- ConstantInt::get(Type::getInt32Ty(*Ctx), ident),
- Builder.CreateGlobalStringPtr(SP.getName()));
-
- GlobalVariable *GV = I->first;
+ Builder.CreateCall2(StartFile,
+ Builder.CreateGlobalStringPtr(FilenameGcda),
+ Builder.CreateGlobalStringPtr(ReversedVersion));
+ for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) {
+ DISubprogram SP(CountersBySP[j].second);
+ Builder.CreateCall3(EmitFunction,
+ Builder.getInt32(j),
+ Options.FunctionNamesInData ?
+ Builder.CreateGlobalStringPtr(SP.getName()) :
+ Constant::getNullValue(Builder.getInt8PtrTy()),
+ Builder.getInt8(Options.UseCfgChecksum));
+
+ GlobalVariable *GV = CountersBySP[j].first;
unsigned Arcs =
cast<ArrayType>(GV->getType()->getElementType())->getNumElements();
Builder.CreateCall2(EmitArcs,
- ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs),
+ Builder.getInt32(Arcs),
Builder.CreateConstGEP2_64(GV, 0, 0));
}
Builder.CreateCall(EndFile);
}
}
- Builder.CreateRetVoid();
- // Create a small bit of code that registers the "__llvm_gcov_writeout"
- // function to be executed at exit.
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
- Function *F = Function::Create(FTy, GlobalValue::InternalLinkage,
- "__llvm_gcov_init", M);
- F->setUnnamedAddr(true);
- F->setLinkage(GlobalValue::InternalLinkage);
- F->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
- F->addFnAttr(Attribute::NoRedZone);
-
- BB = BasicBlock::Create(*Ctx, "entry", F);
- Builder.SetInsertPoint(BB);
-
- FTy = FunctionType::get(Type::getInt32Ty(*Ctx),
- PointerType::get(FTy, 0), false);
- Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy);
- Builder.CreateCall(AtExitFn, WriteoutF);
Builder.CreateRetVoid();
-
- appendToGlobalCtors(*M, F, 0);
+ return WriteoutF;
}
void GCOVProfiler::insertIndirectCounterIncrement() {
@@ -711,13 +752,9 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
Fn->setUnnamedAddr(true);
Fn->setLinkage(GlobalValue::InternalLinkage);
Fn->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
Fn->addFnAttr(Attribute::NoRedZone);
- Type *Int32Ty = Type::getInt32Ty(*Ctx);
- Type *Int64Ty = Type::getInt64Ty(*Ctx);
- Constant *NegOne = ConstantInt::get(Int32Ty, 0xffffffff);
-
// Create basic blocks for function.
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", Fn);
IRBuilder<> Builder(BB);
@@ -731,26 +768,27 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
Argument *Arg = Fn->arg_begin();
Arg->setName("predecessor");
Value *Pred = Builder.CreateLoad(Arg, "pred");
- Value *Cond = Builder.CreateICmpEQ(Pred, NegOne);
+ Value *Cond = Builder.CreateICmpEQ(Pred, Builder.getInt32(0xffffffff));
BranchInst::Create(Exit, PredNotNegOne, Cond, BB);
Builder.SetInsertPoint(PredNotNegOne);
// uint64_t *counter = counters[pred];
// if (!counter) return;
- Value *ZExtPred = Builder.CreateZExt(Pred, Int64Ty);
+ Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());
Arg = llvm::next(Fn->arg_begin());
Arg->setName("counters");
Value *GEP = Builder.CreateGEP(Arg, ZExtPred);
Value *Counter = Builder.CreateLoad(GEP, "counter");
Cond = Builder.CreateICmpEQ(Counter,
- Constant::getNullValue(Int64Ty->getPointerTo()));
+ Constant::getNullValue(
+ Builder.getInt64Ty()->getPointerTo()));
Builder.CreateCondBr(Cond, Exit, CounterEnd);
// ++*counter;
Builder.SetInsertPoint(CounterEnd);
Value *Add = Builder.CreateAdd(Builder.CreateLoad(Counter),
- ConstantInt::get(Int64Ty, 1));
+ Builder.getInt64(1));
Builder.CreateStore(Add, Counter);
Builder.CreateBr(Exit);
@@ -759,18 +797,18 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
Builder.CreateRetVoid();
}
-void GCOVProfiler::
+Function *GCOVProfiler::
insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
- Function *FlushF = M->getFunction("__gcov_flush");
+ Function *FlushF = M->getFunction("__llvm_gcov_flush");
if (!FlushF)
FlushF = Function::Create(FTy, GlobalValue::InternalLinkage,
- "__gcov_flush", M);
+ "__llvm_gcov_flush", M);
else
FlushF->setLinkage(GlobalValue::InternalLinkage);
FlushF->setUnnamedAddr(true);
FlushF->addFnAttr(Attribute::NoInline);
- if (NoRedZone)
+ if (Options.NoRedZone)
FlushF->addFnAttr(Attribute::NoRedZone);
BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
@@ -795,8 +833,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
if (RetTy == Type::getVoidTy(*Ctx))
Builder.CreateRetVoid();
else if (RetTy->isIntegerTy())
- // Used if __gcov_flush was implicitly declared.
+ // Used if __llvm_gcov_flush was implicitly declared.
Builder.CreateRet(ConstantInt::get(RetTy, 0));
else
- report_fatal_error("invalid return type for __gcov_flush");
+ report_fatal_error("invalid return type for __llvm_gcov_flush");
+
+ return FlushF;
}
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 80705af..fce6513 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -418,13 +418,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
ValueMap<Value*, Value*> ShadowMap, OriginMap;
bool InsertChecks;
+ bool LoadShadow;
OwningPtr<VarArgHelper> VAHelper;
- // An unfortunate workaround for asymmetric lowering of va_arg stuff.
- // See a comment in visitCallSite for more details.
- static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
- static const unsigned AMD64FpEndOffset = 176;
-
struct ShadowOriginAndInsertPoint {
Instruction *Shadow;
Instruction *Origin;
@@ -437,11 +433,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
SmallVector<Instruction*, 16> StoreList;
MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
- : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
- InsertChecks = !MS.BL->isIn(F);
+ : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
+ LoadShadow = InsertChecks =
+ !MS.BL->isIn(F) &&
+ F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::SanitizeMemory);
+
DEBUG(if (!InsertChecks)
- dbgs() << "MemorySanitizer is not inserting checks into '"
- << F.getName() << "'\n");
+ dbgs() << "MemorySanitizer is not inserting checks into '"
+ << F.getName() << "'\n");
}
void materializeStores() {
@@ -836,15 +836,25 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(&I);
Type *ShadowTy = getShadowTy(&I);
Value *Addr = I.getPointerOperand();
- Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
- setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld"));
+ if (LoadShadow) {
+ Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
+ setShadow(&I,
+ IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld"));
+ } else {
+ setShadow(&I, getCleanShadow(&I));
+ }
if (ClCheckAccessAddress)
insertCheck(I.getPointerOperand(), &I);
if (MS.TrackOrigins) {
- unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
- setOrigin(&I, IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), Alignment));
+ if (LoadShadow) {
+ unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
+ setOrigin(&I,
+ IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), Alignment));
+ } else {
+ setOrigin(&I, getCleanOrigin());
+ }
}
}
@@ -1410,16 +1420,25 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Addr = I.getArgOperand(0);
Type *ShadowTy = getShadowTy(&I);
- Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
- // We don't know the pointer alignment (could be unaligned SSE load!).
- // Have to assume to worst case.
- setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, 1, "_msld"));
+ if (LoadShadow) {
+ Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
+ // We don't know the pointer alignment (could be unaligned SSE load!).
+ // Have to assume to worst case.
+ setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, 1, "_msld"));
+ } else {
+ setShadow(&I, getCleanShadow(&I));
+ }
+
if (ClCheckAccessAddress)
insertCheck(Addr, &I);
- if (MS.TrackOrigins)
- setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB)));
+ if (MS.TrackOrigins) {
+ if (LoadShadow)
+ setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB)));
+ else
+ setOrigin(&I, getCleanOrigin());
+ }
return true;
}
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index d71dd5d..015fd2e 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -154,7 +154,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
- if (TLI && TLI->isSlowDivBypassed()) {
+ if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
const DenseMap<unsigned int, unsigned int> &BypassWidths =
TLI->getBypassSlowDivWidths();
for (Function::iterator I = F.begin(); I != F.end(); I++)
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c04b447..129af8d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1714,7 +1714,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return true;
}
-static void patchReplacementInstruction(Value *Repl, Instruction *I) {
+static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
BinaryOperator *Op = dyn_cast<BinaryOperator>(I);
@@ -1756,8 +1756,8 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) {
}
}
-static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) {
- patchReplacementInstruction(Repl, I);
+static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
+ patchReplacementInstruction(I, Repl);
I->replaceAllUsesWith(Repl);
}
@@ -1919,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) {
}
// Remove it!
- patchAndReplaceAllUsesWith(AvailableVal, L);
+ patchAndReplaceAllUsesWith(L, AvailableVal);
if (DepLI->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
markInstructionForDeletion(L);
@@ -2260,7 +2260,7 @@ bool GVN::processInstruction(Instruction *I) {
}
// Remove it!
- patchAndReplaceAllUsesWith(repl, I);
+ patchAndReplaceAllUsesWith(I, repl);
if (MD && repl->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
markInstructionForDeletion(I);
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 1601a8d..14e463a 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -53,6 +53,7 @@
#define DEBUG_TYPE "global-merge"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
@@ -60,14 +61,22 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
+static cl::opt<bool>
+EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
+ cl::desc("Enable global merge pass on constants"),
+ cl::init(false));
+
STATISTIC(NumMerged , "Number of globals merged");
namespace {
class GlobalMerge : public FunctionPass {
@@ -78,6 +87,23 @@ namespace {
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
+ /// \brief Check if the given variable has been identified as must keep
+ /// \pre setMustKeepGlobalVariables must have been called on the Module that
+ /// contains GV
+ bool isMustKeepGlobalVariable(const GlobalVariable *GV) const {
+ return MustKeepGlobalVariables.count(GV);
+ }
+
+ /// Collect every variables marked as "used" or used in a landing pad
+ /// instruction for this Module.
+ void setMustKeepGlobalVariables(Module &M);
+
+ /// Collect every variables marked as "used"
+ void collectUsedGlobalVariables(Module &M);
+
+ /// Keep track of the GlobalVariable that are marked as "used"
+ SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables;
+
public:
static char ID; // Pass identification, replacement for typeid.
explicit GlobalMerge(const TargetLowering *tli = 0)
@@ -169,6 +195,46 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
return true;
}
+void GlobalMerge::collectUsedGlobalVariables(Module &M) {
+ // Extract global variables from llvm.used array
+ const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+ if (!GV || !GV->hasInitializer()) return;
+
+ // Should be an array of 'i8*'.
+ const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (InitList == 0) return;
+
+ for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+ if (const GlobalVariable *G =
+ dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(G);
+}
+
+void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
+ // If we already processed a Module, UsedGlobalVariables may have been
+ // populated. Reset the information for this module.
+ MustKeepGlobalVariables.clear();
+ collectUsedGlobalVariables(M);
+
+ for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
+ ++IFn) {
+ for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
+ IBB != IEndBB; ++IBB) {
+ // Follow the inwoke link to find the landing pad instruction
+ const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
+ if (!II) continue;
+
+ const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
+ // Look for globals in the clauses of the landing pad instruction
+ for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
+ Idx != NumClauses; ++Idx)
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
+ ->stripPointerCasts()))
+ MustKeepGlobalVariables.insert(GV);
+ }
+ }
+}
bool GlobalMerge::doInitialization(Module &M) {
DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
@@ -176,6 +242,7 @@ bool GlobalMerge::doInitialization(Module &M) {
const DataLayout *TD = TLI->getDataLayout();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
bool Changed = false;
+ setMustKeepGlobalVariables(M);
// Grab all non-const globals.
for (Module::global_iterator I = M.global_begin(),
@@ -200,6 +267,12 @@ bool GlobalMerge::doInitialization(Module &M) {
I->getName().startswith(".llvm."))
continue;
+ // Ignore all "required" globals:
+ // - the ones used for EH
+ // - the ones marked with "used" attribute
+ if (isMustKeepGlobalVariable(I))
+ continue;
+
if (TD->getTypeAllocSize(Ty) < MaxOffset) {
if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
.isBSSLocal())
@@ -221,11 +294,11 @@ bool GlobalMerge::doInitialization(Module &M) {
if (I->second.size() > 1)
Changed |= doMerge(I->second, M, false, I->first);
- // FIXME: This currently breaks the EH processing due to way how the
- // typeinfo detection works. We might want to detect the TIs and ignore
- // them in the future.
- // if (ConstGlobals.size() > 1)
- // Changed |= doMerge(ConstGlobals, M, true);
+ if (EnableGlobalMergeOnConst)
+ for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+ I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
+ if (I->second.size() > 1)
+ Changed |= doMerge(I->second, M, true, I->first);
return Changed;
}
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index e90fe90..0e57e5c 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -69,112 +69,130 @@ static cl::opt<bool>
ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
namespace {
-/// \brief Alloca partitioning representation.
-///
-/// This class represents a partitioning of an alloca into slices, and
-/// information about the nature of uses of each slice of the alloca. The goal
-/// is that this information is sufficient to decide if and how to split the
-/// alloca apart and replace slices with scalars. It is also intended that this
-/// structure can capture the relevant information needed both to decide about
-/// and to enact these transformations.
-class AllocaPartitioning {
-public:
- /// \brief A common base class for representing a half-open byte range.
- struct ByteRange {
- /// \brief The beginning offset of the range.
- uint64_t BeginOffset;
+/// \brief A common base class for representing a half-open byte range.
+struct ByteRange {
+ /// \brief The beginning offset of the range.
+ uint64_t BeginOffset;
- /// \brief The ending offset, not included in the range.
- uint64_t EndOffset;
+ /// \brief The ending offset, not included in the range.
+ uint64_t EndOffset;
- ByteRange() : BeginOffset(), EndOffset() {}
- ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
- : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
+ ByteRange() : BeginOffset(), EndOffset() {}
+ ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
+ : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
- /// \brief Support for ordering ranges.
- ///
- /// This provides an ordering over ranges such that start offsets are
- /// always increasing, and within equal start offsets, the end offsets are
- /// decreasing. Thus the spanning range comes first in a cluster with the
- /// same start position.
- bool operator<(const ByteRange &RHS) const {
- if (BeginOffset < RHS.BeginOffset) return true;
- if (BeginOffset > RHS.BeginOffset) return false;
- if (EndOffset > RHS.EndOffset) return true;
- return false;
- }
+ /// \brief Support for ordering ranges.
+ ///
+ /// This provides an ordering over ranges such that start offsets are
+ /// always increasing, and within equal start offsets, the end offsets are
+ /// decreasing. Thus the spanning range comes first in a cluster with the
+ /// same start position.
+ bool operator<(const ByteRange &RHS) const {
+ if (BeginOffset < RHS.BeginOffset) return true;
+ if (BeginOffset > RHS.BeginOffset) return false;
+ if (EndOffset > RHS.EndOffset) return true;
+ return false;
+ }
- /// \brief Support comparison with a single offset to allow binary searches.
- friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
- return LHS.BeginOffset < RHSOffset;
- }
+ /// \brief Support comparison with a single offset to allow binary searches.
+ friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
+ return LHS.BeginOffset < RHSOffset;
+ }
- friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
- const ByteRange &RHS) {
- return LHSOffset < RHS.BeginOffset;
- }
+ friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
+ const ByteRange &RHS) {
+ return LHSOffset < RHS.BeginOffset;
+ }
- bool operator==(const ByteRange &RHS) const {
- return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
- }
- bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
- };
+ bool operator==(const ByteRange &RHS) const {
+ return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
+ }
+ bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
+};
- /// \brief A partition of an alloca.
+/// \brief A partition of an alloca.
+///
+/// This structure represents a contiguous partition of the alloca. These are
+/// formed by examining the uses of the alloca. During formation, they may
+/// overlap but once an AllocaPartitioning is built, the Partitions within it
+/// are all disjoint.
+struct Partition : public ByteRange {
+ /// \brief Whether this partition is splittable into smaller partitions.
///
- /// This structure represents a contiguous partition of the alloca. These are
- /// formed by examining the uses of the alloca. During formation, they may
- /// overlap but once an AllocaPartitioning is built, the Partitions within it
- /// are all disjoint.
- struct Partition : public ByteRange {
- /// \brief Whether this partition is splittable into smaller partitions.
- ///
- /// We flag partitions as splittable when they are formed entirely due to
- /// accesses by trivially splittable operations such as memset and memcpy.
- bool IsSplittable;
-
- /// \brief Test whether a partition has been marked as dead.
- bool isDead() const {
- if (BeginOffset == UINT64_MAX) {
- assert(EndOffset == UINT64_MAX);
- return true;
- }
- return false;
+ /// We flag partitions as splittable when they are formed entirely due to
+ /// accesses by trivially splittable operations such as memset and memcpy.
+ bool IsSplittable;
+
+ /// \brief Test whether a partition has been marked as dead.
+ bool isDead() const {
+ if (BeginOffset == UINT64_MAX) {
+ assert(EndOffset == UINT64_MAX);
+ return true;
}
+ return false;
+ }
- /// \brief Kill a partition.
- /// This is accomplished by setting both its beginning and end offset to
- /// the maximum possible value.
- void kill() {
- assert(!isDead() && "He's Dead, Jim!");
- BeginOffset = EndOffset = UINT64_MAX;
- }
+ /// \brief Kill a partition.
+ /// This is accomplished by setting both its beginning and end offset to
+ /// the maximum possible value.
+ void kill() {
+ assert(!isDead() && "He's Dead, Jim!");
+ BeginOffset = EndOffset = UINT64_MAX;
+ }
- Partition() : ByteRange(), IsSplittable() {}
- Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
- : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
- };
+ Partition() : ByteRange(), IsSplittable() {}
+ Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
+ : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
+};
+
+/// \brief A particular use of a partition of the alloca.
+///
+/// This structure is used to associate uses of a partition with it. They
+/// mark the range of bytes which are referenced by a particular instruction,
+/// and includes a handle to the user itself and the pointer value in use.
+/// The bounds of these uses are determined by intersecting the bounds of the
+/// memory use itself with a particular partition. As a consequence there is
+/// intentionally overlap between various uses of the same partition.
+class PartitionUse : public ByteRange {
+ /// \brief Combined storage for both the Use* and split state.
+ PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit;
- /// \brief A particular use of a partition of the alloca.
+public:
+ PartitionUse() : ByteRange(), UsePtrAndIsSplit() {}
+ PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
+ bool IsSplit)
+ : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {}
+
+ /// \brief The use in question. Provides access to both user and used value.
///
- /// This structure is used to associate uses of a partition with it. They
- /// mark the range of bytes which are referenced by a particular instruction,
- /// and includes a handle to the user itself and the pointer value in use.
- /// The bounds of these uses are determined by intersecting the bounds of the
- /// memory use itself with a particular partition. As a consequence there is
- /// intentionally overlap between various uses of the same partition.
- struct PartitionUse : public ByteRange {
- /// \brief The use in question. Provides access to both user and used value.
- ///
- /// Note that this may be null if the partition use is *dead*, that is, it
- /// should be ignored.
- Use *U;
+ /// Note that this may be null if the partition use is *dead*, that is, it
+ /// should be ignored.
+ Use *getUse() const { return UsePtrAndIsSplit.getPointer(); }
- PartitionUse() : ByteRange(), U() {}
- PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U)
- : ByteRange(BeginOffset, EndOffset), U(U) {}
- };
+ /// \brief Set the use for this partition use range.
+ void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); }
+
+ /// \brief Whether this use is split across multiple partitions.
+ bool isSplit() const { return UsePtrAndIsSplit.getInt(); }
+};
+}
+
+namespace llvm {
+template <> struct isPodLike<Partition> : llvm::true_type {};
+template <> struct isPodLike<PartitionUse> : llvm::true_type {};
+}
+namespace {
+/// \brief Alloca partitioning representation.
+///
+/// This class represents a partitioning of an alloca into slices, and
+/// information about the nature of uses of each slice of the alloca. The goal
+/// is that this information is sufficient to decide if and how to split the
+/// alloca apart and replace slices with scalars. It is also intended that this
+/// structure can capture the relevant information needed both to decide about
+/// and to enact these transformations.
+class AllocaPartitioning {
+public:
/// \brief Construct a partitioning of a particular alloca.
///
/// Construction does most of the work for partitioning the alloca. This
@@ -456,10 +474,10 @@ private:
// Clamp the end offset to the end of the allocation. Note that this is
// formulated to handle even the case where "BeginOffset + Size" overflows.
- // NOTE! This may appear superficially to be something we could ignore
- // entirely, but that is not so! There may be PHI-node uses where some
- // instructions are dead but not others. We can't completely ignore the
- // PHI node, and so have to record at least the information here.
+ // This may appear superficially to be something we could ignore entirely,
+ // but that is not so! There may be widened loads or PHI-node uses where
+ // some instructions are dead but not others. We can't completely ignore
+ // them, and so have to record at least the information here.
assert(AllocSize >= BeginOffset); // Established above.
if (Size > AllocSize - BeginOffset) {
DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
@@ -474,33 +492,17 @@ private:
}
void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
- bool IsVolatile) {
- uint64_t Size = DL.getTypeStoreSize(Ty);
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse. We also try to handle cases which might run the
- // risk of overflow.
- // FIXME: We should instead consider the pointer to have escaped if this
- // function is being instrumented for addressing bugs or race conditions.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size)) {
- DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte "
- << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset
- << " which extends past the end of the " << AllocSize
- << " byte alloca:\n"
- << " alloca: " << P.AI << "\n"
- << " use: " << I << "\n");
- return;
- }
-
+ uint64_t Size, bool IsVolatile) {
// We allow splitting of loads and stores where the type is an integer type
- // and which cover the entire alloca. Such integer loads and stores
- // often require decomposition into fine grained loads and stores.
- bool IsSplittable = false;
- if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
- IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
+ // and cover the entire alloca. This prevents us from splitting over
+ // eagerly.
+ // FIXME: In the great blue eventually, we should eagerly split all integer
+ // loads and stores, and then have a separate step that merges adjacent
+ // alloca partitions into a single partition suitable for integer widening.
+ // Or we should skip the merge step and rely on GVN and other passes to
+ // merge adjacent loads and stores that survive mem2reg.
+ bool IsSplittable =
+ Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
insertUse(I, Offset, Size, IsSplittable);
}
@@ -512,7 +514,8 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&LI);
- return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
void visitStoreInst(StoreInst &SI) {
@@ -522,9 +525,28 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&SI);
+ uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse. We also try to handle cases which might run the
+ // risk of overflow.
+ // FIXME: We should instead consider the pointer to have escaped if this
+ // function is being instrumented for addressing bugs or race conditions.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size)) {
+ DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
+ << " which extends past the end of the " << AllocSize
+ << " byte alloca:\n"
+ << " alloca: " << P.AI << "\n"
+ << " use: " << SI << "\n");
+ return;
+ }
+
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split");
- handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
+ handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
}
@@ -795,13 +817,14 @@ private:
EndOffset = AllocSize;
// NB: This only works if we have zero overlapping partitions.
- iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset);
- if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset)
- B = llvm::prior(B);
- for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset;
- ++I) {
+ iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset);
+ if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset)
+ I = llvm::prior(I);
+ iterator E = P.end();
+ bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset;
+ for (; I != E && I->BeginOffset < EndOffset; ++I) {
PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset),
- std::min(I->EndOffset, EndOffset), U);
+ std::min(I->EndOffset, EndOffset), U, IsSplit);
P.use_push_back(I, NewPU);
if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
P.PHIOrSelectOpMap[U]
@@ -809,20 +832,6 @@ private:
}
}
- void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) {
- uint64_t Size = DL.getTypeStoreSize(Ty);
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse.
- if (Offset.isNegative() || Size > AllocSize ||
- Offset.ugt(AllocSize - Size))
- return markAsDead(I);
-
- insertUse(I, Offset, Size);
- }
-
void visitBitCastInst(BitCastInst &BC) {
if (BC.use_empty())
return markAsDead(BC);
@@ -839,12 +848,23 @@ private:
void visitLoadInst(LoadInst &LI) {
assert(IsOffsetKnown);
- handleLoadOrStore(LI.getType(), LI, Offset);
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ insertUse(LI, Offset, Size);
}
void visitStoreInst(StoreInst &SI) {
assert(IsOffsetKnown);
- handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset);
+ uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size))
+ return markAsDead(SI);
+
+ insertUse(SI, Offset, Size);
}
void visitMemSetInst(MemSetInst &II) {
@@ -1089,21 +1109,21 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
Type *AllocaPartitioning::getCommonType(iterator I) const {
Type *Ty = 0;
for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (!UI->U)
+ Use *U = UI->getUse();
+ if (!U)
continue; // Skip dead uses.
- if (isa<IntrinsicInst>(*UI->U->getUser()))
+ if (isa<IntrinsicInst>(*U->getUser()))
continue;
if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset)
continue;
Type *UserTy = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
UserTy = LI->getType();
- } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) {
+ else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
UserTy = SI->getValueOperand()->getType();
- } else {
+ else
return 0; // Bail if we have weird uses.
- }
if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
// If the type is larger than the partition, skip it. We only encounter
@@ -1140,11 +1160,12 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
StringRef Indent) const {
for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (!UI->U)
+ if (!UI->getUse())
continue; // Skip dead uses.
OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") "
- << "used by: " << *UI->U->getUser() << "\n";
- if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) {
+ << "used by: " << *UI->getUse()->getUser() << "\n";
+ if (MemTransferInst *II =
+ dyn_cast<MemTransferInst>(UI->getUse()->getUser())) {
const MemTransferOffsets &MTO = MemTransferInstData.lookup(II);
bool IsDest;
if (!MTO.IsSplittable)
@@ -1375,11 +1396,11 @@ public:
// may be grown during speculation. However, we never need to re-visit the
// new uses, and so we can use the initial size bound.
for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
- const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
- if (!PU.U)
+ const PartitionUse &PU = P.getUse(PI, Idx);
+ if (!PU.getUse())
continue; // Skip dead use.
- visit(cast<Instruction>(PU.U->getUser()));
+ visit(cast<Instruction>(PU.getUse()->getUser()));
}
}
@@ -1523,8 +1544,8 @@ private:
// inside the load.
AllocaPartitioning::use_iterator UI
= P.findPartitionUseForPHIOrSelectOperand(InUse);
- assert(isa<PHINode>(*UI->U->getUser()));
- UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
+ assert(isa<PHINode>(*UI->getUse()->getUser()));
+ UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex()));
}
DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
}
@@ -1571,16 +1592,16 @@ private:
void visitSelectInst(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
- IRBuilder<> IRB(&SI);
// If the select isn't safe to speculate, just use simple logic to emit it.
SmallVector<LoadInst *, 4> Loads;
if (!isSafeSelectToSpeculate(SI, Loads))
return;
+ IRBuilder<> IRB(&SI);
Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
AllocaPartitioning::iterator PIs[2];
- AllocaPartitioning::PartitionUse PUs[2];
+ PartitionUse PUs[2];
for (unsigned i = 0, e = 2; i != e; ++i) {
PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
if (PIs[i] != P.end()) {
@@ -1591,7 +1612,7 @@ private:
PUs[i] = *UI;
// Clear out the use here so that the offsets into the use list remain
// stable but this use is ignored when rewriting.
- UI->U = 0;
+ UI->setUse(0);
}
}
@@ -1623,8 +1644,8 @@ private:
for (unsigned i = 0, e = 2; i != e; ++i) {
if (PIs[i] != P.end()) {
Use *LoadUse = &Loads[i]->getOperandUse(0);
- assert(PUs[i].U->get() == LoadUse->get());
- PUs[i].U = LoadUse;
+ assert(PUs[i].getUse()->get() == LoadUse->get());
+ PUs[i].setUse(LoadUse);
P.use_push_back(PIs[i], PUs[i]);
}
}
@@ -1911,6 +1932,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
if (OldTy == NewTy)
return true;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
+ if (NewITy->getBitWidth() >= OldITy->getBitWidth())
+ return true;
if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
return false;
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
@@ -1939,6 +1964,10 @@ static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
"Value not convertable to type");
if (V->getType() == Ty)
return V;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+ if (NewITy->getBitWidth() > OldITy->getBitWidth())
+ return IRB.CreateZExt(V, NewITy);
if (V->getType()->isIntegerTy() && Ty->isPointerTy())
return IRB.CreateIntToPtr(V, Ty);
if (V->getType()->isPointerTy() && Ty->isIntegerTy())
@@ -1977,7 +2006,8 @@ static bool isVectorPromotionViable(const DataLayout &TD,
ElementSize /= 8;
for (; I != E; ++I) {
- if (!I->U)
+ Use *U = I->getUse();
+ if (!U)
continue; // Skip dead use.
uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
@@ -1997,24 +2027,24 @@ static bool isVectorPromotionViable(const DataLayout &TD,
= (NumElements == 1) ? Ty->getElementType()
: VectorType::get(Ty->getElementType(), NumElements);
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile())
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
+ } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
return false;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
if (!canConvertValue(TD, PartitionTy, LI->getType()))
return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
if (SI->isVolatile())
return false;
if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy))
@@ -2063,7 +2093,8 @@ static bool isIntegerWideningViable(const DataLayout &TD,
// unsplittable entry (which we may make splittable later).
bool WholeAllocaOp = false;
for (; I != E; ++I) {
- if (!I->U)
+ Use *U = I->getUse();
+ if (!U)
continue; // Skip dead use.
uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
@@ -2074,7 +2105,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelEnd > Size)
return false;
- if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
if (RelBegin == 0 && RelEnd == Size)
@@ -2089,7 +2120,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(TD, AllocaTy, LI->getType()))
return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
Type *ValueTy = SI->getValueOperand()->getType();
if (SI->isVolatile())
return false;
@@ -2105,16 +2136,16 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(TD, ValueTy, AllocaTy))
return false;
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) {
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
II->getIntrinsicID() != Intrinsic::lifetime_end)
return false;
@@ -2297,6 +2328,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
// The offset of the partition user currently being rewritten.
uint64_t BeginOffset, EndOffset;
+ bool IsSplit;
Use *OldUse;
Instruction *OldPtr;
@@ -2314,7 +2346,7 @@ public:
NewAllocaEndOffset(NewEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
VecTy(), ElementTy(), ElementSize(), IntTy(),
- BeginOffset(), EndOffset() {
+ BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr() {
}
/// \brief Visit the users of the alloca partition and rewrite them.
@@ -2336,14 +2368,15 @@ public:
}
bool CanSROA = true;
for (; I != E; ++I) {
- if (!I->U)
+ if (!I->getUse())
continue; // Skip dead uses.
BeginOffset = I->BeginOffset;
EndOffset = I->EndOffset;
- OldUse = I->U;
- OldPtr = cast<Instruction>(I->U->get());
+ IsSplit = I->isSplit();
+ OldUse = I->getUse();
+ OldPtr = cast<Instruction>(OldUse->get());
NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str();
- CanSROA &= visit(cast<Instruction>(I->U->getUser()));
+ CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
}
if (VecTy) {
assert(CanSROA);
@@ -2450,29 +2483,12 @@ private:
DEBUG(dbgs() << " original: " << LI << "\n");
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
- IRBuilder<> IRB(&LI);
uint64_t Size = EndOffset - BeginOffset;
- bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
- // If this memory access can be shown to *statically* extend outside the
- // bounds of the original allocation it's behavior is undefined. Rather
- // than trying to transform it, just replace it with undef.
- // FIXME: We should do something more clever for functions being
- // instrumented by asan.
- // FIXME: Eventually, once ASan and friends can flush out bugs here, this
- // should be transformed to a load of null making it unreachable.
- uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType());
- if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) {
- LI.replaceAllUsesWith(UndefValue::get(LI.getType()));
- Pass.DeadInsts.insert(&LI);
- deleteIfTriviallyDead(OldOp);
- DEBUG(dbgs() << " to: undef!!\n");
- return true;
- }
-
- Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
- : LI.getType();
+ IRBuilder<> IRB(&LI);
+ Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
+ : LI.getType();
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
@@ -2492,16 +2508,15 @@ private:
}
V = convertValue(TD, IRB, V, TargetTy);
- if (IsSplitIntLoad) {
+ if (IsSplit) {
assert(!LI.isVolatile());
assert(LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
+ assert(Size < TD.getTypeStoreSize(LI.getType()) &&
+ "Split load isn't smaller than original load");
assert(LI.getType()->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(LI.getType()) &&
"Non-byte-multiple bit width");
- assert(LI.getType()->getIntegerBitWidth() ==
- TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide loads can be split and recomposed");
// Move the insertion point just past the load so that we can refer to it.
IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
// Create a placeholder value with the same type as LI to use as the
@@ -2588,14 +2603,12 @@ private:
uint64_t Size = EndOffset - BeginOffset;
if (Size < TD.getTypeStoreSize(V->getType())) {
assert(!SI.isVolatile());
+ assert(IsSplit && "A seemingly split store isn't splittable");
assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(V->getType()->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
- assert(V->getType()->getIntegerBitWidth() ==
- TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide stores can be split and recomposed");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
getName(".extract"));
@@ -3381,7 +3394,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
for (AllocaPartitioning::use_iterator UI = P.use_begin(PI),
UE = P.use_end(PI);
UI != UE && !IsLive; ++UI)
- if (UI->U)
+ if (UI->getUse())
IsLive = true;
if (!IsLive)
return false; // No live uses left of this partition.
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 00cda8e..1f517d0 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -163,7 +163,7 @@ static bool insertFastDiv(Function &F,
Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
// Compare operand values and branch
- Value *ZeroV = MainBuilder.getInt32(0);
+ Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
@@ -244,7 +244,7 @@ bool llvm::bypassSlowDivision(Function &F,
// Get bitwidth of div/rem instruction
IntegerType *T = cast<IntegerType>(J->getType());
- int bitwidth = T->getBitWidth();
+ unsigned int bitwidth = T->getBitWidth();
// Continue if bitwidth is not bypassed
DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index a309bce..63d7a1d 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -94,9 +94,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
//Some arguments were deleted with the VMap. Copy arguments one by one
for (Function::const_arg_iterator I = OldFunc->arg_begin(),
E = OldFunc->arg_end(); I != E; ++I)
- if (Argument* Anew = dyn_cast<Argument>(VMap[I]))
- Anew->addAttr(OldFunc->getAttributes()
- .getParamAttributes(I->getArgNo() + 1));
+ if (Argument* Anew = dyn_cast<Argument>(VMap[I])) {
+ AttributeSet attrs = OldFunc->getAttributes()
+ .getParamAttributes(I->getArgNo() + 1);
+ if (attrs.getNumSlots() > 0)
+ Anew->addAttr(attrs);
+ }
NewFunc->setAttributes(NewFunc->getAttributes()
.addAttributes(NewFunc->getContext(),
AttributeSet::ReturnIndex,
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index a63d31d..681bf9c 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2789,9 +2789,20 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
return false;
// Turn all invokes that unwind here into calls and delete the basic block.
+ bool InvokeRequiresTableEntry = false;
+ bool Changed = false;
for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
+
+ if (II->hasFnAttr(Attribute::UWTable)) {
+ // Don't remove an `invoke' instruction if the ABI requires an entry into
+ // the table.
+ InvokeRequiresTableEntry = true;
+ continue;
+ }
+
SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+
// Insert a call instruction before the invoke.
CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
Call->takeName(II);
@@ -2811,11 +2822,14 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
// Finally, delete the invoke instruction!
II->eraseFromParent();
+ Changed = true;
}
- // The landingpad is now unreachable. Zap it.
- BB->eraseFromParent();
- return true;
+ if (!InvokeRequiresTableEntry)
+ // The landingpad is now unreachable. Zap it.
+ BB->eraseFromParent();
+
+ return Changed;
}
bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
@@ -3944,11 +3958,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
// Load from null is undefined.
if (LoadInst *LI = dyn_cast<LoadInst>(Use))
- return LI->getPointerAddressSpace() == 0;
+ if (!LI->isVolatile())
+ return LI->getPointerAddressSpace() == 0;
// Store to null is undefined.
if (StoreInst *SI = dyn_cast<StoreInst>(Use))
- return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
+ if (!SI->isVolatile())
+ return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I;
}
return false;
}
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 8ad566c..c231704 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -15,14 +15,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
@@ -797,8 +800,7 @@ struct StrToOpt : public LibCallOptimization {
if (isa<ConstantPointerNull>(EndPtr)) {
// With a null EndPtr, this function won't capture the main argument.
// It would be readonly too, except that it still may write to errno.
- CI->addAttribute(1, Attribute::get(Callee->getContext(),
- Attribute::NoCapture));
+ CI->addAttribute(1, Attribute::NoCapture);
}
return 0;
@@ -1672,67 +1674,16 @@ class LibCallSimplifierImpl {
const TargetLibraryInfo *TLI;
const LibCallSimplifier *LCS;
bool UnsafeFPShrink;
- StringMap<LibCallOptimization*> Optimizations;
-
- // Fortified library call optimizations.
- MemCpyChkOpt MemCpyChk;
- MemMoveChkOpt MemMoveChk;
- MemSetChkOpt MemSetChk;
- StrCpyChkOpt StrCpyChk;
- StpCpyChkOpt StpCpyChk;
- StrNCpyChkOpt StrNCpyChk;
-
- // String library call optimizations.
- StrCatOpt StrCat;
- StrNCatOpt StrNCat;
- StrChrOpt StrChr;
- StrRChrOpt StrRChr;
- StrCmpOpt StrCmp;
- StrNCmpOpt StrNCmp;
- StrCpyOpt StrCpy;
- StpCpyOpt StpCpy;
- StrNCpyOpt StrNCpy;
- StrLenOpt StrLen;
- StrPBrkOpt StrPBrk;
- StrToOpt StrTo;
- StrSpnOpt StrSpn;
- StrCSpnOpt StrCSpn;
- StrStrOpt StrStr;
-
- // Memory library call optimizations.
- MemCmpOpt MemCmp;
- MemCpyOpt MemCpy;
- MemMoveOpt MemMove;
- MemSetOpt MemSet;
// Math library call optimizations.
- UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP;
- CosOpt Cos; PowOpt Pow; Exp2Opt Exp2;
-
- // Integer library call optimizations.
- FFSOpt FFS;
- AbsOpt Abs;
- IsDigitOpt IsDigit;
- IsAsciiOpt IsAscii;
- ToAsciiOpt ToAscii;
-
- // Formatting and IO library call optimizations.
- PrintFOpt PrintF;
- SPrintFOpt SPrintF;
- FPrintFOpt FPrintF;
- FWriteOpt FWrite;
- FPutsOpt FPuts;
- PutsOpt Puts;
-
- void initOptimizations();
- void addOpt(LibFunc::Func F, LibCallOptimization* Opt);
- void addOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt);
+ CosOpt Cos;
+ PowOpt Pow;
+ Exp2Opt Exp2;
public:
LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI,
const LibCallSimplifier *LCS,
bool UnsafeFPShrink = false)
- : UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true),
- Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
+ : Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) {
this->TD = TD;
this->TLI = TLI;
this->LCS = LCS;
@@ -1740,126 +1691,234 @@ public:
}
Value *optimizeCall(CallInst *CI);
+ LibCallOptimization *lookupOptimization(CallInst *CI);
+ bool hasFloatVersion(StringRef FuncName);
};
-void LibCallSimplifierImpl::initOptimizations() {
- // Fortified library call optimizations.
- Optimizations["__memcpy_chk"] = &MemCpyChk;
- Optimizations["__memmove_chk"] = &MemMoveChk;
- Optimizations["__memset_chk"] = &MemSetChk;
- Optimizations["__strcpy_chk"] = &StrCpyChk;
- Optimizations["__stpcpy_chk"] = &StpCpyChk;
- Optimizations["__strncpy_chk"] = &StrNCpyChk;
- Optimizations["__stpncpy_chk"] = &StrNCpyChk;
-
- // String library call optimizations.
- addOpt(LibFunc::strcat, &StrCat);
- addOpt(LibFunc::strncat, &StrNCat);
- addOpt(LibFunc::strchr, &StrChr);
- addOpt(LibFunc::strrchr, &StrRChr);
- addOpt(LibFunc::strcmp, &StrCmp);
- addOpt(LibFunc::strncmp, &StrNCmp);
- addOpt(LibFunc::strcpy, &StrCpy);
- addOpt(LibFunc::stpcpy, &StpCpy);
- addOpt(LibFunc::strncpy, &StrNCpy);
- addOpt(LibFunc::strlen, &StrLen);
- addOpt(LibFunc::strpbrk, &StrPBrk);
- addOpt(LibFunc::strtol, &StrTo);
- addOpt(LibFunc::strtod, &StrTo);
- addOpt(LibFunc::strtof, &StrTo);
- addOpt(LibFunc::strtoul, &StrTo);
- addOpt(LibFunc::strtoll, &StrTo);
- addOpt(LibFunc::strtold, &StrTo);
- addOpt(LibFunc::strtoull, &StrTo);
- addOpt(LibFunc::strspn, &StrSpn);
- addOpt(LibFunc::strcspn, &StrCSpn);
- addOpt(LibFunc::strstr, &StrStr);
-
- // Memory library call optimizations.
- addOpt(LibFunc::memcmp, &MemCmp);
- addOpt(LibFunc::memcpy, &MemCpy);
- addOpt(LibFunc::memmove, &MemMove);
- addOpt(LibFunc::memset, &MemSet);
+bool LibCallSimplifierImpl::hasFloatVersion(StringRef FuncName) {
+ LibFunc::Func Func;
+ SmallString<20> FloatFuncName = FuncName;
+ FloatFuncName += 'f';
+ if (TLI->getLibFunc(FloatFuncName, Func))
+ return TLI->has(Func);
+ return false;
+}
- // Math library call optimizations.
- addOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP);
- addOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP);
- addOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP);
- addOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP);
- addOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP);
- addOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP);
- addOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP);
-
- if(UnsafeFPShrink) {
- addOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP);
- addOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP);
+// Fortified library call optimizations.
+static MemCpyChkOpt MemCpyChk;
+static MemMoveChkOpt MemMoveChk;
+static MemSetChkOpt MemSetChk;
+static StrCpyChkOpt StrCpyChk;
+static StpCpyChkOpt StpCpyChk;
+static StrNCpyChkOpt StrNCpyChk;
+
+// String library call optimizations.
+static StrCatOpt StrCat;
+static StrNCatOpt StrNCat;
+static StrChrOpt StrChr;
+static StrRChrOpt StrRChr;
+static StrCmpOpt StrCmp;
+static StrNCmpOpt StrNCmp;
+static StrCpyOpt StrCpy;
+static StpCpyOpt StpCpy;
+static StrNCpyOpt StrNCpy;
+static StrLenOpt StrLen;
+static StrPBrkOpt StrPBrk;
+static StrToOpt StrTo;
+static StrSpnOpt StrSpn;
+static StrCSpnOpt StrCSpn;
+static StrStrOpt StrStr;
+
+// Memory library call optimizations.
+static MemCmpOpt MemCmp;
+static MemCpyOpt MemCpy;
+static MemMoveOpt MemMove;
+static MemSetOpt MemSet;
+
+// Math library call optimizations.
+static UnaryDoubleFPOpt UnaryDoubleFP(false);
+static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+
+ // Integer library call optimizations.
+static FFSOpt FFS;
+static AbsOpt Abs;
+static IsDigitOpt IsDigit;
+static IsAsciiOpt IsAscii;
+static ToAsciiOpt ToAscii;
+
+// Formatting and IO library call optimizations.
+static PrintFOpt PrintF;
+static SPrintFOpt SPrintF;
+static FPrintFOpt FPrintF;
+static FWriteOpt FWrite;
+static FPutsOpt FPuts;
+static PutsOpt Puts;
+
+LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
+ LibFunc::Func Func;
+ Function *Callee = CI->getCalledFunction();
+ StringRef FuncName = Callee->getName();
+
+ // Next check for intrinsics.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::pow:
+ return &Pow;
+ case Intrinsic::exp2:
+ return &Exp2;
+ default:
+ return 0;
+ }
}
- addOpt(LibFunc::cosf, &Cos);
- addOpt(LibFunc::cos, &Cos);
- addOpt(LibFunc::cosl, &Cos);
- addOpt(LibFunc::powf, &Pow);
- addOpt(LibFunc::pow, &Pow);
- addOpt(LibFunc::powl, &Pow);
- Optimizations["llvm.pow.f32"] = &Pow;
- Optimizations["llvm.pow.f64"] = &Pow;
- Optimizations["llvm.pow.f80"] = &Pow;
- Optimizations["llvm.pow.f128"] = &Pow;
- Optimizations["llvm.pow.ppcf128"] = &Pow;
- addOpt(LibFunc::exp2l, &Exp2);
- addOpt(LibFunc::exp2, &Exp2);
- addOpt(LibFunc::exp2f, &Exp2);
- Optimizations["llvm.exp2.ppcf128"] = &Exp2;
- Optimizations["llvm.exp2.f128"] = &Exp2;
- Optimizations["llvm.exp2.f80"] = &Exp2;
- Optimizations["llvm.exp2.f64"] = &Exp2;
- Optimizations["llvm.exp2.f32"] = &Exp2;
+ // Then check for known library functions.
+ if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) {
+ switch (Func) {
+ case LibFunc::strcat:
+ return &StrCat;
+ case LibFunc::strncat:
+ return &StrNCat;
+ case LibFunc::strchr:
+ return &StrChr;
+ case LibFunc::strrchr:
+ return &StrRChr;
+ case LibFunc::strcmp:
+ return &StrCmp;
+ case LibFunc::strncmp:
+ return &StrNCmp;
+ case LibFunc::strcpy:
+ return &StrCpy;
+ case LibFunc::stpcpy:
+ return &StpCpy;
+ case LibFunc::strncpy:
+ return &StrNCpy;
+ case LibFunc::strlen:
+ return &StrLen;
+ case LibFunc::strpbrk:
+ return &StrPBrk;
+ case LibFunc::strtol:
+ case LibFunc::strtod:
+ case LibFunc::strtof:
+ case LibFunc::strtoul:
+ case LibFunc::strtoll:
+ case LibFunc::strtold:
+ case LibFunc::strtoull:
+ return &StrTo;
+ case LibFunc::strspn:
+ return &StrSpn;
+ case LibFunc::strcspn:
+ return &StrCSpn;
+ case LibFunc::strstr:
+ return &StrStr;
+ case LibFunc::memcmp:
+ return &MemCmp;
+ case LibFunc::memcpy:
+ return &MemCpy;
+ case LibFunc::memmove:
+ return &MemMove;
+ case LibFunc::memset:
+ return &MemSet;
+ case LibFunc::cosf:
+ case LibFunc::cos:
+ case LibFunc::cosl:
+ return &Cos;
+ case LibFunc::powf:
+ case LibFunc::pow:
+ case LibFunc::powl:
+ return &Pow;
+ case LibFunc::exp2l:
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ return &Exp2;
+ case LibFunc::ffs:
+ case LibFunc::ffsl:
+ case LibFunc::ffsll:
+ return &FFS;
+ case LibFunc::abs:
+ case LibFunc::labs:
+ case LibFunc::llabs:
+ return &Abs;
+ case LibFunc::isdigit:
+ return &IsDigit;
+ case LibFunc::isascii:
+ return &IsAscii;
+ case LibFunc::toascii:
+ return &ToAscii;
+ case LibFunc::printf:
+ return &PrintF;
+ case LibFunc::sprintf:
+ return &SPrintF;
+ case LibFunc::fprintf:
+ return &FPrintF;
+ case LibFunc::fwrite:
+ return &FWrite;
+ case LibFunc::fputs:
+ return &FPuts;
+ case LibFunc::puts:
+ return &Puts;
+ case LibFunc::ceil:
+ case LibFunc::fabs:
+ case LibFunc::floor:
+ case LibFunc::rint:
+ case LibFunc::round:
+ case LibFunc::nearbyint:
+ case LibFunc::trunc:
+ if (hasFloatVersion(FuncName))
+ return &UnaryDoubleFP;
+ return 0;
+ case LibFunc::acos:
+ case LibFunc::acosh:
+ case LibFunc::asin:
+ case LibFunc::asinh:
+ case LibFunc::atan:
+ case LibFunc::atanh:
+ case LibFunc::cbrt:
+ case LibFunc::cosh:
+ case LibFunc::exp:
+ case LibFunc::exp10:
+ case LibFunc::expm1:
+ case LibFunc::log:
+ case LibFunc::log10:
+ case LibFunc::log1p:
+ case LibFunc::log2:
+ case LibFunc::logb:
+ case LibFunc::sin:
+ case LibFunc::sinh:
+ case LibFunc::sqrt:
+ case LibFunc::tan:
+ case LibFunc::tanh:
+ if (UnsafeFPShrink && hasFloatVersion(FuncName))
+ return &UnsafeUnaryDoubleFP;
+ return 0;
+ case LibFunc::memcpy_chk:
+ return &MemCpyChk;
+ default:
+ return 0;
+ }
+ }
+
+ // Finally check for fortified library calls.
+ if (FuncName.endswith("_chk")) {
+ if (FuncName == "__memmove_chk")
+ return &MemMoveChk;
+ else if (FuncName == "__memset_chk")
+ return &MemSetChk;
+ else if (FuncName == "__strcpy_chk")
+ return &StrCpyChk;
+ else if (FuncName == "__stpcpy_chk")
+ return &StpCpyChk;
+ else if (FuncName == "__strncpy_chk")
+ return &StrNCpyChk;
+ else if (FuncName == "__stpncpy_chk")
+ return &StrNCpyChk;
+ }
+
+ return 0;
- // Integer library call optimizations.
- addOpt(LibFunc::ffs, &FFS);
- addOpt(LibFunc::ffsl, &FFS);
- addOpt(LibFunc::ffsll, &FFS);
- addOpt(LibFunc::abs, &Abs);
- addOpt(LibFunc::labs, &Abs);
- addOpt(LibFunc::llabs, &Abs);
- addOpt(LibFunc::isdigit, &IsDigit);
- addOpt(LibFunc::isascii, &IsAscii);
- addOpt(LibFunc::toascii, &ToAscii);
-
- // Formatting and IO library call optimizations.
- addOpt(LibFunc::printf, &PrintF);
- addOpt(LibFunc::sprintf, &SPrintF);
- addOpt(LibFunc::fprintf, &FPrintF);
- addOpt(LibFunc::fwrite, &FWrite);
- addOpt(LibFunc::fputs, &FPuts);
- addOpt(LibFunc::puts, &Puts);
}
Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
- if (Optimizations.empty())
- initOptimizations();
-
- Function *Callee = CI->getCalledFunction();
- LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+ LibCallOptimization *LCO = lookupOptimization(CI);
if (LCO) {
IRBuilder<> Builder(CI);
return LCO->optimizeCall(CI, TD, TLI, LCS, Builder);
@@ -1867,17 +1926,6 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
return 0;
}
-void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) {
- if (TLI->has(F))
- Optimizations[TLI->getName(F)] = Opt;
-}
-
-void LibCallSimplifierImpl::addOpt(LibFunc::Func F1, LibFunc::Func F2,
- LibCallOptimization* Opt) {
- if (TLI->has(F1) && TLI->has(F2))
- Optimizations[TLI->getName(F1)] = Opt;
-}
-
LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
const TargetLibraryInfo *TLI,
bool UnsafeFPShrink) {
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 7636541..17900da 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -1771,7 +1771,7 @@ namespace {
size_t MaxDepth = DAG.lookup(IJ);
DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {"
- << IJ.first << " <-> " << IJ.second << "} of depth " <<
+ << *IJ.first << " <-> " << *IJ.second << "} of depth " <<
MaxDepth << " and size " << DAG.size() << "\n");
// At this point the DAG has been constructed, but, may contain
@@ -2086,7 +2086,7 @@ namespace {
DEBUG(if (DebugPairSelection)
dbgs() << "BBV: found pruned DAG for pair {"
- << IJ.first << " <-> " << IJ.second << "} of depth " <<
+ << *IJ.first << " <-> " << *IJ.second << "} of depth " <<
MaxDepth << " and size " << PrunedDAG.size() <<
" (effective size: " << EffSize << ")\n");
if (((TTI && !UseChainDepthWithTI) ||
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index f489393..930d9c4 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -79,6 +79,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -115,6 +116,12 @@ static const unsigned TinyTripCountUnrollThreshold = 128;
/// number of pointers. Notice that the check is quadratic!
static const unsigned RuntimeMemoryCheckThreshold = 4;
+/// We use a metadata with this name to indicate that a scalar loop was
+/// vectorized and that we don't need to re-vectorize it if we run into it
+/// again.
+static const char*
+AlreadyVectorizedMDName = "llvm.vectorizer.already_vectorized";
+
namespace {
// Forward declarations.
@@ -138,10 +145,11 @@ class LoopVectorizationCostModel;
class InnerLoopVectorizer {
public:
InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, DataLayout *DL, unsigned VecWidth,
+ DominatorTree *DT, DataLayout *DL,
+ const TargetLibraryInfo *TLI, unsigned VecWidth,
unsigned UnrollFactor)
- : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), VF(VecWidth),
- UF(UnrollFactor), Builder(SE->getContext()), Induction(0),
+ : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI),
+ VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(0),
OldInduction(0), WidenMap(UnrollFactor) {}
// Perform the actual loop widening (vectorization).
@@ -268,6 +276,9 @@ private:
DominatorTree *DT;
/// Data Layout.
DataLayout *DL;
+ /// Target Library Info.
+ const TargetLibraryInfo *TLI;
+
/// The vectorization SIMD factor to use. Each vector will have this many
/// vector elements.
unsigned VF;
@@ -320,8 +331,9 @@ class LoopVectorizationLegality {
public:
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL,
DominatorTree *DT, TargetTransformInfo* TTI,
- AliasAnalysis* AA)
- : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), Induction(0) {}
+ AliasAnalysis *AA, TargetLibraryInfo *TLI)
+ : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
+ Induction(0) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -407,7 +419,7 @@ public:
/// Alias(Multi)Map stores the values (GEPs or underlying objects and their
/// respective Store/Load instruction(s) to calculate aliasing.
- typedef DenseMap<Value*, Instruction* > AliasMap;
+ typedef MapVector<Value*, Instruction* > AliasMap;
typedef DenseMap<Value*, std::vector<Instruction*> > AliasMultiMap;
/// Returns true if it is legal to vectorize this loop.
@@ -504,6 +516,8 @@ private:
TargetTransformInfo *TTI;
/// Alias Analysis.
AliasAnalysis *AA;
+ /// Target Library Info.
+ TargetLibraryInfo *TLI;
// --- vectorization state --- //
@@ -540,8 +554,8 @@ public:
LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
- DataLayout *DL)
- : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL) {}
+ DataLayout *DL, const TargetLibraryInfo *TLI)
+ : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI) {}
/// Information about vectorization costs
struct VectorizationFactor {
@@ -614,6 +628,8 @@ private:
const TargetTransformInfo &TTI;
/// Target data layout information.
DataLayout *DL;
+ /// Target Library Info.
+ const TargetLibraryInfo *TLI;
};
/// The LoopVectorize Pass.
@@ -631,6 +647,7 @@ struct LoopVectorize : public LoopPass {
TargetTransformInfo *TTI;
DominatorTree *DT;
AliasAnalysis *AA;
+ TargetLibraryInfo *TLI;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
// We only vectorize innermost loops.
@@ -643,19 +660,20 @@ struct LoopVectorize : public LoopPass {
TTI = &getAnalysis<TargetTransformInfo>();
DT = &getAnalysis<DominatorTree>();
AA = getAnalysisIfAvailable<AliasAnalysis>();
+ TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA);
+ LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA, TLI);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing.\n");
return false;
}
// Use the cost model.
- LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL);
+ LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI);
// Check the function attributes to find out if this function should be
// optimized for size.
@@ -689,7 +707,7 @@ struct LoopVectorize : public LoopPass {
DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n");
// If we decided that it is *legal* to vectorize the loop then do it.
- InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF.Width, UF);
+ InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
LB.vectorize(&LVL);
DEBUG(verifyFunction(*L->getHeader()->getParent()));
@@ -1147,6 +1165,11 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
BasicBlock *ExitBlock = OrigLoop->getExitBlock();
assert(ExitBlock && "Must have an exit block");
+ // Mark the old scalar loop with metadata that tells us not to vectorize this
+ // loop again if we run into it.
+ MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef<Value*>());
+ OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD);
+
// Some loops have a single integer induction variable, while other loops
// don't. One example is c++ iterators that often have multiple pointer
// induction variables. In the code below we also support a case where we
@@ -1438,34 +1461,108 @@ getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) {
}
}
-static bool
-isTriviallyVectorizableIntrinsic(Instruction *Inst) {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
- if (!II)
- return false;
- switch (II->getIntrinsicID()) {
- case Intrinsic::sqrt:
- case Intrinsic::sin:
- case Intrinsic::cos:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::log:
- case Intrinsic::log10:
- case Intrinsic::log2:
- case Intrinsic::fabs:
- case Intrinsic::floor:
- case Intrinsic::ceil:
- case Intrinsic::trunc:
- case Intrinsic::rint:
- case Intrinsic::nearbyint:
- case Intrinsic::pow:
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- return true;
- default:
- return false;
+static Intrinsic::ID
+getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
+ // If we have an intrinsic call, check if it is trivially vectorizable.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::sqrt:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::log:
+ case Intrinsic::log10:
+ case Intrinsic::log2:
+ case Intrinsic::fabs:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::pow:
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
+ return II->getIntrinsicID();
+ default:
+ return Intrinsic::not_intrinsic;
+ }
}
- return false;
+
+ if (!TLI)
+ return Intrinsic::not_intrinsic;
+
+ LibFunc::Func Func;
+ Function *F = CI->getCalledFunction();
+ // We're going to make assumptions on the semantics of the functions, check
+ // that the target knows that it's available in this environment.
+ if (!F || !TLI->getLibFunc(F->getName(), Func))
+ return Intrinsic::not_intrinsic;
+
+ // Otherwise check if we have a call to a function that can be turned into a
+ // vector intrinsic.
+ switch (Func) {
+ default:
+ break;
+ case LibFunc::sin:
+ case LibFunc::sinf:
+ case LibFunc::sinl:
+ return Intrinsic::sin;
+ case LibFunc::cos:
+ case LibFunc::cosf:
+ case LibFunc::cosl:
+ return Intrinsic::cos;
+ case LibFunc::exp:
+ case LibFunc::expf:
+ case LibFunc::expl:
+ return Intrinsic::exp;
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ case LibFunc::exp2l:
+ return Intrinsic::exp2;
+ case LibFunc::log:
+ case LibFunc::logf:
+ case LibFunc::logl:
+ return Intrinsic::log;
+ case LibFunc::log10:
+ case LibFunc::log10f:
+ case LibFunc::log10l:
+ return Intrinsic::log10;
+ case LibFunc::log2:
+ case LibFunc::log2f:
+ case LibFunc::log2l:
+ return Intrinsic::log2;
+ case LibFunc::fabs:
+ case LibFunc::fabsf:
+ case LibFunc::fabsl:
+ return Intrinsic::fabs;
+ case LibFunc::floor:
+ case LibFunc::floorf:
+ case LibFunc::floorl:
+ return Intrinsic::floor;
+ case LibFunc::ceil:
+ case LibFunc::ceilf:
+ case LibFunc::ceill:
+ return Intrinsic::ceil;
+ case LibFunc::trunc:
+ case LibFunc::truncf:
+ case LibFunc::truncl:
+ return Intrinsic::trunc;
+ case LibFunc::rint:
+ case LibFunc::rintf:
+ case LibFunc::rintl:
+ return Intrinsic::rint;
+ case LibFunc::nearbyint:
+ case LibFunc::nearbyintf:
+ case LibFunc::nearbyintl:
+ return Intrinsic::nearbyint;
+ case LibFunc::pow:
+ case LibFunc::powf:
+ case LibFunc::powl:
+ return Intrinsic::pow;
+ }
+
+ return Intrinsic::not_intrinsic;
}
/// This function translates the reduction kind to an LLVM binary operator.
@@ -1546,7 +1643,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// To do so, we need to generate the 'identity' vector and overide
// one of the elements with the incoming scalar reduction. We need
// to do it in the vector-loop preheader.
- Builder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator());
+ Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
// This is the vector-clone of the value that leaves the loop.
VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
@@ -1991,17 +2088,21 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
}
case Instruction::Call: {
- assert(isTriviallyVectorizableIntrinsic(it));
+ // Ignore dbg intrinsics.
+ if (isa<DbgInfoIntrinsic>(it))
+ break;
+
Module *M = BB->getParent()->getParent();
- IntrinsicInst *II = cast<IntrinsicInst>(it);
- Intrinsic::ID ID = II->getIntrinsicID();
+ CallInst *CI = cast<CallInst>(it);
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
+ assert(ID && "Not an intrinsic call!");
for (unsigned Part = 0; Part < UF; ++Part) {
SmallVector<Value*, 4> Args;
- for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i) {
- VectorParts &Arg = getVectorValue(II->getArgOperand(i));
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
Args.push_back(Arg[Part]);
}
- Type *Tys[] = { VectorType::get(II->getType()->getScalarType(), VF) };
+ Type *Tys[] = { VectorType::get(CI->getType()->getScalarType(), VF) };
Function *F = Intrinsic::getDeclaration(M, ID, Tys);
Entry[Part] = Builder.CreateCall(F, Args);
}
@@ -2138,6 +2239,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *PreHeader = TheLoop->getLoopPreheader();
BasicBlock *Header = TheLoop->getHeader();
+ // If we marked the scalar loop as "already vectorized" then no need
+ // to vectorize it again.
+ if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) {
+ DEBUG(dbgs() << "LV: This loop was vectorized before\n");
+ return false;
+ }
+
// For each block in the loop.
for (Loop::block_iterator bb = TheLoop->block_begin(),
be = TheLoop->block_end(); bb != be; ++bb) {
@@ -2220,9 +2328,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return false;
}// end of PHI handling
- // We still don't handle functions.
+ // We still don't handle functions. However, we can ignore dbg intrinsic
+ // calls and we do handle certain intrinsic and libm functions.
CallInst *CI = dyn_cast<CallInst>(it);
- if (CI && !isTriviallyVectorizableIntrinsic(it)) {
+ if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
DEBUG(dbgs() << "LV: Found a call site.\n");
return false;
}
@@ -2638,6 +2747,9 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// Is this a bin op ?
FoundBinOp |= !isa<PHINode>(Iter);
+ // Remember the current instruction.
+ Instruction *OldIter = Iter;
+
// For each of the *users* of iter.
for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
it != e; ++it) {
@@ -2663,7 +2775,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
if (isa<PHINode>(Iter) && isa<PHINode>(U) &&
U->getParent() != TheLoop->getHeader() &&
TheLoop->contains(U) &&
- Iter->getNumUses() > 1)
+ Iter->hasNUsesOrMore(2))
continue;
// We can't have multiple inside users.
@@ -2683,6 +2795,10 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
Iter = U;
}
+ // If all uses were skipped this can't be a reduction variable.
+ if (Iter == OldIter)
+ return false;
+
// We found a reduction var if we have reached the original
// phi node and we only have a single instruction with out-of-loop
// users.
@@ -3152,6 +3268,10 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
// For each instruction in the old loop.
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ // Skip dbg intrinsics.
+ if (isa<DbgInfoIntrinsic>(it))
+ continue;
+
unsigned C = getInstructionCost(it, VF);
Cost += C;
DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " <<
@@ -3218,7 +3338,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
Type *CondTy = SI->getCondition()->getType();
- if (ScalarCond)
+ if (!ScalarCond)
CondTy = VectorType::get(CondTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
@@ -3305,13 +3425,14 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
}
case Instruction::Call: {
- assert(isTriviallyVectorizableIntrinsic(I));
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- Type *RetTy = ToVectorTy(II->getType(), VF);
+ CallInst *CI = cast<CallInst>(I);
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
+ assert(ID && "Not an intrinsic call!");
+ Type *RetTy = ToVectorTy(CI->getType(), VF);
SmallVector<Type*, 4> Tys;
- for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i)
- Tys.push_back(ToVectorTy(II->getArgOperand(i)->getType(), VF));
- return TTI.getIntrinsicInstrCost(II->getIntrinsicID(), RetTy, Tys);
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+ Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
+ return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
}
default: {
// We are scalarizing the instruction. Return the cost of the scalar