aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/CMakeLists.txt1
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp56
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp98
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp19
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp149
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp67
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp60
-rw-r--r--lib/Transforms/IPO/Inliner.cpp45
-rw-r--r--lib/Transforms/IPO/Internalize.cpp10
-rw-r--r--lib/Transforms/IPO/LLVMBuild.txt2
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp4
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h52
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp14
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp42
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp91
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp360
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp19
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp226
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp23
-rw-r--r--lib/Transforms/InstCombine/InstCombineWorklist.h30
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp29
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp285
-rw-r--r--lib/Transforms/Instrumentation/BlackList.cpp19
-rw-r--r--lib/Transforms/Instrumentation/BlackList.h58
-rw-r--r--lib/Transforms/Instrumentation/EdgeProfiling.cpp5
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp140
-rw-r--r--lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp5
-rw-r--r--lib/Transforms/Instrumentation/PathProfiling.cpp4
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp2
-rw-r--r--lib/Transforms/LLVMBuild.txt2
-rw-r--r--lib/Transforms/Makefile2
-rw-r--r--lib/Transforms/ObjCARC/CMakeLists.txt13
-rw-r--r--lib/Transforms/ObjCARC/DependencyAnalysis.cpp261
-rw-r--r--lib/Transforms/ObjCARC/DependencyAnalysis.h79
-rw-r--r--lib/Transforms/ObjCARC/LLVMBuild.txt23
-rw-r--r--lib/Transforms/ObjCARC/Makefile15
-rw-r--r--lib/Transforms/ObjCARC/ObjCARC.cpp48
-rw-r--r--lib/Transforms/ObjCARC/ObjCARC.h389
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAPElim.cpp175
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp162
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h74
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCContract.cpp537
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCExpand.cpp128
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp2691
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCUtil.cpp241
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp177
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysis.h80
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp12
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp29
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp6
-rw-r--r--lib/Transforms/Scalar/GVN.cpp57
-rw-r--r--lib/Transforms/Scalar/LICM.cpp13
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp1
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp177
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp12
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp11
-rw-r--r--lib/Transforms/Scalar/ObjCARC.cpp4354
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp12
-rw-r--r--lib/Transforms/Scalar/SROA.cpp43
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp5
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp2
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp20
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp76
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp132
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp16
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp1
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp20
-rw-r--r--lib/Transforms/Utils/IntegerDivision.cpp104
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp20
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp238
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp9
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp19
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp934
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp980
78 files changed, 8130 insertions, 6203 deletions
diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt
index de1353e..2bb6e90 100644
--- a/lib/Transforms/CMakeLists.txt
+++ b/lib/Transforms/CMakeLists.txt
@@ -5,3 +5,4 @@ add_subdirectory(Scalar)
add_subdirectory(IPO)
add_subdirectory(Vectorize)
add_subdirectory(Hello)
+add_subdirectory(ObjCARC)
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 385544a..e6fa4ed 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -514,14 +514,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Attribute - Keep track of the parameter attributes for the arguments
// that we are *not* promoting. For the ones that we do promote, the parameter
// attributes are lost
- SmallVector<AttributeWithIndex, 8> AttributesVec;
+ SmallVector<AttributeSet, 8> AttributesVec;
const AttributeSet &PAL = F->getAttributes();
// Add any return attributes.
- Attribute attrs = PAL.getRetAttributes();
- if (attrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex,
- attrs));
+ if (PAL.hasAttributes(AttributeSet::ReturnIndex))
+ AttributesVec.push_back(AttributeSet::get(F->getContext(),
+ PAL.getRetAttributes()));
// First, determine the new argument list
unsigned ArgIndex = 1;
@@ -537,9 +536,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
} else if (!ArgsToPromote.count(I)) {
// Unchanged argument
Params.push_back(I->getType());
- Attribute attrs = PAL.getParamAttributes(ArgIndex);
- if (attrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
+ AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
+ if (attrs.hasAttributes(ArgIndex)) {
+ AttrBuilder B(attrs, ArgIndex);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Params.size(), B));
+ }
} else if (I->use_empty()) {
// Dead argument (which are always marked as promotable)
++NumArgumentsDead;
@@ -591,10 +593,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
}
// Add any function attributes.
- attrs = PAL.getFnAttributes();
- if (attrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex,
- attrs));
+ if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(FTy->getContext(),
+ PAL.getFnAttributes()));
Type *RetTy = FTy->getReturnType();
@@ -639,10 +640,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
const AttributeSet &CallPAL = CS.getAttributes();
// Add any return attributes.
- Attribute attrs = CallPAL.getRetAttributes();
- if (attrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex,
- attrs));
+ if (CallPAL.hasAttributes(AttributeSet::ReturnIndex))
+ AttributesVec.push_back(AttributeSet::get(F->getContext(),
+ CallPAL.getRetAttributes()));
// Loop over the operands, inserting GEP and loads in the caller as
// appropriate.
@@ -653,10 +653,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
Args.push_back(*AI); // Unmodified argument
- Attribute Attrs = CallPAL.getParamAttributes(ArgIndex);
- if (Attrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
-
+ if (CallPAL.hasAttributes(ArgIndex)) {
+ AttrBuilder B(CallPAL, ArgIndex);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ }
} else if (ByValArgsToTransform.count(I)) {
// Emit a GEP and load for each element of the struct.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
@@ -715,16 +716,17 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Push any varargs arguments on the list.
for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
Args.push_back(*AI);
- Attribute Attrs = CallPAL.getParamAttributes(ArgIndex);
- if (Attrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+ if (CallPAL.hasAttributes(ArgIndex)) {
+ AttrBuilder B(CallPAL, ArgIndex);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ }
}
// Add any function attributes.
- attrs = CallPAL.getFnAttributes();
- if (attrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex,
- attrs));
+ if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(Call->getContext(),
+ CallPAL.getFnAttributes()));
Instruction *New;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index ff040e7..49ef1e7 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -272,14 +272,13 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// Drop any attributes that were on the vararg arguments.
AttributeSet PAL = CS.getAttributes();
- if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) {
- SmallVector<AttributeWithIndex, 8> AttributesVec;
- for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i)
- AttributesVec.push_back(PAL.getSlot(i));
- Attribute FnAttrs = PAL.getFnAttributes();
- if (FnAttrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex,
- FnAttrs));
+ if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) {
+ SmallVector<AttributeSet, 8> AttributesVec;
+ for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i)
+ AttributesVec.push_back(PAL.getSlotAttributes(i));
+ if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(Fn.getContext(),
+ PAL.getFnAttributes()));
PAL = AttributeSet::get(Fn.getContext(), AttributesVec);
}
@@ -351,7 +350,7 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
if (Fn.use_empty())
return false;
- llvm::SmallVector<unsigned, 8> UnusedArgs;
+ SmallVector<unsigned, 8> UnusedArgs;
for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end();
I != E; ++I) {
Argument *Arg = I;
@@ -697,15 +696,10 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
std::vector<Type*> Params;
// Set up to build a new list of parameter attributes.
- SmallVector<AttributeWithIndex, 8> AttributesVec;
+ SmallVector<AttributeSet, 8> AttributesVec;
const AttributeSet &PAL = F->getAttributes();
- // The existing function return attributes.
- Attribute RAttrs = PAL.getRetAttributes();
- Attribute FnAttrs = PAL.getFnAttributes();
-
// Find out the new return value.
-
Type *RetTy = FTy->getReturnType();
Type *NRetTy = NULL;
unsigned RetCount = NumRetVals(F);
@@ -759,22 +753,29 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
assert(NRetTy && "No new return type found?");
+ // The existing function return attributes.
+ AttributeSet RAttrs = PAL.getRetAttributes();
+
// Remove any incompatible attributes, but only if we removed all return
// values. Otherwise, ensure that we don't have any conflicting attributes
// here. Currently, this should not be possible, but special handling might be
// required when new return value attributes are added.
if (NRetTy->isVoidTy())
RAttrs =
- Attribute::get(NRetTy->getContext(), AttrBuilder(RAttrs).
- removeAttributes(Attribute::typeIncompatible(NRetTy)));
+ AttributeSet::get(NRetTy->getContext(), AttributeSet::ReturnIndex,
+ AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
+ removeAttributes(AttributeFuncs::
+ typeIncompatible(NRetTy, AttributeSet::ReturnIndex),
+ AttributeSet::ReturnIndex));
else
- assert(!AttrBuilder(RAttrs).
- hasAttributes(Attribute::typeIncompatible(NRetTy)) &&
+ assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
+ hasAttributes(AttributeFuncs::
+ typeIncompatible(NRetTy, AttributeSet::ReturnIndex),
+ AttributeSet::ReturnIndex) &&
"Return attributes no longer compatible?");
- if (RAttrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex,
- RAttrs));
+ if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
+ AttributesVec.push_back(AttributeSet::get(NRetTy->getContext(), RAttrs));
// Remember which arguments are still alive.
SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
@@ -791,9 +792,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Get the original parameter attributes (skipping the first one, that is
// for the return value.
- Attribute Attrs = PAL.getParamAttributes(i + 1);
- if (Attrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));
+ if (PAL.hasAttributes(i + 1)) {
+ AttrBuilder B(PAL, i + 1);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Params.size(), B));
+ }
} else {
++NumArgumentsEliminated;
DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName()
@@ -801,9 +804,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
}
}
- if (FnAttrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex,
- FnAttrs));
+ if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(F->getContext(),
+ PAL.getFnAttributes()));
// Reconstruct the AttributesList based on the vector we constructed.
AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec);
@@ -836,15 +839,18 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
const AttributeSet &CallPAL = CS.getAttributes();
// The call return attributes.
- Attribute RAttrs = CallPAL.getRetAttributes();
- Attribute FnAttrs = CallPAL.getFnAttributes();
+ AttributeSet RAttrs = CallPAL.getRetAttributes();
+
// Adjust in case the function was changed to return void.
RAttrs =
- Attribute::get(NF->getContext(), AttrBuilder(RAttrs).
- removeAttributes(Attribute::typeIncompatible(NF->getReturnType())));
- if (RAttrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex,
- RAttrs));
+ AttributeSet::get(NF->getContext(), AttributeSet::ReturnIndex,
+ AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
+ removeAttributes(AttributeFuncs::
+ typeIncompatible(NF->getReturnType(),
+ AttributeSet::ReturnIndex),
+ AttributeSet::ReturnIndex));
+ if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
+ AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs));
// Declare these outside of the loops, so we can reuse them for the second
// loop, which loops the varargs.
@@ -856,22 +862,26 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (ArgAlive[i]) {
Args.push_back(*I);
// Get original parameter attributes, but skip return attributes.
- Attribute Attrs = CallPAL.getParamAttributes(i + 1);
- if (Attrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+ if (CallPAL.hasAttributes(i + 1)) {
+ AttrBuilder B(CallPAL, i + 1);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ }
}
// Push any varargs arguments on the list. Don't forget their attributes.
for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
Args.push_back(*I);
- Attribute Attrs = CallPAL.getParamAttributes(i + 1);
- if (Attrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+ if (CallPAL.hasAttributes(i + 1)) {
+ AttrBuilder B(CallPAL, i + 1);
+ AttributesVec.
+ push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ }
}
- if (FnAttrs.hasAttributes())
- AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex,
- FnAttrs));
+ if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
+ AttributesVec.push_back(AttributeSet::get(Call->getContext(),
+ CallPAL.getFnAttributes()));
// Reconstruct the AttributesList based on the vector we constructed.
AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec);
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index e9bc4ad..a75212a 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -215,14 +215,13 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
AttrBuilder B;
B.addAttribute(Attribute::ReadOnly)
.addAttribute(Attribute::ReadNone);
- F->removeAttribute(AttributeSet::FunctionIndex,
- Attribute::get(F->getContext(), B));
+ F->removeAttributes(AttributeSet::FunctionIndex,
+ AttributeSet::get(F->getContext(),
+ AttributeSet::FunctionIndex, B));
// Add in the new attribute.
- B.clear();
- B.addAttribute(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
F->addAttribute(AttributeSet::FunctionIndex,
- Attribute::get(F->getContext(), B));
+ ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
if (ReadsMemory)
++NumReadOnly;
@@ -381,7 +380,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
A != E; ++A) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
- A->addAttr(Attribute::get(F->getContext(), B));
+ A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
Changed = true;
}
@@ -396,7 +395,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
if (!Tracker.Captured) {
if (Tracker.Uses.empty()) {
// If it's trivially not captured, mark it nocapture now.
- A->addAttr(Attribute::get(F->getContext(), B));
+ A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo()+1, B));
++NumNoCapture;
Changed = true;
} else {
@@ -431,7 +430,9 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
ArgumentSCC[0]->
Definition->
- addAttr(Attribute::get(ArgumentSCC[0]->Definition->getContext(), B));
+ addAttr(AttributeSet::get(ArgumentSCC[0]->Definition->getContext(),
+ ArgumentSCC[0]->Definition->getArgNo() + 1,
+ B));
++NumNoCapture;
Changed = true;
}
@@ -473,7 +474,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- A->addAttr(Attribute::get(A->getContext(), B));
+ A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
Changed = true;
}
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index abd37c2..2b9d667 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -448,8 +448,8 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
Dead[i].second->eraseFromParent();
Instruction *I = Dead[i].first;
do {
- if (isAllocationFn(I, TLI))
- break;
+ if (isAllocationFn(I, TLI))
+ break;
Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
if (!J)
break;
@@ -1825,7 +1825,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
GlobalValue::InternalLinkage,
ConstantInt::getFalse(GV->getContext()),
GV->getName()+".b",
- GV->getThreadLocalMode());
+ GV->getThreadLocalMode(),
+ GV->getType()->getAddressSpace());
GV->getParent()->getGlobalList().insert(GV, NewGV);
Constant *InitVal = GV->getInitializer();
@@ -1845,10 +1846,10 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
bool StoringOther = SI->getOperand(0) == OtherVal;
// Only do this if we weren't storing a loaded value.
Value *StoreVal;
- if (StoringOther || SI->getOperand(0) == InitVal)
+ if (StoringOther || SI->getOperand(0) == InitVal) {
StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
StoringOther);
- else {
+ } else {
// Otherwise, we are storing a previously loaded copy. To do this,
// change the copy from copying the original value to just copying the
// bool.
@@ -1887,6 +1888,9 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
UI->eraseFromParent();
}
+ // Retain the name of the old global variable. People who are debugging their
+ // programs may expect these variables to be named the same.
+ NewGV->takeName(GV);
GV->eraseFromParent();
return true;
}
@@ -1989,7 +1993,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
return Changed;
} else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
- DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
+ DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
GV->setConstant(true);
// Clean up any obviously simplifiable users now.
@@ -2067,12 +2071,12 @@ static void ChangeCalleesToFastCall(Function *F) {
static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
- if (!Attrs.getSlot(i).Attrs.hasAttribute(Attribute::Nest))
+ unsigned Index = Attrs.getSlotIndex(i);
+ if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
continue;
// There can be only one.
- return Attrs.removeAttr(C, Attrs.getSlot(i).Index,
- Attribute::get(C, Attribute::Nest));
+ return Attrs.removeAttribute(C, Index, Attribute::Nest);
}
return Attrs;
@@ -2584,24 +2588,38 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
while (1) {
Constant *InstResult = 0;
+ DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
+
if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
- if (!SI->isSimple()) return false; // no volatile/atomic accesses.
+ if (!SI->isSimple()) {
+ DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");
+ return false; // no volatile/atomic accesses.
+ }
Constant *Ptr = getVal(SI->getOperand(1));
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+ DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
- if (!isSimpleEnoughPointerToCommit(Ptr))
+ DEBUG(dbgs() << "; To: " << *Ptr << "\n");
+ }
+ if (!isSimpleEnoughPointerToCommit(Ptr)) {
// If this is too complex for us to commit, reject it.
+ DEBUG(dbgs() << "Pointer is too complex for us to evaluate store.");
return false;
+ }
Constant *Val = getVal(SI->getOperand(0));
// If this might be too difficult for the backend to handle (e.g. the addr
// of one global variable divided by another) then we can't commit it.
- if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD))
+ if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) {
+ DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val
+ << "\n");
return false;
+ }
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
if (CE->getOpcode() == Instruction::BitCast) {
+ DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n");
// If we're evaluating a store through a bitcast, then we need
// to pull the bitcast off the pointer type and push it onto the
// stored value.
@@ -2630,6 +2648,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// If we can't improve the situation by introspecting NewTy,
// we have to give up.
} else {
+ DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
+ "evaluate.\n");
return false;
}
}
@@ -2637,25 +2657,36 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// If we found compatible types, go ahead and push the bitcast
// onto the stored value.
Val = ConstantExpr::getBitCast(Val, NewTy);
+
+ DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");
}
+ }
MutatedMemory[Ptr] = Val;
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
InstResult = ConstantExpr::get(BO->getOpcode(),
getVal(BO->getOperand(0)),
getVal(BO->getOperand(1)));
+ DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult
+ << "\n");
} else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
InstResult = ConstantExpr::getCompare(CI->getPredicate(),
getVal(CI->getOperand(0)),
getVal(CI->getOperand(1)));
+ DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult
+ << "\n");
} else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
InstResult = ConstantExpr::getCast(CI->getOpcode(),
getVal(CI->getOperand(0)),
CI->getType());
+ DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult
+ << "\n");
} else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
getVal(SI->getOperand(1)),
getVal(SI->getOperand(2)));
+ DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult
+ << "\n");
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
Constant *P = getVal(GEP->getOperand(0));
SmallVector<Constant*, 8> GEPOps;
@@ -2665,41 +2696,70 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
InstResult =
ConstantExpr::getGetElementPtr(P, GEPOps,
cast<GEPOperator>(GEP)->isInBounds());
+ DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
+ << "\n");
} else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
- if (!LI->isSimple()) return false; // no volatile/atomic accesses.
+
+ if (!LI->isSimple()) {
+ DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");
+ return false; // no volatile/atomic accesses.
+ }
+
Constant *Ptr = getVal(LI->getOperand(0));
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
+ DEBUG(dbgs() << "Found a constant pointer expression, constant "
+ "folding: " << *Ptr << "\n");
+ }
InstResult = ComputeLoadResult(Ptr);
- if (InstResult == 0) return false; // Could not evaluate load.
+ if (InstResult == 0) {
+ DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
+ "\n");
+ return false; // Could not evaluate load.
+ }
+
+ DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");
} else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
- if (AI->isArrayAllocation()) return false; // Cannot handle array allocs.
+ if (AI->isArrayAllocation()) {
+ DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");
+ return false; // Cannot handle array allocs.
+ }
Type *Ty = AI->getType()->getElementType();
AllocaTmps.push_back(new GlobalVariable(Ty, false,
GlobalValue::InternalLinkage,
UndefValue::get(Ty),
AI->getName()));
InstResult = AllocaTmps.back();
+ DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
} else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
CallSite CS(CurInst);
// Debug info can safely be ignored here.
if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
+ DEBUG(dbgs() << "Ignoring debug info.\n");
++CurInst;
continue;
}
// Cannot handle inline asm.
- if (isa<InlineAsm>(CS.getCalledValue())) return false;
+ if (isa<InlineAsm>(CS.getCalledValue())) {
+ DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");
+ return false;
+ }
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
- if (MSI->isVolatile()) return false;
+ if (MSI->isVolatile()) {
+ DEBUG(dbgs() << "Can not optimize a volatile memset " <<
+ "intrinsic.\n");
+ return false;
+ }
Constant *Ptr = getVal(MSI->getDest());
Constant *Val = getVal(MSI->getValue());
Constant *DestVal = ComputeLoadResult(getVal(Ptr));
if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
// This memset is a no-op.
+ DEBUG(dbgs() << "Ignoring no-op memset.\n");
++CurInst;
continue;
}
@@ -2707,6 +2767,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");
++CurInst;
continue;
}
@@ -2714,8 +2775,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
if (II->getIntrinsicID() == Intrinsic::invariant_start) {
// We don't insert an entry into Values, as it doesn't have a
// meaningful return value.
- if (!II->use_empty())
+ if (!II->use_empty()) {
+ DEBUG(dbgs() << "Found unused invariant_start. Cant evaluate.\n");
return false;
+ }
ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
Value *PtrArg = getVal(II->getArgOperand(1));
Value *Ptr = PtrArg->stripPointerCasts();
@@ -2723,20 +2786,30 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
if (!Size->isAllOnesValue() &&
Size->getValue().getLimitedValue() >=
- TD->getTypeStoreSize(ElemTy))
+ TD->getTypeStoreSize(ElemTy)) {
Invariants.insert(GV);
+ DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
+ << "\n");
+ } else {
+ DEBUG(dbgs() << "Found a global var, but can not treat it as an "
+ "invariant.\n");
+ }
}
// Continue even if we do nothing.
++CurInst;
continue;
}
+
+ DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
return false;
}
// Resolve function pointers.
Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
- if (!Callee || Callee->mayBeOverridden())
+ if (!Callee || Callee->mayBeOverridden()) {
+ DEBUG(dbgs() << "Can not resolve function pointer.\n");
return false; // Cannot resolve.
+ }
SmallVector<Constant*, 8> Formals;
for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
@@ -2746,22 +2819,38 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// If this is a function we can constant fold, do it.
if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
InstResult = C;
+ DEBUG(dbgs() << "Constant folded function call. Result: " <<
+ *InstResult << "\n");
} else {
+ DEBUG(dbgs() << "Can not constant fold function call.\n");
return false;
}
} else {
- if (Callee->getFunctionType()->isVarArg())
+ if (Callee->getFunctionType()->isVarArg()) {
+ DEBUG(dbgs() << "Can not constant fold vararg function call.\n");
return false;
+ }
- Constant *RetVal;
+ Constant *RetVal = 0;
// Execute the call, if successful, use the return value.
ValueStack.push_back(new DenseMap<Value*, Constant*>);
- if (!EvaluateFunction(Callee, RetVal, Formals))
+ if (!EvaluateFunction(Callee, RetVal, Formals)) {
+ DEBUG(dbgs() << "Failed to evaluate function.\n");
return false;
+ }
delete ValueStack.pop_back_val();
InstResult = RetVal;
+
+ if (InstResult != NULL) {
+ DEBUG(dbgs() << "Successfully evaluated function. Result: " <<
+ InstResult << "\n\n");
+ } else {
+ DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n");
+ }
}
} else if (isa<TerminatorInst>(CurInst)) {
+ DEBUG(dbgs() << "Found a terminator instruction.\n");
+
if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
if (BI->isUnconditional()) {
NextBB = BI->getSuccessor(0);
@@ -2787,13 +2876,17 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
NextBB = 0;
} else {
// invoke, unwind, resume, unreachable.
+ DEBUG(dbgs() << "Can not handle terminator.");
return false; // Cannot handle this terminator.
}
// We succeeded at evaluating this block!
+ DEBUG(dbgs() << "Successfully evaluated block.\n");
return true;
} else {
// Did not know how to evaluate this!
+ DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction."
+ "\n");
return false;
}
@@ -2807,6 +2900,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
// If we just processed an invoke, we finished evaluating the block.
if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
NextBB = II->getNormalDest();
+ DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");
return true;
}
@@ -2845,6 +2939,8 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
while (1) {
BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings.
+ DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
+
if (!EvaluateBlock(CurInst, NextBB))
return false;
@@ -2924,6 +3020,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
}
break;
}
+ DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
// We cannot simplify external ctor functions.
if (F->empty()) continue;
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 2971803..a0095da 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -30,35 +30,41 @@ using namespace llvm;
namespace {
- // AlwaysInliner only inlines functions that are mark as "always inline".
- class AlwaysInliner : public Inliner {
- InlineCostAnalyzer CA;
- public:
- // Use extremely low threshold.
- AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/true) {
- initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
- }
- AlwaysInliner(bool InsertLifetime) : Inliner(ID, -2000000000,
- InsertLifetime) {
- initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
- }
- static char ID; // Pass identification, replacement for typeid
- virtual InlineCost getInlineCost(CallSite CS);
-
- using llvm::Pass::doInitialization;
- using llvm::Pass::doFinalization;
-
- virtual bool doFinalization(CallGraph &CG) {
- return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true);
- }
- virtual bool doInitialization(CallGraph &CG);
- };
+/// \brief Inliner pass which only handles "always inline" functions.
+class AlwaysInliner : public Inliner {
+ InlineCostAnalysis *ICA;
+
+public:
+ // Use extremely low threshold.
+ AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), ICA(0) {
+ initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ AlwaysInliner(bool InsertLifetime)
+ : Inliner(ID, -2000000000, InsertLifetime), ICA(0) {
+ initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ static char ID; // Pass identification, replacement for typeid
+
+ virtual InlineCost getInlineCost(CallSite CS);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool runOnSCC(CallGraphSCC &SCC);
+
+ using llvm::Pass::doFinalization;
+ virtual bool doFinalization(CallGraph &CG) {
+ return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true);
+ }
+};
+
}
char AlwaysInliner::ID = 0;
INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
"Inliner for always_inline functions", false, false)
@@ -89,15 +95,18 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
if (Callee && !Callee->isDeclaration() &&
Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::AlwaysInline) &&
- CA.isInlineViable(*Callee))
+ ICA->isInlineViable(*Callee))
return InlineCost::getAlways();
return InlineCost::getNever();
}
-// doInitialization - Initializes the vector of functions that have not
-// been annotated with the "always inline" attribute.
-bool AlwaysInliner::doInitialization(CallGraph &CG) {
- CA.setDataLayout(getAnalysisIfAvailable<DataLayout>());
- return false;
+bool AlwaysInliner::runOnSCC(CallGraphSCC &SCC) {
+ ICA = &getAnalysis<InlineCostAnalysis>();
+ return Inliner::runOnSCC(SCC);
+}
+
+void AlwaysInliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<InlineCostAnalysis>();
+ Inliner::getAnalysisUsage(AU);
}
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 9682923..a4f7026 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -28,29 +28,41 @@ using namespace llvm;
namespace {
- class SimpleInliner : public Inliner {
- InlineCostAnalyzer CA;
- public:
- SimpleInliner() : Inliner(ID) {
- initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
- }
- SimpleInliner(int Threshold) : Inliner(ID, Threshold,
- /*InsertLifetime*/true) {
- initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
- }
- static char ID; // Pass identification, replacement for typeid
- InlineCost getInlineCost(CallSite CS) {
- return CA.getInlineCost(CS, getInlineThreshold(CS));
- }
- using llvm::Pass::doInitialization;
- virtual bool doInitialization(CallGraph &CG);
- };
-}
+/// \brief Actaul inliner pass implementation.
+///
+/// The common implementation of the inlining logic is shared between this
+/// inliner pass and the always inliner pass. The two passes use different cost
+/// analyses to determine when to inline.
+class SimpleInliner : public Inliner {
+ InlineCostAnalysis *ICA;
+
+public:
+ SimpleInliner() : Inliner(ID), ICA(0) {
+ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ SimpleInliner(int Threshold)
+ : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(0) {
+ initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ static char ID; // Pass identification, replacement for typeid
+
+ InlineCost getInlineCost(CallSite CS) {
+ return ICA->getInlineCost(CS, getInlineThreshold(CS));
+ }
+
+ virtual bool runOnSCC(CallGraphSCC &SCC);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+};
+
+} // end anonymous namespace
char SimpleInliner::ID = 0;
INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
INITIALIZE_PASS_END(SimpleInliner, "inline",
"Function Integration/Inlining", false, false)
@@ -60,10 +72,12 @@ Pass *llvm::createFunctionInliningPass(int Threshold) {
return new SimpleInliner(Threshold);
}
-// doInitialization - Initializes the vector of functions that have been
-// annotated with the noinline attribute.
-bool SimpleInliner::doInitialization(CallGraph &CG) {
- CA.setDataLayout(getAnalysisIfAvailable<DataLayout>());
- return false;
+bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
+ ICA = &getAnalysis<InlineCostAnalysis>();
+ return Inliner::runOnSCC(SCC);
}
+void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<InlineCostAnalysis>();
+ Inliner::getAnalysisUsage(AU);
+}
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 2187a2a..663ddb7 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -72,6 +72,40 @@ void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
typedef DenseMap<ArrayType*, std::vector<AllocaInst*> >
InlinedArrayAllocasTy;
+/// \brief If the inlined function had a higher stack protection level than the
+/// calling function, then bump up the caller's stack protection level.
+static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
+ // If upgrading the SSP attribute, clear out the old SSP Attributes first.
+ // Having multiple SSP attributes doesn't actually hurt, but it adds useless
+ // clutter to the IR.
+ AttrBuilder B;
+ B.addAttribute(Attribute::StackProtect)
+ .addAttribute(Attribute::StackProtectStrong);
+ AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(),
+ AttributeSet::FunctionIndex,
+ B);
+ AttributeSet CallerAttr = Caller->getAttributes(),
+ CalleeAttr = Callee->getAttributes();
+
+ if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectReq)) {
+ Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+ Caller->addFnAttr(Attribute::StackProtectReq);
+ } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectStrong) &&
+ !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectReq)) {
+ Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+ Caller->addFnAttr(Attribute::StackProtectStrong);
+ } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtect) &&
+ !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectReq) &&
+ !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectStrong))
+ Caller->addFnAttr(Attribute::StackProtect);
+}
+
/// InlineCallIfPossible - If it is possible to inline the specified call site,
/// do so and update the CallGraph for this operation.
///
@@ -91,16 +125,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
if (!InlineFunction(CS, IFI, InsertLifetime))
return false;
- // If the inlined function had a higher stack protection level than the
- // calling function, then bump up the caller's stack protection level.
- if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectReq))
- Caller->addFnAttr(Attribute::StackProtectReq);
- else if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtect) &&
- !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackProtectReq))
- Caller->addFnAttr(Attribute::StackProtect);
+ AdjustCallerSSPLevel(Caller, Callee);
// Look at all of the allocas that we inlined through this call site. If we
// have already inlined other allocas through other calls into this function,
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 70d55b0..4bfab5b 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -50,6 +50,8 @@ namespace {
explicit InternalizePass();
explicit InternalizePass(ArrayRef<const char *> exportList);
void LoadFile(const char *Filename);
+ void ClearExportList();
+ void AddToExportList(const std::string &val);
virtual bool runOnModule(Module &M);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -97,6 +99,14 @@ void InternalizePass::LoadFile(const char *Filename) {
}
}
+void InternalizePass::ClearExportList() {
+ ExternalNames.clear();
+}
+
+void InternalizePass::AddToExportList(const std::string &val) {
+ ExternalNames.insert(val);
+}
+
bool InternalizePass::runOnModule(Module &M) {
CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
index b18c915..124cbb6 100644
--- a/lib/Transforms/IPO/LLVMBuild.txt
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
name = IPO
parent = Transforms
library_name = ipo
-required_libraries = Analysis Core IPA InstCombine Scalar Vectorize Support Target TransformUtils
+required_libraries = Analysis Core IPA InstCombine Scalar Vectorize Support Target TransformUtils ObjCARC
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 6dc1773..47b2b51 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -214,6 +214,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createGVNPass()); // Remove redundancies
else
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+
+ // BBVectorize may have significantly shortened a loop body; unroll again.
+ if (!DisableUnrollLoops)
+ MPM.add(createLoopUnrollPass());
}
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index d872f0c..73d9323 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -146,9 +146,11 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
Function *F = (*I)->getFunction();
const AttributeSet &PAL = F->getAttributes();
- const AttributeSet &NPAL = PAL.addAttr(F->getContext(), ~0,
- Attribute::get(F->getContext(),
- NewAttributes));
+ const AttributeSet &NPAL =
+ PAL.addAttributes(F->getContext(), AttributeSet::FunctionIndex,
+ AttributeSet::get(F->getContext(),
+ AttributeSet::FunctionIndex,
+ NewAttributes));
if (PAL != NPAL) {
MadeChange = true;
F->setAttributes(NPAL);
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 959daa2..1f6a3a5e 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -27,7 +27,7 @@ namespace llvm {
class DbgDeclareInst;
class MemIntrinsic;
class MemSetInst;
-
+
/// SelectPatternFlavor - We can match a variety of different patterns for
/// select operations.
enum SelectPatternFlavor {
@@ -36,7 +36,7 @@ enum SelectPatternFlavor {
SPF_SMAX, SPF_UMAX
//SPF_ABS - TODO.
};
-
+
/// getComplexity: Assign a complexity or rank value to LLVM Values...
/// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
static inline unsigned getComplexity(Value *V) {
@@ -51,23 +51,23 @@ static inline unsigned getComplexity(Value *V) {
return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
}
-
+
/// InstCombineIRInserter - This is an IRBuilder insertion helper that works
/// just like the normal insertion helper, but also adds any new instructions
/// to the instcombine worklist.
-class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
+class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
: public IRBuilderDefaultInserter<true> {
InstCombineWorklist &Worklist;
public:
InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
-
+
void InsertHelper(Instruction *I, const Twine &Name,
BasicBlock *BB, BasicBlock::iterator InsertPt) const {
IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
Worklist.Add(I);
}
};
-
+
/// InstCombiner - The -instcombine pass.
class LLVM_LIBRARY_VISIBILITY InstCombiner
: public FunctionPass,
@@ -85,7 +85,7 @@ public:
/// instructions into the worklist when they are created.
typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
BuilderTy *Builder;
-
+
static char ID; // Pass identification, replacement for typeid
InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {
MinimizeSize = false;
@@ -94,7 +94,7 @@ public:
public:
virtual bool runOnFunction(Function &F);
-
+
bool DoOneIteration(Function &F, unsigned ItNum);
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -211,11 +211,11 @@ public:
private:
bool ShouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
- Value *dyn_castFNegVal(Value *V) const;
- Type *FindElementAtOffset(Type *Ty, int64_t Offset,
+ Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const;
+ Type *FindElementAtOffset(Type *Ty, int64_t Offset,
SmallVectorImpl<Value*> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
-
+
/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
/// results in any code being generated and is interesting to optimize out. If
/// the cast can be eliminated by some other simple transformation, we prefer
@@ -247,7 +247,7 @@ public:
return New;
}
- // InsertNewInstWith - same as InsertNewInstBefore, but also sets the
+ // InsertNewInstWith - same as InsertNewInstBefore, but also sets the
// debug loc.
//
Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) {
@@ -263,10 +263,10 @@ public:
//
Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist.
-
+
// If we are replacing the instruction with itself, this must be in a
// segment of unreachable code, so just clobber the instruction.
- if (&I == V)
+ if (&I == V)
V = UndefValue::get(I.getType());
DEBUG(errs() << "IC: Replacing " << I << "\n"
@@ -296,13 +296,13 @@ public:
MadeIRChange = true;
return 0; // Don't do anything with FI
}
-
+
void ComputeMaskedBits(Value *V, APInt &KnownZero,
APInt &KnownOne, unsigned Depth = 0) const {
return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
}
-
- bool MaskedValueIsZero(Value *V, const APInt &Mask,
+
+ bool MaskedValueIsZero(Value *V, const APInt &Mask,
unsigned Depth = 0) const {
return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
}
@@ -325,10 +325,10 @@ private:
/// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
/// based on the demanded bits.
- Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
+ Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt& KnownZero, APInt& KnownOne,
unsigned Depth);
- bool SimplifyDemandedBits(Use &U, APInt DemandedMask,
+ bool SimplifyDemandedBits(Use &U, APInt DemandedMask,
APInt& KnownZero, APInt& KnownOne,
unsigned Depth=0);
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
@@ -336,15 +336,15 @@ private:
Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl,
APInt DemandedMask, APInt &KnownZero,
APInt &KnownOne);
-
+
/// SimplifyDemandedInstructionBits - Inst is an integer instruction that
/// SimplifyDemandedBits knows about. See if the instruction has any
/// properties that allow us to simplify its operands.
bool SimplifyDemandedInstructionBits(Instruction &Inst);
-
+
Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt& UndefElts, unsigned Depth = 0);
-
+
// FoldOpIntoPhi - Given a binary operator, cast instruction, or select
// which has a PHI node as operand #0, see if we can fold the instruction
// into the PHI (which is only possible if all operands to the PHI are
@@ -360,10 +360,10 @@ private:
Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
-
+
Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
ConstantInt *AndRHS, BinaryOperator &TheAnd);
-
+
Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
bool isSub, Instruction &I);
Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
@@ -382,8 +382,8 @@ private:
Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
};
-
-
+
+
} // end namespace llvm.
#endif
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index f07c58d..c6d60d6 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -66,10 +66,12 @@ namespace {
bool insaneIntVal(int V) { return V > 4 || V < -4; }
APFloat *getFpValPtr(void)
{ return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); }
+ const APFloat *getFpValPtr(void) const
+ { return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); }
const APFloat &getFpVal(void) const {
assert(IsFp && BufHasFpVal && "Incorret state");
- return *reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]);
+ return *getFpValPtr();
}
APFloat &getFpVal(void)
@@ -1248,6 +1250,16 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (SimplifyDemandedInstructionBits(I))
return &I;
+
+ // Fold (sub 0, (zext bool to B)) --> (sext bool to B)
+ if (C->isZero() && match(Op1, m_ZExt(m_Value(X))))
+ if (X->getType()->isIntegerTy(1))
+ return CastInst::CreateSExtOrBitCast(X, Op1->getType());
+
+ // Fold (sub 0, (sext bool to B)) --> (zext bool to B)
+ if (C->isZero() && match(Op1, m_SExt(m_Value(X))))
+ if (X->getType()->isIntegerTy(1))
+ return CastInst::CreateZExtOrBitCast(X, Op1->getType());
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index c1e60d4..4332467 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1245,6 +1245,34 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
}
+ {
+ Value *X = 0;
+ bool OpsSwapped = false;
+ // Canonicalize SExt or Not to the LHS
+ if (match(Op1, m_SExt(m_Value())) ||
+ match(Op1, m_Not(m_Value()))) {
+ std::swap(Op0, Op1);
+ OpsSwapped = true;
+ }
+
+ // Fold (and (sext bool to A), B) --> (select bool, B, 0)
+ if (match(Op0, m_SExt(m_Value(X))) &&
+ X->getType()->getScalarType()->isIntegerTy(1)) {
+ Value *Zero = Constant::getNullValue(Op1->getType());
+ return SelectInst::Create(X, Op1, Zero);
+ }
+
+ // Fold (and ~(sext bool to A), B) --> (select bool, 0, B)
+ if (match(Op0, m_Not(m_SExt(m_Value(X)))) &&
+ X->getType()->getScalarType()->isIntegerTy(1)) {
+ Value *Zero = Constant::getNullValue(Op0->getType());
+ return SelectInst::Create(X, Zero, Op1);
+ }
+
+ if (OpsSwapped)
+ std::swap(Op0, Op1);
+ }
+
return Changed ? &I : 0;
}
@@ -2043,6 +2071,20 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return BinaryOperator::CreateOr(Inner, C1);
}
+ // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D))
+ // Since this OR statement hasn't been optimized further yet, we hope
+ // that this transformation will allow the new ORs to be optimized.
+ {
+ Value *X = 0, *Y = 0;
+ if (Op0->hasOneUse() && Op1->hasOneUse() &&
+ match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) &&
+ match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) {
+ Value *orTrue = Builder->CreateOr(A, C);
+ Value *orFalse = Builder->CreateOr(B, D);
+ return SelectInst::Create(X, orTrue, orFalse);
+ }
+ }
+
return Changed ? &I : 0;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d17879b..64cd1bd 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1014,8 +1014,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return false; // Cannot transform this return value.
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
- AttrBuilder RAttrs = CallerPAL.getRetAttributes();
- if (RAttrs.hasAttributes(Attribute::typeIncompatible(NewRetTy)))
+ AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
+ if (RAttrs.
+ hasAttributes(AttributeFuncs::
+ typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
+ AttributeSet::ReturnIndex))
return false; // Attribute not compatible with transformed value.
}
@@ -1044,14 +1047,16 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!CastInst::isCastable(ActTy, ParamTy))
return false; // Cannot transform this parameter value.
- Attribute Attrs = CallerPAL.getParamAttributes(i + 1);
- if (AttrBuilder(Attrs).
- hasAttributes(Attribute::typeIncompatible(ParamTy)))
+ if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
+ hasAttributes(AttributeFuncs::
+ typeIncompatible(ParamTy, i + 1), i + 1))
return false; // Attribute not compatible with transformed value.
// If the parameter is passed as a byval argument, then we have to have a
// sized type and the sized type has to have the same size as the old type.
- if (ParamTy != ActTy && Attrs.hasAttribute(Attribute::ByVal)) {
+ if (ParamTy != ActTy &&
+ CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
+ Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
return false;
@@ -1100,11 +1105,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// won't be dropping them. Check that these extra arguments have attributes
// that are compatible with being a vararg call argument.
for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
- if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
+ unsigned Index = CallerPAL.getSlotIndex(i - 1);
+ if (Index <= FT->getNumParams())
break;
- Attribute PAttrs = CallerPAL.getSlot(i - 1).Attrs;
+
// Check if it has an attribute that's incompatible with varargs.
- if (PAttrs.hasAttribute(Attribute::StructRet))
+ AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1);
+ if (PAttrs.hasAttribute(Index, Attribute::StructRet))
return false;
}
@@ -1113,21 +1120,23 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// inserting cast instructions as necessary.
std::vector<Value*> Args;
Args.reserve(NumActualArgs);
- SmallVector<AttributeWithIndex, 8> attrVec;
+ SmallVector<AttributeSet, 8> attrVec;
attrVec.reserve(NumCommonArgs);
// Get any return attributes.
- AttrBuilder RAttrs = CallerPAL.getRetAttributes();
+ AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
- RAttrs.removeAttributes(Attribute::typeIncompatible(NewRetTy));
+ RAttrs.
+ removeAttributes(AttributeFuncs::
+ typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
+ AttributeSet::ReturnIndex);
// Add the new return attributes.
if (RAttrs.hasAttributes())
- attrVec.push_back(
- AttributeWithIndex::get(AttributeSet::ReturnIndex,
- Attribute::get(FT->getContext(), RAttrs)));
+ attrVec.push_back(AttributeSet::get(Caller->getContext(),
+ AttributeSet::ReturnIndex, RAttrs));
AI = CS.arg_begin();
for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
@@ -1141,9 +1150,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
}
// Add any parameter attributes.
- Attribute PAttrs = CallerPAL.getParamAttributes(i + 1);
+ AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
if (PAttrs.hasAttributes())
- attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+ attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1,
+ PAttrs));
}
// If the function takes more arguments than the call was taking, add them
@@ -1168,23 +1178,23 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
}
// Add any parameter attributes.
- Attribute PAttrs = CallerPAL.getParamAttributes(i + 1);
+ AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
if (PAttrs.hasAttributes())
- attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+ attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1,
+ PAttrs));
}
}
}
- Attribute FnAttrs = CallerPAL.getFnAttributes();
- if (FnAttrs.hasAttributes())
- attrVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex,
- FnAttrs));
+ AttributeSet FnAttrs = CallerPAL.getFnAttributes();
+ if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex))
+ attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs));
if (NewRetTy->isVoidTy())
Caller->setName(""); // Void type should not have a name.
const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(),
- attrVec);
+ attrVec);
Instruction *NC;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
@@ -1262,12 +1272,12 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
if (!NestAttrs.isEmpty()) {
unsigned NestIdx = 1;
Type *NestTy = 0;
- Attribute NestAttr;
+ AttributeSet NestAttr;
// Look for a parameter marked with the 'nest' attribute.
for (FunctionType::param_iterator I = NestFTy->param_begin(),
E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
- if (NestAttrs.getParamAttributes(NestIdx).hasAttribute(Attribute::Nest)){
+ if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) {
// Record the parameter type and any other attributes.
NestTy = *I;
NestAttr = NestAttrs.getParamAttributes(NestIdx);
@@ -1279,17 +1289,16 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
std::vector<Value*> NewArgs;
NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
- SmallVector<AttributeWithIndex, 8> NewAttrs;
+ SmallVector<AttributeSet, 8> NewAttrs;
NewAttrs.reserve(Attrs.getNumSlots() + 1);
// Insert the nest argument into the call argument list, which may
// mean appending it. Likewise for attributes.
// Add any result attributes.
- Attribute Attr = Attrs.getRetAttributes();
- if (Attr.hasAttributes())
- NewAttrs.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex,
- Attr));
+ if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
+ NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+ Attrs.getRetAttributes()));
{
unsigned Idx = 1;
@@ -1301,7 +1310,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
if (NestVal->getType() != NestTy)
NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
NewArgs.push_back(NestVal);
- NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
+ NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+ NestAttr));
}
if (I == E)
@@ -1309,20 +1319,21 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// Add the original argument and attributes.
NewArgs.push_back(*I);
- Attr = Attrs.getParamAttributes(Idx);
- if (Attr.hasAttributes())
- NewAttrs.push_back
- (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
+ AttributeSet Attr = Attrs.getParamAttributes(Idx);
+ if (Attr.hasAttributes(Idx)) {
+ AttrBuilder B(Attr, Idx);
+ NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
+ Idx + (Idx >= NestIdx), B));
+ }
++Idx, ++I;
} while (1);
}
// Add any function attributes.
- Attr = Attrs.getFnAttributes();
- if (Attr.hasAttributes())
- NewAttrs.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex,
- Attr));
+ if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
+ NewAttrs.push_back(AttributeSet::get(FTy->getContext(),
+ Attrs.getFnAttributes()));
// The trampoline may have been bitcast to a bogus type (FTy).
// Handle this by synthesizing a new function type, equal to FTy
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 5af4442..a960ab2 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -30,7 +30,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
Scale = 0;
return ConstantInt::get(Val->getType(), 0);
}
-
+
if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
// Cannot look past anything that might overflow.
OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
@@ -47,19 +47,19 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
Offset = 0;
return I->getOperand(0);
}
-
+
if (I->getOpcode() == Instruction::Mul) {
// This value is scaled by 'RHS'.
Scale = RHS->getZExtValue();
Offset = 0;
return I->getOperand(0);
}
-
+
if (I->getOpcode() == Instruction::Add) {
- // We have X+C. Check to see if we really have (X*C2)+C1,
+ // We have X+C. Check to see if we really have (X*C2)+C1,
// where C1 is divisible by C2.
unsigned SubScale;
- Value *SubVal =
+ Value *SubVal =
DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
Offset += RHS->getZExtValue();
Scale = SubScale;
@@ -82,7 +82,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
if (!TD) return 0;
PointerType *PTy = cast<PointerType>(CI.getType());
-
+
BuilderTy AllocaBuilder(*Builder);
AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
@@ -110,7 +110,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
uint64_t ArrayOffset;
Value *NumElements = // See if the array size is a decomposable linear expr.
DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
-
+
// If we can now satisfy the modulus, by using a non-1 scale, we really can
// do the xform.
if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
@@ -125,17 +125,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// Insert before the alloca, not before the cast.
Amt = AllocaBuilder.CreateMul(Amt, NumElements);
}
-
+
if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
Offset, true);
Amt = AllocaBuilder.CreateAdd(Amt, Off);
}
-
+
AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
New->setAlignment(AI.getAlignment());
New->takeName(&AI);
-
+
// If the allocation has multiple real uses, insert a cast and change all
// things that used it to use the new cast. This will also hack on CI, but it
// will die soon.
@@ -148,10 +148,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
return ReplaceInstUsesWith(CI, New);
}
-/// EvaluateInDifferentType - Given an expression that
+/// EvaluateInDifferentType - Given an expression that
/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
/// insert the code to evaluate the expression.
-Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
+Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
bool isSigned) {
if (Constant *C = dyn_cast<Constant>(V)) {
C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
@@ -181,7 +181,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
break;
- }
+ }
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -190,7 +190,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
// new.
if (I->getOperand(0)->getType() == Ty)
return I->getOperand(0);
-
+
// Otherwise, must be the same type of cast, so just reinsert a new one.
// This also handles the case of zext(trunc(x)) -> zext(x).
Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
@@ -212,11 +212,11 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
Res = NPN;
break;
}
- default:
+ default:
// TODO: Can handle more cases here.
llvm_unreachable("Unreachable!");
}
-
+
Res->takeName(I);
return InsertNewInstWith(Res, *I);
}
@@ -224,7 +224,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
/// This function is a wrapper around CastInst::isEliminableCastPair. It
/// simply extracts arguments and returns what that function returns.
-static Instruction::CastOps
+static Instruction::CastOps
isEliminableCastPair(
const CastInst *CI, ///< The first cast instruction
unsigned opcode, ///< The opcode of the second cast instruction
@@ -253,7 +253,7 @@ isEliminableCastPair(
if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
(Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
Res = 0;
-
+
return Instruction::CastOps(Res);
}
@@ -265,18 +265,18 @@ bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
Type *Ty) {
// Noop casts and casts of constants should be eliminated trivially.
if (V->getType() == Ty || isa<Constant>(V)) return false;
-
+
// If this is another cast that can be eliminated, we prefer to have it
// eliminated.
if (const CastInst *CI = dyn_cast<CastInst>(V))
if (isEliminableCastPair(CI, opc, Ty, TD))
return false;
-
+
// If this is a vector sext from a compare, then we don't want to break the
// idiom where each element of the extended vector is either zero or all ones.
if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
return false;
-
+
return true;
}
@@ -288,7 +288,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
// Many cases of "cast of a cast" are eliminable. If it's eliminable we just
// eliminate it now.
if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
- if (Instruction::CastOps opc =
+ if (Instruction::CastOps opc =
isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
// The first cast (CSrc) is eliminable so we need to fix up or replace
// the second cast (CI). CSrc will then have a good chance of being dead.
@@ -311,7 +311,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
if (Instruction *NV = FoldOpIntoPhi(CI))
return NV;
}
-
+
return 0;
}
@@ -330,15 +330,15 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
// We can always evaluate constants in another type.
if (isa<Constant>(V))
return true;
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
-
+
Type *OrigTy = V->getType();
-
+
// If this is an extension from the dest type, we can eliminate it, even if it
// has multiple uses.
- if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
+ if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
I->getOperand(0)->getType() == Ty)
return true;
@@ -423,29 +423,29 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) {
// TODO: Can handle more cases here.
break;
}
-
+
return false;
}
Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
if (Instruction *Result = commonCastTransforms(CI))
return Result;
-
- // See if we can simplify any instructions used by the input whose sole
+
+ // See if we can simplify any instructions used by the input whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(CI))
return &CI;
-
+
Value *Src = CI.getOperand(0);
Type *DestTy = CI.getType(), *SrcTy = Src->getType();
-
+
// Attempt to truncate the entire input expression tree to the destination
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
CanEvaluateTruncated(Src, DestTy)) {
-
+
// If this cast is a truncate, evaluting in a different type always
// eliminates the cast, so it is always a win.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -462,7 +462,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
}
-
+
// Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
Value *A = 0; ConstantInt *Cst = 0;
if (Src->hasOneUse() &&
@@ -472,7 +472,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// ASize < MidSize and MidSize > ResultSize, but don't know the relation
// between ASize and ResultSize.
unsigned ASize = A->getType()->getPrimitiveSizeInBits();
-
+
// If the shift amount is larger than the size of A, then the result is
// known to be zero because all the input bits got shifted out.
if (Cst->getZExtValue() >= ASize)
@@ -485,7 +485,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
Shift->takeName(Src);
return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
}
-
+
// Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
// type isn't non-native.
if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
@@ -508,7 +508,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
// cast to integer to avoid the comparison.
if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
const APInt &Op1CV = Op1C->getValue();
-
+
// zext (x <s 0) to i32 --> x>>u31 true if signbit set.
// zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
@@ -538,14 +538,14 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
// zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
// zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
// zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
- if ((Op1CV == 0 || Op1CV.isPowerOf2()) &&
+ if ((Op1CV == 0 || Op1CV.isPowerOf2()) &&
// This only works for EQ and NE
ICI->isEquality()) {
// If Op1C some other power of two, convert:
uint32_t BitWidth = Op1C->getType()->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne);
-
+
APInt KnownZeroMask(~KnownZero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
if (!DoXform) return ICI;
@@ -559,7 +559,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
Res = ConstantExpr::getZExt(Res, CI.getType());
return ReplaceInstUsesWith(CI, Res);
}
-
+
uint32_t ShiftAmt = KnownZeroMask.logBase2();
Value *In = ICI->getOperand(0);
if (ShiftAmt) {
@@ -568,12 +568,12 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
In->getName()+".lobit");
}
-
+
if ((Op1CV != 0) == isNE) { // Toggle the low bit.
Constant *One = ConstantInt::get(In->getType(), 1);
In = Builder->CreateXor(In, One);
}
-
+
if (CI.getType() == In->getType())
return ReplaceInstUsesWith(CI, In);
return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
@@ -646,19 +646,19 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
BitsToClear = 0;
if (isa<Constant>(V))
return true;
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
-
+
// If the input is a truncate from the destination type, we can trivially
// eliminate it.
if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
return true;
-
+
// We can't extend or shrink something that has multiple uses: doing so would
// require duplicating the instruction in general, which isn't profitable.
if (!I->hasOneUse()) return false;
-
+
unsigned Opc = I->getOpcode(), Tmp;
switch (Opc) {
case Instruction::ZExt: // zext(zext(x)) -> zext(x).
@@ -678,7 +678,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
// These can all be promoted if neither operand has 'bits to clear'.
if (BitsToClear == 0 && Tmp == 0)
return true;
-
+
// If the operation is an AND/OR/XOR and the bits to clear are zero in the
// other side, BitsToClear is ok.
if (Tmp == 0 &&
@@ -691,10 +691,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
APInt::getHighBitsSet(VSize, BitsToClear)))
return true;
}
-
+
// Otherwise, we don't know how to analyze this BitsToClear case yet.
return false;
-
+
case Instruction::LShr:
// We can promote lshr(x, cst) if we can promote x. This requires the
// ultimate 'and' to clear out the high zero bits we're clearing out though.
@@ -716,7 +716,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
Tmp != BitsToClear)
return false;
return true;
-
+
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
// get into trouble with cyclic PHIs here because we only consider
@@ -739,48 +739,48 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
}
Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
- // If this zero extend is only used by a truncate, let the truncate by
+ // If this zero extend is only used by a truncate, let the truncate be
// eliminated before we try to optimize this zext.
if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
return 0;
-
+
// If one of the common conversion will work, do it.
if (Instruction *Result = commonCastTransforms(CI))
return Result;
- // See if we can simplify any instructions used by the input whose sole
+ // See if we can simplify any instructions used by the input whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(CI))
return &CI;
-
+
Value *Src = CI.getOperand(0);
Type *SrcTy = Src->getType(), *DestTy = CI.getType();
-
+
// Attempt to extend the entire input expression tree to the destination
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
unsigned BitsToClear;
if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
- CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
+ CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
"Unreasonable BitsToClear");
-
+
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
" to avoid zero extend: " << CI);
Value *Res = EvaluateInDifferentType(Src, DestTy, false);
assert(Res->getType() == DestTy);
-
+
uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
uint32_t DestBitSize = DestTy->getScalarSizeInBits();
-
+
// If the high bits are already filled with zeros, just replace this
// cast with the result.
if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
DestBitSize-SrcBitsKept)))
return ReplaceInstUsesWith(CI, Res);
-
+
// We need to emit an AND to clear the high bits.
Constant *C = ConstantInt::get(Res->getType(),
APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
@@ -792,7 +792,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// 'and' which will be much cheaper than the pair of casts.
if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
// TODO: Subsume this into EvaluateInDifferentType.
-
+
// Get the sizes of the types involved. We know that the intermediate type
// will be smaller than A or C, but don't know the relation between A and C.
Value *A = CSrc->getOperand(0);
@@ -809,7 +809,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
return new ZExtInst(And, CI.getType());
}
-
+
if (SrcSize == DstSize) {
APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
@@ -818,7 +818,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
if (SrcSize > DstSize) {
Value *Trunc = Builder->CreateTrunc(A, CI.getType());
APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
- return BinaryOperator::CreateAnd(Trunc,
+ return BinaryOperator::CreateAnd(Trunc,
ConstantInt::get(Trunc->getType(),
AndValue));
}
@@ -876,7 +876,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
Value *New = Builder->CreateZExt(X, CI.getType());
return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
}
-
+
return 0;
}
@@ -989,14 +989,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
// If this is a constant, it can be trivially promoted.
if (isa<Constant>(V))
return true;
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
-
+
// If this is a truncate from the dest type, we can trivially eliminate it.
if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
return true;
-
+
// We can't extend or shrink something that has multiple uses: doing so would
// require duplicating the instruction in general, which isn't profitable.
if (!I->hasOneUse()) return false;
@@ -1015,14 +1015,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
// These operators can all arbitrarily be extended if their inputs can.
return CanEvaluateSExtd(I->getOperand(0), Ty) &&
CanEvaluateSExtd(I->getOperand(1), Ty);
-
+
//case Instruction::Shl: TODO
//case Instruction::LShr: TODO
-
+
case Instruction::Select:
return CanEvaluateSExtd(I->getOperand(1), Ty) &&
CanEvaluateSExtd(I->getOperand(2), Ty);
-
+
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
// get into trouble with cyclic PHIs here because we only consider
@@ -1036,24 +1036,24 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
// TODO: Can handle more cases here.
break;
}
-
+
return false;
}
Instruction *InstCombiner::visitSExt(SExtInst &CI) {
- // If this sign extend is only used by a truncate, let the truncate by
- // eliminated before we try to optimize this zext.
+ // If this sign extend is only used by a truncate, let the truncate be
+ // eliminated before we try to optimize this sext.
if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
return 0;
-
+
if (Instruction *I = commonCastTransforms(CI))
return I;
-
- // See if we can simplify any instructions used by the input whose sole
+
+ // See if we can simplify any instructions used by the input whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(CI))
return &CI;
-
+
Value *Src = CI.getOperand(0);
Type *SrcTy = Src->getType(), *DestTy = CI.getType();
@@ -1076,7 +1076,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// cast with the result.
if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize)
return ReplaceInstUsesWith(CI, Res);
-
+
// We need to emit a shl + ashr to do the sign extend.
Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"),
@@ -1089,7 +1089,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) {
uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
uint32_t DestBitSize = DestTy->getScalarSizeInBits();
-
+
// We need to emit a shl + ashr to do the sign extend.
Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
@@ -1125,7 +1125,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
A = Builder->CreateShl(A, ShAmtV, CI.getName());
return BinaryOperator::CreateAShr(A, ShAmtV);
}
-
+
return 0;
}
@@ -1147,7 +1147,7 @@ static Value *LookThroughFPExtensions(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V))
if (I->getOpcode() == Instruction::FPExt)
return LookThroughFPExtensions(I->getOperand(0));
-
+
// If this value is a constant, return the constant in the smallest FP type
// that can accurately represent it. This allows us to turn
// (float)((double)X+2.0) into x+2.0f.
@@ -1166,14 +1166,14 @@ static Value *LookThroughFPExtensions(Value *V) {
return V;
// Don't try to shrink to various long double types.
}
-
+
return V;
}
Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
if (Instruction *I = commonCastTransforms(CI))
return I;
-
+
// If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
// smaller than the destination type, we can eliminate the truncate by doing
// the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well
@@ -1190,7 +1190,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
Type *SrcTy = OpI->getType();
Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
- if (LHSTrunc->getType() != SrcTy &&
+ if (LHSTrunc->getType() != SrcTy &&
RHSTrunc->getType() != SrcTy) {
unsigned DstSize = CI.getType()->getScalarSizeInBits();
// If the source types were both smaller than the destination type of
@@ -1202,10 +1202,36 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
}
}
- break;
+ break;
+ }
+
+ // (fptrunc (fneg x)) -> (fneg (fptrunc x))
+ if (BinaryOperator::isFNeg(OpI)) {
+ Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1),
+ CI.getType());
+ return BinaryOperator::CreateFNeg(InnerTrunc);
+ }
+ }
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0));
+ if (II) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::fabs: {
+ // (fptrunc (fabs x)) -> (fabs (fptrunc x))
+ Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
+ CI.getType());
+ Type *IntrinsicType[] = { CI.getType() };
+ Function *Overload =
+ Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(),
+ II->getIntrinsicID(), IntrinsicType);
+
+ Value *Args[] = { InnerTrunc };
+ return CallInst::Create(Overload, Args, II->getName());
+ }
}
}
-
+
// Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
@@ -1220,7 +1246,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
Arg->getOperand(0)->getType()->isFloatTy()) {
Function *Callee = Call->getCalledFunction();
Module *M = CI.getParent()->getParent()->getParent();
- Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
+ Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
Callee->getAttributes(),
Builder->getFloatTy(),
Builder->getFloatTy(),
@@ -1228,15 +1254,15 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
"sqrtfcall");
ret->setAttributes(Callee->getAttributes());
-
-
+
+
// Remove the old Call. With -fmath-errno, it won't get marked readnone.
ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType()));
EraseInstFromFunction(*Call);
return ret;
}
}
-
+
return 0;
}
@@ -1254,7 +1280,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
// This is safe if the intermediate type has enough bits in its mantissa to
// accurately represent all values of X. For example, do not do this with
// i64->float->i64. This is also safe for sitofp case, because any negative
- // 'X' value would cause an undefined result for the fptoui.
+ // 'X' value would cause an undefined result for the fptoui.
if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
OpI->getOperand(0)->getType() == FI.getType() &&
(int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
@@ -1268,19 +1294,19 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
if (OpI == 0)
return commonCastTransforms(FI);
-
+
// fptosi(sitofp(X)) --> X
// fptosi(uitofp(X)) --> X
// This is safe if the intermediate type has enough bits in its mantissa to
// accurately represent all values of X. For example, do not do this with
// i64->float->i64. This is also safe for sitofp case, because any negative
- // 'X' value would cause an undefined result for the fptoui.
+ // 'X' value would cause an undefined result for the fptoui.
if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
OpI->getOperand(0)->getType() == FI.getType() &&
(int)FI.getType()->getScalarSizeInBits() <=
OpI->getType()->getFPMantissaWidth())
return ReplaceInstUsesWith(FI, OpI->getOperand(0));
-
+
return commonCastTransforms(FI);
}
@@ -1296,21 +1322,16 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
// If the source integer type is not the intptr_t type for this target, do a
// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
// cast to be exposed to other transforms.
- if (TD) {
- if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
- TD->getPointerSizeInBits()) {
- Value *P = Builder->CreateTrunc(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()));
- return new IntToPtrInst(P, CI.getType());
- }
- if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
- TD->getPointerSizeInBits()) {
- Value *P = Builder->CreateZExt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()));
- return new IntToPtrInst(P, CI.getType());
- }
+ if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+ TD->getPointerSizeInBits()) {
+ Type *Ty = TD->getIntPtrType(CI.getContext());
+ if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+ Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+ Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+ return new IntToPtrInst(P, CI.getType());
}
-
+
if (Instruction *I = commonCastTransforms(CI))
return I;
@@ -1320,19 +1341,19 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
Value *Src = CI.getOperand(0);
-
+
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
// If casting the result of a getelementptr instruction with no offset, turn
// this into a cast of the original pointer!
if (GEP->hasAllZeroIndices()) {
// Changing the cast operand is usually not a good idea but it is safe
- // here because the pointer operand is being replaced with another
+ // here because the pointer operand is being replaced with another
// pointer operand so the opcode doesn't need to change.
Worklist.Add(GEP);
CI.setOperand(0, GEP->getOperand(0));
return &CI;
}
-
+
// If the GEP has a single use, and the base pointer is a bitcast, and the
// GEP computes a constant offset, see if we can convert these three
// instructions into fewer. This typically happens with unions and other
@@ -1353,15 +1374,15 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
Builder->CreateGEP(OrigBase, NewIndices);
NGEP->takeName(GEP);
-
+
if (isa<BitCastInst>(CI))
return new BitCastInst(NGEP, CI.getType());
assert(isa<PtrToIntInst>(CI));
return new PtrToIntInst(NGEP, CI.getType());
- }
+ }
}
}
-
+
return commonCastTransforms(CI);
}
@@ -1369,19 +1390,15 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
// If the destination integer type is not the intptr_t type for this target,
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
// to be exposed to other transforms.
- if (TD) {
- if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
- Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()));
- return new TruncInst(P, CI.getType());
- }
- if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
- Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
- TD->getIntPtrType(CI.getContext()));
- return new ZExtInst(P, CI.getType());
- }
+ if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) {
+ Type *Ty = TD->getIntPtrType(CI.getContext());
+ if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+ Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+ Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty);
+ return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false);
}
-
+
return commonPointerCastTransforms(CI);
}
@@ -1396,33 +1413,33 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
// element size, or the input is a multiple of the output element size.
// Convert the input type to have the same element type as the output.
VectorType *SrcTy = cast<VectorType>(InVal->getType());
-
+
if (SrcTy->getElementType() != DestTy->getElementType()) {
// The input types don't need to be identical, but for now they must be the
// same size. There is no specific reason we couldn't handle things like
// <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
- // there yet.
+ // there yet.
if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
DestTy->getElementType()->getPrimitiveSizeInBits())
return 0;
-
+
SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
}
-
+
// Now that the element types match, get the shuffle mask and RHS of the
// shuffle to use, which depends on whether we're increasing or decreasing the
// size of the input.
SmallVector<uint32_t, 16> ShuffleMask;
Value *V2;
-
+
if (SrcTy->getNumElements() > DestTy->getNumElements()) {
// If we're shrinking the number of elements, just shuffle in the low
// elements from the input and use undef as the second shuffle input.
V2 = UndefValue::get(SrcTy);
for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
ShuffleMask.push_back(i);
-
+
} else {
// If we're increasing the number of elements, shuffle in all of the
// elements from InVal and fill the rest of the result elements with zeros
@@ -1436,7 +1453,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
ShuffleMask.push_back(SrcElts);
}
-
+
return new ShuffleVectorInst(InVal, V2,
ConstantDataVector::get(V2->getContext(),
ShuffleMask));
@@ -1463,7 +1480,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
Type *VecEltTy) {
// Undef values never contribute useful bits to the result.
if (isa<UndefValue>(V)) return true;
-
+
// If we got down to a value of the right type, we win, try inserting into the
// right element.
if (V->getType() == VecEltTy) {
@@ -1471,15 +1488,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
if (Constant *C = dyn_cast<Constant>(V))
if (C->isNullValue())
return true;
-
+
// Fail if multiple elements are inserted into this slot.
if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
return false;
-
+
Elements[ElementIndex] = V;
return true;
}
-
+
if (Constant *C = dyn_cast<Constant>(V)) {
// Figure out the # elements this provides, and bitcast it or slice it up
// as required.
@@ -1490,7 +1507,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
if (NumElts == 1)
return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
ElementIndex, Elements, VecEltTy);
-
+
// Okay, this is a constant that covers multiple elements. Slice it up into
// pieces and insert each element-sized piece into the vector.
if (!isa<IntegerType>(C->getType()))
@@ -1498,7 +1515,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
C->getType()->getPrimitiveSizeInBits()));
unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
-
+
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
i*ElementSize));
@@ -1508,23 +1525,23 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
}
return true;
}
-
+
if (!V->hasOneUse()) return false;
-
+
Instruction *I = dyn_cast<Instruction>(V);
if (I == 0) return false;
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ Elements, VecEltTy);
case Instruction::ZExt:
if (!isMultipleOfTypeSize(
I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
VecEltTy))
return false;
return CollectInsertionElements(I->getOperand(0), ElementIndex,
- Elements, VecEltTy);
+ Elements, VecEltTy);
case Instruction::Or:
return CollectInsertionElements(I->getOperand(0), ElementIndex,
Elements, VecEltTy) &&
@@ -1536,11 +1553,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
if (CI == 0) return false;
if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
-
+
return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
Elements, VecEltTy);
}
-
+
}
}
@@ -1575,11 +1592,11 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
Value *Result = Constant::getNullValue(CI.getType());
for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
if (Elements[i] == 0) continue; // Unset element.
-
+
Result = IC.Builder->CreateInsertElement(Result, Elements[i],
IC.Builder->getInt32(i));
}
-
+
return Result;
}
@@ -1607,11 +1624,11 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
VecTy->getPrimitiveSizeInBits() / DestWidth);
VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
}
-
+
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
}
}
-
+
// bitcast(trunc(lshr(bitcast(somevector), cst))
ConstantInt *ShAmt = 0;
if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
@@ -1628,7 +1645,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
VecTy->getPrimitiveSizeInBits() / DestWidth);
VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
}
-
+
unsigned Elt = ShAmt->getZExtValue() / DestWidth;
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
@@ -1652,12 +1669,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
PointerType *SrcPTy = cast<PointerType>(SrcTy);
Type *DstElTy = DstPTy->getElementType();
Type *SrcElTy = SrcPTy->getElementType();
-
+
// If the address spaces don't match, don't eliminate the bitcast, which is
// required for changing types.
if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
return 0;
-
+
// If we are casting a alloca to a pointer to a type of the same
// size, rewrite the allocation instruction to allocate the "right" type.
// There is no need to modify malloc calls because it is their bitcast that
@@ -1665,14 +1682,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
return V;
-
+
// If the source and destination are pointers, and this cast is equivalent
// to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
// This can enhance SROA and other transforms that want type-safe pointers.
Constant *ZeroUInt =
Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
unsigned NumZeros = 0;
- while (SrcElTy != DstElTy &&
+ while (SrcElTy != DstElTy &&
isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
SrcElTy->getNumContainedTypes() /* not "{}" */) {
SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
@@ -1685,7 +1702,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
return GetElementPtrInst::CreateInBounds(Src, Idxs);
}
}
-
+
// Try to optimize int -> float bitcasts.
if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
@@ -1698,7 +1715,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
// FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
}
-
+
if (isa<IntegerType>(SrcTy)) {
// If this is a cast from an integer to vector, check to see if the input
// is a trunc or zext of a bitcast from vector. If so, we can replace all
@@ -1711,7 +1728,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
cast<VectorType>(DestTy), *this))
return I;
}
-
+
// If the input is an 'or' instruction, we may be doing shifts and ors to
// assemble the elements of the vector manually. Try to rip the code out
// and replace it with insertelements.
@@ -1721,18 +1738,29 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
- if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
- Value *Elem =
- Builder->CreateExtractElement(Src,
- Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
- return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+ if (SrcVTy->getNumElements() == 1) {
+ // If our destination is not a vector, then make this a straight
+ // scalar-scalar cast.
+ if (!DestTy->isVectorTy()) {
+ Value *Elem =
+ Builder->CreateExtractElement(Src,
+ Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+ return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+ }
+
+ // Otherwise, see if our source is an insert. If so, then use the scalar
+ // component directly.
+ if (InsertElementInst *IEI =
+ dyn_cast<InsertElementInst>(CI.getOperand(0)))
+ return CastInst::Create(Instruction::BitCast, IEI->getOperand(1),
+ DestTy);
}
}
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
// Okay, we have (bitcast (shuffle ..)). Check to see if this is
// a bitcast to a vector with the same # elts.
- if (SVI->hasOneUse() && DestTy->isVectorTy() &&
+ if (SVI->hasOneUse() && DestTy->isVectorTy() &&
cast<VectorType>(DestTy)->getNumElements() ==
SVI->getType()->getNumElements() &&
SVI->getType()->getNumElements() ==
@@ -1741,9 +1769,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If either of the operands is a cast from CI.getType(), then
// evaluating the shuffle in the casted destination's type will allow
// us to eliminate at least one cast.
- if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) &&
+ if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) &&
Tmp->getOperand(0)->getType() == DestTy) ||
- ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
+ ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) &&
Tmp->getOperand(0)->getType() == DestTy)) {
Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
@@ -1753,7 +1781,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
}
}
-
+
if (SrcTy->isPointerTy())
return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 40e559e..bad46b4 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1331,6 +1331,25 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
And, Constant::getNullValue(And->getType()));
}
+
+ // Transform (icmp pred iM (shl iM %v, N), CI)
+ // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N))
+ // Transform the shl to a trunc if (trunc (CI>>N)) has no loss.
+ // This enables to get rid of the shift in favor of a trunc which can be
+ // free on the target. It has the additional benefit of comparing to a
+ // smaller constant, which will be target friendly.
+ unsigned Amt = ShAmt->getLimitedValue(TypeBits-1);
+ if (Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
+ Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt);
+ Constant *NCI = ConstantExpr::getTrunc(
+ ConstantExpr::getAShr(RHS,
+ ConstantInt::get(RHS->getType(), Amt)),
+ NTy);
+ return new ICmpInst(ICI.getPredicate(),
+ Builder->CreateTrunc(LHSI->getOperand(0), NTy),
+ NCI);
+ }
+
break;
}
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index d0f4392..8e4267f 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -377,6 +377,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), TD))
return ReplaceInstUsesWith(I, V);
+ bool AllowReassociate = I.hasUnsafeAlgebra();
+
// Simplify mul instructions with a constant RHS.
if (isa<Constant>(Op1)) {
// Try to fold constant mul into select arguments.
@@ -389,7 +391,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
return NV;
ConstantFP *C = dyn_cast<ConstantFP>(Op1);
- if (C && I.hasUnsafeAlgebra() && C->getValueAPF().isNormal()) {
+ if (C && AllowReassociate && C->getValueAPF().isNormal()) {
// Let MDC denote an expression in one of these forms:
// X * C, C/X, X/C, where C is a constant.
//
@@ -430,7 +432,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
BinaryOperator::CreateFAdd(M0, M1) :
BinaryOperator::CreateFSub(M0, M1);
Instruction *RI = cast<Instruction>(R);
- RI->setHasUnsafeAlgebra(true);
+ RI->copyFastMathFlags(&I);
return RI;
}
}
@@ -438,9 +440,6 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
}
- if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y
- if (Value *Op1v = dyn_castFNegVal(Op1))
- return BinaryOperator::CreateFMul(Op0v, Op1v);
// Under unsafe algebra do:
// X * log2(0.5*Y) = X*log2(Y) - X
@@ -469,36 +468,66 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
}
- // X * cond ? 1.0 : 0.0 => cond ? X : 0.0
- if (I.hasNoNaNs() && I.hasNoSignedZeros()) {
- Value *V0 = I.getOperand(0);
- Value *V1 = I.getOperand(1);
- Value *Cond, *SLHS, *SRHS;
- bool Match = false;
-
- if (match(V0, m_Select(m_Value(Cond), m_Value(SLHS), m_Value(SRHS)))) {
- Match = true;
- } else if (match(V1, m_Select(m_Value(Cond), m_Value(SLHS),
- m_Value(SRHS)))) {
- Match = true;
- std::swap(V0, V1);
+ // Handle symmetric situation in a 2-iteration loop
+ Value *Opnd0 = Op0;
+ Value *Opnd1 = Op1;
+ for (int i = 0; i < 2; i++) {
+ bool IgnoreZeroSign = I.hasNoSignedZeros();
+ if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
+ Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
+ Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
+
+ // -X * -Y => X*Y
+ if (N1)
+ return BinaryOperator::CreateFMul(N0, N1);
+
+ if (Opnd0->hasOneUse()) {
+ // -X * Y => -(X*Y) (Promote negation as high as possible)
+ Value *T = Builder->CreateFMul(N0, Opnd1);
+ cast<Instruction>(T)->setDebugLoc(I.getDebugLoc());
+ Instruction *Neg = BinaryOperator::CreateFNeg(T);
+ if (I.getFastMathFlags().any()) {
+ cast<Instruction>(T)->copyFastMathFlags(&I);
+ Neg->copyFastMathFlags(&I);
+ }
+ return Neg;
+ }
}
- if (Match) {
- ConstantFP *C0 = dyn_cast<ConstantFP>(SLHS);
- ConstantFP *C1 = dyn_cast<ConstantFP>(SRHS);
-
- if (C0 && C1 &&
- ((C0->isZero() && C1->isExactlyValue(1.0)) ||
- (C1->isZero() && C0->isExactlyValue(1.0)))) {
- Value *T;
- if (C0->isZero())
- T = Builder->CreateSelect(Cond, SLHS, V1);
- else
- T = Builder->CreateSelect(Cond, V1, SRHS);
- return ReplaceInstUsesWith(I, T);
+ // (X*Y) * X => (X*X) * Y where Y != X
+ // The purpose is two-fold:
+ // 1) to form a power expression (of X).
+ // 2) potentially shorten the critical path: After transformation, the
+ // latency of the instruction Y is amortized by the expression of X*X,
+ // and therefore Y is in a "less critical" position compared to what it
+ // was before the transformation.
+ //
+ if (AllowReassociate) {
+ Value *Opnd0_0, *Opnd0_1;
+ if (Opnd0->hasOneUse() &&
+ match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) {
+ Value *Y = 0;
+ if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1)
+ Y = Opnd0_1;
+ else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1)
+ Y = Opnd0_0;
+
+ if (Y) {
+ Instruction *T = cast<Instruction>(Builder->CreateFMul(Opnd1, Opnd1));
+ T->copyFastMathFlags(&I);
+ T->setDebugLoc(I.getDebugLoc());
+
+ Instruction *R = BinaryOperator::CreateFMul(T, Y);
+ R->copyFastMathFlags(&I);
+ return R;
+ }
}
}
+
+ if (!isa<Constant>(Op1))
+ std::swap(Opnd0, Opnd1);
+ else
+ break;
}
return Changed ? &I : 0;
@@ -784,21 +813,140 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
return 0;
}
+/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
+/// FP value and:
+/// 1) 1/C is exact, or
+/// 2) reciprocal is allowed.
+/// If the convertion was successful, the simplified expression "X * 1/C" is
+/// returned; otherwise, NULL is returned.
+///
+static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
+ ConstantFP *Divisor,
+ bool AllowReciprocal) {
+ const APFloat &FpVal = Divisor->getValueAPF();
+ APFloat Reciprocal(FpVal.getSemantics());
+ bool Cvt = FpVal.getExactInverse(&Reciprocal);
+
+ if (!Cvt && AllowReciprocal && FpVal.isNormal()) {
+ Reciprocal = APFloat(FpVal.getSemantics(), 1.0f);
+ (void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven);
+ Cvt = !Reciprocal.isDenormal();
+ }
+
+ if (!Cvt)
+ return 0;
+
+ ConstantFP *R;
+ R = ConstantFP::get(Dividend->getType()->getContext(), Reciprocal);
+ return BinaryOperator::CreateFMul(Dividend, R);
+}
+
Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (Value *V = SimplifyFDivInst(Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
+ bool AllowReassociate = I.hasUnsafeAlgebra();
+ bool AllowReciprocal = I.hasAllowReciprocal();
+
if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
- const APFloat &Op1F = Op1C->getValueAPF();
-
- // If the divisor has an exact multiplicative inverse we can turn the fdiv
- // into a cheaper fmul.
- APFloat Reciprocal(Op1F.getSemantics());
- if (Op1F.getExactInverse(&Reciprocal)) {
- ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal);
- return BinaryOperator::CreateFMul(Op0, RFP);
+ if (AllowReassociate) {
+ ConstantFP *C1 = 0;
+ ConstantFP *C2 = Op1C;
+ Value *X;
+ Instruction *Res = 0;
+
+ if (match(Op0, m_FMul(m_Value(X), m_ConstantFP(C1)))) {
+ // (X*C1)/C2 => X * (C1/C2)
+ //
+ Constant *C = ConstantExpr::getFDiv(C1, C2);
+ const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
+ if (F.isNormal() && !F.isDenormal())
+ Res = BinaryOperator::CreateFMul(X, C);
+ } else if (match(Op0, m_FDiv(m_Value(X), m_ConstantFP(C1)))) {
+ // (X/C1)/C2 => X /(C2*C1) [=> X * 1/(C2*C1) if reciprocal is allowed]
+ //
+ Constant *C = ConstantExpr::getFMul(C1, C2);
+ const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
+ if (F.isNormal() && !F.isDenormal()) {
+ Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
+ AllowReciprocal);
+ if (!Res)
+ Res = BinaryOperator::CreateFDiv(X, C);
+ }
+ }
+
+ if (Res) {
+ Res->setFastMathFlags(I.getFastMathFlags());
+ return Res;
+ }
+ }
+
+ // X / C => X * 1/C
+ if (Instruction *T = CvtFDivConstToReciprocal(Op0, Op1C, AllowReciprocal))
+ return T;
+
+ return 0;
+ }
+
+ if (AllowReassociate && isa<ConstantFP>(Op0)) {
+ ConstantFP *C1 = cast<ConstantFP>(Op0), *C2;
+ Constant *Fold = 0;
+ Value *X;
+ bool CreateDiv = true;
+
+ // C1 / (X*C2) => (C1/C2) / X
+ if (match(Op1, m_FMul(m_Value(X), m_ConstantFP(C2))))
+ Fold = ConstantExpr::getFDiv(C1, C2);
+ else if (match(Op1, m_FDiv(m_Value(X), m_ConstantFP(C2)))) {
+ // C1 / (X/C2) => (C1*C2) / X
+ Fold = ConstantExpr::getFMul(C1, C2);
+ } else if (match(Op1, m_FDiv(m_ConstantFP(C2), m_Value(X)))) {
+ // C1 / (C2/X) => (C1/C2) * X
+ Fold = ConstantExpr::getFDiv(C1, C2);
+ CreateDiv = false;
+ }
+
+ if (Fold) {
+ const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF();
+ if (FoldC.isNormal() && !FoldC.isDenormal()) {
+ Instruction *R = CreateDiv ?
+ BinaryOperator::CreateFDiv(Fold, X) :
+ BinaryOperator::CreateFMul(X, Fold);
+ R->setFastMathFlags(I.getFastMathFlags());
+ return R;
+ }
+ }
+ return 0;
+ }
+
+ if (AllowReassociate) {
+ Value *X, *Y;
+ Value *NewInst = 0;
+ Instruction *SimpR = 0;
+
+ if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) {
+ // (X/Y) / Z => X / (Y*Z)
+ //
+ if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op1)) {
+ NewInst = Builder->CreateFMul(Y, Op1);
+ SimpR = BinaryOperator::CreateFDiv(X, NewInst);
+ }
+ } else if (Op1->hasOneUse() && match(Op1, m_FDiv(m_Value(X), m_Value(Y)))) {
+ // Z / (X/Y) => Z*Y / X
+ //
+ if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op0)) {
+ NewInst = Builder->CreateFMul(Op0, Y);
+ SimpR = BinaryOperator::CreateFDiv(NewInst, X);
+ }
+ }
+
+ if (NewInst) {
+ if (Instruction *T = dyn_cast<Instruction>(NewInst))
+ T->setDebugLoc(I.getDebugLoc());
+ SimpR->setFastMathFlags(I.getFastMathFlags());
+ return SimpR;
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index dd7ea14..4f71db1 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -13,7 +13,9 @@
//===----------------------------------------------------------------------===//
#include "InstCombine.h"
+#include "llvm/Support/PatternMatch.h"
using namespace llvm;
+using namespace PatternMatch;
/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
/// is to leave as a vector operation. isConstant indicates whether we're
@@ -92,6 +94,13 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
}
+ // Extract a value from a vector add operation with a constant zero.
+ Value *Val = 0; Constant *Con = 0;
+ if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) {
+ if (Con->getAggregateElement(EltNo)->isNullValue())
+ return FindScalarElement(Val, EltNo);
+ }
+
// Otherwise, we don't know.
return 0;
}
@@ -295,12 +304,12 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
return V;
}
-
+
if (isa<ConstantAggregateZero>(V)) {
Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
return V;
}
-
+
if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
// If this is an insert of an extract from some other vector, include it.
Value *VecOp = IEI->getOperand(0);
@@ -595,12 +604,12 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// ShuffleVectorInst is equivalent to the original one.
for (unsigned i = 0; i < VWidth; ++i) {
int eltMask;
- if (Mask[i] == -1) {
+ if (Mask[i] < 0) {
// This element is an undef value.
eltMask = -1;
} else if (Mask[i] < (int)LHSWidth) {
// This element is from left hand side vector operand.
- //
+ //
// If LHS is going to be replaced (case 1, 2, or 4), calculate the
// new mask value for the element.
if (newLHS != LHS) {
@@ -609,8 +618,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// with a -1 mask value.
if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
eltMask = -1;
- }
- else
+ } else
eltMask = Mask[i];
} else {
// This element is from right hand side vector operand
@@ -630,8 +638,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
&& "should have been check above");
eltMask = -1;
}
- }
- else
+ } else
eltMask = Mask[i]-LHSWidth;
// If LHS's width is changed, shift the mask value accordingly.
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 57ed9e3..49efce5 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -19,20 +19,20 @@
#include "llvm/Support/raw_ostream.h"
namespace llvm {
-
+
/// InstCombineWorklist - This is the worklist management logic for
/// InstCombine.
class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
SmallVector<Instruction*, 256> Worklist;
DenseMap<Instruction*, unsigned> WorklistMap;
-
+
void operator=(const InstCombineWorklist&RHS) LLVM_DELETED_FUNCTION;
InstCombineWorklist(const InstCombineWorklist&) LLVM_DELETED_FUNCTION;
public:
InstCombineWorklist() {}
-
+
bool isEmpty() const { return Worklist.empty(); }
-
+
/// Add - Add the specified instruction to the worklist if it isn't already
/// in it.
void Add(Instruction *I) {
@@ -41,12 +41,12 @@ public:
Worklist.push_back(I);
}
}
-
+
void AddValue(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V))
Add(I);
}
-
+
/// AddInitialGroup - Add the specified batch of stuff in reverse order.
/// which should only be done when the worklist is empty and when the group
/// has no duplicates.
@@ -61,25 +61,25 @@ public:
Worklist.push_back(I);
}
}
-
+
// Remove - remove I from the worklist if it exists.
void Remove(Instruction *I) {
DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
if (It == WorklistMap.end()) return; // Not in worklist.
-
+
// Don't bother moving everything down, just null out the slot.
Worklist[It->second] = 0;
-
+
WorklistMap.erase(It);
}
-
+
Instruction *RemoveOne() {
Instruction *I = Worklist.back();
Worklist.pop_back();
WorklistMap.erase(I);
return I;
}
-
+
/// AddUsersToWorkList - When an instruction is simplified, add all users of
/// the instruction to the work lists because they might get more simplified
/// now.
@@ -89,18 +89,18 @@ public:
UI != UE; ++UI)
Add(cast<Instruction>(*UI));
}
-
-
+
+
/// Zap - check that the worklist is empty and nuke the backing store for
/// the map if it is large.
void Zap() {
assert(WorklistMap.empty() && "Worklist empty, but map not?");
-
+
// Do an explicit clear, this shrinks the map if needed.
WorklistMap.clear();
}
};
-
+
} // end namespace llvm.
#endif
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 6f24cdd..c6115e3 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -162,6 +162,21 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
return !Overflow;
}
+/// Conservatively clears subclassOptionalData after a reassociation or
+/// commutation. We preserve fast-math flags when applicable as they can be
+/// preserved.
+static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
+ FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I);
+ if (!FPMO) {
+ I.clearSubclassOptionalData();
+ return;
+ }
+
+ FastMathFlags FMF = I.getFastMathFlags();
+ I.clearSubclassOptionalData();
+ I.setFastMathFlags(FMF);
+}
+
/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
/// operators which are associative or commutative:
//
@@ -219,7 +234,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
I.clearSubclassOptionalData();
I.setHasNoSignedWrap(true);
} else {
- I.clearSubclassOptionalData();
+ ClearSubclassDataAfterReassociation(I);
}
Changed = true;
@@ -241,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
I.setOperand(1, C);
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
- I.clearSubclassOptionalData();
+ ClearSubclassDataAfterReassociation(I);
Changed = true;
++NumReassoc;
continue;
@@ -263,7 +278,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
I.setOperand(1, B);
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
- I.clearSubclassOptionalData();
+ ClearSubclassDataAfterReassociation(I);
Changed = true;
++NumReassoc;
continue;
@@ -283,7 +298,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
I.setOperand(1, V);
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
- I.clearSubclassOptionalData();
+ ClearSubclassDataAfterReassociation(I);
Changed = true;
++NumReassoc;
continue;
@@ -310,7 +325,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
I.setOperand(1, Folded);
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
- I.clearSubclassOptionalData();
+ ClearSubclassDataAfterReassociation(I);
Changed = true;
continue;
@@ -516,8 +531,8 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
// instruction if the LHS is a constant negative zero (which is the 'negate'
// form).
//
-Value *InstCombiner::dyn_castFNegVal(Value *V) const {
- if (BinaryOperator::isFNeg(V))
+Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
+ if (BinaryOperator::isFNeg(V, IgnoreZeroSign))
return BinaryOperator::getFNegArgument(V);
// Constants can be considered to be negated values if they can be folded.
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 9bd3239..6877475 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -16,7 +16,6 @@
#define DEBUG_TYPE "asan"
#include "llvm/Transforms/Instrumentation.h"
-#include "BlackList.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -36,6 +35,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/InstVisitor.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
@@ -43,6 +43,7 @@
#include "llvm/Support/system_error.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BlackList.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
@@ -53,7 +54,8 @@ using namespace llvm;
static const uint64_t kDefaultShadowScale = 3;
static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
-static const uint64_t kDefaultShadowOffsetAndroid = 0;
+static const uint64_t kDefaultShort64bitShadowOffset = 0x7FFF8000; // < 2G.
+static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
static const size_t kMaxStackMallocSize = 1 << 16; // 64K
static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
@@ -63,11 +65,13 @@ static const char *kAsanModuleCtorName = "asan.module_ctor";
static const char *kAsanModuleDtorName = "asan.module_dtor";
static const int kAsanCtorAndCtorPriority = 1;
static const char *kAsanReportErrorTemplate = "__asan_report_";
+static const char *kAsanReportLoadN = "__asan_report_load_n";
+static const char *kAsanReportStoreN = "__asan_report_store_n";
static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *kAsanInitName = "__asan_init";
+static const char *kAsanInitName = "__asan_init_v1";
static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
static const char *kAsanMappingScaleName = "__asan_mapping_scale";
@@ -133,6 +137,9 @@ static cl::opt<int> ClMappingScale("asan-mapping-scale",
cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0));
static cl::opt<int> ClMappingOffsetLog("asan-mapping-offset-log",
cl::desc("offset of asan shadow mapping"), cl::Hidden, cl::init(-1));
+static cl::opt<bool> ClShort64BitOffset("asan-short-64bit-mapping-offset",
+ cl::desc("Use short immediate constant as the mapping offset for 64bit"),
+ cl::Hidden, cl::init(true));
// Optimization flags. Not user visible, used mostly for testing
// and benchmarking the tool.
@@ -186,14 +193,53 @@ class SetOfDynamicallyInitializedGlobals {
SmallSet<GlobalValue*, 32> DynInitGlobals;
};
-static int MappingScale() {
- return ClMappingScale ? ClMappingScale : kDefaultShadowScale;
+/// This struct defines the shadow mapping using the rule:
+/// shadow = (mem >> Scale) ADD-or-OR Offset.
+struct ShadowMapping {
+ int Scale;
+ uint64_t Offset;
+ bool OrShadowOffset;
+};
+
+static ShadowMapping getShadowMapping(const Module &M, int LongSize,
+ bool ZeroBaseShadow) {
+ llvm::Triple TargetTriple(M.getTargetTriple());
+ bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android;
+ bool IsMacOSX = TargetTriple.getOS() == llvm::Triple::MacOSX;
+ bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64;
+ bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64;
+
+ ShadowMapping Mapping;
+
+ // OR-ing shadow offset if more efficient (at least on x86),
+ // but on ppc64 we have to use add since the shadow offset is not neccesary
+ // 1/8-th of the address space.
+ Mapping.OrShadowOffset = !IsPPC64 && !ClShort64BitOffset;
+
+ Mapping.Offset = (IsAndroid || ZeroBaseShadow) ? 0 :
+ (LongSize == 32 ? kDefaultShadowOffset32 :
+ IsPPC64 ? kPPC64_ShadowOffset64 : kDefaultShadowOffset64);
+ if (!ZeroBaseShadow && ClShort64BitOffset && IsX86_64 && !IsMacOSX) {
+ assert(LongSize == 64);
+ Mapping.Offset = kDefaultShort64bitShadowOffset;
+ }
+ if (!ZeroBaseShadow && ClMappingOffsetLog >= 0) {
+ // Zero offset log is the special case.
+ Mapping.Offset = (ClMappingOffsetLog == 0) ? 0 : 1ULL << ClMappingOffsetLog;
+ }
+
+ Mapping.Scale = kDefaultShadowScale;
+ if (ClMappingScale) {
+ Mapping.Scale = ClMappingScale;
+ }
+
+ return Mapping;
}
-static size_t RedzoneSize() {
+static size_t RedzoneSizeForScale(int MappingScale) {
// Redzone used for stack and globals is at least 32 bytes.
// For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
- return std::max(32U, 1U << MappingScale());
+ return std::max(32U, 1U << MappingScale);
}
/// AddressSanitizer: instrument the code in module to find memory bugs.
@@ -201,23 +247,27 @@ struct AddressSanitizer : public FunctionPass {
AddressSanitizer(bool CheckInitOrder = false,
bool CheckUseAfterReturn = false,
bool CheckLifetime = false,
- StringRef BlacklistFile = StringRef())
+ StringRef BlacklistFile = StringRef(),
+ bool ZeroBaseShadow = false)
: FunctionPass(ID),
CheckInitOrder(CheckInitOrder || ClInitializers),
CheckUseAfterReturn(CheckUseAfterReturn || ClUseAfterReturn),
CheckLifetime(CheckLifetime || ClCheckLifetime),
BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
- : BlacklistFile) {}
+ : BlacklistFile),
+ ZeroBaseShadow(ZeroBaseShadow) {}
virtual const char *getPassName() const {
return "AddressSanitizerFunctionPass";
}
void instrumentMop(Instruction *I);
- void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB,
- Value *Addr, uint32_t TypeSize, bool IsWrite);
+ void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
+ Value *Addr, uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument);
Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
Value *ShadowValue, uint32_t TypeSize);
Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
- bool IsWrite, size_t AccessSizeIndex);
+ bool IsWrite, size_t AccessSizeIndex,
+ Value *SizeArgument);
bool instrumentMemIntrinsic(MemIntrinsic *MI);
void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr,
Value *Size,
@@ -227,6 +277,7 @@ struct AddressSanitizer : public FunctionPass {
void createInitializerPoisonCalls(Module &M,
Value *FirstAddr, Value *LastAddr);
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
+ void emitShadowMapping(Module &M, IRBuilder<> &IRB) const;
virtual bool doInitialization(Module &M);
static char ID; // Pass identification, replacement for typeid
@@ -240,18 +291,22 @@ struct AddressSanitizer : public FunctionPass {
bool CheckInitOrder;
bool CheckUseAfterReturn;
bool CheckLifetime;
+ SmallString<64> BlacklistFile;
+ bool ZeroBaseShadow;
+
LLVMContext *C;
DataLayout *TD;
- uint64_t MappingOffset;
int LongSize;
Type *IntptrTy;
+ ShadowMapping Mapping;
Function *AsanCtorFunction;
Function *AsanInitFunction;
Function *AsanHandleNoReturnFunc;
- SmallString<64> BlacklistFile;
OwningPtr<BlackList> BL;
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
+ // This array is indexed by AccessIsWrite.
+ Function *AsanErrorCallbackSized[2];
InlineAsm *EmptyAsm;
SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
@@ -261,11 +316,13 @@ struct AddressSanitizer : public FunctionPass {
class AddressSanitizerModule : public ModulePass {
public:
AddressSanitizerModule(bool CheckInitOrder = false,
- StringRef BlacklistFile = StringRef())
+ StringRef BlacklistFile = StringRef(),
+ bool ZeroBaseShadow = false)
: ModulePass(ID),
CheckInitOrder(CheckInitOrder || ClInitializers),
BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
- : BlacklistFile) {}
+ : BlacklistFile),
+ ZeroBaseShadow(ZeroBaseShadow) {}
bool runOnModule(Module &M);
static char ID; // Pass identification, replacement for typeid
virtual const char *getPassName() const {
@@ -278,14 +335,20 @@ class AddressSanitizerModule : public ModulePass {
bool ShouldInstrumentGlobal(GlobalVariable *G);
void createInitializerPoisonCalls(Module &M, Value *FirstAddr,
Value *LastAddr);
+ size_t RedzoneSize() const {
+ return RedzoneSizeForScale(Mapping.Scale);
+ }
bool CheckInitOrder;
SmallString<64> BlacklistFile;
+ bool ZeroBaseShadow;
+
OwningPtr<BlackList> BL;
SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
Type *IntptrTy;
LLVMContext *C;
DataLayout *TD;
+ ShadowMapping Mapping;
Function *AsanPoisonGlobals;
Function *AsanUnpoisonGlobals;
Function *AsanRegisterGlobals;
@@ -308,6 +371,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
LLVMContext *C;
Type *IntptrTy;
Type *IntptrPtrTy;
+ ShadowMapping Mapping;
SmallVector<AllocaInst*, 16> AllocaVec;
SmallVector<Instruction*, 8> RetVec;
@@ -332,7 +396,8 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
FunctionStackPoisoner(Function &F, AddressSanitizer &ASan)
: F(F), ASan(ASan), DIB(*F.getParent()), C(ASan.C),
IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)),
- TotalStackSize(0), StackAlignment(1 << MappingScale()) {}
+ Mapping(ASan.Mapping),
+ TotalStackSize(0), StackAlignment(1 << Mapping.Scale) {}
bool runOnFunction() {
if (!ClStack) return false;
@@ -411,6 +476,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
AI.getAllocatedType()->isSized());
}
+ size_t RedzoneSize() const {
+ return RedzoneSizeForScale(Mapping.Scale);
+ }
uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
Type *Ty = AI->getAllocatedType();
uint64_t SizeInBytes = ASan.TD->getTypeAllocSize(Ty);
@@ -439,9 +507,9 @@ INITIALIZE_PASS(AddressSanitizer, "asan",
false, false)
FunctionPass *llvm::createAddressSanitizerFunctionPass(
bool CheckInitOrder, bool CheckUseAfterReturn, bool CheckLifetime,
- StringRef BlacklistFile) {
+ StringRef BlacklistFile, bool ZeroBaseShadow) {
return new AddressSanitizer(CheckInitOrder, CheckUseAfterReturn,
- CheckLifetime, BlacklistFile);
+ CheckLifetime, BlacklistFile, ZeroBaseShadow);
}
char AddressSanitizerModule::ID = 0;
@@ -449,8 +517,9 @@ INITIALIZE_PASS(AddressSanitizerModule, "asan-module",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs."
"ModulePass", false, false)
ModulePass *llvm::createAddressSanitizerModulePass(
- bool CheckInitOrder, StringRef BlacklistFile) {
- return new AddressSanitizerModule(CheckInitOrder, BlacklistFile);
+ bool CheckInitOrder, StringRef BlacklistFile, bool ZeroBaseShadow) {
+ return new AddressSanitizerModule(CheckInitOrder, BlacklistFile,
+ ZeroBaseShadow);
}
static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -473,32 +542,30 @@ static bool GlobalWasGeneratedByAsan(GlobalVariable *G) {
Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
// Shadow >> scale
- Shadow = IRB.CreateLShr(Shadow, MappingScale());
- if (MappingOffset == 0)
+ Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
+ if (Mapping.Offset == 0)
return Shadow;
// (Shadow >> scale) | offset
- return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy,
- MappingOffset));
+ if (Mapping.OrShadowOffset)
+ return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset));
+ else
+ return IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset));
}
void AddressSanitizer::instrumentMemIntrinsicParam(
Instruction *OrigIns,
Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) {
+ IRBuilder<> IRB(InsertBefore);
+ if (Size->getType() != IntptrTy)
+ Size = IRB.CreateIntCast(Size, IntptrTy, false);
// Check the first byte.
- {
- IRBuilder<> IRB(InsertBefore);
- instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite);
- }
+ instrumentAddress(OrigIns, InsertBefore, Addr, 8, IsWrite, Size);
// Check the last byte.
- {
- IRBuilder<> IRB(InsertBefore);
- Value *SizeMinusOne = IRB.CreateSub(
- Size, ConstantInt::get(Size->getType(), 1));
- SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false);
- Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
- Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne);
- instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
- }
+ IRB.SetInsertPoint(InsertBefore);
+ Value *SizeMinusOne = IRB.CreateSub(Size, ConstantInt::get(IntptrTy, 1));
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+ Value *AddrLast = IRB.CreateAdd(AddrLong, SizeMinusOne);
+ instrumentAddress(OrigIns, InsertBefore, AddrLast, 8, IsWrite, Size);
}
// Instrument memset/memmove/memcpy
@@ -577,14 +644,24 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
assert(OrigTy->isSized());
uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
- if (TypeSize != 8 && TypeSize != 16 &&
- TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
- // Ignore all unusual sizes.
- return;
- }
+ assert((TypeSize % 8) == 0);
+ // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check.
+ if (TypeSize == 8 || TypeSize == 16 ||
+ TypeSize == 32 || TypeSize == 64 || TypeSize == 128)
+ return instrumentAddress(I, I, Addr, TypeSize, IsWrite, 0);
+ // Instrument unusual size (but still multiple of 8).
+ // We can not do it with a single check, so we do 1-byte check for the first
+ // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
+ // to report the actual access size.
IRBuilder<> IRB(I);
- instrumentAddress(I, IRB, Addr, TypeSize, IsWrite);
+ Value *LastByte = IRB.CreateIntToPtr(
+ IRB.CreateAdd(IRB.CreatePointerCast(Addr, IntptrTy),
+ ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
+ OrigPtrTy);
+ Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
+ instrumentAddress(I, I, Addr, 8, IsWrite, Size);
+ instrumentAddress(I, I, LastByte, 8, IsWrite, Size);
}
// Validate the result of Module::getOrInsertFunction called for an interface
@@ -600,10 +677,12 @@ static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
Instruction *AddressSanitizer::generateCrashCode(
Instruction *InsertBefore, Value *Addr,
- bool IsWrite, size_t AccessSizeIndex) {
+ bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument) {
IRBuilder<> IRB(InsertBefore);
- CallInst *Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex],
- Addr);
+ CallInst *Call = SizeArgument
+ ? IRB.CreateCall2(AsanErrorCallbackSized[IsWrite], Addr, SizeArgument)
+ : IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr);
+
// We don't do Call->setDoesNotReturn() because the BB already has
// UnreachableInst at the end.
// This EmptyAsm is required to avoid callback merge.
@@ -614,7 +693,7 @@ Instruction *AddressSanitizer::generateCrashCode(
Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
Value *ShadowValue,
uint32_t TypeSize) {
- size_t Granularity = 1 << MappingScale();
+ size_t Granularity = 1 << Mapping.Scale;
// Addr & (Granularity - 1)
Value *LastAccessedByte = IRB.CreateAnd(
AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
@@ -630,12 +709,14 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
}
void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
- IRBuilder<> &IRB, Value *Addr,
- uint32_t TypeSize, bool IsWrite) {
+ Instruction *InsertBefore,
+ Value *Addr, uint32_t TypeSize,
+ bool IsWrite, Value *SizeArgument) {
+ IRBuilder<> IRB(InsertBefore);
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
Type *ShadowTy = IntegerType::get(
- *C, std::max(8U, TypeSize >> MappingScale()));
+ *C, std::max(8U, TypeSize >> Mapping.Scale));
Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
Value *ShadowPtr = memToShadow(AddrLong, IRB);
Value *CmpVal = Constant::getNullValue(ShadowTy);
@@ -644,7 +725,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
- size_t Granularity = 1 << MappingScale();
+ size_t Granularity = 1 << Mapping.Scale;
TerminatorInst *CrashTerm = 0;
if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
@@ -663,8 +744,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
CrashTerm = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), true);
}
- Instruction *Crash =
- generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex);
+ Instruction *Crash = generateCrashCode(
+ CrashTerm, AddrLong, IsWrite, AccessSizeIndex, SizeArgument);
Crash->setDebugLoc(OrigIns->getDebugLoc());
}
@@ -782,7 +863,9 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
BL.reset(new BlackList(BlacklistFile));
if (BL->isIn(M)) return false;
C = &(M.getContext());
- IntptrTy = Type::getIntNTy(*C, TD->getPointerSizeInBits());
+ int LongSize = TD->getPointerSizeInBits();
+ IntptrTy = Type::getIntNTy(*C, LongSize);
+ Mapping = getShadowMapping(M, LongSize, ZeroBaseShadow);
initializeCallbacks(M);
DynamicallyInitializedGlobals.Init(M);
@@ -819,12 +902,22 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
Value *FirstDynamic = 0, *LastDynamic = 0;
for (size_t i = 0; i < n; i++) {
+ static const uint64_t kMaxGlobalRedzone = 1 << 18;
GlobalVariable *G = GlobalsToChange[i];
PointerType *PtrTy = cast<PointerType>(G->getType());
Type *Ty = PtrTy->getElementType();
uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
- size_t RZ = RedzoneSize();
- uint64_t RightRedzoneSize = RZ + (RZ - (SizeInBytes % RZ));
+ uint64_t MinRZ = RedzoneSize();
+ // MinRZ <= RZ <= kMaxGlobalRedzone
+ // and trying to make RZ to be ~ 1/4 of SizeInBytes.
+ uint64_t RZ = std::max(MinRZ,
+ std::min(kMaxGlobalRedzone,
+ (SizeInBytes / MinRZ / 4) * MinRZ));
+ uint64_t RightRedzoneSize = RZ;
+ // Round up to MinRZ
+ if (SizeInBytes % MinRZ)
+ RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ);
+ assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0);
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
// Determine whether this global should be poisoned in initialization.
bool GlobalHasDynamicInitializer =
@@ -848,7 +941,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
M, NewTy, G->isConstant(), G->getLinkage(),
NewInitializer, "", G, G->getThreadLocalMode());
NewGlobal->copyAttributesFrom(G);
- NewGlobal->setAlignment(RZ);
+ NewGlobal->setAlignment(MinRZ);
Value *Indices2[2];
Indices2[0] = IRB.getInt32(0);
@@ -921,6 +1014,10 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
}
}
+ AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanReportLoadN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
@@ -930,6 +1027,23 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
/*hasSideEffects=*/true);
}
+void AddressSanitizer::emitShadowMapping(Module &M, IRBuilder<> &IRB) const {
+ // Tell the values of mapping offset and scale to the run-time.
+ GlobalValue *asan_mapping_offset =
+ new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
+ ConstantInt::get(IntptrTy, Mapping.Offset),
+ kAsanMappingOffsetName);
+ // Read the global, otherwise it may be optimized away.
+ IRB.CreateLoad(asan_mapping_offset, true);
+
+ GlobalValue *asan_mapping_scale =
+ new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
+ ConstantInt::get(IntptrTy, Mapping.Scale),
+ kAsanMappingScaleName);
+ // Read the global, otherwise it may be optimized away.
+ IRB.CreateLoad(asan_mapping_scale, true);
+}
+
// virtual
bool AddressSanitizer::doInitialization(Module &M) {
// Initialize the private fields. No one has accessed them before.
@@ -955,41 +1069,10 @@ bool AddressSanitizer::doInitialization(Module &M) {
AsanInitFunction->setLinkage(Function::ExternalLinkage);
IRB.CreateCall(AsanInitFunction);
- llvm::Triple targetTriple(M.getTargetTriple());
- bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android;
-
- MappingOffset = isAndroid ? kDefaultShadowOffsetAndroid :
- (LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64);
- if (ClMappingOffsetLog >= 0) {
- if (ClMappingOffsetLog == 0) {
- // special case
- MappingOffset = 0;
- } else {
- MappingOffset = 1ULL << ClMappingOffsetLog;
- }
- }
-
-
- if (ClMappingOffsetLog >= 0) {
- // Tell the run-time the current values of mapping offset and scale.
- GlobalValue *asan_mapping_offset =
- new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
- ConstantInt::get(IntptrTy, MappingOffset),
- kAsanMappingOffsetName);
- // Read the global, otherwise it may be optimized away.
- IRB.CreateLoad(asan_mapping_offset, true);
- }
- if (ClMappingScale) {
- GlobalValue *asan_mapping_scale =
- new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
- ConstantInt::get(IntptrTy, MappingScale()),
- kAsanMappingScaleName);
- // Read the global, otherwise it may be optimized away.
- IRB.CreateLoad(asan_mapping_scale, true);
- }
+ Mapping = getShadowMapping(M, LongSize, ZeroBaseShadow);
+ emitShadowMapping(M, IRB);
appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
-
return true;
}
@@ -1015,11 +1098,11 @@ bool AddressSanitizer::runOnFunction(Function &F) {
DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
initializeCallbacks(*F.getParent());
- // If needed, insert __asan_init before checking for AddressSafety attr.
+ // If needed, insert __asan_init before checking for SanitizeAddress attr.
maybeInsertAsanInitAtFunctionEntry(F);
if (!F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::AddressSafety))
+ Attribute::SanitizeAddress))
return false;
if (!ClDebugFunc.empty() && ClDebugFunc != F.getName())
@@ -1048,12 +1131,12 @@ bool AddressSanitizer::runOnFunction(Function &F) {
} else if (isa<MemIntrinsic>(BI) && ClMemIntrin) {
// ok, take it.
} else {
- if (CallInst *CI = dyn_cast<CallInst>(BI)) {
+ CallSite CS(BI);
+ if (CS) {
// A call inside BB.
TempsToInstrument.clear();
- if (CI->doesNotReturn()) {
- NoReturnCalls.push_back(CI);
- }
+ if (CS.doesNotReturn())
+ NoReturnCalls.push_back(CS.getInstruction());
}
continue;
}
@@ -1147,7 +1230,7 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
void FunctionStackPoisoner::poisonRedZones(
const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, Value *ShadowBase,
bool DoPoison) {
- size_t ShadowRZSize = RedzoneSize() >> MappingScale();
+ size_t ShadowRZSize = RedzoneSize() >> Mapping.Scale;
assert(ShadowRZSize >= 1 && ShadowRZSize <= 4);
Type *RZTy = Type::getIntNTy(*C, ShadowRZSize * 8);
Type *RZPtrTy = PointerType::get(RZTy, 0);
@@ -1178,13 +1261,13 @@ void FunctionStackPoisoner::poisonRedZones(
// Poison the partial redzone at right
Ptr = IRB.CreateAdd(
ShadowBase, ConstantInt::get(IntptrTy,
- (Pos >> MappingScale()) - ShadowRZSize));
+ (Pos >> Mapping.Scale) - ShadowRZSize));
size_t AddressableBytes = RedzoneSize() - (AlignedSize - SizeInBytes);
uint32_t Poison = 0;
if (DoPoison) {
PoisonShadowPartialRightRedzone((uint8_t*)&Poison, AddressableBytes,
RedzoneSize(),
- 1ULL << MappingScale(),
+ 1ULL << Mapping.Scale,
kAsanStackPartialRedzoneMagic);
}
Value *PartialPoison = ConstantInt::get(RZTy, Poison);
@@ -1193,7 +1276,7 @@ void FunctionStackPoisoner::poisonRedZones(
// Poison the full redzone at right.
Ptr = IRB.CreateAdd(ShadowBase,
- ConstantInt::get(IntptrTy, Pos >> MappingScale()));
+ ConstantInt::get(IntptrTy, Pos >> Mapping.Scale));
bool LastAlloca = (i == AllocaVec.size() - 1);
Value *Poison = LastAlloca ? PoisonRight : PoisonMid;
IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp
index 4fcbea4..927982d 100644
--- a/lib/Transforms/Instrumentation/BlackList.cpp
+++ b/lib/Transforms/Instrumentation/BlackList.cpp
@@ -13,7 +13,7 @@
//
//===----------------------------------------------------------------------===//
-#include "BlackList.h"
+#include "llvm/Transforms/Utils/BlackList.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -78,21 +78,21 @@ BlackList::BlackList(const StringRef Path) {
}
// Iterate through each of the prefixes, and create Regexs for them.
- for (StringMap<std::string>::iterator I = Regexps.begin(), E = Regexps.end();
- I != E; ++I) {
+ for (StringMap<std::string>::const_iterator I = Regexps.begin(),
+ E = Regexps.end(); I != E; ++I) {
Entries[I->getKey()] = new Regex(I->getValue());
}
}
-bool BlackList::isIn(const Function &F) {
+bool BlackList::isIn(const Function &F) const {
return isIn(*F.getParent()) || inSection("fun", F.getName());
}
-bool BlackList::isIn(const GlobalVariable &G) {
+bool BlackList::isIn(const GlobalVariable &G) const {
return isIn(*G.getParent()) || inSection("global", G.getName());
}
-bool BlackList::isIn(const Module &M) {
+bool BlackList::isIn(const Module &M) const {
return inSection("src", M.getModuleIdentifier());
}
@@ -107,14 +107,15 @@ static StringRef GetGVTypeString(const GlobalVariable &G) {
return "<unknown type>";
}
-bool BlackList::isInInit(const GlobalVariable &G) {
+bool BlackList::isInInit(const GlobalVariable &G) const {
return (isIn(*G.getParent()) ||
inSection("global-init", G.getName()) ||
inSection("global-init-type", GetGVTypeString(G)));
}
-bool BlackList::inSection(const StringRef Section, const StringRef Query) {
- StringMap<Regex*>::iterator I = Entries.find(Section);
+bool BlackList::inSection(const StringRef Section,
+ const StringRef Query) const {
+ StringMap<Regex*>::const_iterator I = Entries.find(Section);
if (I == Entries.end()) return false;
Regex *FunctionRegex = I->getValue();
diff --git a/lib/Transforms/Instrumentation/BlackList.h b/lib/Transforms/Instrumentation/BlackList.h
deleted file mode 100644
index ee18a98..0000000
--- a/lib/Transforms/Instrumentation/BlackList.h
+++ /dev/null
@@ -1,58 +0,0 @@
-//===-- BlackList.h - blacklist for sanitizers ------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//===----------------------------------------------------------------------===//
-//
-// This is a utility class for instrumentation passes (like AddressSanitizer
-// or ThreadSanitizer) to avoid instrumenting some functions or global
-// variables based on a user-supplied blacklist.
-//
-// The blacklist disables instrumentation of various functions and global
-// variables. Each line contains a prefix, followed by a wild card expression.
-// Empty lines and lines starting with "#" are ignored.
-// ---
-// # Blacklisted items:
-// fun:*_ZN4base6subtle*
-// global:*global_with_bad_access_or_initialization*
-// global-init:*global_with_initialization_issues*
-// global-init-type:*Namespace::ClassName*
-// src:file_with_tricky_code.cc
-// ---
-// Note that the wild card is in fact an llvm::Regex, but * is automatically
-// replaced with .*
-// This is similar to the "ignore" feature of ThreadSanitizer.
-// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
-//
-//===----------------------------------------------------------------------===//
-//
-
-#include "llvm/ADT/StringMap.h"
-
-namespace llvm {
-class Function;
-class GlobalVariable;
-class Module;
-class Regex;
-class StringRef;
-
-class BlackList {
- public:
- BlackList(const StringRef Path);
- // Returns whether either this function or it's source file are blacklisted.
- bool isIn(const Function &F);
- // Returns whether either this global or it's source file are blacklisted.
- bool isIn(const GlobalVariable &G);
- // Returns whether this module is blacklisted by filename.
- bool isIn(const Module &M);
- // Returns whether a global should be excluded from initialization checking.
- bool isInInit(const GlobalVariable &G);
- private:
- StringMap<Regex*> Entries;
-
- bool inSection(const StringRef Section, const StringRef Query);
-};
-
-} // namespace llvm
diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
index 0b18b4c..a2459fb 100644
--- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -21,7 +21,6 @@
#include "llvm/Transforms/Instrumentation.h"
#include "ProfilingUtils.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
@@ -55,8 +54,8 @@ ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
bool EdgeProfiler::runOnModule(Module &M) {
Function *Main = M.getFunction("main");
if (Main == 0) {
- M.getContext().emitWarning("cannot insert edge profiling into a module"
- " with no main function");
+ errs() << "WARNING: cannot insert edge profiling into a module"
+ << " with no main function!\n";
return false; // No main, no instrumentation!
}
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 58d5801..80705af 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -71,7 +71,6 @@
#define DEBUG_TYPE "msan"
#include "llvm/Transforms/Instrumentation.h"
-#include "BlackList.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -91,6 +90,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BlackList.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -127,6 +127,10 @@ static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
cl::Hidden, cl::init(true));
+static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact",
+ cl::desc("exact handling of relational integer ICmp"),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> ClStoreCleanOrigin("msan-store-clean-origin",
cl::desc("store origin for clean (fully initialized) values"),
cl::Hidden, cl::init(false));
@@ -361,6 +365,9 @@ bool MemorySanitizer::doInitialization(Module &M) {
new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
IRB.getInt32(TrackOrigins), "__msan_track_origins");
+ new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
+ IRB.getInt32(ClKeepGoing), "__msan_keep_going");
+
return true;
}
@@ -451,9 +458,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment());
DEBUG(dbgs() << " STORE: " << *NewSI << "\n");
(void)NewSI;
- // If the store is volatile, add a check.
- if (I.isVolatile())
- insertCheck(Val, &I);
+
if (ClCheckAccessAddress)
insertCheck(Addr, &I);
@@ -574,7 +579,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
return IT;
if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
- uint32_t EltSize = MS.TD->getTypeStoreSizeInBits(VT->getElementType());
+ uint32_t EltSize = MS.TD->getTypeSizeInBits(VT->getElementType());
return VectorType::get(IntegerType::get(*MS.C, EltSize),
VT->getNumElements());
}
@@ -586,7 +591,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
return Res;
}
- uint32_t TypeSize = MS.TD->getTypeStoreSizeInBits(OrigTy);
+ uint32_t TypeSize = MS.TD->getTypeSizeInBits(OrigTy);
return IntegerType::get(*MS.C, TypeSize);
}
@@ -847,7 +852,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
///
/// Stores the corresponding shadow and (optionally) origin.
/// Optionally, checks that the store address is fully defined.
- /// Volatile stores check that the value being stored is fully defined.
void visitStoreInst(StoreInst &I) {
StoreList.push_back(&I);
}
@@ -1127,10 +1131,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *B = I.getOperand(1);
Value *Sa = getShadow(A);
Value *Sb = getShadow(B);
- if (A->getType()->isPointerTy())
- A = IRB.CreatePointerCast(A, MS.IntptrTy);
- if (B->getType()->isPointerTy())
- B = IRB.CreatePointerCast(B, MS.IntptrTy);
+
+ // Get rid of pointers and vectors of pointers.
+ // For ints (and vectors of ints), types of A and Sa match,
+ // and this is a no-op.
+ A = IRB.CreatePointerCast(A, Sa->getType());
+ B = IRB.CreatePointerCast(B, Sb->getType());
+
// A == B <==> (C = A^B) == 0
// A != B <==> (C = A^B) != 0
// Sc = Sa | Sb
@@ -1152,6 +1159,73 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
+ /// \brief Build the lowest possible value of V, taking into account V's
+ /// uninitialized bits.
+ Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
+ bool isSigned) {
+ if (isSigned) {
+ // Split shadow into sign bit and other bits.
+ Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
+ Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
+ // Maximise the undefined shadow bit, minimize other undefined bits.
+ return
+ IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit);
+ } else {
+ // Minimize undefined bits.
+ return IRB.CreateAnd(A, IRB.CreateNot(Sa));
+ }
+ }
+
+ /// \brief Build the highest possible value of V, taking into account V's
+ /// uninitialized bits.
+ Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
+ bool isSigned) {
+ if (isSigned) {
+ // Split shadow into sign bit and other bits.
+ Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
+ Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
+ // Minimise the undefined shadow bit, maximise other undefined bits.
+ return
+ IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits);
+ } else {
+ // Maximize undefined bits.
+ return IRB.CreateOr(A, Sa);
+ }
+ }
+
+ /// \brief Instrument relational comparisons.
+ ///
+ /// This function does exact shadow propagation for all relational
+ /// comparisons of integers, pointers and vectors of those.
+ /// FIXME: output seems suboptimal when one of the operands is a constant
+ void handleRelationalComparisonExact(ICmpInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *A = I.getOperand(0);
+ Value *B = I.getOperand(1);
+ Value *Sa = getShadow(A);
+ Value *Sb = getShadow(B);
+
+ // Get rid of pointers and vectors of pointers.
+ // For ints (and vectors of ints), types of A and Sa match,
+ // and this is a no-op.
+ A = IRB.CreatePointerCast(A, Sa->getType());
+ B = IRB.CreatePointerCast(B, Sb->getType());
+
+ // Let [a0, a1] be the interval of possible values of A, taking into account
+ // its undefined bits. Let [b0, b1] be the interval of possible values of B.
+ // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
+ bool IsSigned = I.isSigned();
+ Value *S1 = IRB.CreateICmp(I.getPredicate(),
+ getLowestPossibleValue(IRB, A, Sa, IsSigned),
+ getHighestPossibleValue(IRB, B, Sb, IsSigned));
+ Value *S2 = IRB.CreateICmp(I.getPredicate(),
+ getHighestPossibleValue(IRB, A, Sa, IsSigned),
+ getLowestPossibleValue(IRB, B, Sb, IsSigned));
+ Value *Si = IRB.CreateXor(S1, S2);
+ setShadow(&I, Si);
+ setOriginForNaryOp(I);
+ }
+
/// \brief Instrument signed relational comparisons.
///
/// Handle (x<0) and (x>=0) comparisons (essentially, sign bit tests) by
@@ -1181,12 +1255,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
void visitICmpInst(ICmpInst &I) {
- if (ClHandleICmp && I.isEquality())
+ if (!ClHandleICmp) {
+ handleShadowOr(I);
+ return;
+ }
+ if (I.isEquality()) {
handleEqualityComparison(I);
- else if (ClHandleICmp && I.isSigned() && I.isRelational())
+ return;
+ }
+
+ assert(I.isRelational());
+ if (ClHandleICmpExact) {
+ handleRelationalComparisonExact(I);
+ return;
+ }
+ if (I.isSigned()) {
handleSignedRelationalComparison(I);
- else
- handleShadowOr(I);
+ return;
+ }
+
+ assert(I.isUnsigned());
+ if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
+ handleRelationalComparisonExact(I);
+ return;
+ }
+
+ handleShadowOr(I);
}
void visitFCmpInst(FCmpInst &I) {
@@ -1458,8 +1552,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
AttrBuilder B;
B.addAttribute(Attribute::ReadOnly)
.addAttribute(Attribute::ReadNone);
- Func->removeAttribute(AttributeSet::FunctionIndex,
- Attribute::get(Func->getContext(), B));
+ Func->removeAttributes(AttributeSet::FunctionIndex,
+ AttributeSet::get(Func->getContext(),
+ AttributeSet::FunctionIndex,
+ B));
}
}
IRBuilder<> IRB(&I);
@@ -1498,6 +1594,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (MS.TrackOrigins)
IRB.CreateStore(getOrigin(A),
getOriginPtrForArgument(A, IRB, ArgOffset));
+ (void)Store;
assert(Size != 0 && Store != 0);
DEBUG(dbgs() << " Param:" << *Store << "\n");
ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
@@ -1774,7 +1871,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
// Unpoison the whole __va_list_tag.
// FIXME: magic ABI constants.
IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
- /* size */24, /* alignment */16, false);
+ /* size */24, /* alignment */8, false);
}
void visitVACopyInst(VACopyInst &I) {
@@ -1785,7 +1882,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
// Unpoison the whole __va_list_tag.
// FIXME: magic ABI constants.
IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
- /* size */ 24, /* alignment */ 16, false);
+ /* size */24, /* alignment */8, false);
}
void finalizeInstrumentation() {
@@ -1850,8 +1947,9 @@ bool MemorySanitizer::runOnFunction(Function &F) {
AttrBuilder B;
B.addAttribute(Attribute::ReadOnly)
.addAttribute(Attribute::ReadNone);
- F.removeAttribute(AttributeSet::FunctionIndex,
- Attribute::get(F.getContext(), B));
+ F.removeAttributes(AttributeSet::FunctionIndex,
+ AttributeSet::get(F.getContext(),
+ AttributeSet::FunctionIndex, B));
return Visitor.runOnFunction();
}
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index c5a1fe9..b45aef6 100644
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -22,7 +22,6 @@
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Analysis/ProfileInfoLoader.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -76,8 +75,8 @@ inline static void printEdgeCounter(ProfileInfo::Edge e,
bool OptimalEdgeProfiler::runOnModule(Module &M) {
Function *Main = M.getFunction("main");
if (Main == 0) {
- M.getContext().emitWarning("cannot insert edge profiling into a module"
- " with no main function");
+ errs() << "WARNING: cannot insert edge profiling into a module"
+ << " with no main function!\n";
return false; // No main, no instrumentation!
}
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
index 358bbeb..7de7326 100644
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ b/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -1345,8 +1345,8 @@ bool PathProfiler::runOnModule(Module &M) {
Main = M.getFunction("MAIN__");
if (!Main) {
- Context->emitWarning("cannot insert edge profiling into a module"
- " with no main function");
+ errs() << "WARNING: cannot insert path profiling into a module"
+ << " with no main function!\n";
return false;
}
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 29d2ece..f93c5ab 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -22,7 +22,6 @@
#define DEBUG_TYPE "tsan"
#include "llvm/Transforms/Instrumentation.h"
-#include "BlackList.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
@@ -41,6 +40,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BlackList.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt
index f7bca06..15e9fba 100644
--- a/lib/Transforms/LLVMBuild.txt
+++ b/lib/Transforms/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize
+subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC
[component_0]
type = Group
diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile
index 8b1df92..c390517 100644
--- a/lib/Transforms/Makefile
+++ b/lib/Transforms/Makefile
@@ -8,7 +8,7 @@
##===----------------------------------------------------------------------===##
LEVEL = ../..
-PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello
+PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello ObjCARC
include $(LEVEL)/Makefile.config
diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt
new file mode 100644
index 0000000..233deb3
--- /dev/null
+++ b/lib/Transforms/ObjCARC/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_llvm_library(LLVMObjCARCOpts
+ ObjCARC.cpp
+ ObjCARCOpts.cpp
+ ObjCARCExpand.cpp
+ ObjCARCAPElim.cpp
+ ObjCARCAliasAnalysis.cpp
+ ObjCARCUtil.cpp
+ ObjCARCContract.cpp
+ DependencyAnalysis.cpp
+ ProvenanceAnalysis.cpp
+ )
+
+add_dependencies(LLVMObjCARCOpts intrinsics_gen)
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
new file mode 100644
index 0000000..5aada9c
--- /dev/null
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -0,0 +1,261 @@
+//===- DependencyAnalysis.cpp - ObjC ARC Optimization ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines special dependency analysis routines used in Objective C
+/// ARC Optimizations.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-dependency"
+#include "ObjCARC.h"
+#include "DependencyAnalysis.h"
+#include "ProvenanceAnalysis.h"
+#include "llvm/Support/CFG.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+/// Test whether the given instruction can result in a reference count
+/// modification (positive or negative) for the pointer's object.
+bool
+llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA,
+ InstructionClass Class) {
+ switch (Class) {
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_User:
+ // These operations never directly modify a reference count.
+ return false;
+ default: break;
+ }
+
+ ImmutableCallSite CS = static_cast<const Value *>(Inst);
+ assert(CS && "Only calls can alter reference counts!");
+
+ // See if AliasAnalysis can help us with the call.
+ AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
+ if (AliasAnalysis::onlyReadsMemory(MRB))
+ return false;
+ if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+ for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ I != E; ++I) {
+ const Value *Op = *I;
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+ return true;
+ }
+ return false;
+ }
+
+ // Assume the worst.
+ return true;
+}
+
+/// Test whether the given instruction can "use" the given pointer's object in a
+/// way that requires the reference count to be positive.
+bool
+llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, InstructionClass Class) {
+ // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers.
+ if (Class == IC_Call)
+ return false;
+
+ // Consider various instructions which may have pointer arguments which are
+ // not "uses".
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
+ // Comparing a pointer with null, or any other constant, isn't really a use,
+ // because we don't care what the pointer points to, or about the values
+ // of any other dynamic reference-counted pointers.
+ if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA()))
+ return false;
+ } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
+ // For calls, just check the arguments (and not the callee operand).
+ for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
+ OE = CS.arg_end(); OI != OE; ++OI) {
+ const Value *Op = *OI;
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+ return true;
+ }
+ return false;
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // Special-case stores, because we don't care about the stored value, just
+ // the store address.
+ const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
+ // If we can't tell what the underlying object was, assume there is a
+ // dependence.
+ return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr);
+ }
+
+ // Check each operand for a match.
+ for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
+ OI != OE; ++OI) {
+ const Value *Op = *OI;
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+ return true;
+ }
+ return false;
+}
+
+/// Test if there can be dependencies on Inst through Arg. This function only
+/// tests dependencies relevant for removing pairs of calls.
+bool
+llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
+ const Value *Arg, ProvenanceAnalysis &PA) {
+ // If we've reached the definition of Arg, stop.
+ if (Inst == Arg)
+ return true;
+
+ switch (Flavor) {
+ case NeedsPositiveRetainCount: {
+ InstructionClass Class = GetInstructionClass(Inst);
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ return false;
+ default:
+ return CanUse(Inst, Arg, PA, Class);
+ }
+ }
+
+ case AutoreleasePoolBoundary: {
+ InstructionClass Class = GetInstructionClass(Inst);
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ case IC_AutoreleasepoolPush:
+ // These mark the end and begin of an autorelease pool scope.
+ return true;
+ default:
+ // Nothing else does this.
+ return false;
+ }
+ }
+
+ case CanChangeRetainCount: {
+ InstructionClass Class = GetInstructionClass(Inst);
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ // Conservatively assume this can decrement any count.
+ return true;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ return false;
+ default:
+ return CanAlterRefCount(Inst, Arg, PA, Class);
+ }
+ }
+
+ case RetainAutoreleaseDep:
+ switch (GetBasicInstructionClass(Inst)) {
+ case IC_AutoreleasepoolPop:
+ case IC_AutoreleasepoolPush:
+ // Don't merge an objc_autorelease with an objc_retain inside a different
+ // autoreleasepool scope.
+ return true;
+ case IC_Retain:
+ case IC_RetainRV:
+ // Check for a retain of the same pointer for merging.
+ return GetObjCArg(Inst) == Arg;
+ default:
+ // Nothing else matters for objc_retainAutorelease formation.
+ return false;
+ }
+
+ case RetainAutoreleaseRVDep: {
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ switch (Class) {
+ case IC_Retain:
+ case IC_RetainRV:
+ // Check for a retain of the same pointer for merging.
+ return GetObjCArg(Inst) == Arg;
+ default:
+ // Anything that can autorelease interrupts
+ // retainAutoreleaseReturnValue formation.
+ return CanInterruptRV(Class);
+ }
+ }
+
+ case RetainRVDep:
+ return CanInterruptRV(GetBasicInstructionClass(Inst));
+ }
+
+ llvm_unreachable("Invalid dependence flavor");
+}
+
+/// Walk up the CFG from StartPos (which is in StartBB) and find local and
+/// non-local dependencies on Arg.
+///
+/// TODO: Cache results?
+void
+llvm::objcarc::FindDependencies(DependenceKind Flavor,
+ const Value *Arg,
+ BasicBlock *StartBB, Instruction *StartInst,
+ SmallPtrSet<Instruction *, 4> &DependingInsts,
+ SmallPtrSet<const BasicBlock *, 4> &Visited,
+ ProvenanceAnalysis &PA) {
+ BasicBlock::iterator StartPos = StartInst;
+
+ SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
+ Worklist.push_back(std::make_pair(StartBB, StartPos));
+ do {
+ std::pair<BasicBlock *, BasicBlock::iterator> Pair =
+ Worklist.pop_back_val();
+ BasicBlock *LocalStartBB = Pair.first;
+ BasicBlock::iterator LocalStartPos = Pair.second;
+ BasicBlock::iterator StartBBBegin = LocalStartBB->begin();
+ for (;;) {
+ if (LocalStartPos == StartBBBegin) {
+ pred_iterator PI(LocalStartBB), PE(LocalStartBB, false);
+ if (PI == PE)
+ // If we've reached the function entry, produce a null dependence.
+ DependingInsts.insert(0);
+ else
+ // Add the predecessors to the worklist.
+ do {
+ BasicBlock *PredBB = *PI;
+ if (Visited.insert(PredBB))
+ Worklist.push_back(std::make_pair(PredBB, PredBB->end()));
+ } while (++PI != PE);
+ break;
+ }
+
+ Instruction *Inst = --LocalStartPos;
+ if (Depends(Flavor, Inst, Arg, PA)) {
+ DependingInsts.insert(Inst);
+ break;
+ }
+ }
+ } while (!Worklist.empty());
+
+ // Determine whether the original StartBB post-dominates all of the blocks we
+ // visited. If not, insert a sentinal indicating that most optimizations are
+ // not safe.
+ for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(),
+ E = Visited.end(); I != E; ++I) {
+ const BasicBlock *BB = *I;
+ if (BB == StartBB)
+ continue;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
+ const BasicBlock *Succ = *SI;
+ if (Succ != StartBB && !Visited.count(Succ)) {
+ DependingInsts.insert(reinterpret_cast<Instruction *>(-1));
+ return;
+ }
+ }
+ }
+}
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.h b/lib/Transforms/ObjCARC/DependencyAnalysis.h
new file mode 100644
index 0000000..24d358b
--- /dev/null
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.h
@@ -0,0 +1,79 @@
+//===- DependencyAnalysis.h - ObjC ARC Optimization ---*- mode: c++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file declares special dependency analysis routines used in Objective C
+/// ARC Optimizations.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H
+#define LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+ class BasicBlock;
+ class Instruction;
+ class Value;
+}
+
+namespace llvm {
+namespace objcarc {
+
+class ProvenanceAnalysis;
+
+/// \enum DependenceKind
+/// \brief Defines different dependence kinds among various ARC constructs.
+///
+/// There are several kinds of dependence-like concepts in use here.
+///
+enum DependenceKind {
+ NeedsPositiveRetainCount,
+ AutoreleasePoolBoundary,
+ CanChangeRetainCount,
+ RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease.
+ RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue.
+ RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue.
+};
+
+void FindDependencies(DependenceKind Flavor,
+ const Value *Arg,
+ BasicBlock *StartBB, Instruction *StartInst,
+ SmallPtrSet<Instruction *, 4> &DependingInstructions,
+ SmallPtrSet<const BasicBlock *, 4> &Visited,
+ ProvenanceAnalysis &PA);
+
+bool
+Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
+ ProvenanceAnalysis &PA);
+
+/// Test whether the given instruction can "use" the given pointer's object in a
+/// way that requires the reference count to be positive.
+bool
+CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
+ InstructionClass Class);
+
+/// Test whether the given instruction can result in a reference count
+/// modification (positive or negative) for the pointer's object.
+bool
+CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, InstructionClass Class);
+
+} // namespace objcarc
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H
diff --git a/lib/Transforms/ObjCARC/LLVMBuild.txt b/lib/Transforms/ObjCARC/LLVMBuild.txt
new file mode 100644
index 0000000..90a2338
--- /dev/null
+++ b/lib/Transforms/ObjCARC/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Transforms/ObjCARC/LLVMBuild.txt -------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ObjCARC
+parent = Transforms
+library_name = ObjCARCOpts
+required_libraries = Analysis Core Support TransformUtils
diff --git a/lib/Transforms/ObjCARC/Makefile b/lib/Transforms/ObjCARC/Makefile
new file mode 100644
index 0000000..2a34e21
--- /dev/null
+++ b/lib/Transforms/ObjCARC/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/ObjCARC/Makefile ---------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMObjCARCOpts
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp
new file mode 100644
index 0000000..53a31b0
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -0,0 +1,48 @@
+//===-- ObjCARC.cpp -------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common infrastructure for libLLVMObjCARCOpts.a, which
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ObjCARC.h"
+#include "llvm-c/Core.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+ class PassRegistry;
+}
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+/// \brief A handy option to enable/disable all ARC Optimizations.
+bool llvm::objcarc::EnableARCOpts;
+static cl::opt<bool, true>
+EnableARCOptimizations("enable-objc-arc-opts",
+ cl::location(EnableARCOpts),
+ cl::init(true));
+
+/// initializeObjCARCOptsPasses - Initialize all passes linked into the
+/// ObjCARCOpts library.
+void llvm::initializeObjCARCOpts(PassRegistry &Registry) {
+ initializeObjCARCAliasAnalysisPass(Registry);
+ initializeObjCARCAPElimPass(Registry);
+ initializeObjCARCExpandPass(Registry);
+ initializeObjCARCContractPass(Registry);
+ initializeObjCARCOptPass(Registry);
+}
+
+void LLVMInitializeObjCARCOpts(LLVMPassRegistryRef R) {
+ initializeObjCARCOpts(*unwrap(R));
+}
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
new file mode 100644
index 0000000..e062b66
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -0,0 +1,389 @@
+//===- ObjCARC.h - ObjC ARC Optimization --------------*- mode: c++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines common definitions/declarations used by the ObjC ARC
+/// Optimizer. ARC stands for Automatic Reference Counting and is a system for
+/// managing reference counts for objects in Objective C.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_OBJCARC_H
+#define LLVM_TRANSFORMS_SCALAR_OBJCARC_H
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Transforms/ObjCARC.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+namespace llvm {
+class raw_ostream;
+}
+
+namespace llvm {
+namespace objcarc {
+
+/// \brief A handy option to enable/disable all ARC Optimizations.
+extern bool EnableARCOpts;
+
+/// \brief Test if the given module looks interesting to run ARC optimization
+/// on.
+static inline bool ModuleHasARC(const Module &M) {
+ return
+ M.getNamedValue("objc_retain") ||
+ M.getNamedValue("objc_release") ||
+ M.getNamedValue("objc_autorelease") ||
+ M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
+ M.getNamedValue("objc_retainBlock") ||
+ M.getNamedValue("objc_autoreleaseReturnValue") ||
+ M.getNamedValue("objc_autoreleasePoolPush") ||
+ M.getNamedValue("objc_loadWeakRetained") ||
+ M.getNamedValue("objc_loadWeak") ||
+ M.getNamedValue("objc_destroyWeak") ||
+ M.getNamedValue("objc_storeWeak") ||
+ M.getNamedValue("objc_initWeak") ||
+ M.getNamedValue("objc_moveWeak") ||
+ M.getNamedValue("objc_copyWeak") ||
+ M.getNamedValue("objc_retainedObject") ||
+ M.getNamedValue("objc_unretainedObject") ||
+ M.getNamedValue("objc_unretainedPointer");
+}
+
+/// \enum InstructionClass
+/// \brief A simple classification for instructions.
+enum InstructionClass {
+ IC_Retain, ///< objc_retain
+ IC_RetainRV, ///< objc_retainAutoreleasedReturnValue
+ IC_RetainBlock, ///< objc_retainBlock
+ IC_Release, ///< objc_release
+ IC_Autorelease, ///< objc_autorelease
+ IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue
+ IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush
+ IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop
+ IC_NoopCast, ///< objc_retainedObject, etc.
+ IC_FusedRetainAutorelease, ///< objc_retainAutorelease
+ IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
+ IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive)
+ IC_StoreWeak, ///< objc_storeWeak (primitive)
+ IC_InitWeak, ///< objc_initWeak (derived)
+ IC_LoadWeak, ///< objc_loadWeak (derived)
+ IC_MoveWeak, ///< objc_moveWeak (derived)
+ IC_CopyWeak, ///< objc_copyWeak (derived)
+ IC_DestroyWeak, ///< objc_destroyWeak (derived)
+ IC_StoreStrong, ///< objc_storeStrong (derived)
+ IC_CallOrUser, ///< could call objc_release and/or "use" pointers
+ IC_Call, ///< could call objc_release
+ IC_User, ///< could "use" a pointer
+ IC_None ///< anything else
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class);
+
+/// \brief Test if the given class is objc_retain or equivalent.
+static inline bool IsRetain(InstructionClass Class) {
+ return Class == IC_Retain ||
+ Class == IC_RetainRV;
+}
+
+/// \brief Test if the given class is objc_autorelease or equivalent.
+static inline bool IsAutorelease(InstructionClass Class) {
+ return Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV;
+}
+
+/// \brief Test if the given class represents instructions which return their
+/// argument verbatim.
+static inline bool IsForwarding(InstructionClass Class) {
+ // objc_retainBlock technically doesn't always return its argument
+ // verbatim, but it doesn't matter for our purposes here.
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV ||
+ Class == IC_RetainBlock ||
+ Class == IC_NoopCast;
+}
+
+/// \brief Test if the given class represents instructions which do nothing if
+/// passed a null pointer.
+static inline bool IsNoopOnNull(InstructionClass Class) {
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_Release ||
+ Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV ||
+ Class == IC_RetainBlock;
+}
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the "tail" keyword.
+static inline bool IsAlwaysTail(InstructionClass Class) {
+ // IC_RetainBlock may be given a stack argument.
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_AutoreleaseRV;
+}
+
+/// \brief Test if the given class represents instructions which are never safe
+/// to mark with the "tail" keyword.
+static inline bool IsNeverTail(InstructionClass Class) {
+ /// It is never safe to tail call objc_autorelease since by tail calling
+ /// objc_autorelease, we also tail call -[NSObject autorelease] which supports
+ /// fast autoreleasing causing our object to be potentially reclaimed from the
+ /// autorelease pool which violates the semantics of __autoreleasing types in
+ /// ARC.
+ return Class == IC_Autorelease;
+}
+
+/// \brief Test if the given class represents instructions which are always safe
+/// to mark with the nounwind attribute.
+static inline bool IsNoThrow(InstructionClass Class) {
+ // objc_retainBlock is not nounwind because it calls user copy constructors
+ // which could theoretically throw.
+ return Class == IC_Retain ||
+ Class == IC_RetainRV ||
+ Class == IC_Release ||
+ Class == IC_Autorelease ||
+ Class == IC_AutoreleaseRV ||
+ Class == IC_AutoreleasepoolPush ||
+ Class == IC_AutoreleasepoolPop;
+}
+
+/// Test whether the given instruction can autorelease any pointer or cause an
+/// autoreleasepool pop.
+static inline bool
+CanInterruptRV(InstructionClass Class) {
+ switch (Class) {
+ case IC_AutoreleasepoolPop:
+ case IC_CallOrUser:
+ case IC_Call:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// \brief Determine if F is one of the special known Functions. If it isn't,
+/// return IC_CallOrUser.
+InstructionClass GetFunctionClass(const Function *F);
+
+/// \brief Determine which objc runtime call instruction class V belongs to.
+///
+/// This is similar to GetInstructionClass except that it only detects objc
+/// runtime calls. This allows it to be faster.
+///
+static inline InstructionClass GetBasicInstructionClass(const Value *V) {
+ if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+ if (const Function *F = CI->getCalledFunction())
+ return GetFunctionClass(F);
+ // Otherwise, be conservative.
+ return IC_CallOrUser;
+ }
+
+ // Otherwise, be conservative.
+ return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User;
+}
+
+/// \brief Determine what kind of construct V is.
+InstructionClass GetInstructionClass(const Value *V);
+
+/// \brief This is a wrapper around getUnderlyingObject which also knows how to
+/// look through objc_retain and objc_autorelease calls, which we know to return
+/// their argument verbatim.
+static inline const Value *GetUnderlyingObjCPtr(const Value *V) {
+ for (;;) {
+ V = GetUnderlyingObject(V);
+ if (!IsForwarding(GetBasicInstructionClass(V)))
+ break;
+ V = cast<CallInst>(V)->getArgOperand(0);
+ }
+
+ return V;
+}
+
+/// \brief This is a wrapper around Value::stripPointerCasts which also knows
+/// how to look through objc_retain and objc_autorelease calls, which we know to
+/// return their argument verbatim.
+static inline const Value *StripPointerCastsAndObjCCalls(const Value *V) {
+ for (;;) {
+ V = V->stripPointerCasts();
+ if (!IsForwarding(GetBasicInstructionClass(V)))
+ break;
+ V = cast<CallInst>(V)->getArgOperand(0);
+ }
+ return V;
+}
+
+/// \brief This is a wrapper around Value::stripPointerCasts which also knows
+/// how to look through objc_retain and objc_autorelease calls, which we know to
+/// return their argument verbatim.
+static inline Value *StripPointerCastsAndObjCCalls(Value *V) {
+ for (;;) {
+ V = V->stripPointerCasts();
+ if (!IsForwarding(GetBasicInstructionClass(V)))
+ break;
+ V = cast<CallInst>(V)->getArgOperand(0);
+ }
+ return V;
+}
+
+/// \brief Assuming the given instruction is one of the special calls such as
+/// objc_retain or objc_release, return the argument value, stripped of no-op
+/// casts and forwarding calls.
+static inline Value *GetObjCArg(Value *Inst) {
+ return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
+}
+
+static inline bool isNullOrUndef(const Value *V) {
+ return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
+}
+
+static inline bool isNoopInstruction(const Instruction *I) {
+ return isa<BitCastInst>(I) ||
+ (isa<GetElementPtrInst>(I) &&
+ cast<GetElementPtrInst>(I)->hasAllZeroIndices());
+}
+
+
+/// \brief Erase the given instruction.
+///
+/// Many ObjC calls return their argument verbatim,
+/// so if it's such a call and the return value has users, replace them with the
+/// argument value.
+///
+static inline void EraseInstruction(Instruction *CI) {
+ Value *OldArg = cast<CallInst>(CI)->getArgOperand(0);
+
+ bool Unused = CI->use_empty();
+
+ if (!Unused) {
+ // Replace the return value with the argument.
+ assert(IsForwarding(GetBasicInstructionClass(CI)) &&
+ "Can't delete non-forwarding instruction with users!");
+ CI->replaceAllUsesWith(OldArg);
+ }
+
+ CI->eraseFromParent();
+
+ if (Unused)
+ RecursivelyDeleteTriviallyDeadInstructions(OldArg);
+}
+
+/// \brief Test whether the given value is possible a retainable object pointer.
+static inline bool IsPotentialRetainableObjPtr(const Value *Op) {
+ // Pointers to static or stack storage are not valid retainable object
+ // pointers.
+ if (isa<Constant>(Op) || isa<AllocaInst>(Op))
+ return false;
+ // Special arguments can not be a valid retainable object pointer.
+ if (const Argument *Arg = dyn_cast<Argument>(Op))
+ if (Arg->hasByValAttr() ||
+ Arg->hasNestAttr() ||
+ Arg->hasStructRetAttr())
+ return false;
+ // Only consider values with pointer types.
+ //
+ // It seemes intuitive to exclude function pointer types as well, since
+ // functions are never retainable object pointers, however clang occasionally
+ // bitcasts retainable object pointers to function-pointer type temporarily.
+ PointerType *Ty = dyn_cast<PointerType>(Op->getType());
+ if (!Ty)
+ return false;
+ // Conservatively assume anything else is a potential retainable object
+ // pointer.
+ return true;
+}
+
+static inline bool IsPotentialRetainableObjPtr(const Value *Op,
+ AliasAnalysis &AA) {
+ // First make the rudimentary check.
+ if (!IsPotentialRetainableObjPtr(Op))
+ return false;
+
+ // Objects in constant memory are not reference-counted.
+ if (AA.pointsToConstantMemory(Op))
+ return false;
+
+ // Pointers in constant memory are not pointing to reference-counted objects.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
+ if (AA.pointsToConstantMemory(LI->getPointerOperand()))
+ return false;
+
+ // Otherwise assume the worst.
+ return true;
+}
+
+/// \brief Helper for GetInstructionClass. Determines what kind of construct CS
+/// is.
+static inline InstructionClass GetCallSiteClass(ImmutableCallSite CS) {
+ for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ I != E; ++I)
+ if (IsPotentialRetainableObjPtr(*I))
+ return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser;
+
+ return CS.onlyReadsMemory() ? IC_None : IC_Call;
+}
+
+/// \brief Return true if this value refers to a distinct and identifiable
+/// object.
+///
+/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses
+/// special knowledge of ObjC conventions.
+static inline bool IsObjCIdentifiedObject(const Value *V) {
+ // Assume that call results and arguments have their own "provenance".
+ // Constants (including GlobalVariables) and Allocas are never
+ // reference-counted.
+ if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
+ isa<Argument>(V) || isa<Constant>(V) ||
+ isa<AllocaInst>(V))
+ return true;
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
+ const Value *Pointer =
+ StripPointerCastsAndObjCCalls(LI->getPointerOperand());
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
+ // A constant pointer can't be pointing to an object on the heap. It may
+ // be reference-counted, but it won't be deleted.
+ if (GV->isConstant())
+ return true;
+ StringRef Name = GV->getName();
+ // These special variables are known to hold values which are not
+ // reference-counted pointers.
+ if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") ||
+ Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") ||
+ Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") ||
+ Name.startswith("\01L_OBJC_METH_VAR_NAME_") ||
+ Name.startswith("\01l_objc_msgSend_fixup_"))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_OBJCARC_H
diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
new file mode 100644
index 0000000..00d9864
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -0,0 +1,175 @@
+//===- ObjCARCAPElim.cpp - ObjC ARC Optimization --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines ObjC ARC optimizations. ARC stands for Automatic
+/// Reference Counting and is a system for managing reference counts for objects
+/// in Objective C.
+///
+/// This specific file implements optimizations which remove extraneous
+/// autorelease pools.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-ap-elim"
+#include "ObjCARC.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+namespace {
+ /// \brief Autorelease pool elimination.
+ class ObjCARCAPElim : public ModulePass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool runOnModule(Module &M);
+
+ static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0);
+ static bool OptimizeBB(BasicBlock *BB);
+
+ public:
+ static char ID;
+ ObjCARCAPElim() : ModulePass(ID) {
+ initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCAPElim::ID = 0;
+INITIALIZE_PASS(ObjCARCAPElim,
+ "objc-arc-apelim",
+ "ObjC ARC autorelease pool elimination",
+ false, false)
+
+Pass *llvm::createObjCARCAPElimPass() {
+ return new ObjCARCAPElim();
+}
+
+void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+}
+
+/// Interprocedurally determine if calls made by the given call site can
+/// possibly produce autoreleases.
+bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) {
+ if (const Function *Callee = CS.getCalledFunction()) {
+ if (Callee->isDeclaration() || Callee->mayBeOverridden())
+ return true;
+ for (Function::const_iterator I = Callee->begin(), E = Callee->end();
+ I != E; ++I) {
+ const BasicBlock *BB = I;
+ for (BasicBlock::const_iterator J = BB->begin(), F = BB->end();
+ J != F; ++J)
+ if (ImmutableCallSite JCS = ImmutableCallSite(J))
+ // This recursion depth limit is arbitrary. It's just great
+ // enough to cover known interesting testcases.
+ if (Depth < 3 &&
+ !JCS.onlyReadsMemory() &&
+ MayAutorelease(JCS, Depth + 1))
+ return true;
+ }
+ return false;
+ }
+
+ return true;
+}
+
+bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
+ bool Changed = false;
+
+ Instruction *Push = 0;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+ Instruction *Inst = I++;
+ switch (GetBasicInstructionClass(Inst)) {
+ case IC_AutoreleasepoolPush:
+ Push = Inst;
+ break;
+ case IC_AutoreleasepoolPop:
+ // If this pop matches a push and nothing in between can autorelease,
+ // zap the pair.
+ if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) {
+ Changed = true;
+ DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop "
+ "autorelease pair:\n"
+ " Pop: " << *Inst << "\n"
+ << " Push: " << *Push << "\n");
+ Inst->eraseFromParent();
+ Push->eraseFromParent();
+ }
+ Push = 0;
+ break;
+ case IC_CallOrUser:
+ if (MayAutorelease(ImmutableCallSite(Inst)))
+ Push = 0;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+bool ObjCARCAPElim::runOnModule(Module &M) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!ModuleHasARC(M))
+ return false;
+
+ // Find the llvm.global_ctors variable, as the first step in
+ // identifying the global constructors. In theory, unnecessary autorelease
+ // pools could occur anywhere, but in practice it's pretty rare. Global
+ // ctors are a place where autorelease pools get inserted automatically,
+ // so it's pretty common for them to be unnecessary, and it's pretty
+ // profitable to eliminate them.
+ GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+ if (!GV)
+ return false;
+
+ assert(GV->hasDefinitiveInitializer() &&
+ "llvm.global_ctors is uncooperative!");
+
+ bool Changed = false;
+
+ // Dig the constructor functions out of GV's initializer.
+ ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
+ for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end();
+ OI != OE; ++OI) {
+ Value *Op = *OI;
+ // llvm.global_ctors is an array of pairs where the second members
+ // are constructor functions.
+ Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
+ // If the user used a constructor function with the wrong signature and
+ // it got bitcasted or whatever, look the other way.
+ if (!F)
+ continue;
+ // Only look at function definitions.
+ if (F->isDeclaration())
+ continue;
+ // Only look at functions with one basic block.
+ if (llvm::next(F->begin()) != F->end())
+ continue;
+ // Ok, a single-block constructor function definition. Try to optimize it.
+ Changed |= OptimizeBB(F->begin());
+ }
+
+ return Changed;
+}
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
new file mode 100644
index 0000000..46b2de7
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
@@ -0,0 +1,162 @@
+//===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -*- mode: c++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines a simple ARC-aware AliasAnalysis using special knowledge
+/// of Objective C to enhance other optimization passes which rely on the Alias
+/// Analysis infrastructure.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-aa"
+#include "ObjCARC.h"
+#include "ObjCARCAliasAnalysis.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/PassSupport.h"
+
+namespace llvm {
+ class Function;
+ class Value;
+}
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+// Register this pass...
+char ObjCARCAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa",
+ "ObjC-ARC-Based Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
+ return new ObjCARCAliasAnalysis();
+}
+
+void
+ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AliasAnalysis::getAnalysisUsage(AU);
+}
+
+AliasAnalysis::AliasResult
+ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // First, strip off no-ops, including ObjC-specific no-ops, and try making a
+ // precise alias query.
+ const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr);
+ const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr);
+ AliasResult Result =
+ AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag),
+ Location(SB, LocB.Size, LocB.TBAATag));
+ if (Result != MayAlias)
+ return Result;
+
+ // If that failed, climb to the underlying object, including climbing through
+ // ObjC-specific no-ops, and try making an imprecise alias query.
+ const Value *UA = GetUnderlyingObjCPtr(SA);
+ const Value *UB = GetUnderlyingObjCPtr(SB);
+ if (UA != SA || UB != SB) {
+ Result = AliasAnalysis::alias(Location(UA), Location(UB));
+ // We can't use MustAlias or PartialAlias results here because
+ // GetUnderlyingObjCPtr may return an offsetted pointer value.
+ if (Result == NoAlias)
+ return NoAlias;
+ }
+
+ // If that failed, fail. We don't need to chain here, since that's covered
+ // by the earlier precise query.
+ return MayAlias;
+}
+
+bool
+ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+ // First, strip off no-ops, including ObjC-specific no-ops, and try making
+ // a precise alias query.
+ const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr);
+ if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag),
+ OrLocal))
+ return true;
+
+ // If that failed, climb to the underlying object, including climbing through
+ // ObjC-specific no-ops, and try making an imprecise alias query.
+ const Value *U = GetUnderlyingObjCPtr(S);
+ if (U != S)
+ return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
+
+ // If that failed, fail. We don't need to chain here, since that's covered
+ // by the earlier precise query.
+ return false;
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ // We have nothing to do. Just chain to the next AliasAnalysis.
+ return AliasAnalysis::getModRefBehavior(CS);
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::getModRefBehavior(F);
+
+ switch (GetFunctionClass(F)) {
+ case IC_NoopCast:
+ return DoesNotAccessMemory;
+ default:
+ break;
+ }
+
+ return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
+ if (!EnableARCOpts)
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+
+ switch (GetBasicInstructionClass(CS.getInstruction())) {
+ case IC_Retain:
+ case IC_RetainRV:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_NoopCast:
+ case IC_AutoreleasepoolPush:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV:
+ // These functions don't access any memory visible to the compiler.
+ // Note that this doesn't include objc_retainBlock, because it updates
+ // pointers when it copies block data.
+ return NoModRef;
+ default:
+ break;
+ }
+
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ // TODO: Theoretically we could check for dependencies between objc_* calls
+ // and OnlyAccessesArgumentPointees calls or other well-behaved calls.
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
new file mode 100644
index 0000000..7abe995
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
@@ -0,0 +1,74 @@
+//===- ObjCARCAliasAnalysis.h - ObjC ARC Optimization -*- mode: c++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares a simple ARC-aware AliasAnalysis using special knowledge
+/// of Objective C to enhance other optimization passes which rely on the Alias
+/// Analysis infrastructure.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
+#define LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+namespace objcarc {
+
+ /// \brief This is a simple alias analysis implementation that uses knowledge
+ /// of ARC constructs to answer queries.
+ ///
+ /// TODO: This class could be generalized to know about other ObjC-specific
+ /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
+ /// even though their offsets are dynamic.
+ class ObjCARCAliasAnalysis : public ImmutablePass,
+ public AliasAnalysis {
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ ObjCARCAliasAnalysis() : ImmutablePass(ID) {
+ initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ private:
+ virtual void initializePass() {
+ InitializeAliasAnalysis(this);
+ }
+
+ /// This method is used when a pass implements an analysis interface through
+ /// multiple inheritance. If needed, it should override this to adjust the
+ /// this pointer as needed for the specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *PI) {
+ if (PI == &AliasAnalysis::ID)
+ return static_cast<AliasAnalysis *>(this);
+ return this;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual AliasResult alias(const Location &LocA, const Location &LocB);
+ virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2);
+ };
+
+} // namespace objcarc
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
new file mode 100644
index 0000000..1c13d1c
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -0,0 +1,537 @@
+//===- ObjCARCContract.cpp - ObjC ARC Optimization ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines late ObjC ARC optimizations. ARC stands for Automatic
+/// Reference Counting and is a system for managing reference counts for objects
+/// in Objective C.
+///
+/// This specific file mainly deals with ``contracting'' multiple lower level
+/// operations into singular higher level operations through pattern matching.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+// TODO: ObjCARCContract could insert PHI nodes when uses aren't
+// dominated by single calls.
+
+#define DEBUG_TYPE "objc-arc-contract"
+#include "ObjCARC.h"
+#include "DependencyAnalysis.h"
+#include "ProvenanceAnalysis.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+STATISTIC(NumPeeps, "Number of calls peephole-optimized");
+STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
+
+namespace {
+ /// \brief Late ARC optimizations
+ ///
+ /// These change the IR in a way that makes it difficult to be analyzed by
+ /// ObjCARCOpt, so it's run late.
+ class ObjCARCContract : public FunctionPass {
+ bool Changed;
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ ProvenanceAnalysis PA;
+
+ /// A flag indicating whether this optimization pass should run.
+ bool Run;
+
+ /// Declarations for ObjC runtime functions, for use in creating calls to
+ /// them. These are initialized lazily to avoid cluttering up the Module
+ /// with unused declarations.
+
+ /// Declaration for objc_storeStrong().
+ Constant *StoreStrongCallee;
+ /// Declaration for objc_retainAutorelease().
+ Constant *RetainAutoreleaseCallee;
+ /// Declaration for objc_retainAutoreleaseReturnValue().
+ Constant *RetainAutoreleaseRVCallee;
+
+ /// The inline asm string to insert between calls and RetainRV calls to make
+ /// the optimization work on targets which need it.
+ const MDString *RetainRVMarker;
+
+ /// The set of inserted objc_storeStrong calls. If at the end of walking the
+ /// function we have found no alloca instructions, these calls can be marked
+ /// "tail".
+ SmallPtrSet<CallInst *, 8> StoreStrongCalls;
+
+ Constant *getStoreStrongCallee(Module *M);
+ Constant *getRetainAutoreleaseCallee(Module *M);
+ Constant *getRetainAutoreleaseRVCallee(Module *M);
+
+ bool ContractAutorelease(Function &F, Instruction *Autorelease,
+ InstructionClass Class,
+ SmallPtrSet<Instruction *, 4>
+ &DependingInstructions,
+ SmallPtrSet<const BasicBlock *, 4>
+ &Visited);
+
+ void ContractRelease(Instruction *Release,
+ inst_iterator &Iter);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+
+ public:
+ static char ID;
+ ObjCARCContract() : FunctionPass(ID) {
+ initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCContract::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCContract,
+ "objc-arc-contract", "ObjC ARC contraction", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(ObjCARCContract,
+ "objc-arc-contract", "ObjC ARC contraction", false, false)
+
+Pass *llvm::createObjCARCContractPass() {
+ return new ObjCARCContract();
+}
+
+void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTree>();
+ AU.setPreservesCFG();
+}
+
+Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
+ if (!StoreStrongCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
+ Type *Params[] = { I8XX, I8X };
+
+ AttributeSet Attr = AttributeSet()
+ .addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind)
+ .addAttribute(M->getContext(), 1, Attribute::NoCapture);
+
+ StoreStrongCallee =
+ M->getOrInsertFunction(
+ "objc_storeStrong",
+ FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+ Attr);
+ }
+ return StoreStrongCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
+ if (!RetainAutoreleaseCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ RetainAutoreleaseCallee =
+ M->getOrInsertFunction("objc_retainAutorelease", FTy, Attribute);
+ }
+ return RetainAutoreleaseCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
+ if (!RetainAutoreleaseRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ RetainAutoreleaseRVCallee =
+ M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
+ Attribute);
+ }
+ return RetainAutoreleaseRVCallee;
+}
+
+/// Merge an autorelease with a retain into a fused call.
+bool
+ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
+ InstructionClass Class,
+ SmallPtrSet<Instruction *, 4>
+ &DependingInstructions,
+ SmallPtrSet<const BasicBlock *, 4>
+ &Visited) {
+ const Value *Arg = GetObjCArg(Autorelease);
+
+ // Check that there are no instructions between the retain and the autorelease
+ // (such as an autorelease_pop) which may change the count.
+ CallInst *Retain = 0;
+ if (Class == IC_AutoreleaseRV)
+ FindDependencies(RetainAutoreleaseRVDep, Arg,
+ Autorelease->getParent(), Autorelease,
+ DependingInstructions, Visited, PA);
+ else
+ FindDependencies(RetainAutoreleaseDep, Arg,
+ Autorelease->getParent(), Autorelease,
+ DependingInstructions, Visited, PA);
+
+ Visited.clear();
+ if (DependingInstructions.size() != 1) {
+ DependingInstructions.clear();
+ return false;
+ }
+
+ Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+ DependingInstructions.clear();
+
+ if (!Retain ||
+ GetBasicInstructionClass(Retain) != IC_Retain ||
+ GetObjCArg(Retain) != Arg)
+ return false;
+
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "ObjCARCContract::ContractAutorelease: Fusing "
+ "retain/autorelease. Erasing: " << *Autorelease << "\n"
+ " Old Retain: "
+ << *Retain << "\n");
+
+ if (Class == IC_AutoreleaseRV)
+ Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent()));
+ else
+ Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent()));
+
+ DEBUG(dbgs() << " New Retain: "
+ << *Retain << "\n");
+
+ EraseInstruction(Autorelease);
+ return true;
+}
+
+/// Attempt to merge an objc_release with a store, load, and objc_retain to form
+/// an objc_storeStrong. This can be a little tricky because the instructions
+/// don't always appear in order, and there may be unrelated intervening
+/// instructions.
+void ObjCARCContract::ContractRelease(Instruction *Release,
+ inst_iterator &Iter) {
+ LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
+ if (!Load || !Load->isSimple()) return;
+
+ // For now, require everything to be in one basic block.
+ BasicBlock *BB = Release->getParent();
+ if (Load->getParent() != BB) return;
+
+ // Walk down to find the store and the release, which may be in either order.
+ BasicBlock::iterator I = Load, End = BB->end();
+ ++I;
+ AliasAnalysis::Location Loc = AA->getLocation(Load);
+ StoreInst *Store = 0;
+ bool SawRelease = false;
+ for (; !Store || !SawRelease; ++I) {
+ if (I == End)
+ return;
+
+ Instruction *Inst = I;
+ if (Inst == Release) {
+ SawRelease = true;
+ continue;
+ }
+
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+
+ // Unrelated retains are harmless.
+ if (IsRetain(Class))
+ continue;
+
+ if (Store) {
+ // The store is the point where we're going to put the objc_storeStrong,
+ // so make sure there are no uses after it.
+ if (CanUse(Inst, Load, PA, Class))
+ return;
+ } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) {
+ // We are moving the load down to the store, so check for anything
+ // else which writes to the memory between the load and the store.
+ Store = dyn_cast<StoreInst>(Inst);
+ if (!Store || !Store->isSimple()) return;
+ if (Store->getPointerOperand() != Loc.Ptr) return;
+ }
+ }
+
+ Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
+
+ // Walk up to find the retain.
+ I = Store;
+ BasicBlock::iterator Begin = BB->begin();
+ while (I != Begin && GetBasicInstructionClass(I) != IC_Retain)
+ --I;
+ Instruction *Retain = I;
+ if (GetBasicInstructionClass(Retain) != IC_Retain) return;
+ if (GetObjCArg(Retain) != New) return;
+
+ Changed = true;
+ ++NumStoreStrongs;
+
+ LLVMContext &C = Release->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *I8XX = PointerType::getUnqual(I8X);
+
+ Value *Args[] = { Load->getPointerOperand(), New };
+ if (Args[0]->getType() != I8XX)
+ Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
+ if (Args[1]->getType() != I8X)
+ Args[1] = new BitCastInst(Args[1], I8X, "", Store);
+ CallInst *StoreStrong =
+ CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()),
+ Args, "", Store);
+ StoreStrong->setDoesNotThrow();
+ StoreStrong->setDebugLoc(Store->getDebugLoc());
+
+ // We can't set the tail flag yet, because we haven't yet determined
+ // whether there are any escaping allocas. Remember this call, so that
+ // we can set the tail flag once we know it's safe.
+ StoreStrongCalls.insert(StoreStrong);
+
+ if (&*Iter == Store) ++Iter;
+ Store->eraseFromParent();
+ Release->eraseFromParent();
+ EraseInstruction(Retain);
+ if (Load->use_empty())
+ Load->eraseFromParent();
+}
+
+bool ObjCARCContract::doInitialization(Module &M) {
+ // If nothing in the Module uses ARC, don't do anything.
+ Run = ModuleHasARC(M);
+ if (!Run)
+ return false;
+
+ // These are initialized lazily.
+ StoreStrongCallee = 0;
+ RetainAutoreleaseCallee = 0;
+ RetainAutoreleaseRVCallee = 0;
+
+ // Initialize RetainRVMarker.
+ RetainRVMarker = 0;
+ if (NamedMDNode *NMD =
+ M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
+ if (NMD->getNumOperands() == 1) {
+ const MDNode *N = NMD->getOperand(0);
+ if (N->getNumOperands() == 1)
+ if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
+ RetainRVMarker = S;
+ }
+
+ return false;
+}
+
+bool ObjCARCContract::runOnFunction(Function &F) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!Run)
+ return false;
+
+ Changed = false;
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
+
+ PA.setAA(&getAnalysis<AliasAnalysis>());
+
+ // Track whether it's ok to mark objc_storeStrong calls with the "tail"
+ // keyword. Be conservative if the function has variadic arguments.
+ // It seems that functions which "return twice" are also unsafe for the
+ // "tail" argument, because they are setjmp, which could need to
+ // return to an earlier stack state.
+ bool TailOkForStoreStrongs = !F.isVarArg() &&
+ !F.callsFunctionThatReturnsTwice();
+
+ // For ObjC library calls which return their argument, replace uses of the
+ // argument with uses of the call return value, if it dominates the use. This
+ // reduces register pressure.
+ SmallPtrSet<Instruction *, 4> DependingInstructions;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+
+ DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n");
+
+ // Only these library routines return their argument. In particular,
+ // objc_retainBlock does not necessarily return its argument.
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ switch (Class) {
+ case IC_Retain:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV:
+ break;
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
+ continue;
+ break;
+ case IC_RetainRV: {
+ // If we're compiling for a target which needs a special inline-asm
+ // marker to do the retainAutoreleasedReturnValue optimization,
+ // insert it now.
+ if (!RetainRVMarker)
+ break;
+ BasicBlock::iterator BBI = Inst;
+ BasicBlock *InstParent = Inst->getParent();
+
+ // Step up to see if the call immediately precedes the RetainRV call.
+ // If it's an invoke, we have to cross a block boundary. And we have
+ // to carefully dodge no-op instructions.
+ do {
+ if (&*BBI == InstParent->begin()) {
+ BasicBlock *Pred = InstParent->getSinglePredecessor();
+ if (!Pred)
+ goto decline_rv_optimization;
+ BBI = Pred->getTerminator();
+ break;
+ }
+ --BBI;
+ } while (isNoopInstruction(BBI));
+
+ if (&*BBI == GetObjCArg(Inst)) {
+ DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for "
+ "retainAutoreleasedReturnValue optimization.\n");
+ Changed = true;
+ InlineAsm *IA =
+ InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
+ /*isVarArg=*/false),
+ RetainRVMarker->getString(),
+ /*Constraints=*/"", /*hasSideEffects=*/true);
+ CallInst::Create(IA, "", Inst);
+ }
+ decline_rv_optimization:
+ break;
+ }
+ case IC_InitWeak: {
+ // objc_initWeak(p, null) => *p = null
+ CallInst *CI = cast<CallInst>(Inst);
+ if (isNullOrUndef(CI->getArgOperand(1))) {
+ Value *Null =
+ ConstantPointerNull::get(cast<PointerType>(CI->getType()));
+ Changed = true;
+ new StoreInst(Null, CI->getArgOperand(0), CI);
+
+ DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n"
+ << " New = " << *Null << "\n");
+
+ CI->replaceAllUsesWith(Null);
+ CI->eraseFromParent();
+ }
+ continue;
+ }
+ case IC_Release:
+ ContractRelease(Inst, I);
+ continue;
+ case IC_User:
+ // Be conservative if the function has any alloca instructions.
+ // Technically we only care about escaping alloca instructions,
+ // but this is sufficient to handle some interesting cases.
+ if (isa<AllocaInst>(Inst))
+ TailOkForStoreStrongs = false;
+ continue;
+ default:
+ continue;
+ }
+
+ DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n");
+
+ // Don't use GetObjCArg because we don't want to look through bitcasts
+ // and such; to do the replacement, the argument must have type i8*.
+ const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
+ for (;;) {
+ // If we're compiling bugpointed code, don't get in trouble.
+ if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
+ break;
+ // Look through the uses of the pointer.
+ for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+ UI != UE; ) {
+ Use &U = UI.getUse();
+ unsigned OperandNo = UI.getOperandNo();
+ ++UI; // Increment UI now, because we may unlink its element.
+
+ // If the call's return value dominates a use of the call's argument
+ // value, rewrite the use to use the return value. We check for
+ // reachability here because an unreachable call is considered to
+ // trivially dominate itself, which would lead us to rewriting its
+ // argument in terms of its return value, which would lead to
+ // infinite loops in GetObjCArg.
+ if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) {
+ Changed = true;
+ Instruction *Replacement = Inst;
+ Type *UseTy = U.get()->getType();
+ if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
+ // For PHI nodes, insert the bitcast in the predecessor block.
+ unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
+ BasicBlock *BB = PHI->getIncomingBlock(ValNo);
+ if (Replacement->getType() != UseTy)
+ Replacement = new BitCastInst(Replacement, UseTy, "",
+ &BB->back());
+ // While we're here, rewrite all edges for this PHI, rather
+ // than just one use at a time, to minimize the number of
+ // bitcasts we emit.
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+ if (PHI->getIncomingBlock(i) == BB) {
+ // Keep the UI iterator valid.
+ if (&PHI->getOperandUse(
+ PHINode::getOperandNumForIncomingValue(i)) ==
+ &UI.getUse())
+ ++UI;
+ PHI->setIncomingValue(i, Replacement);
+ }
+ } else {
+ if (Replacement->getType() != UseTy)
+ Replacement = new BitCastInst(Replacement, UseTy, "",
+ cast<Instruction>(U.getUser()));
+ U.set(Replacement);
+ }
+ }
+ }
+
+ // If Arg is a no-op casted pointer, strip one level of casts and iterate.
+ if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
+ Arg = BI->getOperand(0);
+ else if (isa<GEPOperator>(Arg) &&
+ cast<GEPOperator>(Arg)->hasAllZeroIndices())
+ Arg = cast<GEPOperator>(Arg)->getPointerOperand();
+ else if (isa<GlobalAlias>(Arg) &&
+ !cast<GlobalAlias>(Arg)->mayBeOverridden())
+ Arg = cast<GlobalAlias>(Arg)->getAliasee();
+ else
+ break;
+ }
+ }
+
+ // If this function has no escaping allocas or suspicious vararg usage,
+ // objc_storeStrong calls can be marked with the "tail" keyword.
+ if (TailOkForStoreStrongs)
+ for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(),
+ E = StoreStrongCalls.end(); I != E; ++I)
+ (*I)->setTailCall();
+ StoreStrongCalls.clear();
+
+ return Changed;
+}
diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
new file mode 100644
index 0000000..39bf8f3
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -0,0 +1,128 @@
+//===- ObjCARCExpand.cpp - ObjC ARC Optimization --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines ObjC ARC optimizations. ARC stands for Automatic
+/// Reference Counting and is a system for managing reference counts for objects
+/// in Objective C.
+///
+/// This specific file deals with early optimizations which perform certain
+/// cleanup operations.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-expand"
+
+#include "ObjCARC.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+ class Module;
+}
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+namespace {
+ /// \brief Early ARC transformations.
+ class ObjCARCExpand : public FunctionPass {
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+
+ /// A flag indicating whether this optimization pass should run.
+ bool Run;
+
+ public:
+ static char ID;
+ ObjCARCExpand() : FunctionPass(ID) {
+ initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCExpand::ID = 0;
+INITIALIZE_PASS(ObjCARCExpand,
+ "objc-arc-expand", "ObjC ARC expansion", false, false)
+
+Pass *llvm::createObjCARCExpandPass() {
+ return new ObjCARCExpand();
+}
+
+void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+}
+
+bool ObjCARCExpand::doInitialization(Module &M) {
+ Run = ModuleHasARC(M);
+ return false;
+}
+
+bool ObjCARCExpand::runOnFunction(Function &F) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!Run)
+ return false;
+
+ bool Changed = false;
+
+ DEBUG(dbgs() << "ObjCARCExpand: Visiting Function: " << F.getName() << "\n");
+
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+ Instruction *Inst = &*I;
+
+ DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << *Inst << "\n");
+
+ switch (GetBasicInstructionClass(Inst)) {
+ case IC_Retain:
+ case IC_RetainRV:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV:
+ case IC_FusedRetainAutorelease:
+ case IC_FusedRetainAutoreleaseRV: {
+ // These calls return their argument verbatim, as a low-level
+ // optimization. However, this makes high-level optimizations
+ // harder. Undo any uses of this optimization that the front-end
+ // emitted here. We'll redo them in the contract pass.
+ Changed = true;
+ Value *Value = cast<CallInst>(Inst)->getArgOperand(0);
+ DEBUG(dbgs() << "ObjCARCExpand: Old = " << *Inst << "\n"
+ " New = " << *Value << "\n");
+ Inst->replaceAllUsesWith(Value);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ DEBUG(dbgs() << "ObjCARCExpand: Finished List.\n\n");
+
+ return Changed;
+}
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
new file mode 100644
index 0000000..9c14949
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -0,0 +1,2691 @@
+//===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines ObjC ARC optimizations. ARC stands for Automatic
+/// Reference Counting and is a system for managing reference counts for objects
+/// in Objective C.
+///
+/// The optimizations performed include elimination of redundant, partially
+/// redundant, and inconsequential reference count operations, elimination of
+/// redundant weak pointer operations, and numerous minor simplifications.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc-opts"
+#include "ObjCARC.h"
+#include "DependencyAnalysis.h"
+#include "ObjCARCAliasAnalysis.h"
+#include "ProvenanceAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+/// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific.
+/// @{
+
+namespace {
+ /// \brief An associative container with fast insertion-order (deterministic)
+ /// iteration over its elements. Plus the special blot operation.
+ template<class KeyT, class ValueT>
+ class MapVector {
+ /// Map keys to indices in Vector.
+ typedef DenseMap<KeyT, size_t> MapTy;
+ MapTy Map;
+
+ typedef std::vector<std::pair<KeyT, ValueT> > VectorTy;
+ /// Keys and values.
+ VectorTy Vector;
+
+ public:
+ typedef typename VectorTy::iterator iterator;
+ typedef typename VectorTy::const_iterator const_iterator;
+ iterator begin() { return Vector.begin(); }
+ iterator end() { return Vector.end(); }
+ const_iterator begin() const { return Vector.begin(); }
+ const_iterator end() const { return Vector.end(); }
+
+#ifdef XDEBUG
+ ~MapVector() {
+ assert(Vector.size() >= Map.size()); // May differ due to blotting.
+ for (typename MapTy::const_iterator I = Map.begin(), E = Map.end();
+ I != E; ++I) {
+ assert(I->second < Vector.size());
+ assert(Vector[I->second].first == I->first);
+ }
+ for (typename VectorTy::const_iterator I = Vector.begin(),
+ E = Vector.end(); I != E; ++I)
+ assert(!I->first ||
+ (Map.count(I->first) &&
+ Map[I->first] == size_t(I - Vector.begin())));
+ }
+#endif
+
+ ValueT &operator[](const KeyT &Arg) {
+ std::pair<typename MapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(Arg, size_t(0)));
+ if (Pair.second) {
+ size_t Num = Vector.size();
+ Pair.first->second = Num;
+ Vector.push_back(std::make_pair(Arg, ValueT()));
+ return Vector[Num].second;
+ }
+ return Vector[Pair.first->second].second;
+ }
+
+ std::pair<iterator, bool>
+ insert(const std::pair<KeyT, ValueT> &InsertPair) {
+ std::pair<typename MapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(InsertPair.first, size_t(0)));
+ if (Pair.second) {
+ size_t Num = Vector.size();
+ Pair.first->second = Num;
+ Vector.push_back(InsertPair);
+ return std::make_pair(Vector.begin() + Num, true);
+ }
+ return std::make_pair(Vector.begin() + Pair.first->second, false);
+ }
+
+ const_iterator find(const KeyT &Key) const {
+ typename MapTy::const_iterator It = Map.find(Key);
+ if (It == Map.end()) return Vector.end();
+ return Vector.begin() + It->second;
+ }
+
+ /// This is similar to erase, but instead of removing the element from the
+ /// vector, it just zeros out the key in the vector. This leaves iterators
+ /// intact, but clients must be prepared for zeroed-out keys when iterating.
+ void blot(const KeyT &Key) {
+ typename MapTy::iterator It = Map.find(Key);
+ if (It == Map.end()) return;
+ Vector[It->second].first = KeyT();
+ Map.erase(It);
+ }
+
+ void clear() {
+ Map.clear();
+ Vector.clear();
+ }
+ };
+}
+
+/// @}
+///
+/// \defgroup ARCUtilities Utility declarations/definitions specific to ARC.
+/// @{
+
+/// \brief This is similar to StripPointerCastsAndObjCCalls but it stops as soon
+/// as it finds a value with multiple uses.
+static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
+ if (Arg->hasOneUse()) {
+ if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
+ return FindSingleUseIdentifiedObject(BC->getOperand(0));
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
+ if (GEP->hasAllZeroIndices())
+ return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
+ if (IsForwarding(GetBasicInstructionClass(Arg)))
+ return FindSingleUseIdentifiedObject(
+ cast<CallInst>(Arg)->getArgOperand(0));
+ if (!IsObjCIdentifiedObject(Arg))
+ return 0;
+ return Arg;
+ }
+
+ // If we found an identifiable object but it has multiple uses, but they are
+ // trivial uses, we can still consider this to be a single-use value.
+ if (IsObjCIdentifiedObject(Arg)) {
+ for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+ UI != UE; ++UI) {
+ const User *U = *UI;
+ if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
+ return 0;
+ }
+
+ return Arg;
+ }
+
+ return 0;
+}
+
+/// \brief Test whether the given retainable object pointer escapes.
+///
+/// This differs from regular escape analysis in that a use as an
+/// argument to a call is not considered an escape.
+///
+static bool DoesRetainableObjPtrEscape(const User *Ptr) {
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Target: " << *Ptr << "\n");
+
+ // Walk the def-use chains.
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(Ptr);
+ // If Ptr has any operands add them as well.
+ for (User::const_op_iterator I = Ptr->op_begin(), E = Ptr->op_end(); I != E;
+ ++I) {
+ Worklist.push_back(*I);
+ }
+
+ // Ensure we do not visit any value twice.
+ SmallPtrSet<const Value *, 8> VisitedSet;
+
+ do {
+ const Value *V = Worklist.pop_back_val();
+
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Visiting: " << *V << "\n");
+
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+ UI != UE; ++UI) {
+ const User *UUser = *UI;
+
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User: " << *UUser << "\n");
+
+ // Special - Use by a call (callee or argument) is not considered
+ // to be an escape.
+ switch (GetBasicInstructionClass(UUser)) {
+ case IC_StoreWeak:
+ case IC_InitWeak:
+ case IC_StoreStrong:
+ case IC_Autorelease:
+ case IC_AutoreleaseRV: {
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer "
+ "arguments. Pointer Escapes!\n");
+ // These special functions make copies of their pointer arguments.
+ return true;
+ }
+ case IC_User:
+ case IC_None:
+ // Use by an instruction which copies the value is an escape if the
+ // result is an escape.
+ if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
+ isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
+
+ if (VisitedSet.insert(UUser)) {
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. "
+ "Ptr escapes if result escapes. Adding to list.\n");
+ Worklist.push_back(UUser);
+ } else {
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node."
+ "\n");
+ }
+ continue;
+ }
+ // Use by a load is not an escape.
+ if (isa<LoadInst>(UUser))
+ continue;
+ // Use by a store is not an escape if the use is the address.
+ if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
+ if (V != SI->getValueOperand())
+ continue;
+ break;
+ default:
+ // Regular calls and other stuff are not considered escapes.
+ continue;
+ }
+ // Otherwise, conservatively assume an escape.
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming ptr escapes.\n");
+ return true;
+ }
+ } while (!Worklist.empty());
+
+ // No escapes found.
+ DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Ptr does not escape.\n");
+ return false;
+}
+
+/// @}
+///
+/// \defgroup ARCOpt ARC Optimization.
+/// @{
+
+// TODO: On code like this:
+//
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+// stuff_that_cannot_release()
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+//
+// The second retain and autorelease can be deleted.
+
+// TODO: It should be possible to delete
+// objc_autoreleasePoolPush and objc_autoreleasePoolPop
+// pairs if nothing is actually autoreleased between them. Also, autorelease
+// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
+// after inlining) can be turned into plain release calls.
+
+// TODO: Critical-edge splitting. If the optimial insertion point is
+// a critical edge, the current algorithm has to fail, because it doesn't
+// know how to split edges. It should be possible to make the optimizer
+// think in terms of edges, rather than blocks, and then split critical
+// edges on demand.
+
+// TODO: OptimizeSequences could generalized to be Interprocedural.
+
+// TODO: Recognize that a bunch of other objc runtime calls have
+// non-escaping arguments and non-releasing arguments, and may be
+// non-autoreleasing.
+
+// TODO: Sink autorelease calls as far as possible. Unfortunately we
+// usually can't sink them past other calls, which would be the main
+// case where it would be useful.
+
+// TODO: The pointer returned from objc_loadWeakRetained is retained.
+
+// TODO: Delete release+retain pairs (rare).
+
+STATISTIC(NumNoops, "Number of no-op objc calls eliminated");
+STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
+STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
+STATISTIC(NumRets, "Number of return value forwarding "
+ "retain+autoreleaes eliminated");
+STATISTIC(NumRRs, "Number of retain+release paths eliminated");
+STATISTIC(NumPeeps, "Number of calls peephole-optimized");
+
+namespace {
+ /// \enum Sequence
+ ///
+ /// \brief A sequence of states that a pointer may go through in which an
+ /// objc_retain and objc_release are actually needed.
+ enum Sequence {
+ S_None,
+ S_Retain, ///< objc_retain(x).
+ S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement.
+ S_Use, ///< any use of x.
+ S_Stop, ///< like S_Release, but code motion is stopped.
+ S_Release, ///< objc_release(x).
+ S_MovableRelease ///< objc_release(x), !clang.imprecise_release.
+ };
+
+ raw_ostream &operator<<(raw_ostream &OS, const Sequence S)
+ LLVM_ATTRIBUTE_UNUSED;
+ raw_ostream &operator<<(raw_ostream &OS, const Sequence S) {
+ switch (S) {
+ case S_None:
+ return OS << "S_None";
+ case S_Retain:
+ return OS << "S_Retain";
+ case S_CanRelease:
+ return OS << "S_CanRelease";
+ case S_Use:
+ return OS << "S_Use";
+ case S_Release:
+ return OS << "S_Release";
+ case S_MovableRelease:
+ return OS << "S_MovableRelease";
+ case S_Stop:
+ return OS << "S_Stop";
+ }
+ llvm_unreachable("Unknown sequence type.");
+ }
+}
+
+static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
+ // The easy cases.
+ if (A == B)
+ return A;
+ if (A == S_None || B == S_None)
+ return S_None;
+
+ if (A > B) std::swap(A, B);
+ if (TopDown) {
+ // Choose the side which is further along in the sequence.
+ if ((A == S_Retain || A == S_CanRelease) &&
+ (B == S_CanRelease || B == S_Use))
+ return B;
+ } else {
+ // Choose the side which is further along in the sequence.
+ if ((A == S_Use || A == S_CanRelease) &&
+ (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
+ return A;
+ // If both sides are releases, choose the more conservative one.
+ if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
+ return A;
+ if (A == S_Release && B == S_MovableRelease)
+ return A;
+ }
+
+ return S_None;
+}
+
+namespace {
+ /// \brief Unidirectional information about either a
+ /// retain-decrement-use-release sequence or release-use-decrement-retain
+ /// reverese sequence.
+ struct RRInfo {
+ /// After an objc_retain, the reference count of the referenced
+ /// object is known to be positive. Similarly, before an objc_release, the
+ /// reference count of the referenced object is known to be positive. If
+ /// there are retain-release pairs in code regions where the retain count
+ /// is known to be positive, they can be eliminated, regardless of any side
+ /// effects between them.
+ ///
+ /// Also, a retain+release pair nested within another retain+release
+ /// pair all on the known same pointer value can be eliminated, regardless
+ /// of any intervening side effects.
+ ///
+ /// KnownSafe is true when either of these conditions is satisfied.
+ bool KnownSafe;
+
+ /// True if the Calls are objc_retainBlock calls (as opposed to objc_retain
+ /// calls).
+ bool IsRetainBlock;
+
+ /// True of the objc_release calls are all marked with the "tail" keyword.
+ bool IsTailCallRelease;
+
+ /// If the Calls are objc_release calls and they all have a
+ /// clang.imprecise_release tag, this is the metadata tag.
+ MDNode *ReleaseMetadata;
+
+ /// For a top-down sequence, the set of objc_retains or
+ /// objc_retainBlocks. For bottom-up, the set of objc_releases.
+ SmallPtrSet<Instruction *, 2> Calls;
+
+ /// The set of optimal insert positions for moving calls in the opposite
+ /// sequence.
+ SmallPtrSet<Instruction *, 2> ReverseInsertPts;
+
+ RRInfo() :
+ KnownSafe(false), IsRetainBlock(false),
+ IsTailCallRelease(false),
+ ReleaseMetadata(0) {}
+
+ void clear();
+ };
+}
+
+void RRInfo::clear() {
+ KnownSafe = false;
+ IsRetainBlock = false;
+ IsTailCallRelease = false;
+ ReleaseMetadata = 0;
+ Calls.clear();
+ ReverseInsertPts.clear();
+}
+
+namespace {
+ /// \brief This class summarizes several per-pointer runtime properties which
+ /// are propogated through the flow graph.
+ class PtrState {
+ /// True if the reference count is known to be incremented.
+ bool KnownPositiveRefCount;
+
+ /// True of we've seen an opportunity for partial RR elimination, such as
+ /// pushing calls into a CFG triangle or into one side of a CFG diamond.
+ bool Partial;
+
+ /// The current position in the sequence.
+ Sequence Seq : 8;
+
+ public:
+ /// Unidirectional information about the current sequence.
+ ///
+ /// TODO: Encapsulate this better.
+ RRInfo RRI;
+
+ PtrState() : KnownPositiveRefCount(false), Partial(false),
+ Seq(S_None) {}
+
+ void SetKnownPositiveRefCount() {
+ KnownPositiveRefCount = true;
+ }
+
+ void ClearRefCount() {
+ KnownPositiveRefCount = false;
+ }
+
+ bool IsKnownIncremented() const {
+ return KnownPositiveRefCount;
+ }
+
+ void SetSeq(Sequence NewSeq) {
+ Seq = NewSeq;
+ }
+
+ Sequence GetSeq() const {
+ return Seq;
+ }
+
+ void ClearSequenceProgress() {
+ ResetSequenceProgress(S_None);
+ }
+
+ void ResetSequenceProgress(Sequence NewSeq) {
+ Seq = NewSeq;
+ Partial = false;
+ RRI.clear();
+ }
+
+ void Merge(const PtrState &Other, bool TopDown);
+ };
+}
+
+void
+PtrState::Merge(const PtrState &Other, bool TopDown) {
+ Seq = MergeSeqs(Seq, Other.Seq, TopDown);
+ KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
+
+ // We can't merge a plain objc_retain with an objc_retainBlock.
+ if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
+ Seq = S_None;
+
+ // If we're not in a sequence (anymore), drop all associated state.
+ if (Seq == S_None) {
+ Partial = false;
+ RRI.clear();
+ } else if (Partial || Other.Partial) {
+ // If we're doing a merge on a path that's previously seen a partial
+ // merge, conservatively drop the sequence, to avoid doing partial
+ // RR elimination. If the branch predicates for the two merge differ,
+ // mixing them is unsafe.
+ ClearSequenceProgress();
+ } else {
+ // Conservatively merge the ReleaseMetadata information.
+ if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
+ RRI.ReleaseMetadata = 0;
+
+ RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe;
+ RRI.IsTailCallRelease = RRI.IsTailCallRelease &&
+ Other.RRI.IsTailCallRelease;
+ RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
+
+ // Merge the insert point sets. If there are any differences,
+ // that makes this a partial merge.
+ Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size();
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ I = Other.RRI.ReverseInsertPts.begin(),
+ E = Other.RRI.ReverseInsertPts.end(); I != E; ++I)
+ Partial |= RRI.ReverseInsertPts.insert(*I);
+ }
+}
+
+namespace {
+ /// \brief Per-BasicBlock state.
+ class BBState {
+ /// The number of unique control paths from the entry which can reach this
+ /// block.
+ unsigned TopDownPathCount;
+
+ /// The number of unique control paths to exits from this block.
+ unsigned BottomUpPathCount;
+
+ /// A type for PerPtrTopDown and PerPtrBottomUp.
+ typedef MapVector<const Value *, PtrState> MapTy;
+
+ /// The top-down traversal uses this to record information known about a
+ /// pointer at the bottom of each block.
+ MapTy PerPtrTopDown;
+
+ /// The bottom-up traversal uses this to record information known about a
+ /// pointer at the top of each block.
+ MapTy PerPtrBottomUp;
+
+ /// Effective predecessors of the current block ignoring ignorable edges and
+ /// ignored backedges.
+ SmallVector<BasicBlock *, 2> Preds;
+ /// Effective successors of the current block ignoring ignorable edges and
+ /// ignored backedges.
+ SmallVector<BasicBlock *, 2> Succs;
+
+ public:
+ BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
+
+ typedef MapTy::iterator ptr_iterator;
+ typedef MapTy::const_iterator ptr_const_iterator;
+
+ ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
+ ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
+ ptr_const_iterator top_down_ptr_begin() const {
+ return PerPtrTopDown.begin();
+ }
+ ptr_const_iterator top_down_ptr_end() const {
+ return PerPtrTopDown.end();
+ }
+
+ ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); }
+ ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
+ ptr_const_iterator bottom_up_ptr_begin() const {
+ return PerPtrBottomUp.begin();
+ }
+ ptr_const_iterator bottom_up_ptr_end() const {
+ return PerPtrBottomUp.end();
+ }
+
+ /// Mark this block as being an entry block, which has one path from the
+ /// entry by definition.
+ void SetAsEntry() { TopDownPathCount = 1; }
+
+ /// Mark this block as being an exit block, which has one path to an exit by
+ /// definition.
+ void SetAsExit() { BottomUpPathCount = 1; }
+
+ PtrState &getPtrTopDownState(const Value *Arg) {
+ return PerPtrTopDown[Arg];
+ }
+
+ PtrState &getPtrBottomUpState(const Value *Arg) {
+ return PerPtrBottomUp[Arg];
+ }
+
+ void clearBottomUpPointers() {
+ PerPtrBottomUp.clear();
+ }
+
+ void clearTopDownPointers() {
+ PerPtrTopDown.clear();
+ }
+
+ void InitFromPred(const BBState &Other);
+ void InitFromSucc(const BBState &Other);
+ void MergePred(const BBState &Other);
+ void MergeSucc(const BBState &Other);
+
+ /// Return the number of possible unique paths from an entry to an exit
+ /// which pass through this block. This is only valid after both the
+ /// top-down and bottom-up traversals are complete.
+ unsigned GetAllPathCount() const {
+ assert(TopDownPathCount != 0);
+ assert(BottomUpPathCount != 0);
+ return TopDownPathCount * BottomUpPathCount;
+ }
+
+ // Specialized CFG utilities.
+ typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator;
+ edge_iterator pred_begin() { return Preds.begin(); }
+ edge_iterator pred_end() { return Preds.end(); }
+ edge_iterator succ_begin() { return Succs.begin(); }
+ edge_iterator succ_end() { return Succs.end(); }
+
+ void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
+ void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
+
+ bool isExit() const { return Succs.empty(); }
+ };
+}
+
+void BBState::InitFromPred(const BBState &Other) {
+ PerPtrTopDown = Other.PerPtrTopDown;
+ TopDownPathCount = Other.TopDownPathCount;
+}
+
+void BBState::InitFromSucc(const BBState &Other) {
+ PerPtrBottomUp = Other.PerPtrBottomUp;
+ BottomUpPathCount = Other.BottomUpPathCount;
+}
+
+/// The top-down traversal uses this to merge information about predecessors to
+/// form the initial state for a new block.
+void BBState::MergePred(const BBState &Other) {
+ // Other.TopDownPathCount can be 0, in which case it is either dead or a
+ // loop backedge. Loop backedges are special.
+ TopDownPathCount += Other.TopDownPathCount;
+
+ // Check for overflow. If we have overflow, fall back to conservative
+ // behavior.
+ if (TopDownPathCount < Other.TopDownPathCount) {
+ clearTopDownPointers();
+ return;
+ }
+
+ // For each entry in the other set, if our set has an entry with the same key,
+ // merge the entries. Otherwise, copy the entry and merge it with an empty
+ // entry.
+ for (ptr_const_iterator MI = Other.top_down_ptr_begin(),
+ ME = Other.top_down_ptr_end(); MI != ME; ++MI) {
+ std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI);
+ Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+ /*TopDown=*/true);
+ }
+
+ // For each entry in our set, if the other set doesn't have an entry with the
+ // same key, force it to merge with an empty entry.
+ for (ptr_iterator MI = top_down_ptr_begin(),
+ ME = top_down_ptr_end(); MI != ME; ++MI)
+ if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
+ MI->second.Merge(PtrState(), /*TopDown=*/true);
+}
+
+/// The bottom-up traversal uses this to merge information about successors to
+/// form the initial state for a new block.
+void BBState::MergeSucc(const BBState &Other) {
+ // Other.BottomUpPathCount can be 0, in which case it is either dead or a
+ // loop backedge. Loop backedges are special.
+ BottomUpPathCount += Other.BottomUpPathCount;
+
+ // Check for overflow. If we have overflow, fall back to conservative
+ // behavior.
+ if (BottomUpPathCount < Other.BottomUpPathCount) {
+ clearBottomUpPointers();
+ return;
+ }
+
+ // For each entry in the other set, if our set has an entry with the
+ // same key, merge the entries. Otherwise, copy the entry and merge
+ // it with an empty entry.
+ for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(),
+ ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) {
+ std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI);
+ Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+ /*TopDown=*/false);
+ }
+
+ // For each entry in our set, if the other set doesn't have an entry
+ // with the same key, force it to merge with an empty entry.
+ for (ptr_iterator MI = bottom_up_ptr_begin(),
+ ME = bottom_up_ptr_end(); MI != ME; ++MI)
+ if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
+ MI->second.Merge(PtrState(), /*TopDown=*/false);
+}
+
+namespace {
+ /// \brief The main ARC optimization pass.
+ class ObjCARCOpt : public FunctionPass {
+ bool Changed;
+ ProvenanceAnalysis PA;
+
+ /// A flag indicating whether this optimization pass should run.
+ bool Run;
+
+ /// Declarations for ObjC runtime functions, for use in creating calls to
+ /// them. These are initialized lazily to avoid cluttering up the Module
+ /// with unused declarations.
+
+ /// Declaration for ObjC runtime function
+ /// objc_retainAutoreleasedReturnValue.
+ Constant *RetainRVCallee;
+ /// Declaration for ObjC runtime function objc_autoreleaseReturnValue.
+ Constant *AutoreleaseRVCallee;
+ /// Declaration for ObjC runtime function objc_release.
+ Constant *ReleaseCallee;
+ /// Declaration for ObjC runtime function objc_retain.
+ Constant *RetainCallee;
+ /// Declaration for ObjC runtime function objc_retainBlock.
+ Constant *RetainBlockCallee;
+ /// Declaration for ObjC runtime function objc_autorelease.
+ Constant *AutoreleaseCallee;
+
+ /// Flags which determine whether each of the interesting runtine functions
+ /// is in fact used in the current function.
+ unsigned UsedInThisFunction;
+
+ /// The Metadata Kind for clang.imprecise_release metadata.
+ unsigned ImpreciseReleaseMDKind;
+
+ /// The Metadata Kind for clang.arc.copy_on_escape metadata.
+ unsigned CopyOnEscapeMDKind;
+
+ /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
+ unsigned NoObjCARCExceptionsMDKind;
+
+ Constant *getRetainRVCallee(Module *M);
+ Constant *getAutoreleaseRVCallee(Module *M);
+ Constant *getReleaseCallee(Module *M);
+ Constant *getRetainCallee(Module *M);
+ Constant *getRetainBlockCallee(Module *M);
+ Constant *getAutoreleaseCallee(Module *M);
+
+ bool IsRetainBlockOptimizable(const Instruction *Inst);
+
+ void OptimizeRetainCall(Function &F, Instruction *Retain);
+ bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
+ void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
+ InstructionClass &Class);
+ void OptimizeIndividualCalls(Function &F);
+
+ void CheckForCFGHazards(const BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BBState &MyStates) const;
+ bool VisitInstructionBottomUp(Instruction *Inst,
+ BasicBlock *BB,
+ MapVector<Value *, RRInfo> &Retains,
+ BBState &MyStates);
+ bool VisitBottomUp(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains);
+ bool VisitInstructionTopDown(Instruction *Inst,
+ DenseMap<Value *, RRInfo> &Releases,
+ BBState &MyStates);
+ bool VisitTopDown(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ DenseMap<Value *, RRInfo> &Releases);
+ bool Visit(Function &F,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases);
+
+ void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ SmallVectorImpl<Instruction *> &DeadInsts,
+ Module *M);
+
+ bool ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ Module *M,
+ SmallVector<Instruction *, 4> &NewRetains,
+ SmallVector<Instruction *, 4> &NewReleases,
+ SmallVector<Instruction *, 8> &DeadInsts,
+ RRInfo &RetainsToMove,
+ RRInfo &ReleasesToMove,
+ Value *Arg,
+ bool KnownSafe,
+ bool &AnyPairsCompletelyEliminated);
+
+ bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ Module *M);
+
+ void OptimizeWeakCalls(Function &F);
+
+ bool OptimizeSequences(Function &F);
+
+ void OptimizeReturns(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+ virtual void releaseMemory();
+
+ public:
+ static char ID;
+ ObjCARCOpt() : FunctionPass(ID) {
+ initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
+ }
+ };
+}
+
+char ObjCARCOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCOpt,
+ "objc-arc", "ObjC ARC optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis)
+INITIALIZE_PASS_END(ObjCARCOpt,
+ "objc-arc", "ObjC ARC optimization", false, false)
+
+Pass *llvm::createObjCARCOptPass() {
+ return new ObjCARCOpt();
+}
+
+void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<ObjCARCAliasAnalysis>();
+ AU.addRequired<AliasAnalysis>();
+ // ARC optimization doesn't currently split critical edges.
+ AU.setPreservesCFG();
+}
+
+bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
+ // Without the magic metadata tag, we have to assume this might be an
+ // objc_retainBlock call inserted to convert a block pointer to an id,
+ // in which case it really is needed.
+ if (!Inst->getMetadata(CopyOnEscapeMDKind))
+ return false;
+
+ // If the pointer "escapes" (not including being used in a call),
+ // the copy may be needed.
+ if (DoesRetainableObjPtrEscape(Inst))
+ return false;
+
+ // Otherwise, it's not needed.
+ return true;
+}
+
+Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
+ if (!RetainRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ RetainRVCallee =
+ M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
+ Attribute);
+ }
+ return RetainRVCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
+ if (!AutoreleaseRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ AutoreleaseRVCallee =
+ M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
+ Attribute);
+ }
+ return AutoreleaseRVCallee;
+}
+
+Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
+ if (!ReleaseCallee) {
+ LLVMContext &C = M->getContext();
+ Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ ReleaseCallee =
+ M->getOrInsertFunction(
+ "objc_release",
+ FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+ Attribute);
+ }
+ return ReleaseCallee;
+}
+
+Constant *ObjCARCOpt::getRetainCallee(Module *M) {
+ if (!RetainCallee) {
+ LLVMContext &C = M->getContext();
+ Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ RetainCallee =
+ M->getOrInsertFunction(
+ "objc_retain",
+ FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+ Attribute);
+ }
+ return RetainCallee;
+}
+
+Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
+ if (!RetainBlockCallee) {
+ LLVMContext &C = M->getContext();
+ Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+ // objc_retainBlock is not nounwind because it calls user copy constructors
+ // which could theoretically throw.
+ RetainBlockCallee =
+ M->getOrInsertFunction(
+ "objc_retainBlock",
+ FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+ AttributeSet());
+ }
+ return RetainBlockCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
+ if (!AutoreleaseCallee) {
+ LLVMContext &C = M->getContext();
+ Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ AutoreleaseCallee =
+ M->getOrInsertFunction(
+ "objc_autorelease",
+ FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+ Attribute);
+ }
+ return AutoreleaseCallee;
+}
+
+/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
+/// return value.
+void
+ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
+ ImmutableCallSite CS(GetObjCArg(Retain));
+ const Instruction *Call = CS.getInstruction();
+ if (!Call) return;
+ if (Call->getParent() != Retain->getParent()) return;
+
+ // Check that the call is next to the retain.
+ BasicBlock::const_iterator I = Call;
+ ++I;
+ while (isNoopInstruction(I)) ++I;
+ if (&*I != Retain)
+ return;
+
+ // Turn it to an objc_retainAutoreleasedReturnValue..
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming "
+ "objc_retain => objc_retainAutoreleasedReturnValue"
+ " since the operand is a return value.\n"
+ " Old: "
+ << *Retain << "\n");
+
+ cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
+
+ DEBUG(dbgs() << " New: "
+ << *Retain << "\n");
+}
+
+/// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
+/// not a return value. Or, if it can be paired with an
+/// objc_autoreleaseReturnValue, delete the pair and return true.
+bool
+ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
+ // Check for the argument being from an immediately preceding call or invoke.
+ const Value *Arg = GetObjCArg(RetainRV);
+ ImmutableCallSite CS(Arg);
+ if (const Instruction *Call = CS.getInstruction()) {
+ if (Call->getParent() == RetainRV->getParent()) {
+ BasicBlock::const_iterator I = Call;
+ ++I;
+ while (isNoopInstruction(I)) ++I;
+ if (&*I == RetainRV)
+ return false;
+ } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ BasicBlock *RetainRVParent = RetainRV->getParent();
+ if (II->getNormalDest() == RetainRVParent) {
+ BasicBlock::const_iterator I = RetainRVParent->begin();
+ while (isNoopInstruction(I)) ++I;
+ if (&*I == RetainRV)
+ return false;
+ }
+ }
+ }
+
+ // Check for being preceded by an objc_autoreleaseReturnValue on the same
+ // pointer. In this case, we can delete the pair.
+ BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
+ if (I != Begin) {
+ do --I; while (I != Begin && isNoopInstruction(I));
+ if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
+ GetObjCArg(I) == Arg) {
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n"
+ << " Erasing " << *RetainRV
+ << "\n");
+
+ EraseInstruction(I);
+ EraseInstruction(RetainRV);
+ return true;
+ }
+ }
+
+ // Turn it to a plain objc_retain.
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming "
+ "objc_retainAutoreleasedReturnValue => "
+ "objc_retain since the operand is not a return value.\n"
+ " Old: "
+ << *RetainRV << "\n");
+
+ cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
+
+ DEBUG(dbgs() << " New: "
+ << *RetainRV << "\n");
+
+ return false;
+}
+
+/// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not
+/// used as a return value.
+void
+ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
+ InstructionClass &Class) {
+ // Check for a return of the pointer value.
+ const Value *Ptr = GetObjCArg(AutoreleaseRV);
+ SmallVector<const Value *, 2> Users;
+ Users.push_back(Ptr);
+ do {
+ Ptr = Users.pop_back_val();
+ for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
+ UI != UE; ++UI) {
+ const User *I = *UI;
+ if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
+ return;
+ if (isa<BitCastInst>(I))
+ Users.push_back(I);
+ }
+ } while (!Users.empty());
+
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming "
+ "objc_autoreleaseReturnValue => "
+ "objc_autorelease since its operand is not used as a return "
+ "value.\n"
+ " Old: "
+ << *AutoreleaseRV << "\n");
+
+ CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
+ AutoreleaseRVCI->
+ setCalledFunction(getAutoreleaseCallee(F.getParent()));
+ AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
+ Class = IC_Autorelease;
+
+ DEBUG(dbgs() << " New: "
+ << *AutoreleaseRV << "\n");
+
+}
+
+/// Visit each call, one at a time, and make simplifications without doing any
+/// additional analysis.
+void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
+ // Reset all the flags in preparation for recomputing them.
+ UsedInThisFunction = 0;
+
+ // Visit all objc_* calls in F.
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: "
+ << Class << "; " << *Inst << "\n");
+
+ switch (Class) {
+ default: break;
+
+ // Delete no-op casts. These function calls have special semantics, but
+ // the semantics are entirely implemented via lowering in the front-end,
+ // so by the time they reach the optimizer, they are just no-op calls
+ // which return their argument.
+ //
+ // There are gray areas here, as the ability to cast reference-counted
+ // pointers to raw void* and back allows code to break ARC assumptions,
+ // however these are currently considered to be unimportant.
+ case IC_NoopCast:
+ Changed = true;
+ ++NumNoops;
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:"
+ " " << *Inst << "\n");
+ EraseInstruction(Inst);
+ continue;
+
+ // If the pointer-to-weak-pointer is null, it's undefined behavior.
+ case IC_StoreWeak:
+ case IC_LoadWeak:
+ case IC_LoadWeakRetained:
+ case IC_InitWeak:
+ case IC_DestroyWeak: {
+ CallInst *CI = cast<CallInst>(Inst);
+ if (isNullOrUndef(CI->getArgOperand(0))) {
+ Changed = true;
+ Type *Ty = CI->getArgOperand(0)->getType();
+ new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+ Constant::getNullValue(Ty),
+ CI);
+ llvm::Value *NewValue = UndefValue::get(CI->getType());
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
+ "pointer-to-weak-pointer is undefined behavior.\n"
+ " Old = " << *CI <<
+ "\n New = " <<
+ *NewValue << "\n");
+ CI->replaceAllUsesWith(NewValue);
+ CI->eraseFromParent();
+ continue;
+ }
+ break;
+ }
+ case IC_CopyWeak:
+ case IC_MoveWeak: {
+ CallInst *CI = cast<CallInst>(Inst);
+ if (isNullOrUndef(CI->getArgOperand(0)) ||
+ isNullOrUndef(CI->getArgOperand(1))) {
+ Changed = true;
+ Type *Ty = CI->getArgOperand(0)->getType();
+ new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+ Constant::getNullValue(Ty),
+ CI);
+
+ llvm::Value *NewValue = UndefValue::get(CI->getType());
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
+ "pointer-to-weak-pointer is undefined behavior.\n"
+ " Old = " << *CI <<
+ "\n New = " <<
+ *NewValue << "\n");
+
+ CI->replaceAllUsesWith(NewValue);
+ CI->eraseFromParent();
+ continue;
+ }
+ break;
+ }
+ case IC_Retain:
+ OptimizeRetainCall(F, Inst);
+ break;
+ case IC_RetainRV:
+ if (OptimizeRetainRVCall(F, Inst))
+ continue;
+ break;
+ case IC_AutoreleaseRV:
+ OptimizeAutoreleaseRVCall(F, Inst, Class);
+ break;
+ }
+
+ // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
+ if (IsAutorelease(Class) && Inst->use_empty()) {
+ CallInst *Call = cast<CallInst>(Inst);
+ const Value *Arg = Call->getArgOperand(0);
+ Arg = FindSingleUseIdentifiedObject(Arg);
+ if (Arg) {
+ Changed = true;
+ ++NumAutoreleases;
+
+ // Create the declaration lazily.
+ LLVMContext &C = Inst->getContext();
+ CallInst *NewCall =
+ CallInst::Create(getReleaseCallee(F.getParent()),
+ Call->getArgOperand(0), "", Call);
+ NewCall->setMetadata(ImpreciseReleaseMDKind,
+ MDNode::get(C, ArrayRef<Value *>()));
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing "
+ "objc_autorelease(x) with objc_release(x) since x is "
+ "otherwise unused.\n"
+ " Old: " << *Call <<
+ "\n New: " <<
+ *NewCall << "\n");
+
+ EraseInstruction(Call);
+ Inst = NewCall;
+ Class = IC_Release;
+ }
+ }
+
+ // For functions which can never be passed stack arguments, add
+ // a tail keyword.
+ if (IsAlwaysTail(Class)) {
+ Changed = true;
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword"
+ " to function since it can never be passed stack args: " << *Inst <<
+ "\n");
+ cast<CallInst>(Inst)->setTailCall();
+ }
+
+ // Ensure that functions that can never have a "tail" keyword due to the
+ // semantics of ARC truly do not do so.
+ if (IsNeverTail(Class)) {
+ Changed = true;
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail "
+ "keyword from function: " << *Inst <<
+ "\n");
+ cast<CallInst>(Inst)->setTailCall(false);
+ }
+
+ // Set nounwind as needed.
+ if (IsNoThrow(Class)) {
+ Changed = true;
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw"
+ " class. Setting nounwind on: " << *Inst << "\n");
+ cast<CallInst>(Inst)->setDoesNotThrow();
+ }
+
+ if (!IsNoopOnNull(Class)) {
+ UsedInThisFunction |= 1 << Class;
+ continue;
+ }
+
+ const Value *Arg = GetObjCArg(Inst);
+
+ // ARC calls with null are no-ops. Delete them.
+ if (isNullOrUndef(Arg)) {
+ Changed = true;
+ ++NumNoops;
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with "
+ " null are no-ops. Erasing: " << *Inst << "\n");
+ EraseInstruction(Inst);
+ continue;
+ }
+
+ // Keep track of which of retain, release, autorelease, and retain_block
+ // are actually present in this function.
+ UsedInThisFunction |= 1 << Class;
+
+ // If Arg is a PHI, and one or more incoming values to the
+ // PHI are null, and the call is control-equivalent to the PHI, and there
+ // are no relevant side effects between the PHI and the call, the call
+ // could be pushed up to just those paths with non-null incoming values.
+ // For now, don't bother splitting critical edges for this.
+ SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
+ Worklist.push_back(std::make_pair(Inst, Arg));
+ do {
+ std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
+ Inst = Pair.first;
+ Arg = Pair.second;
+
+ const PHINode *PN = dyn_cast<PHINode>(Arg);
+ if (!PN) continue;
+
+ // Determine if the PHI has any null operands, or any incoming
+ // critical edges.
+ bool HasNull = false;
+ bool HasCriticalEdges = false;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming =
+ StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+ if (isNullOrUndef(Incoming))
+ HasNull = true;
+ else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
+ .getNumSuccessors() != 1) {
+ HasCriticalEdges = true;
+ break;
+ }
+ }
+ // If we have null operands and no critical edges, optimize.
+ if (!HasCriticalEdges && HasNull) {
+ SmallPtrSet<Instruction *, 4> DependingInstructions;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+
+ // Check that there is nothing that cares about the reference
+ // count between the call and the phi.
+ switch (Class) {
+ case IC_Retain:
+ case IC_RetainBlock:
+ // These can always be moved up.
+ break;
+ case IC_Release:
+ // These can't be moved across things that care about the retain
+ // count.
+ FindDependencies(NeedsPositiveRetainCount, Arg,
+ Inst->getParent(), Inst,
+ DependingInstructions, Visited, PA);
+ break;
+ case IC_Autorelease:
+ // These can't be moved across autorelease pool scope boundaries.
+ FindDependencies(AutoreleasePoolBoundary, Arg,
+ Inst->getParent(), Inst,
+ DependingInstructions, Visited, PA);
+ break;
+ case IC_RetainRV:
+ case IC_AutoreleaseRV:
+ // Don't move these; the RV optimization depends on the autoreleaseRV
+ // being tail called, and the retainRV being immediately after a call
+ // (which might still happen if we get lucky with codegen layout, but
+ // it's not worth taking the chance).
+ continue;
+ default:
+ llvm_unreachable("Invalid dependence flavor");
+ }
+
+ if (DependingInstructions.size() == 1 &&
+ *DependingInstructions.begin() == PN) {
+ Changed = true;
+ ++NumPartialNoops;
+ // Clone the call into each predecessor that has a non-null value.
+ CallInst *CInst = cast<CallInst>(Inst);
+ Type *ParamTy = CInst->getArgOperand(0)->getType();
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *Incoming =
+ StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+ if (!isNullOrUndef(Incoming)) {
+ CallInst *Clone = cast<CallInst>(CInst->clone());
+ Value *Op = PN->getIncomingValue(i);
+ Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
+ if (Op->getType() != ParamTy)
+ Op = new BitCastInst(Op, ParamTy, "", InsertPos);
+ Clone->setArgOperand(0, Op);
+ Clone->insertBefore(InsertPos);
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning "
+ << *CInst << "\n"
+ " And inserting "
+ "clone at " << *InsertPos << "\n");
+ Worklist.push_back(std::make_pair(Clone, Incoming));
+ }
+ }
+ // Erase the original call.
+ DEBUG(dbgs() << "Erasing: " << *CInst << "\n");
+ EraseInstruction(CInst);
+ continue;
+ }
+ }
+ } while (!Worklist.empty());
+ }
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n");
+}
+
+/// Check for critical edges, loop boundaries, irreducible control flow, or
+/// other CFG structures where moving code across the edge would result in it
+/// being executed more.
+void
+ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BBState &MyStates) const {
+ // If any top-down local-use or possible-dec has a succ which is earlier in
+ // the sequence, forget it.
+ for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
+ E = MyStates.top_down_ptr_end(); I != E; ++I)
+ switch (I->second.GetSeq()) {
+ default: break;
+ case S_Use: {
+ const Value *Arg = I->first;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ bool SomeSuccHasSame = false;
+ bool AllSuccsHaveSame = true;
+ PtrState &S = I->second;
+ succ_const_iterator SI(TI), SE(TI, false);
+
+ for (; SI != SE; ++SI) {
+ Sequence SuccSSeq = S_None;
+ bool SuccSRRIKnownSafe = false;
+ // If VisitBottomUp has pointer information for this successor, take
+ // what we know about it.
+ DenseMap<const BasicBlock *, BBState>::iterator BBI =
+ BBStates.find(*SI);
+ assert(BBI != BBStates.end());
+ const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+ SuccSSeq = SuccS.GetSeq();
+ SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+ switch (SuccSSeq) {
+ case S_None:
+ case S_CanRelease: {
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
+ S.ClearSequenceProgress();
+ break;
+ }
+ continue;
+ }
+ case S_Use:
+ SomeSuccHasSame = true;
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+ AllSuccsHaveSame = false;
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+ // If the state at the other end of any of the successor edges
+ // matches the current state, require all edges to match. This
+ // guards against loops in the middle of a sequence.
+ if (SomeSuccHasSame && !AllSuccsHaveSame)
+ S.ClearSequenceProgress();
+ break;
+ }
+ case S_CanRelease: {
+ const Value *Arg = I->first;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ bool SomeSuccHasSame = false;
+ bool AllSuccsHaveSame = true;
+ PtrState &S = I->second;
+ succ_const_iterator SI(TI), SE(TI, false);
+
+ for (; SI != SE; ++SI) {
+ Sequence SuccSSeq = S_None;
+ bool SuccSRRIKnownSafe = false;
+ // If VisitBottomUp has pointer information for this successor, take
+ // what we know about it.
+ DenseMap<const BasicBlock *, BBState>::iterator BBI =
+ BBStates.find(*SI);
+ assert(BBI != BBStates.end());
+ const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+ SuccSSeq = SuccS.GetSeq();
+ SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+ switch (SuccSSeq) {
+ case S_None: {
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
+ S.ClearSequenceProgress();
+ break;
+ }
+ continue;
+ }
+ case S_CanRelease:
+ SomeSuccHasSame = true;
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+ AllSuccsHaveSame = false;
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+ // If the state at the other end of any of the successor edges
+ // matches the current state, require all edges to match. This
+ // guards against loops in the middle of a sequence.
+ if (SomeSuccHasSame && !AllSuccsHaveSame)
+ S.ClearSequenceProgress();
+ break;
+ }
+ }
+}
+
+bool
+ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
+ BasicBlock *BB,
+ MapVector<Value *, RRInfo> &Retains,
+ BBState &MyStates) {
+ bool NestingDetected = false;
+ InstructionClass Class = GetInstructionClass(Inst);
+ const Value *Arg = 0;
+
+ switch (Class) {
+ case IC_Release: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrBottomUpState(Arg);
+
+ // If we see two releases in a row on the same pointer. If so, make
+ // a note, and we'll cicle back to revisit it after we've
+ // hopefully eliminated the second release, which may allow us to
+ // eliminate the first release too.
+ // Theoretically we could implement removal of nested retain+release
+ // pairs by making PtrState hold a stack of states, but this is
+ // simple and avoids adding overhead for the non-nested case.
+ if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) {
+ DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested "
+ "releases (i.e. a release pair)\n");
+ NestingDetected = true;
+ }
+
+ MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release);
+ S.RRI.ReleaseMetadata = ReleaseMetadata;
+ S.RRI.KnownSafe = S.IsKnownIncremented();
+ S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+ S.RRI.Calls.insert(Inst);
+
+ S.SetKnownPositiveRefCount();
+ break;
+ }
+ case IC_RetainBlock:
+ // An objc_retainBlock call with just a use may need to be kept,
+ // because it may be copying a block from the stack to the heap.
+ if (!IsRetainBlockOptimizable(Inst))
+ break;
+ // FALLTHROUGH
+ case IC_Retain:
+ case IC_RetainRV: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrBottomUpState(Arg);
+ S.SetKnownPositiveRefCount();
+
+ switch (S.GetSeq()) {
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ S.RRI.ReverseInsertPts.clear();
+ // FALL THROUGH
+ case S_CanRelease:
+ // Don't do retain+release tracking for IC_RetainRV, because it's
+ // better to let it remain as the first instruction after a call.
+ if (Class != IC_RetainRV) {
+ S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+ Retains[Inst] = S.RRI;
+ }
+ S.ClearSequenceProgress();
+ break;
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ return NestingDetected;
+ }
+ case IC_AutoreleasepoolPop:
+ // Conservatively, clear MyStates for all known pointers.
+ MyStates.clearBottomUpPointers();
+ return NestingDetected;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ // These are irrelevant.
+ return NestingDetected;
+ default:
+ break;
+ }
+
+ // Consider any other possible effects of this instruction on each
+ // pointer being tracked.
+ for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
+ ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+ const Value *Ptr = MI->first;
+ if (Ptr == Arg)
+ continue; // Handled above.
+ PtrState &S = MI->second;
+ Sequence Seq = S.GetSeq();
+
+ // Check for possible releases.
+ if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+ S.ClearRefCount();
+ switch (Seq) {
+ case S_Use:
+ S.SetSeq(S_CanRelease);
+ continue;
+ case S_CanRelease:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Stop:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+
+ // Check for possible direct uses.
+ switch (Seq) {
+ case S_Release:
+ case S_MovableRelease:
+ if (CanUse(Inst, Ptr, PA, Class)) {
+ assert(S.RRI.ReverseInsertPts.empty());
+ // If this is an invoke instruction, we're scanning it as part of
+ // one of its successor blocks, since we can't insert code after it
+ // in its own block, and we don't want to split critical edges.
+ if (isa<InvokeInst>(Inst))
+ S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+ else
+ S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+ S.SetSeq(S_Use);
+ } else if (Seq == S_Release &&
+ (Class == IC_User || Class == IC_CallOrUser)) {
+ // Non-movable releases depend on any possible objc pointer use.
+ S.SetSeq(S_Stop);
+ assert(S.RRI.ReverseInsertPts.empty());
+ // As above; handle invoke specially.
+ if (isa<InvokeInst>(Inst))
+ S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+ else
+ S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+ }
+ break;
+ case S_Stop:
+ if (CanUse(Inst, Ptr, PA, Class))
+ S.SetSeq(S_Use);
+ break;
+ case S_CanRelease:
+ case S_Use:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ }
+
+ return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains) {
+ bool NestingDetected = false;
+ BBState &MyStates = BBStates[BB];
+
+ // Merge the states from each successor to compute the initial state
+ // for the current block.
+ BBState::edge_iterator SI(MyStates.succ_begin()),
+ SE(MyStates.succ_end());
+ if (SI != SE) {
+ const BasicBlock *Succ = *SI;
+ DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
+ assert(I != BBStates.end());
+ MyStates.InitFromSucc(I->second);
+ ++SI;
+ for (; SI != SE; ++SI) {
+ Succ = *SI;
+ I = BBStates.find(Succ);
+ assert(I != BBStates.end());
+ MyStates.MergeSucc(I->second);
+ }
+ }
+
+ // Visit all the instructions, bottom-up.
+ for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
+ Instruction *Inst = llvm::prior(I);
+
+ // Invoke instructions are visited as part of their successors (below).
+ if (isa<InvokeInst>(Inst))
+ continue;
+
+ DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n");
+
+ NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
+ }
+
+ // If there's a predecessor with an invoke, visit the invoke as if it were
+ // part of this block, since we can't insert code after an invoke in its own
+ // block, and we don't want to split critical edges.
+ for (BBState::edge_iterator PI(MyStates.pred_begin()),
+ PE(MyStates.pred_end()); PI != PE; ++PI) {
+ BasicBlock *Pred = *PI;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back()))
+ NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
+ }
+
+ return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
+ DenseMap<Value *, RRInfo> &Releases,
+ BBState &MyStates) {
+ bool NestingDetected = false;
+ InstructionClass Class = GetInstructionClass(Inst);
+ const Value *Arg = 0;
+
+ switch (Class) {
+ case IC_RetainBlock:
+ // An objc_retainBlock call with just a use may need to be kept,
+ // because it may be copying a block from the stack to the heap.
+ if (!IsRetainBlockOptimizable(Inst))
+ break;
+ // FALLTHROUGH
+ case IC_Retain:
+ case IC_RetainRV: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+
+ // Don't do retain+release tracking for IC_RetainRV, because it's
+ // better to let it remain as the first instruction after a call.
+ if (Class != IC_RetainRV) {
+ // If we see two retains in a row on the same pointer. If so, make
+ // a note, and we'll cicle back to revisit it after we've
+ // hopefully eliminated the second retain, which may allow us to
+ // eliminate the first retain too.
+ // Theoretically we could implement removal of nested retain+release
+ // pairs by making PtrState hold a stack of states, but this is
+ // simple and avoids adding overhead for the non-nested case.
+ if (S.GetSeq() == S_Retain)
+ NestingDetected = true;
+
+ S.ResetSequenceProgress(S_Retain);
+ S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+ S.RRI.KnownSafe = S.IsKnownIncremented();
+ S.RRI.Calls.insert(Inst);
+ }
+
+ S.SetKnownPositiveRefCount();
+
+ // A retain can be a potential use; procede to the generic checking
+ // code below.
+ break;
+ }
+ case IC_Release: {
+ Arg = GetObjCArg(Inst);
+
+ PtrState &S = MyStates.getPtrTopDownState(Arg);
+ S.ClearRefCount();
+
+ switch (S.GetSeq()) {
+ case S_Retain:
+ case S_CanRelease:
+ S.RRI.ReverseInsertPts.clear();
+ // FALL THROUGH
+ case S_Use:
+ S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+ Releases[Inst] = S.RRI;
+ S.ClearSequenceProgress();
+ break;
+ case S_None:
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+ break;
+ }
+ case IC_AutoreleasepoolPop:
+ // Conservatively, clear MyStates for all known pointers.
+ MyStates.clearTopDownPointers();
+ return NestingDetected;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ // These are irrelevant.
+ return NestingDetected;
+ default:
+ break;
+ }
+
+ // Consider any other possible effects of this instruction on each
+ // pointer being tracked.
+ for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
+ ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+ const Value *Ptr = MI->first;
+ if (Ptr == Arg)
+ continue; // Handled above.
+ PtrState &S = MI->second;
+ Sequence Seq = S.GetSeq();
+
+ // Check for possible releases.
+ if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+ S.ClearRefCount();
+ switch (Seq) {
+ case S_Retain:
+ S.SetSeq(S_CanRelease);
+ assert(S.RRI.ReverseInsertPts.empty());
+ S.RRI.ReverseInsertPts.insert(Inst);
+
+ // One call can't cause a transition from S_Retain to S_CanRelease
+ // and S_CanRelease to S_Use. If we've made the first transition,
+ // we're done.
+ continue;
+ case S_Use:
+ case S_CanRelease:
+ case S_None:
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+ }
+
+ // Check for possible direct uses.
+ switch (Seq) {
+ case S_CanRelease:
+ if (CanUse(Inst, Ptr, PA, Class))
+ S.SetSeq(S_Use);
+ break;
+ case S_Retain:
+ case S_Use:
+ case S_None:
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+ }
+
+ return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitTopDown(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ DenseMap<Value *, RRInfo> &Releases) {
+ bool NestingDetected = false;
+ BBState &MyStates = BBStates[BB];
+
+ // Merge the states from each predecessor to compute the initial state
+ // for the current block.
+ BBState::edge_iterator PI(MyStates.pred_begin()),
+ PE(MyStates.pred_end());
+ if (PI != PE) {
+ const BasicBlock *Pred = *PI;
+ DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
+ assert(I != BBStates.end());
+ MyStates.InitFromPred(I->second);
+ ++PI;
+ for (; PI != PE; ++PI) {
+ Pred = *PI;
+ I = BBStates.find(Pred);
+ assert(I != BBStates.end());
+ MyStates.MergePred(I->second);
+ }
+ }
+
+ // Visit all the instructions, top-down.
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ Instruction *Inst = I;
+
+ DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n");
+
+ NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
+ }
+
+ CheckForCFGHazards(BB, BBStates, MyStates);
+ return NestingDetected;
+}
+
+static void
+ComputePostOrders(Function &F,
+ SmallVectorImpl<BasicBlock *> &PostOrder,
+ SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder,
+ unsigned NoObjCARCExceptionsMDKind,
+ DenseMap<const BasicBlock *, BBState> &BBStates) {
+ /// The visited set, for doing DFS walks.
+ SmallPtrSet<BasicBlock *, 16> Visited;
+
+ // Do DFS, computing the PostOrder.
+ SmallPtrSet<BasicBlock *, 16> OnStack;
+ SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
+
+ // Functions always have exactly one entry block, and we don't have
+ // any other block that we treat like an entry block.
+ BasicBlock *EntryBB = &F.getEntryBlock();
+ BBState &MyStates = BBStates[EntryBB];
+ MyStates.SetAsEntry();
+ TerminatorInst *EntryTI = cast<TerminatorInst>(&EntryBB->back());
+ SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI)));
+ Visited.insert(EntryBB);
+ OnStack.insert(EntryBB);
+ do {
+ dfs_next_succ:
+ BasicBlock *CurrBB = SuccStack.back().first;
+ TerminatorInst *TI = cast<TerminatorInst>(&CurrBB->back());
+ succ_iterator SE(TI, false);
+
+ while (SuccStack.back().second != SE) {
+ BasicBlock *SuccBB = *SuccStack.back().second++;
+ if (Visited.insert(SuccBB)) {
+ TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back());
+ SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI)));
+ BBStates[CurrBB].addSucc(SuccBB);
+ BBState &SuccStates = BBStates[SuccBB];
+ SuccStates.addPred(CurrBB);
+ OnStack.insert(SuccBB);
+ goto dfs_next_succ;
+ }
+
+ if (!OnStack.count(SuccBB)) {
+ BBStates[CurrBB].addSucc(SuccBB);
+ BBStates[SuccBB].addPred(CurrBB);
+ }
+ }
+ OnStack.erase(CurrBB);
+ PostOrder.push_back(CurrBB);
+ SuccStack.pop_back();
+ } while (!SuccStack.empty());
+
+ Visited.clear();
+
+ // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
+ // Functions may have many exits, and there also blocks which we treat
+ // as exits due to ignored edges.
+ SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
+ for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+ BasicBlock *ExitBB = I;
+ BBState &MyStates = BBStates[ExitBB];
+ if (!MyStates.isExit())
+ continue;
+
+ MyStates.SetAsExit();
+
+ PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin()));
+ Visited.insert(ExitBB);
+ while (!PredStack.empty()) {
+ reverse_dfs_next_succ:
+ BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
+ while (PredStack.back().second != PE) {
+ BasicBlock *BB = *PredStack.back().second++;
+ if (Visited.insert(BB)) {
+ PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin()));
+ goto reverse_dfs_next_succ;
+ }
+ }
+ ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
+ }
+ }
+}
+
+// Visit the function both top-down and bottom-up.
+bool
+ObjCARCOpt::Visit(Function &F,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases) {
+
+ // Use reverse-postorder traversals, because we magically know that loops
+ // will be well behaved, i.e. they won't repeatedly call retain on a single
+ // pointer without doing a release. We can't use the ReversePostOrderTraversal
+ // class here because we want the reverse-CFG postorder to consider each
+ // function exit point, and we want to ignore selected cycle edges.
+ SmallVector<BasicBlock *, 16> PostOrder;
+ SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
+ ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
+ NoObjCARCExceptionsMDKind,
+ BBStates);
+
+ // Use reverse-postorder on the reverse CFG for bottom-up.
+ bool BottomUpNestingDetected = false;
+ for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+ ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend();
+ I != E; ++I)
+ BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains);
+
+ // Use reverse-postorder for top-down.
+ bool TopDownNestingDetected = false;
+ for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+ PostOrder.rbegin(), E = PostOrder.rend();
+ I != E; ++I)
+ TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases);
+
+ return TopDownNestingDetected && BottomUpNestingDetected;
+}
+
+/// Move the calls in RetainsToMove and ReleasesToMove.
+void ObjCARCOpt::MoveCalls(Value *Arg,
+ RRInfo &RetainsToMove,
+ RRInfo &ReleasesToMove,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ SmallVectorImpl<Instruction *> &DeadInsts,
+ Module *M) {
+ Type *ArgTy = Arg->getType();
+ Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
+
+ // Insert the new retain and release calls.
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ PI = ReleasesToMove.ReverseInsertPts.begin(),
+ PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+ Instruction *InsertPt = *PI;
+ Value *MyArg = ArgTy == ParamTy ? Arg :
+ new BitCastInst(Arg, ParamTy, "", InsertPt);
+ CallInst *Call =
+ CallInst::Create(RetainsToMove.IsRetainBlock ?
+ getRetainBlockCallee(M) : getRetainCallee(M),
+ MyArg, "", InsertPt);
+ Call->setDoesNotThrow();
+ if (RetainsToMove.IsRetainBlock)
+ Call->setMetadata(CopyOnEscapeMDKind,
+ MDNode::get(M->getContext(), ArrayRef<Value *>()));
+ else
+ Call->setTailCall();
+
+ DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call
+ << "\n"
+ " At insertion point: " << *InsertPt
+ << "\n");
+ }
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ PI = RetainsToMove.ReverseInsertPts.begin(),
+ PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+ Instruction *InsertPt = *PI;
+ Value *MyArg = ArgTy == ParamTy ? Arg :
+ new BitCastInst(Arg, ParamTy, "", InsertPt);
+ CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
+ "", InsertPt);
+ // Attach a clang.imprecise_release metadata tag, if appropriate.
+ if (MDNode *M = ReleasesToMove.ReleaseMetadata)
+ Call->setMetadata(ImpreciseReleaseMDKind, M);
+ Call->setDoesNotThrow();
+ if (ReleasesToMove.IsTailCallRelease)
+ Call->setTailCall();
+
+ DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call
+ << "\n"
+ " At insertion point: " << *InsertPt
+ << "\n");
+ }
+
+ // Delete the original retain and release calls.
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ AI = RetainsToMove.Calls.begin(),
+ AE = RetainsToMove.Calls.end(); AI != AE; ++AI) {
+ Instruction *OrigRetain = *AI;
+ Retains.blot(OrigRetain);
+ DeadInsts.push_back(OrigRetain);
+ DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain <<
+ "\n");
+ }
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ AI = ReleasesToMove.Calls.begin(),
+ AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) {
+ Instruction *OrigRelease = *AI;
+ Releases.erase(OrigRelease);
+ DeadInsts.push_back(OrigRelease);
+ DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease
+ << "\n");
+ }
+}
+
+bool
+ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
+ &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ Module *M,
+ SmallVector<Instruction *, 4> &NewRetains,
+ SmallVector<Instruction *, 4> &NewReleases,
+ SmallVector<Instruction *, 8> &DeadInsts,
+ RRInfo &RetainsToMove,
+ RRInfo &ReleasesToMove,
+ Value *Arg,
+ bool KnownSafe,
+ bool &AnyPairsCompletelyEliminated) {
+ // If a pair happens in a region where it is known that the reference count
+ // is already incremented, we can similarly ignore possible decrements.
+ bool KnownSafeTD = true, KnownSafeBU = true;
+
+ // Connect the dots between the top-down-collected RetainsToMove and
+ // bottom-up-collected ReleasesToMove to form sets of related calls.
+ // This is an iterative process so that we connect multiple releases
+ // to multiple retains if needed.
+ unsigned OldDelta = 0;
+ unsigned NewDelta = 0;
+ unsigned OldCount = 0;
+ unsigned NewCount = 0;
+ bool FirstRelease = true;
+ bool FirstRetain = true;
+ for (;;) {
+ for (SmallVectorImpl<Instruction *>::const_iterator
+ NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
+ Instruction *NewRetain = *NI;
+ MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
+ assert(It != Retains.end());
+ const RRInfo &NewRetainRRI = It->second;
+ KnownSafeTD &= NewRetainRRI.KnownSafe;
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ LI = NewRetainRRI.Calls.begin(),
+ LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
+ Instruction *NewRetainRelease = *LI;
+ DenseMap<Value *, RRInfo>::const_iterator Jt =
+ Releases.find(NewRetainRelease);
+ if (Jt == Releases.end())
+ return false;
+ const RRInfo &NewRetainReleaseRRI = Jt->second;
+ assert(NewRetainReleaseRRI.Calls.count(NewRetain));
+ if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
+ OldDelta -=
+ BBStates[NewRetainRelease->getParent()].GetAllPathCount();
+
+ // Merge the ReleaseMetadata and IsTailCallRelease values.
+ if (FirstRelease) {
+ ReleasesToMove.ReleaseMetadata =
+ NewRetainReleaseRRI.ReleaseMetadata;
+ ReleasesToMove.IsTailCallRelease =
+ NewRetainReleaseRRI.IsTailCallRelease;
+ FirstRelease = false;
+ } else {
+ if (ReleasesToMove.ReleaseMetadata !=
+ NewRetainReleaseRRI.ReleaseMetadata)
+ ReleasesToMove.ReleaseMetadata = 0;
+ if (ReleasesToMove.IsTailCallRelease !=
+ NewRetainReleaseRRI.IsTailCallRelease)
+ ReleasesToMove.IsTailCallRelease = false;
+ }
+
+ // Collect the optimal insertion points.
+ if (!KnownSafe)
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ RI = NewRetainReleaseRRI.ReverseInsertPts.begin(),
+ RE = NewRetainReleaseRRI.ReverseInsertPts.end();
+ RI != RE; ++RI) {
+ Instruction *RIP = *RI;
+ if (ReleasesToMove.ReverseInsertPts.insert(RIP))
+ NewDelta -= BBStates[RIP->getParent()].GetAllPathCount();
+ }
+ NewReleases.push_back(NewRetainRelease);
+ }
+ }
+ }
+ NewRetains.clear();
+ if (NewReleases.empty()) break;
+
+ // Back the other way.
+ for (SmallVectorImpl<Instruction *>::const_iterator
+ NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) {
+ Instruction *NewRelease = *NI;
+ DenseMap<Value *, RRInfo>::const_iterator It =
+ Releases.find(NewRelease);
+ assert(It != Releases.end());
+ const RRInfo &NewReleaseRRI = It->second;
+ KnownSafeBU &= NewReleaseRRI.KnownSafe;
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ LI = NewReleaseRRI.Calls.begin(),
+ LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
+ Instruction *NewReleaseRetain = *LI;
+ MapVector<Value *, RRInfo>::const_iterator Jt =
+ Retains.find(NewReleaseRetain);
+ if (Jt == Retains.end())
+ return false;
+ const RRInfo &NewReleaseRetainRRI = Jt->second;
+ assert(NewReleaseRetainRRI.Calls.count(NewRelease));
+ if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
+ unsigned PathCount =
+ BBStates[NewReleaseRetain->getParent()].GetAllPathCount();
+ OldDelta += PathCount;
+ OldCount += PathCount;
+
+ // Merge the IsRetainBlock values.
+ if (FirstRetain) {
+ RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock;
+ FirstRetain = false;
+ } else if (ReleasesToMove.IsRetainBlock !=
+ NewReleaseRetainRRI.IsRetainBlock)
+ // It's not possible to merge the sequences if one uses
+ // objc_retain and the other uses objc_retainBlock.
+ return false;
+
+ // Collect the optimal insertion points.
+ if (!KnownSafe)
+ for (SmallPtrSet<Instruction *, 2>::const_iterator
+ RI = NewReleaseRetainRRI.ReverseInsertPts.begin(),
+ RE = NewReleaseRetainRRI.ReverseInsertPts.end();
+ RI != RE; ++RI) {
+ Instruction *RIP = *RI;
+ if (RetainsToMove.ReverseInsertPts.insert(RIP)) {
+ PathCount = BBStates[RIP->getParent()].GetAllPathCount();
+ NewDelta += PathCount;
+ NewCount += PathCount;
+ }
+ }
+ NewRetains.push_back(NewReleaseRetain);
+ }
+ }
+ }
+ NewReleases.clear();
+ if (NewRetains.empty()) break;
+ }
+
+ // If the pointer is known incremented or nested, we can safely delete the
+ // pair regardless of what's between them.
+ if (KnownSafeTD || KnownSafeBU) {
+ RetainsToMove.ReverseInsertPts.clear();
+ ReleasesToMove.ReverseInsertPts.clear();
+ NewCount = 0;
+ } else {
+ // Determine whether the new insertion points we computed preserve the
+ // balance of retain and release calls through the program.
+ // TODO: If the fully aggressive solution isn't valid, try to find a
+ // less aggressive solution which is.
+ if (NewDelta != 0)
+ return false;
+ }
+
+ // Determine whether the original call points are balanced in the retain and
+ // release calls through the program. If not, conservatively don't touch
+ // them.
+ // TODO: It's theoretically possible to do code motion in this case, as
+ // long as the existing imbalances are maintained.
+ if (OldDelta != 0)
+ return false;
+
+ Changed = true;
+ assert(OldCount != 0 && "Unreachable code?");
+ NumRRs += OldCount - NewCount;
+ // Set to true if we completely removed any RR pairs.
+ AnyPairsCompletelyEliminated = NewCount == 0;
+
+ // We can move calls!
+ return true;
+}
+
+/// Identify pairings between the retains and releases, and delete and/or move
+/// them.
+bool
+ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
+ &BBStates,
+ MapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases,
+ Module *M) {
+ bool AnyPairsCompletelyEliminated = false;
+ RRInfo RetainsToMove;
+ RRInfo ReleasesToMove;
+ SmallVector<Instruction *, 4> NewRetains;
+ SmallVector<Instruction *, 4> NewReleases;
+ SmallVector<Instruction *, 8> DeadInsts;
+
+ // Visit each retain.
+ for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
+ E = Retains.end(); I != E; ++I) {
+ Value *V = I->first;
+ if (!V) continue; // blotted
+
+ Instruction *Retain = cast<Instruction>(V);
+
+ DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain
+ << "\n");
+
+ Value *Arg = GetObjCArg(Retain);
+
+ // If the object being released is in static or stack storage, we know it's
+ // not being managed by ObjC reference counting, so we can delete pairs
+ // regardless of what possible decrements or uses lie between them.
+ bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
+
+ // A constant pointer can't be pointing to an object on the heap. It may
+ // be reference-counted, but it won't be deleted.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
+ if (const GlobalVariable *GV =
+ dyn_cast<GlobalVariable>(
+ StripPointerCastsAndObjCCalls(LI->getPointerOperand())))
+ if (GV->isConstant())
+ KnownSafe = true;
+
+ // Connect the dots between the top-down-collected RetainsToMove and
+ // bottom-up-collected ReleasesToMove to form sets of related calls.
+ NewRetains.push_back(Retain);
+ bool PerformMoveCalls =
+ ConnectTDBUTraversals(BBStates, Retains, Releases, M, NewRetains,
+ NewReleases, DeadInsts, RetainsToMove,
+ ReleasesToMove, Arg, KnownSafe,
+ AnyPairsCompletelyEliminated);
+
+ if (PerformMoveCalls) {
+ // Ok, everything checks out and we're all set. Let's move/delete some
+ // code!
+ MoveCalls(Arg, RetainsToMove, ReleasesToMove,
+ Retains, Releases, DeadInsts, M);
+ }
+
+ // Clean up state for next retain.
+ NewReleases.clear();
+ NewRetains.clear();
+ RetainsToMove.clear();
+ ReleasesToMove.clear();
+ }
+
+ // Now that we're done moving everything, we can delete the newly dead
+ // instructions, as we no longer need them as insert points.
+ while (!DeadInsts.empty())
+ EraseInstruction(DeadInsts.pop_back_val());
+
+ return AnyPairsCompletelyEliminated;
+}
+
+/// Weak pointer optimizations.
+void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
+ // First, do memdep-style RLE and S2L optimizations. We can't use memdep
+ // itself because it uses AliasAnalysis and we need to do provenance
+ // queries instead.
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst <<
+ "\n");
+
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
+ continue;
+
+ // Delete objc_loadWeak calls with no users.
+ if (Class == IC_LoadWeak && Inst->use_empty()) {
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // TODO: For now, just look for an earlier available version of this value
+ // within the same block. Theoretically, we could do memdep-style non-local
+ // analysis too, but that would want caching. A better approach would be to
+ // use the technique that EarlyCSE uses.
+ inst_iterator Current = llvm::prior(I);
+ BasicBlock *CurrentBB = Current.getBasicBlockIterator();
+ for (BasicBlock::iterator B = CurrentBB->begin(),
+ J = Current.getInstructionIterator();
+ J != B; --J) {
+ Instruction *EarlierInst = &*llvm::prior(J);
+ InstructionClass EarlierClass = GetInstructionClass(EarlierInst);
+ switch (EarlierClass) {
+ case IC_LoadWeak:
+ case IC_LoadWeakRetained: {
+ // If this is loading from the same pointer, replace this load's value
+ // with that one.
+ CallInst *Call = cast<CallInst>(Inst);
+ CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+ Value *Arg = Call->getArgOperand(0);
+ Value *EarlierArg = EarlierCall->getArgOperand(0);
+ switch (PA.getAA()->alias(Arg, EarlierArg)) {
+ case AliasAnalysis::MustAlias:
+ Changed = true;
+ // If the load has a builtin retain, insert a plain retain for it.
+ if (Class == IC_LoadWeakRetained) {
+ CallInst *CI =
+ CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+ "", Call);
+ CI->setTailCall();
+ }
+ // Zap the fully redundant load.
+ Call->replaceAllUsesWith(EarlierCall);
+ Call->eraseFromParent();
+ goto clobbered;
+ case AliasAnalysis::MayAlias:
+ case AliasAnalysis::PartialAlias:
+ goto clobbered;
+ case AliasAnalysis::NoAlias:
+ break;
+ }
+ break;
+ }
+ case IC_StoreWeak:
+ case IC_InitWeak: {
+ // If this is storing to the same pointer and has the same size etc.
+ // replace this load's value with the stored value.
+ CallInst *Call = cast<CallInst>(Inst);
+ CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+ Value *Arg = Call->getArgOperand(0);
+ Value *EarlierArg = EarlierCall->getArgOperand(0);
+ switch (PA.getAA()->alias(Arg, EarlierArg)) {
+ case AliasAnalysis::MustAlias:
+ Changed = true;
+ // If the load has a builtin retain, insert a plain retain for it.
+ if (Class == IC_LoadWeakRetained) {
+ CallInst *CI =
+ CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+ "", Call);
+ CI->setTailCall();
+ }
+ // Zap the fully redundant load.
+ Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
+ Call->eraseFromParent();
+ goto clobbered;
+ case AliasAnalysis::MayAlias:
+ case AliasAnalysis::PartialAlias:
+ goto clobbered;
+ case AliasAnalysis::NoAlias:
+ break;
+ }
+ break;
+ }
+ case IC_MoveWeak:
+ case IC_CopyWeak:
+ // TOOD: Grab the copied value.
+ goto clobbered;
+ case IC_AutoreleasepoolPush:
+ case IC_None:
+ case IC_User:
+ // Weak pointers are only modified through the weak entry points
+ // (and arbitrary calls, which could call the weak entry points).
+ break;
+ default:
+ // Anything else could modify the weak pointer.
+ goto clobbered;
+ }
+ }
+ clobbered:;
+ }
+
+ // Then, for each destroyWeak with an alloca operand, check to see if
+ // the alloca and all its users can be zapped.
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+ InstructionClass Class = GetBasicInstructionClass(Inst);
+ if (Class != IC_DestroyWeak)
+ continue;
+
+ CallInst *Call = cast<CallInst>(Inst);
+ Value *Arg = Call->getArgOperand(0);
+ if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
+ for (Value::use_iterator UI = Alloca->use_begin(),
+ UE = Alloca->use_end(); UI != UE; ++UI) {
+ const Instruction *UserInst = cast<Instruction>(*UI);
+ switch (GetBasicInstructionClass(UserInst)) {
+ case IC_InitWeak:
+ case IC_StoreWeak:
+ case IC_DestroyWeak:
+ continue;
+ default:
+ goto done;
+ }
+ }
+ Changed = true;
+ for (Value::use_iterator UI = Alloca->use_begin(),
+ UE = Alloca->use_end(); UI != UE; ) {
+ CallInst *UserInst = cast<CallInst>(*UI++);
+ switch (GetBasicInstructionClass(UserInst)) {
+ case IC_InitWeak:
+ case IC_StoreWeak:
+ // These functions return their second argument.
+ UserInst->replaceAllUsesWith(UserInst->getArgOperand(1));
+ break;
+ case IC_DestroyWeak:
+ // No return value.
+ break;
+ default:
+ llvm_unreachable("alloca really is used!");
+ }
+ UserInst->eraseFromParent();
+ }
+ Alloca->eraseFromParent();
+ done:;
+ }
+ }
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n");
+
+}
+
+/// Identify program paths which execute sequences of retains and releases which
+/// can be eliminated.
+bool ObjCARCOpt::OptimizeSequences(Function &F) {
+ /// Releases, Retains - These are used to store the results of the main flow
+ /// analysis. These use Value* as the key instead of Instruction* so that the
+ /// map stays valid when we get around to rewriting code and calls get
+ /// replaced by arguments.
+ DenseMap<Value *, RRInfo> Releases;
+ MapVector<Value *, RRInfo> Retains;
+
+ /// This is used during the traversal of the function to track the
+ /// states for each identified object at each block.
+ DenseMap<const BasicBlock *, BBState> BBStates;
+
+ // Analyze the CFG of the function, and all instructions.
+ bool NestingDetected = Visit(F, BBStates, Retains, Releases);
+
+ // Transform.
+ return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) &&
+ NestingDetected;
+}
+
+/// Look for this pattern:
+/// \code
+/// %call = call i8* @something(...)
+/// %2 = call i8* @objc_retain(i8* %call)
+/// %3 = call i8* @objc_autorelease(i8* %2)
+/// ret i8* %3
+/// \endcode
+/// And delete the retain and autorelease.
+///
+/// Otherwise if it's just this:
+/// \code
+/// %3 = call i8* @objc_autorelease(i8* %2)
+/// ret i8* %3
+/// \endcode
+/// convert the autorelease to autoreleaseRV.
+void ObjCARCOpt::OptimizeReturns(Function &F) {
+ if (!F.getReturnType()->isPointerTy())
+ return;
+
+ SmallPtrSet<Instruction *, 4> DependingInstructions;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ BasicBlock *BB = FI;
+ ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n");
+
+ if (!Ret) continue;
+
+ const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
+ FindDependencies(NeedsPositiveRetainCount, Arg,
+ BB, Ret, DependingInstructions, Visited, PA);
+ if (DependingInstructions.size() != 1)
+ goto next_block;
+
+ {
+ CallInst *Autorelease =
+ dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+ if (!Autorelease)
+ goto next_block;
+ InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
+ if (!IsAutorelease(AutoreleaseClass))
+ goto next_block;
+ if (GetObjCArg(Autorelease) != Arg)
+ goto next_block;
+
+ DependingInstructions.clear();
+ Visited.clear();
+
+ // Check that there is nothing that can affect the reference
+ // count between the autorelease and the retain.
+ FindDependencies(CanChangeRetainCount, Arg,
+ BB, Autorelease, DependingInstructions, Visited, PA);
+ if (DependingInstructions.size() != 1)
+ goto next_block;
+
+ {
+ CallInst *Retain =
+ dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+
+ // Check that we found a retain with the same argument.
+ if (!Retain ||
+ !IsRetain(GetBasicInstructionClass(Retain)) ||
+ GetObjCArg(Retain) != Arg)
+ goto next_block;
+
+ DependingInstructions.clear();
+ Visited.clear();
+
+ // Convert the autorelease to an autoreleaseRV, since it's
+ // returning the value.
+ if (AutoreleaseClass == IC_Autorelease) {
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Converting autorelease "
+ "=> autoreleaseRV since it's returning a value.\n"
+ " In: " << *Autorelease
+ << "\n");
+ Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent()));
+ DEBUG(dbgs() << " Out: " << *Autorelease
+ << "\n");
+ Autorelease->setTailCall(); // Always tail call autoreleaseRV.
+ AutoreleaseClass = IC_AutoreleaseRV;
+ }
+
+ // Check that there is nothing that can affect the reference
+ // count between the retain and the call.
+ // Note that Retain need not be in BB.
+ FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
+ DependingInstructions, Visited, PA);
+ if (DependingInstructions.size() != 1)
+ goto next_block;
+
+ {
+ CallInst *Call =
+ dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+
+ // Check that the pointer is the return value of the call.
+ if (!Call || Arg != Call)
+ goto next_block;
+
+ // Check that the call is a regular call.
+ InstructionClass Class = GetBasicInstructionClass(Call);
+ if (Class != IC_CallOrUser && Class != IC_Call)
+ goto next_block;
+
+ // If so, we can zap the retain and autorelease.
+ Changed = true;
+ ++NumRets;
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain
+ << "\n Erasing: "
+ << *Autorelease << "\n");
+ EraseInstruction(Retain);
+ EraseInstruction(Autorelease);
+ }
+ }
+ }
+
+ next_block:
+ DependingInstructions.clear();
+ Visited.clear();
+ }
+
+ DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n");
+
+}
+
+bool ObjCARCOpt::doInitialization(Module &M) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ Run = ModuleHasARC(M);
+ if (!Run)
+ return false;
+
+ // Identify the imprecise release metadata kind.
+ ImpreciseReleaseMDKind =
+ M.getContext().getMDKindID("clang.imprecise_release");
+ CopyOnEscapeMDKind =
+ M.getContext().getMDKindID("clang.arc.copy_on_escape");
+ NoObjCARCExceptionsMDKind =
+ M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
+
+ // Intuitively, objc_retain and others are nocapture, however in practice
+ // they are not, because they return their argument value. And objc_release
+ // calls finalizers which can have arbitrary side effects.
+
+ // These are initialized lazily.
+ RetainRVCallee = 0;
+ AutoreleaseRVCallee = 0;
+ ReleaseCallee = 0;
+ RetainCallee = 0;
+ RetainBlockCallee = 0;
+ AutoreleaseCallee = 0;
+
+ return false;
+}
+
+bool ObjCARCOpt::runOnFunction(Function &F) {
+ if (!EnableARCOpts)
+ return false;
+
+ // If nothing in the Module uses ARC, don't do anything.
+ if (!Run)
+ return false;
+
+ Changed = false;
+
+ DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n");
+
+ PA.setAA(&getAnalysis<AliasAnalysis>());
+
+ // This pass performs several distinct transformations. As a compile-time aid
+ // when compiling code that isn't ObjC, skip these if the relevant ObjC
+ // library functions aren't declared.
+
+ // Preliminary optimizations. This also computs UsedInThisFunction.
+ OptimizeIndividualCalls(F);
+
+ // Optimizations for weak pointers.
+ if (UsedInThisFunction & ((1 << IC_LoadWeak) |
+ (1 << IC_LoadWeakRetained) |
+ (1 << IC_StoreWeak) |
+ (1 << IC_InitWeak) |
+ (1 << IC_CopyWeak) |
+ (1 << IC_MoveWeak) |
+ (1 << IC_DestroyWeak)))
+ OptimizeWeakCalls(F);
+
+ // Optimizations for retain+release pairs.
+ if (UsedInThisFunction & ((1 << IC_Retain) |
+ (1 << IC_RetainRV) |
+ (1 << IC_RetainBlock)))
+ if (UsedInThisFunction & (1 << IC_Release))
+ // Run OptimizeSequences until it either stops making changes or
+ // no retain+release pair nesting is detected.
+ while (OptimizeSequences(F)) {}
+
+ // Optimizations if objc_autorelease is used.
+ if (UsedInThisFunction & ((1 << IC_Autorelease) |
+ (1 << IC_AutoreleaseRV)))
+ OptimizeReturns(F);
+
+ DEBUG(dbgs() << "\n");
+
+ return Changed;
+}
+
+void ObjCARCOpt::releaseMemory() {
+ PA.clear();
+}
+
+/// @}
+///
diff --git a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
new file mode 100644
index 0000000..a841c64
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp
@@ -0,0 +1,241 @@
+//===- ObjCARCUtil.cpp - ObjC ARC Optimization --------*- mode: c++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines several utility functions used by various ARC
+/// optimizations which are IMHO too big to be in a header file.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ObjCARC.h"
+#include "llvm/IR/Intrinsics.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
+ const InstructionClass Class) {
+ switch (Class) {
+ case IC_Retain:
+ return OS << "IC_Retain";
+ case IC_RetainRV:
+ return OS << "IC_RetainRV";
+ case IC_RetainBlock:
+ return OS << "IC_RetainBlock";
+ case IC_Release:
+ return OS << "IC_Release";
+ case IC_Autorelease:
+ return OS << "IC_Autorelease";
+ case IC_AutoreleaseRV:
+ return OS << "IC_AutoreleaseRV";
+ case IC_AutoreleasepoolPush:
+ return OS << "IC_AutoreleasepoolPush";
+ case IC_AutoreleasepoolPop:
+ return OS << "IC_AutoreleasepoolPop";
+ case IC_NoopCast:
+ return OS << "IC_NoopCast";
+ case IC_FusedRetainAutorelease:
+ return OS << "IC_FusedRetainAutorelease";
+ case IC_FusedRetainAutoreleaseRV:
+ return OS << "IC_FusedRetainAutoreleaseRV";
+ case IC_LoadWeakRetained:
+ return OS << "IC_LoadWeakRetained";
+ case IC_StoreWeak:
+ return OS << "IC_StoreWeak";
+ case IC_InitWeak:
+ return OS << "IC_InitWeak";
+ case IC_LoadWeak:
+ return OS << "IC_LoadWeak";
+ case IC_MoveWeak:
+ return OS << "IC_MoveWeak";
+ case IC_CopyWeak:
+ return OS << "IC_CopyWeak";
+ case IC_DestroyWeak:
+ return OS << "IC_DestroyWeak";
+ case IC_StoreStrong:
+ return OS << "IC_StoreStrong";
+ case IC_CallOrUser:
+ return OS << "IC_CallOrUser";
+ case IC_Call:
+ return OS << "IC_Call";
+ case IC_User:
+ return OS << "IC_User";
+ case IC_None:
+ return OS << "IC_None";
+ }
+ llvm_unreachable("Unknown instruction class!");
+}
+
+InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) {
+ Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+
+ // No arguments.
+ if (AI == AE)
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush)
+ .Default(IC_CallOrUser);
+
+ // One argument.
+ const Argument *A0 = AI++;
+ if (AI == AE)
+ // Argument is a pointer.
+ if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
+ Type *ETy = PTy->getElementType();
+ // Argument is i8*.
+ if (ETy->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_retain", IC_Retain)
+ .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV)
+ .Case("objc_retainBlock", IC_RetainBlock)
+ .Case("objc_release", IC_Release)
+ .Case("objc_autorelease", IC_Autorelease)
+ .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV)
+ .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop)
+ .Case("objc_retainedObject", IC_NoopCast)
+ .Case("objc_unretainedObject", IC_NoopCast)
+ .Case("objc_unretainedPointer", IC_NoopCast)
+ .Case("objc_retain_autorelease", IC_FusedRetainAutorelease)
+ .Case("objc_retainAutorelease", IC_FusedRetainAutorelease)
+ .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV)
+ .Default(IC_CallOrUser);
+
+ // Argument is i8**
+ if (PointerType *Pte = dyn_cast<PointerType>(ETy))
+ if (Pte->getElementType()->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_loadWeakRetained", IC_LoadWeakRetained)
+ .Case("objc_loadWeak", IC_LoadWeak)
+ .Case("objc_destroyWeak", IC_DestroyWeak)
+ .Default(IC_CallOrUser);
+ }
+
+ // Two arguments, first is i8**.
+ const Argument *A1 = AI++;
+ if (AI == AE)
+ if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
+ if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
+ if (Pte->getElementType()->isIntegerTy(8))
+ if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
+ Type *ETy1 = PTy1->getElementType();
+ // Second argument is i8*
+ if (ETy1->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_storeWeak", IC_StoreWeak)
+ .Case("objc_initWeak", IC_InitWeak)
+ .Case("objc_storeStrong", IC_StoreStrong)
+ .Default(IC_CallOrUser);
+ // Second argument is i8**.
+ if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
+ if (Pte1->getElementType()->isIntegerTy(8))
+ return StringSwitch<InstructionClass>(F->getName())
+ .Case("objc_moveWeak", IC_MoveWeak)
+ .Case("objc_copyWeak", IC_CopyWeak)
+ .Default(IC_CallOrUser);
+ }
+
+ // Anything else.
+ return IC_CallOrUser;
+}
+
+/// \brief Determine what kind of construct V is.
+InstructionClass
+llvm::objcarc::GetInstructionClass(const Value *V) {
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Any instruction other than bitcast and gep with a pointer operand have a
+ // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
+ // to a subsequent use, rather than using it themselves, in this sense.
+ // As a short cut, several other opcodes are known to have no pointer
+ // operands of interest. And ret is never followed by a release, so it's
+ // not interesting to examine.
+ switch (I->getOpcode()) {
+ case Instruction::Call: {
+ const CallInst *CI = cast<CallInst>(I);
+ // Check for calls to special functions.
+ if (const Function *F = CI->getCalledFunction()) {
+ InstructionClass Class = GetFunctionClass(F);
+ if (Class != IC_CallOrUser)
+ return Class;
+
+ // None of the intrinsic functions do objc_release. For intrinsics, the
+ // only question is whether or not they may be users.
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::returnaddress: case Intrinsic::frameaddress:
+ case Intrinsic::stacksave: case Intrinsic::stackrestore:
+ case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
+ case Intrinsic::objectsize: case Intrinsic::prefetch:
+ case Intrinsic::stackprotector:
+ case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64:
+ case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa:
+ case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext:
+ case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline:
+ case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start: case Intrinsic::invariant_end:
+ // Don't let dbg info affect our results.
+ case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
+ // Short cut: Some intrinsics obviously don't use ObjC pointers.
+ return IC_None;
+ default:
+ break;
+ }
+ }
+ return GetCallSiteClass(CI);
+ }
+ case Instruction::Invoke:
+ return GetCallSiteClass(cast<InvokeInst>(I));
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::Select: case Instruction::PHI:
+ case Instruction::Ret: case Instruction::Br:
+ case Instruction::Switch: case Instruction::IndirectBr:
+ case Instruction::Alloca: case Instruction::VAArg:
+ case Instruction::Add: case Instruction::FAdd:
+ case Instruction::Sub: case Instruction::FSub:
+ case Instruction::Mul: case Instruction::FMul:
+ case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv:
+ case Instruction::SRem: case Instruction::URem: case Instruction::FRem:
+ case Instruction::Shl: case Instruction::LShr: case Instruction::AShr:
+ case Instruction::And: case Instruction::Or: case Instruction::Xor:
+ case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc:
+ case Instruction::IntToPtr: case Instruction::FCmp:
+ case Instruction::FPTrunc: case Instruction::FPExt:
+ case Instruction::FPToUI: case Instruction::FPToSI:
+ case Instruction::UIToFP: case Instruction::SIToFP:
+ case Instruction::InsertElement: case Instruction::ExtractElement:
+ case Instruction::ShuffleVector:
+ case Instruction::ExtractValue:
+ break;
+ case Instruction::ICmp:
+ // Comparing a pointer with null, or any other constant, isn't an
+ // interesting use, because we don't care what the pointer points to, or
+ // about the values of any other dynamic reference-counted pointers.
+ if (IsPotentialRetainableObjPtr(I->getOperand(1)))
+ return IC_User;
+ break;
+ default:
+ // For anything else, check all the operands.
+ // Note that this includes both operands of a Store: while the first
+ // operand isn't actually being dereferenced, it is being stored to
+ // memory where we can no longer track who might read it and dereference
+ // it, so we have to consider it potentially used.
+ for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ if (IsPotentialRetainableObjPtr(*OI))
+ return IC_User;
+ }
+ }
+
+ // Otherwise, it's totally inert for ARC purposes.
+ return IC_None;
+}
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
new file mode 100644
index 0000000..ae3c628
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -0,0 +1,177 @@
+//===- ProvenanceAnalysis.cpp - ObjC ARC Optimization ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a special form of Alias Analysis called ``Provenance
+/// Analysis''. The word ``provenance'' refers to the history of the ownership
+/// of an object. Thus ``Provenance Analysis'' is an analysis which attempts to
+/// use various techniques to determine if locally
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ObjCARC.h"
+#include "ProvenanceAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+bool ProvenanceAnalysis::relatedSelect(const SelectInst *A,
+ const Value *B) {
+ // If the values are Selects with the same condition, we can do a more precise
+ // check: just check for relations between the values on corresponding arms.
+ if (const SelectInst *SB = dyn_cast<SelectInst>(B))
+ if (A->getCondition() == SB->getCondition())
+ return related(A->getTrueValue(), SB->getTrueValue()) ||
+ related(A->getFalseValue(), SB->getFalseValue());
+
+ // Check both arms of the Select node individually.
+ return related(A->getTrueValue(), B) ||
+ related(A->getFalseValue(), B);
+}
+
+bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
+ const Value *B) {
+ // If the values are PHIs in the same block, we can do a more precise as well
+ // as efficient check: just check for relations between the values on
+ // corresponding edges.
+ if (const PHINode *PNB = dyn_cast<PHINode>(B))
+ if (PNB->getParent() == A->getParent()) {
+ for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
+ if (related(A->getIncomingValue(i),
+ PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
+ return true;
+ return false;
+ }
+
+ // Check each unique source of the PHI node against B.
+ SmallPtrSet<const Value *, 4> UniqueSrc;
+ for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
+ const Value *PV1 = A->getIncomingValue(i);
+ if (UniqueSrc.insert(PV1) && related(PV1, B))
+ return true;
+ }
+
+ // All of the arms checked out.
+ return false;
+}
+
+/// Test if the value of P, or any value covered by its provenance, is ever
+/// stored within the function (not counting callees).
+static bool IsStoredObjCPointer(const Value *P) {
+ SmallPtrSet<const Value *, 8> Visited;
+ SmallVector<const Value *, 8> Worklist;
+ Worklist.push_back(P);
+ Visited.insert(P);
+ do {
+ P = Worklist.pop_back_val();
+ for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end();
+ UI != UE; ++UI) {
+ const User *Ur = *UI;
+ if (isa<StoreInst>(Ur)) {
+ if (UI.getOperandNo() == 0)
+ // The pointer is stored.
+ return true;
+ // The pointed is stored through.
+ continue;
+ }
+ if (isa<CallInst>(Ur))
+ // The pointer is passed as an argument, ignore this.
+ continue;
+ if (isa<PtrToIntInst>(P))
+ // Assume the worst.
+ return true;
+ if (Visited.insert(Ur))
+ Worklist.push_back(Ur);
+ }
+ } while (!Worklist.empty());
+
+ // Everything checked out.
+ return false;
+}
+
+bool ProvenanceAnalysis::relatedCheck(const Value *A,
+ const Value *B) {
+ // Skip past provenance pass-throughs.
+ A = GetUnderlyingObjCPtr(A);
+ B = GetUnderlyingObjCPtr(B);
+
+ // Quick check.
+ if (A == B)
+ return true;
+
+ // Ask regular AliasAnalysis, for a first approximation.
+ switch (AA->alias(A, B)) {
+ case AliasAnalysis::NoAlias:
+ return false;
+ case AliasAnalysis::MustAlias:
+ case AliasAnalysis::PartialAlias:
+ return true;
+ case AliasAnalysis::MayAlias:
+ break;
+ }
+
+ bool AIsIdentified = IsObjCIdentifiedObject(A);
+ bool BIsIdentified = IsObjCIdentifiedObject(B);
+
+ // An ObjC-Identified object can't alias a load if it is never locally stored.
+ if (AIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(B))
+ return IsStoredObjCPointer(A);
+ if (BIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(A))
+ return IsStoredObjCPointer(B);
+ // Both pointers are identified and escapes aren't an evident problem.
+ return false;
+ }
+ } else if (BIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(A))
+ return IsStoredObjCPointer(B);
+ }
+
+ // Special handling for PHI and Select.
+ if (const PHINode *PN = dyn_cast<PHINode>(A))
+ return relatedPHI(PN, B);
+ if (const PHINode *PN = dyn_cast<PHINode>(B))
+ return relatedPHI(PN, A);
+ if (const SelectInst *S = dyn_cast<SelectInst>(A))
+ return relatedSelect(S, B);
+ if (const SelectInst *S = dyn_cast<SelectInst>(B))
+ return relatedSelect(S, A);
+
+ // Conservative.
+ return true;
+}
+
+bool ProvenanceAnalysis::related(const Value *A,
+ const Value *B) {
+ // Begin by inserting a conservative value into the map. If the insertion
+ // fails, we have the answer already. If it succeeds, leave it there until we
+ // compute the real answer to guard against recursive queries.
+ if (A > B) std::swap(A, B);
+ std::pair<CachedResultsTy::iterator, bool> Pair =
+ CachedResults.insert(std::make_pair(ValuePairTy(A, B), true));
+ if (!Pair.second)
+ return Pair.first->second;
+
+ bool Result = relatedCheck(A, B);
+ CachedResults[ValuePairTy(A, B)] = Result;
+ return Result;
+}
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
new file mode 100644
index 0000000..ec449fd
--- /dev/null
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -0,0 +1,80 @@
+//===- ProvenanceAnalysis.h - ObjC ARC Optimization ---*- mode: c++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file declares a special form of Alias Analysis called ``Provenance
+/// Analysis''. The word ``provenance'' refers to the history of the ownership
+/// of an object. Thus ``Provenance Analysis'' is an analysis which attempts to
+/// use various techniques to determine if locally
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
+#define LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+ class Value;
+ class AliasAnalysis;
+ class PHINode;
+ class SelectInst;
+}
+
+namespace llvm {
+namespace objcarc {
+
+/// \brief This is similar to BasicAliasAnalysis, and it uses many of the same
+/// techniques, except it uses special ObjC-specific reasoning about pointer
+/// relationships.
+///
+/// In this context ``Provenance'' is defined as the history of an object's
+/// ownership. Thus ``Provenance Analysis'' is defined by using the notion of
+/// an ``independent provenance source'' of a pointer to determine whether or
+/// not two pointers have the same provenance source and thus could
+/// potentially be related.
+class ProvenanceAnalysis {
+ AliasAnalysis *AA;
+
+ typedef std::pair<const Value *, const Value *> ValuePairTy;
+ typedef DenseMap<ValuePairTy, bool> CachedResultsTy;
+ CachedResultsTy CachedResults;
+
+ bool relatedCheck(const Value *A, const Value *B);
+ bool relatedSelect(const SelectInst *A, const Value *B);
+ bool relatedPHI(const PHINode *A, const Value *B);
+
+ void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
+ ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
+
+public:
+ ProvenanceAnalysis() {}
+
+ void setAA(AliasAnalysis *aa) { AA = aa; }
+
+ AliasAnalysis *getAA() const { return AA; }
+
+ bool related(const Value *A, const Value *B);
+
+ void clear() {
+ CachedResults.clear();
+ }
+};
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index b3fc6e3..fd55e08 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -21,7 +21,6 @@ add_llvm_library(LLVMScalarOpts
LoopUnswitch.cpp
LowerAtomic.cpp
MemCpyOptimizer.cpp
- ObjCARC.cpp
Reassociate.cpp
Reg2Mem.cpp
SCCP.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index d513c96..d71dd5d 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -729,9 +729,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
// It's not safe to eliminate the sign / zero extension of the return value.
// See llvm::isInTailCallPosition().
const Function *F = BB->getParent();
- Attribute CallerRetAttr = F->getAttributes().getRetAttributes();
- if (CallerRetAttr.hasAttribute(Attribute::ZExt) ||
- CallerRetAttr.hasAttribute(Attribute::SExt))
+ AttributeSet CallerAttrs = F->getAttributes();
+ if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
return false;
// Make sure there are no instructions between the PHI and return, or that the
@@ -788,10 +788,10 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
// Conservatively require the attributes of the call to match those of the
// return. Ignore noalias because it doesn't affect the call sequence.
- Attribute CalleeRetAttr = CS.getAttributes().getRetAttributes();
- if (AttrBuilder(CalleeRetAttr).
+ AttributeSet CalleeAttrs = CS.getAttributes();
+ if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
removeAttribute(Attribute::NoAlias) !=
- AttrBuilder(CallerRetAttr).
+ AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
removeAttribute(Attribute::NoAlias))
continue;
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 4c3631b..995782e 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -21,6 +21,8 @@
#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -97,12 +99,29 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
Value *Incoming = P->getIncomingValue(i);
if (isa<Constant>(Incoming)) continue;
- Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
- P->getIncomingBlock(i),
- BB);
- if (!C) continue;
+ Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB);
- P->setIncomingValue(i, C);
+ // Look if the incoming value is a select with a constant but LVI tells us
+ // that the incoming value can never be that constant. In that case replace
+ // the incoming value with the other value of the select. This often allows
+ // us to remove the select later.
+ if (!V) {
+ SelectInst *SI = dyn_cast<SelectInst>(Incoming);
+ if (!SI) continue;
+
+ Constant *C = dyn_cast<Constant>(SI->getFalseValue());
+ if (!C) continue;
+
+ if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C,
+ P->getIncomingBlock(i), BB) !=
+ LazyValueInfo::False)
+ continue;
+
+ DEBUG(dbgs() << "CVP: Threading PHI over " << *SI << '\n');
+ V = SI->getTrueValue();
+ }
+
+ P->setIncomingValue(i, V);
Changed = true;
}
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index fe3acbf..57432c7 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -376,10 +376,10 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// Check to see if the later store is to the entire object (either a global,
// an alloca, or a byval argument). If so, then it clearly overwrites any
// other store to the same object.
- const DataLayout &TD = *AA.getDataLayout();
+ const DataLayout *TD = AA.getDataLayout();
- const Value *UO1 = GetUnderlyingObject(P1, &TD),
- *UO2 = GetUnderlyingObject(P2, &TD);
+ const Value *UO1 = GetUnderlyingObject(P1, TD),
+ *UO2 = GetUnderlyingObject(P2, TD);
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 14201b9..c04b447 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -849,8 +849,8 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
return -1;
int64_t StoreOffset = 0, LoadOffset = 0;
- Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
- Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
+ Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&TD);
+ Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &TD);
if (StoreBase != LoadBase)
return -1;
@@ -945,7 +945,7 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
// then we should widen it!
int64_t LoadOffs = 0;
const Value *LoadBase =
- GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
+ GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &TD);
unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
unsigned Size = MemoryDependenceAnalysis::
@@ -1526,10 +1526,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
BasicBlock *LoadBB = LI->getParent();
BasicBlock *TmpBB = LoadBB;
- bool isSinglePred = false;
bool allSingleSucc = true;
while (TmpBB->getSinglePredecessor()) {
- isSinglePred = true;
TmpBB = TmpBB->getSinglePredecessor();
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
return false;
@@ -1548,28 +1546,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
assert(TmpBB);
LoadBB = TmpBB;
- // FIXME: It is extremely unclear what this loop is doing, other than
- // artificially restricting loadpre.
- if (isSinglePred) {
- bool isHot = false;
- for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
- const AvailableValueInBlock &AV = ValuesPerBlock[i];
- if (AV.isSimpleValue())
- // "Hot" Instruction is in some loop (because it dominates its dep.
- // instruction).
- if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
- if (DT->dominates(LI, I)) {
- isHot = true;
- break;
- }
- }
-
- // We are interested only in "hot" instructions. We don't want to do any
- // mis-optimizations here.
- if (!isHot)
- return false;
- }
-
// Check to see how many predecessors have the loaded value fully
// available.
DenseMap<BasicBlock*, Value*> PredLoads;
@@ -2371,8 +2347,8 @@ bool GVN::processBlock(BasicBlock *BB) {
E = InstrsToErase.end(); I != E; ++I) {
DEBUG(dbgs() << "GVN removed: " << **I << '\n');
if (MD) MD->removeInstruction(*I);
- (*I)->eraseFromParent();
DEBUG(verifyRemoved(*I));
+ (*I)->eraseFromParent();
}
InstrsToErase.clear();
@@ -2389,7 +2365,7 @@ bool GVN::processBlock(BasicBlock *BB) {
/// control flow patterns and attempts to perform simple PRE at the join point.
bool GVN::performPRE(Function &F) {
bool Changed = false;
- DenseMap<BasicBlock*, Value*> predMap;
+ SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
BasicBlock *CurrentBlock = *DI;
@@ -2445,19 +2421,22 @@ bool GVN::performPRE(Function &F) {
if (P == CurrentBlock) {
NumWithout = 2;
break;
- } else if (!DT->dominates(&F.getEntryBlock(), P)) {
+ } else if (!DT->isReachableFromEntry(P)) {
NumWithout = 2;
break;
}
Value* predV = findLeader(P, ValNo);
if (predV == 0) {
+ predMap.push_back(std::make_pair(static_cast<Value *>(0), P));
PREPred = P;
++NumWithout;
} else if (predV == CurInst) {
+ /* CurInst dominates this predecessor. */
NumWithout = 2;
+ break;
} else {
- predMap[P] = predV;
+ predMap.push_back(std::make_pair(predV, P));
++NumWith;
}
}
@@ -2504,15 +2483,14 @@ bool GVN::performPRE(Function &F) {
// the PRE predecessor. This is typically because of loads which
// are not value numbered precisely.
if (!success) {
- delete PREInstr;
DEBUG(verifyRemoved(PREInstr));
+ delete PREInstr;
continue;
}
PREInstr->insertBefore(PREPred->getTerminator());
PREInstr->setName(CurInst->getName() + ".pre");
PREInstr->setDebugLoc(CurInst->getDebugLoc());
- predMap[PREPred] = PREInstr;
VN.add(PREInstr, ValNo);
++NumGVNPRE;
@@ -2520,13 +2498,14 @@ bool GVN::performPRE(Function &F) {
addToLeaderTable(ValNo, PREInstr, PREPred);
// Create a PHI to make the value available in this block.
- pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
- PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE),
+ PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(),
CurInst->getName() + ".pre-phi",
CurrentBlock->begin());
- for (pred_iterator PI = PB; PI != PE; ++PI) {
- BasicBlock *P = *PI;
- Phi->addIncoming(predMap[P], P);
+ for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
+ if (Value *V = predMap[i].first)
+ Phi->addIncoming(V, predMap[i].second);
+ else
+ Phi->addIncoming(PREInstr, PREPred);
}
VN.add(Phi, ValNo);
@@ -2551,8 +2530,8 @@ bool GVN::performPRE(Function &F) {
DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
if (MD) MD->removeInstruction(CurInst);
- CurInst->eraseFromParent();
DEBUG(verifyRemoved(CurInst));
+ CurInst->eraseFromParent();
Changed = true;
}
}
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index dc6bef7..f94cd2a 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -440,13 +440,12 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
}
// Only these instructions are hoistable/sinkable.
- bool HoistableKind = (isa<BinaryOperator>(I) || isa<CastInst>(I) ||
- isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
- isa<CmpInst>(I) || isa<InsertElementInst>(I) ||
- isa<ExtractElementInst>(I) ||
- isa<ShuffleVectorInst>(I));
- if (!HoistableKind)
- return false;
+ if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) &&
+ !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) &&
+ !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
+ !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) &&
+ !isa<InsertValueInst>(I))
+ return false;
return isSafeToExecuteUnconditionally(I);
}
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index c4f9012..8258719 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -407,7 +407,7 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
// step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
{
- if (DefX2->getOpcode() != Instruction::And)
+ if (!DefX2 || DefX2->getOpcode() != Instruction::And)
return false;
BinaryOperator *SubOneOp;
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index c48808f..a23860a 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "loop-instsimplify"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 0ea80f3..e98ae95 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -51,6 +52,7 @@ namespace {
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<TargetTransformInfo>();
}
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -59,11 +61,13 @@ namespace {
private:
LoopInfo *LI;
+ const TargetTransformInfo *TTI;
};
}
char LoopRotate::ID = 0;
INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -75,6 +79,7 @@ Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
/// the loop is rotated at least once.
bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
+ TTI = &getAnalysis<TargetTransformInfo>();
// Simplify the loop latch before attempting to rotate the header
// upward. Rotation may not be needed if the loop tail can be folded into the
@@ -278,7 +283,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
// duplicate blocks inside it.
{
CodeMetrics Metrics;
- Metrics.analyzeBasicBlock(OrigHeader);
+ Metrics.analyzeBasicBlock(OrigHeader, *TTI);
if (Metrics.notDuplicatable) {
DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non duplicatable"
<< " instructions: "; L->dump());
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index c7b853e..4e4cb86 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -58,6 +58,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopPass.h"
@@ -237,7 +238,7 @@ struct Formula {
/// BaseRegs - The list of "base" registers for this use. When this is
/// non-empty,
- SmallVector<const SCEV *, 2> BaseRegs;
+ SmallVector<const SCEV *, 4> BaseRegs;
/// ScaledReg - The 'scaled' register for this use. This should be non-null
/// when Scale is not zero.
@@ -1087,19 +1088,19 @@ namespace {
/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
struct UniquifierDenseMapInfo {
- static SmallVector<const SCEV *, 2> getEmptyKey() {
- SmallVector<const SCEV *, 2> V;
+ static SmallVector<const SCEV *, 4> getEmptyKey() {
+ SmallVector<const SCEV *, 4> V;
V.push_back(reinterpret_cast<const SCEV *>(-1));
return V;
}
- static SmallVector<const SCEV *, 2> getTombstoneKey() {
- SmallVector<const SCEV *, 2> V;
+ static SmallVector<const SCEV *, 4> getTombstoneKey() {
+ SmallVector<const SCEV *, 4> V;
V.push_back(reinterpret_cast<const SCEV *>(-2));
return V;
}
- static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
+ static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
unsigned Result = 0;
for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
E = V.end(); I != E; ++I)
@@ -1107,8 +1108,8 @@ struct UniquifierDenseMapInfo {
return Result;
}
- static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
- const SmallVector<const SCEV *, 2> &RHS) {
+ static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
+ const SmallVector<const SCEV *, 4> &RHS) {
return LHS == RHS;
}
};
@@ -1119,7 +1120,7 @@ struct UniquifierDenseMapInfo {
/// the user itself, and information about how the use may be satisfied.
/// TODO: Represent multiple users of the same expression in common?
class LSRUse {
- DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
+ DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
public:
/// KindType - An enum for a kind of use, indicating what types of
@@ -1178,7 +1179,7 @@ public:
/// HasFormula - Test whether this use as a formula which has the same
/// registers as the given formula.
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
- SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+ SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
std::sort(Key.begin(), Key.end());
@@ -1188,7 +1189,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRUse::InsertFormula(const Formula &F) {
- SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+ SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
std::sort(Key.begin(), Key.end());
@@ -2536,6 +2537,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
// Add this IV user to the end of the chain.
IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
}
+ IVChain &Chain = IVChainVec[ChainIdx];
SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
// This chain's NearUsers become FarUsers.
@@ -2553,8 +2555,19 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
for (Value::use_iterator UseIter = IVOper->use_begin(),
UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
- if (!OtherUse || OtherUse == UserInst)
+ if (!OtherUse)
continue;
+ // Uses in the chain will no longer be uses if the chain is formed.
+ // Include the head of the chain in this iteration (not Chain.begin()).
+ IVChain::const_iterator IncIter = Chain.Incs.begin();
+ IVChain::const_iterator IncEnd = Chain.Incs.end();
+ for( ; IncIter != IncEnd; ++IncIter) {
+ if (IncIter->UserInst == OtherUse)
+ break;
+ }
+ if (IncIter != IncEnd)
+ continue;
+
if (SE.isSCEVable(OtherUse->getType())
&& !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
&& IU.isIVUserOrOperand(OtherUse)) {
@@ -2891,7 +2904,6 @@ void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
Formula F;
F.InitialMatch(S, L, SE);
- F.HasBaseReg = true;
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Initial formula already exists!"); (void)Inserted;
}
@@ -2903,6 +2915,7 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S,
LSRUse &LU, size_t LUIdx) {
Formula F;
F.BaseRegs.push_back(S);
+ F.HasBaseReg = true;
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
}
@@ -3656,7 +3669,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
// Collect the best formula for each unique set of shared registers. This
// is reset for each use.
- typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
+ typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
BestFormulaeTy;
BestFormulaeTy BestFormulae;
@@ -3691,7 +3704,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
dbgs() << "\n");
}
else {
- SmallVector<const SCEV *, 2> Key;
+ SmallVector<const SCEV *, 4> Key;
for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
JE = F.BaseRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
@@ -3837,83 +3850,83 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
/// for expressions like A, A+1, A+2, etc., allocate a single register for
/// them.
void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
- if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
- DEBUG(dbgs() << "The search space is too complex.\n");
+ if (EstimateSearchSpaceComplexity() < ComplexityLimit)
+ return;
- DEBUG(dbgs() << "Narrowing the search space by assuming that uses "
- "separated by a constant offset will use the same "
- "registers.\n");
+ DEBUG(dbgs() << "The search space is too complex.\n"
+ "Narrowing the search space by assuming that uses separated "
+ "by a constant offset will use the same registers.\n");
- // This is especially useful for unrolled loops.
+ // This is especially useful for unrolled loops.
- for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
- LSRUse &LU = Uses[LUIdx];
- for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
- E = LU.Formulae.end(); I != E; ++I) {
- const Formula &F = *I;
- if (F.BaseOffset != 0 && F.Scale == 0) {
- if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
- if (reconcileNewOffset(*LUThatHas, F.BaseOffset,
- /*HasBaseReg=*/false,
- LU.Kind, LU.AccessTy)) {
- DEBUG(dbgs() << " Deleting use "; LU.print(dbgs());
- dbgs() << '\n');
-
- LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
-
- // Update the relocs to reference the new use.
- for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
- E = Fixups.end(); I != E; ++I) {
- LSRFixup &Fixup = *I;
- if (Fixup.LUIdx == LUIdx) {
- Fixup.LUIdx = LUThatHas - &Uses.front();
- Fixup.Offset += F.BaseOffset;
- // Add the new offset to LUThatHas' offset list.
- if (LUThatHas->Offsets.back() != Fixup.Offset) {
- LUThatHas->Offsets.push_back(Fixup.Offset);
- if (Fixup.Offset > LUThatHas->MaxOffset)
- LUThatHas->MaxOffset = Fixup.Offset;
- if (Fixup.Offset < LUThatHas->MinOffset)
- LUThatHas->MinOffset = Fixup.Offset;
- }
- DEBUG(dbgs() << "New fixup has offset "
- << Fixup.Offset << '\n');
- }
- if (Fixup.LUIdx == NumUses-1)
- Fixup.LUIdx = LUIdx;
- }
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ if (F.BaseOffset == 0 || F.Scale != 0)
+ continue;
- // Delete formulae from the new use which are no longer legal.
- bool Any = false;
- for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
- Formula &F = LUThatHas->Formulae[i];
- if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
- LUThatHas->Kind, LUThatHas->AccessTy, F)) {
- DEBUG(dbgs() << " Deleting "; F.print(dbgs());
- dbgs() << '\n');
- LUThatHas->DeleteFormula(F);
- --i;
- --e;
- Any = true;
- }
- }
- if (Any)
- LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+ LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
+ if (!LUThatHas)
+ continue;
- // Delete the old use.
- DeleteUse(LU, LUIdx);
- --LUIdx;
- --NumUses;
- break;
- }
+ if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
+ LU.Kind, LU.AccessTy))
+ continue;
+
+ DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
+
+ LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+
+ // Update the relocs to reference the new use.
+ for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ LSRFixup &Fixup = *I;
+ if (Fixup.LUIdx == LUIdx) {
+ Fixup.LUIdx = LUThatHas - &Uses.front();
+ Fixup.Offset += F.BaseOffset;
+ // Add the new offset to LUThatHas' offset list.
+ if (LUThatHas->Offsets.back() != Fixup.Offset) {
+ LUThatHas->Offsets.push_back(Fixup.Offset);
+ if (Fixup.Offset > LUThatHas->MaxOffset)
+ LUThatHas->MaxOffset = Fixup.Offset;
+ if (Fixup.Offset < LUThatHas->MinOffset)
+ LUThatHas->MinOffset = Fixup.Offset;
}
+ DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
}
+ if (Fixup.LUIdx == NumUses-1)
+ Fixup.LUIdx = LUIdx;
}
- }
- DEBUG(dbgs() << "After pre-selection:\n";
- print_uses(dbgs()));
+ // Delete formulae from the new use which are no longer legal.
+ bool Any = false;
+ for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
+ Formula &F = LUThatHas->Formulae[i];
+ if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
+ LUThatHas->Kind, LUThatHas->AccessTy, F)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs());
+ dbgs() << '\n');
+ LUThatHas->DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ }
+ }
+
+ if (Any)
+ LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+
+ // Delete the old use.
+ DeleteUse(LU, LUIdx);
+ --LUIdx;
+ --NumUses;
+ break;
+ }
}
+
+ DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index e0f915b..80d060b 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
@@ -90,6 +91,7 @@ namespace {
AU.addPreservedID(LCSSAID);
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<TargetTransformInfo>();
// FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
// If loop unroll does not preserve dom info then LCSSA pass on next
// loop will receive invalid dom info.
@@ -101,6 +103,7 @@ namespace {
char LoopUnroll::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -113,11 +116,12 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
- bool &NotDuplicatable, const DataLayout *TD) {
+ bool &NotDuplicatable,
+ const TargetTransformInfo &TTI) {
CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
- Metrics.analyzeBasicBlock(*I, TD);
+ Metrics.analyzeBasicBlock(*I, TTI);
NumCalls = Metrics.NumInlineCandidates;
NotDuplicatable = Metrics.notDuplicatable;
@@ -134,6 +138,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
LoopInfo *LI = &getAnalysis<LoopInfo>();
ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
+ const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
@@ -181,11 +186,10 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Enforce the threshold.
if (Threshold != NoThreshold) {
- const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
unsigned NumInlineCandidates;
bool notDuplicatable;
unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates,
- notDuplicatable, TD);
+ notDuplicatable, TTI);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
if (notDuplicatable) {
DEBUG(dbgs() << " Not unrolling loop which contains non duplicatable"
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 68d4423..0e8199f 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -37,6 +37,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -101,7 +102,7 @@ namespace {
// Analyze loop. Check its size, calculate is it possible to unswitch
// it. Returns true if we can unswitch this loop.
- bool countLoop(const Loop* L);
+ bool countLoop(const Loop* L, const TargetTransformInfo &TTI);
// Clean all data related to given loop.
void forgetLoop(const Loop* L);
@@ -170,6 +171,7 @@ namespace {
AU.addPreservedID(LCSSAID);
AU.addPreserved<DominatorTree>();
AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<TargetTransformInfo>();
}
private:
@@ -221,7 +223,7 @@ namespace {
// Analyze loop. Check its size, calculate is it possible to unswitch
// it. Returns true if we can unswitch this loop.
-bool LUAnalysisCache::countLoop(const Loop* L) {
+bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
std::pair<LoopPropsMapIt, bool> InsertRes =
LoopsProperties.insert(std::make_pair(L, LoopProperties()));
@@ -243,7 +245,7 @@ bool LUAnalysisCache::countLoop(const Loop* L) {
for (Loop::block_iterator I = L->block_begin(),
E = L->block_end();
I != E; ++I)
- Metrics.analyzeBasicBlock(*I);
+ Metrics.analyzeBasicBlock(*I, TTI);
Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
@@ -334,6 +336,7 @@ void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop,
char LoopUnswitch::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -424,7 +427,7 @@ bool LoopUnswitch::processCurrentLoop() {
// Probably we reach the quota of branches for this loop. If so
// stop unswitching.
- if (!BranchesInfo.countLoop(currentLoop))
+ if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>()))
return false;
// Loop over all of the basic blocks in the loop. If we find an interior
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
deleted file mode 100644
index e6ec841..0000000
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ /dev/null
@@ -1,4354 +0,0 @@
-//===- ObjCARC.cpp - ObjC ARC Optimization --------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines ObjC ARC optimizations. ARC stands for
-// Automatic Reference Counting and is a system for managing reference counts
-// for objects in Objective C.
-//
-// The optimizations performed include elimination of redundant, partially
-// redundant, and inconsequential reference count operations, elimination of
-// redundant weak pointer operations, pattern-matching and replacement of
-// low-level operations into higher-level operations, and numerous minor
-// simplifications.
-//
-// This file also defines a simple ARC-aware AliasAnalysis.
-//
-// WARNING: This file knows about certain library functions. It recognizes them
-// by name, and hardwires knowledge of their semantics.
-//
-// WARNING: This file knows about how certain Objective-C library functions are
-// used. Naive LLVM IR transformations which would otherwise be
-// behavior-preserving may break these assumptions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "objc-arc"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-// A handy option to enable/disable all optimizations in this file.
-static cl::opt<bool> EnableARCOpts("enable-objc-arc-opts", cl::init(true));
-
-//===----------------------------------------------------------------------===//
-// Misc. Utilities
-//===----------------------------------------------------------------------===//
-
-namespace {
- /// MapVector - An associative container with fast insertion-order
- /// (deterministic) iteration over its elements. Plus the special
- /// blot operation.
- template<class KeyT, class ValueT>
- class MapVector {
- /// Map - Map keys to indices in Vector.
- typedef DenseMap<KeyT, size_t> MapTy;
- MapTy Map;
-
- /// Vector - Keys and values.
- typedef std::vector<std::pair<KeyT, ValueT> > VectorTy;
- VectorTy Vector;
-
- public:
- typedef typename VectorTy::iterator iterator;
- typedef typename VectorTy::const_iterator const_iterator;
- iterator begin() { return Vector.begin(); }
- iterator end() { return Vector.end(); }
- const_iterator begin() const { return Vector.begin(); }
- const_iterator end() const { return Vector.end(); }
-
-#ifdef XDEBUG
- ~MapVector() {
- assert(Vector.size() >= Map.size()); // May differ due to blotting.
- for (typename MapTy::const_iterator I = Map.begin(), E = Map.end();
- I != E; ++I) {
- assert(I->second < Vector.size());
- assert(Vector[I->second].first == I->first);
- }
- for (typename VectorTy::const_iterator I = Vector.begin(),
- E = Vector.end(); I != E; ++I)
- assert(!I->first ||
- (Map.count(I->first) &&
- Map[I->first] == size_t(I - Vector.begin())));
- }
-#endif
-
- ValueT &operator[](const KeyT &Arg) {
- std::pair<typename MapTy::iterator, bool> Pair =
- Map.insert(std::make_pair(Arg, size_t(0)));
- if (Pair.second) {
- size_t Num = Vector.size();
- Pair.first->second = Num;
- Vector.push_back(std::make_pair(Arg, ValueT()));
- return Vector[Num].second;
- }
- return Vector[Pair.first->second].second;
- }
-
- std::pair<iterator, bool>
- insert(const std::pair<KeyT, ValueT> &InsertPair) {
- std::pair<typename MapTy::iterator, bool> Pair =
- Map.insert(std::make_pair(InsertPair.first, size_t(0)));
- if (Pair.second) {
- size_t Num = Vector.size();
- Pair.first->second = Num;
- Vector.push_back(InsertPair);
- return std::make_pair(Vector.begin() + Num, true);
- }
- return std::make_pair(Vector.begin() + Pair.first->second, false);
- }
-
- const_iterator find(const KeyT &Key) const {
- typename MapTy::const_iterator It = Map.find(Key);
- if (It == Map.end()) return Vector.end();
- return Vector.begin() + It->second;
- }
-
- /// blot - This is similar to erase, but instead of removing the element
- /// from the vector, it just zeros out the key in the vector. This leaves
- /// iterators intact, but clients must be prepared for zeroed-out keys when
- /// iterating.
- void blot(const KeyT &Key) {
- typename MapTy::iterator It = Map.find(Key);
- if (It == Map.end()) return;
- Vector[It->second].first = KeyT();
- Map.erase(It);
- }
-
- void clear() {
- Map.clear();
- Vector.clear();
- }
- };
-}
-
-//===----------------------------------------------------------------------===//
-// ARC Utilities.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Transforms/Utils/Local.h"
-
-namespace {
- /// InstructionClass - A simple classification for instructions.
- enum InstructionClass {
- IC_Retain, ///< objc_retain
- IC_RetainRV, ///< objc_retainAutoreleasedReturnValue
- IC_RetainBlock, ///< objc_retainBlock
- IC_Release, ///< objc_release
- IC_Autorelease, ///< objc_autorelease
- IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue
- IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush
- IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop
- IC_NoopCast, ///< objc_retainedObject, etc.
- IC_FusedRetainAutorelease, ///< objc_retainAutorelease
- IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
- IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive)
- IC_StoreWeak, ///< objc_storeWeak (primitive)
- IC_InitWeak, ///< objc_initWeak (derived)
- IC_LoadWeak, ///< objc_loadWeak (derived)
- IC_MoveWeak, ///< objc_moveWeak (derived)
- IC_CopyWeak, ///< objc_copyWeak (derived)
- IC_DestroyWeak, ///< objc_destroyWeak (derived)
- IC_StoreStrong, ///< objc_storeStrong (derived)
- IC_CallOrUser, ///< could call objc_release and/or "use" pointers
- IC_Call, ///< could call objc_release
- IC_User, ///< could "use" a pointer
- IC_None ///< anything else
- };
-}
-
-/// IsPotentialUse - Test whether the given value is possible a
-/// reference-counted pointer.
-static bool IsPotentialUse(const Value *Op) {
- // Pointers to static or stack storage are not reference-counted pointers.
- if (isa<Constant>(Op) || isa<AllocaInst>(Op))
- return false;
- // Special arguments are not reference-counted.
- if (const Argument *Arg = dyn_cast<Argument>(Op))
- if (Arg->hasByValAttr() ||
- Arg->hasNestAttr() ||
- Arg->hasStructRetAttr())
- return false;
- // Only consider values with pointer types.
- // It seemes intuitive to exclude function pointer types as well, since
- // functions are never reference-counted, however clang occasionally
- // bitcasts reference-counted pointers to function-pointer type
- // temporarily.
- PointerType *Ty = dyn_cast<PointerType>(Op->getType());
- if (!Ty)
- return false;
- // Conservatively assume anything else is a potential use.
- return true;
-}
-
-/// GetCallSiteClass - Helper for GetInstructionClass. Determines what kind
-/// of construct CS is.
-static InstructionClass GetCallSiteClass(ImmutableCallSite CS) {
- for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
- I != E; ++I)
- if (IsPotentialUse(*I))
- return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser;
-
- return CS.onlyReadsMemory() ? IC_None : IC_Call;
-}
-
-/// GetFunctionClass - Determine if F is one of the special known Functions.
-/// If it isn't, return IC_CallOrUser.
-static InstructionClass GetFunctionClass(const Function *F) {
- Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-
- // No arguments.
- if (AI == AE)
- return StringSwitch<InstructionClass>(F->getName())
- .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush)
- .Default(IC_CallOrUser);
-
- // One argument.
- const Argument *A0 = AI++;
- if (AI == AE)
- // Argument is a pointer.
- if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
- Type *ETy = PTy->getElementType();
- // Argument is i8*.
- if (ETy->isIntegerTy(8))
- return StringSwitch<InstructionClass>(F->getName())
- .Case("objc_retain", IC_Retain)
- .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV)
- .Case("objc_retainBlock", IC_RetainBlock)
- .Case("objc_release", IC_Release)
- .Case("objc_autorelease", IC_Autorelease)
- .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV)
- .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop)
- .Case("objc_retainedObject", IC_NoopCast)
- .Case("objc_unretainedObject", IC_NoopCast)
- .Case("objc_unretainedPointer", IC_NoopCast)
- .Case("objc_retain_autorelease", IC_FusedRetainAutorelease)
- .Case("objc_retainAutorelease", IC_FusedRetainAutorelease)
- .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV)
- .Default(IC_CallOrUser);
-
- // Argument is i8**
- if (PointerType *Pte = dyn_cast<PointerType>(ETy))
- if (Pte->getElementType()->isIntegerTy(8))
- return StringSwitch<InstructionClass>(F->getName())
- .Case("objc_loadWeakRetained", IC_LoadWeakRetained)
- .Case("objc_loadWeak", IC_LoadWeak)
- .Case("objc_destroyWeak", IC_DestroyWeak)
- .Default(IC_CallOrUser);
- }
-
- // Two arguments, first is i8**.
- const Argument *A1 = AI++;
- if (AI == AE)
- if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
- if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
- if (Pte->getElementType()->isIntegerTy(8))
- if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
- Type *ETy1 = PTy1->getElementType();
- // Second argument is i8*
- if (ETy1->isIntegerTy(8))
- return StringSwitch<InstructionClass>(F->getName())
- .Case("objc_storeWeak", IC_StoreWeak)
- .Case("objc_initWeak", IC_InitWeak)
- .Case("objc_storeStrong", IC_StoreStrong)
- .Default(IC_CallOrUser);
- // Second argument is i8**.
- if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
- if (Pte1->getElementType()->isIntegerTy(8))
- return StringSwitch<InstructionClass>(F->getName())
- .Case("objc_moveWeak", IC_MoveWeak)
- .Case("objc_copyWeak", IC_CopyWeak)
- .Default(IC_CallOrUser);
- }
-
- // Anything else.
- return IC_CallOrUser;
-}
-
-/// GetInstructionClass - Determine what kind of construct V is.
-static InstructionClass GetInstructionClass(const Value *V) {
- if (const Instruction *I = dyn_cast<Instruction>(V)) {
- // Any instruction other than bitcast and gep with a pointer operand have a
- // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
- // to a subsequent use, rather than using it themselves, in this sense.
- // As a short cut, several other opcodes are known to have no pointer
- // operands of interest. And ret is never followed by a release, so it's
- // not interesting to examine.
- switch (I->getOpcode()) {
- case Instruction::Call: {
- const CallInst *CI = cast<CallInst>(I);
- // Check for calls to special functions.
- if (const Function *F = CI->getCalledFunction()) {
- InstructionClass Class = GetFunctionClass(F);
- if (Class != IC_CallOrUser)
- return Class;
-
- // None of the intrinsic functions do objc_release. For intrinsics, the
- // only question is whether or not they may be users.
- switch (F->getIntrinsicID()) {
- case Intrinsic::returnaddress: case Intrinsic::frameaddress:
- case Intrinsic::stacksave: case Intrinsic::stackrestore:
- case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
- case Intrinsic::objectsize: case Intrinsic::prefetch:
- case Intrinsic::stackprotector:
- case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64:
- case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa:
- case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext:
- case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline:
- case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
- case Intrinsic::invariant_start: case Intrinsic::invariant_end:
- // Don't let dbg info affect our results.
- case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
- // Short cut: Some intrinsics obviously don't use ObjC pointers.
- return IC_None;
- default:
- break;
- }
- }
- return GetCallSiteClass(CI);
- }
- case Instruction::Invoke:
- return GetCallSiteClass(cast<InvokeInst>(I));
- case Instruction::BitCast:
- case Instruction::GetElementPtr:
- case Instruction::Select: case Instruction::PHI:
- case Instruction::Ret: case Instruction::Br:
- case Instruction::Switch: case Instruction::IndirectBr:
- case Instruction::Alloca: case Instruction::VAArg:
- case Instruction::Add: case Instruction::FAdd:
- case Instruction::Sub: case Instruction::FSub:
- case Instruction::Mul: case Instruction::FMul:
- case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv:
- case Instruction::SRem: case Instruction::URem: case Instruction::FRem:
- case Instruction::Shl: case Instruction::LShr: case Instruction::AShr:
- case Instruction::And: case Instruction::Or: case Instruction::Xor:
- case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc:
- case Instruction::IntToPtr: case Instruction::FCmp:
- case Instruction::FPTrunc: case Instruction::FPExt:
- case Instruction::FPToUI: case Instruction::FPToSI:
- case Instruction::UIToFP: case Instruction::SIToFP:
- case Instruction::InsertElement: case Instruction::ExtractElement:
- case Instruction::ShuffleVector:
- case Instruction::ExtractValue:
- break;
- case Instruction::ICmp:
- // Comparing a pointer with null, or any other constant, isn't an
- // interesting use, because we don't care what the pointer points to, or
- // about the values of any other dynamic reference-counted pointers.
- if (IsPotentialUse(I->getOperand(1)))
- return IC_User;
- break;
- default:
- // For anything else, check all the operands.
- // Note that this includes both operands of a Store: while the first
- // operand isn't actually being dereferenced, it is being stored to
- // memory where we can no longer track who might read it and dereference
- // it, so we have to consider it potentially used.
- for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
- OI != OE; ++OI)
- if (IsPotentialUse(*OI))
- return IC_User;
- }
- }
-
- // Otherwise, it's totally inert for ARC purposes.
- return IC_None;
-}
-
-/// GetBasicInstructionClass - Determine what kind of construct V is. This is
-/// similar to GetInstructionClass except that it only detects objc runtine
-/// calls. This allows it to be faster.
-static InstructionClass GetBasicInstructionClass(const Value *V) {
- if (const CallInst *CI = dyn_cast<CallInst>(V)) {
- if (const Function *F = CI->getCalledFunction())
- return GetFunctionClass(F);
- // Otherwise, be conservative.
- return IC_CallOrUser;
- }
-
- // Otherwise, be conservative.
- return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User;
-}
-
-/// IsRetain - Test if the given class is objc_retain or
-/// equivalent.
-static bool IsRetain(InstructionClass Class) {
- return Class == IC_Retain ||
- Class == IC_RetainRV;
-}
-
-/// IsAutorelease - Test if the given class is objc_autorelease or
-/// equivalent.
-static bool IsAutorelease(InstructionClass Class) {
- return Class == IC_Autorelease ||
- Class == IC_AutoreleaseRV;
-}
-
-/// IsForwarding - Test if the given class represents instructions which return
-/// their argument verbatim.
-static bool IsForwarding(InstructionClass Class) {
- // objc_retainBlock technically doesn't always return its argument
- // verbatim, but it doesn't matter for our purposes here.
- return Class == IC_Retain ||
- Class == IC_RetainRV ||
- Class == IC_Autorelease ||
- Class == IC_AutoreleaseRV ||
- Class == IC_RetainBlock ||
- Class == IC_NoopCast;
-}
-
-/// IsNoopOnNull - Test if the given class represents instructions which do
-/// nothing if passed a null pointer.
-static bool IsNoopOnNull(InstructionClass Class) {
- return Class == IC_Retain ||
- Class == IC_RetainRV ||
- Class == IC_Release ||
- Class == IC_Autorelease ||
- Class == IC_AutoreleaseRV ||
- Class == IC_RetainBlock;
-}
-
-/// IsAlwaysTail - Test if the given class represents instructions which are
-/// always safe to mark with the "tail" keyword.
-static bool IsAlwaysTail(InstructionClass Class) {
- // IC_RetainBlock may be given a stack argument.
- return Class == IC_Retain ||
- Class == IC_RetainRV ||
- Class == IC_Autorelease ||
- Class == IC_AutoreleaseRV;
-}
-
-/// IsNoThrow - Test if the given class represents instructions which are always
-/// safe to mark with the nounwind attribute..
-static bool IsNoThrow(InstructionClass Class) {
- // objc_retainBlock is not nounwind because it calls user copy constructors
- // which could theoretically throw.
- return Class == IC_Retain ||
- Class == IC_RetainRV ||
- Class == IC_Release ||
- Class == IC_Autorelease ||
- Class == IC_AutoreleaseRV ||
- Class == IC_AutoreleasepoolPush ||
- Class == IC_AutoreleasepoolPop;
-}
-
-/// EraseInstruction - Erase the given instruction. Many ObjC calls return their
-/// argument verbatim, so if it's such a call and the return value has users,
-/// replace them with the argument value.
-static void EraseInstruction(Instruction *CI) {
- Value *OldArg = cast<CallInst>(CI)->getArgOperand(0);
-
- bool Unused = CI->use_empty();
-
- if (!Unused) {
- // Replace the return value with the argument.
- assert(IsForwarding(GetBasicInstructionClass(CI)) &&
- "Can't delete non-forwarding instruction with users!");
- CI->replaceAllUsesWith(OldArg);
- }
-
- CI->eraseFromParent();
-
- if (Unused)
- RecursivelyDeleteTriviallyDeadInstructions(OldArg);
-}
-
-/// GetUnderlyingObjCPtr - This is a wrapper around getUnderlyingObject which
-/// also knows how to look through objc_retain and objc_autorelease calls, which
-/// we know to return their argument verbatim.
-static const Value *GetUnderlyingObjCPtr(const Value *V) {
- for (;;) {
- V = GetUnderlyingObject(V);
- if (!IsForwarding(GetBasicInstructionClass(V)))
- break;
- V = cast<CallInst>(V)->getArgOperand(0);
- }
-
- return V;
-}
-
-/// StripPointerCastsAndObjCCalls - This is a wrapper around
-/// Value::stripPointerCasts which also knows how to look through objc_retain
-/// and objc_autorelease calls, which we know to return their argument verbatim.
-static const Value *StripPointerCastsAndObjCCalls(const Value *V) {
- for (;;) {
- V = V->stripPointerCasts();
- if (!IsForwarding(GetBasicInstructionClass(V)))
- break;
- V = cast<CallInst>(V)->getArgOperand(0);
- }
- return V;
-}
-
-/// StripPointerCastsAndObjCCalls - This is a wrapper around
-/// Value::stripPointerCasts which also knows how to look through objc_retain
-/// and objc_autorelease calls, which we know to return their argument verbatim.
-static Value *StripPointerCastsAndObjCCalls(Value *V) {
- for (;;) {
- V = V->stripPointerCasts();
- if (!IsForwarding(GetBasicInstructionClass(V)))
- break;
- V = cast<CallInst>(V)->getArgOperand(0);
- }
- return V;
-}
-
-/// GetObjCArg - Assuming the given instruction is one of the special calls such
-/// as objc_retain or objc_release, return the argument value, stripped of no-op
-/// casts and forwarding calls.
-static Value *GetObjCArg(Value *Inst) {
- return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
-}
-
-/// IsObjCIdentifiedObject - This is similar to AliasAnalysis'
-/// isObjCIdentifiedObject, except that it uses special knowledge of
-/// ObjC conventions...
-static bool IsObjCIdentifiedObject(const Value *V) {
- // Assume that call results and arguments have their own "provenance".
- // Constants (including GlobalVariables) and Allocas are never
- // reference-counted.
- if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
- isa<Argument>(V) || isa<Constant>(V) ||
- isa<AllocaInst>(V))
- return true;
-
- if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
- const Value *Pointer =
- StripPointerCastsAndObjCCalls(LI->getPointerOperand());
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
- // A constant pointer can't be pointing to an object on the heap. It may
- // be reference-counted, but it won't be deleted.
- if (GV->isConstant())
- return true;
- StringRef Name = GV->getName();
- // These special variables are known to hold values which are not
- // reference-counted pointers.
- if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") ||
- Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") ||
- Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") ||
- Name.startswith("\01L_OBJC_METH_VAR_NAME_") ||
- Name.startswith("\01l_objc_msgSend_fixup_"))
- return true;
- }
- }
-
- return false;
-}
-
-/// FindSingleUseIdentifiedObject - This is similar to
-/// StripPointerCastsAndObjCCalls but it stops as soon as it finds a value
-/// with multiple uses.
-static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
- if (Arg->hasOneUse()) {
- if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
- return FindSingleUseIdentifiedObject(BC->getOperand(0));
- if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
- if (GEP->hasAllZeroIndices())
- return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
- if (IsForwarding(GetBasicInstructionClass(Arg)))
- return FindSingleUseIdentifiedObject(
- cast<CallInst>(Arg)->getArgOperand(0));
- if (!IsObjCIdentifiedObject(Arg))
- return 0;
- return Arg;
- }
-
- // If we found an identifiable object but it has multiple uses, but they are
- // trivial uses, we can still consider this to be a single-use value.
- if (IsObjCIdentifiedObject(Arg)) {
- for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
- UI != UE; ++UI) {
- const User *U = *UI;
- if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
- return 0;
- }
-
- return Arg;
- }
-
- return 0;
-}
-
-/// ModuleHasARC - Test if the given module looks interesting to run ARC
-/// optimization on.
-static bool ModuleHasARC(const Module &M) {
- return
- M.getNamedValue("objc_retain") ||
- M.getNamedValue("objc_release") ||
- M.getNamedValue("objc_autorelease") ||
- M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
- M.getNamedValue("objc_retainBlock") ||
- M.getNamedValue("objc_autoreleaseReturnValue") ||
- M.getNamedValue("objc_autoreleasePoolPush") ||
- M.getNamedValue("objc_loadWeakRetained") ||
- M.getNamedValue("objc_loadWeak") ||
- M.getNamedValue("objc_destroyWeak") ||
- M.getNamedValue("objc_storeWeak") ||
- M.getNamedValue("objc_initWeak") ||
- M.getNamedValue("objc_moveWeak") ||
- M.getNamedValue("objc_copyWeak") ||
- M.getNamedValue("objc_retainedObject") ||
- M.getNamedValue("objc_unretainedObject") ||
- M.getNamedValue("objc_unretainedPointer");
-}
-
-/// DoesObjCBlockEscape - Test whether the given pointer, which is an
-/// Objective C block pointer, does not "escape". This differs from regular
-/// escape analysis in that a use as an argument to a call is not considered
-/// an escape.
-static bool DoesObjCBlockEscape(const Value *BlockPtr) {
- // Walk the def-use chains.
- SmallVector<const Value *, 4> Worklist;
- Worklist.push_back(BlockPtr);
- do {
- const Value *V = Worklist.pop_back_val();
- for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
- UI != UE; ++UI) {
- const User *UUser = *UI;
- // Special - Use by a call (callee or argument) is not considered
- // to be an escape.
- switch (GetBasicInstructionClass(UUser)) {
- case IC_StoreWeak:
- case IC_InitWeak:
- case IC_StoreStrong:
- case IC_Autorelease:
- case IC_AutoreleaseRV:
- // These special functions make copies of their pointer arguments.
- return true;
- case IC_User:
- case IC_None:
- // Use by an instruction which copies the value is an escape if the
- // result is an escape.
- if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
- isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
- Worklist.push_back(UUser);
- continue;
- }
- // Use by a load is not an escape.
- if (isa<LoadInst>(UUser))
- continue;
- // Use by a store is not an escape if the use is the address.
- if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
- if (V != SI->getValueOperand())
- continue;
- break;
- default:
- // Regular calls and other stuff are not considered escapes.
- continue;
- }
- // Otherwise, conservatively assume an escape.
- return true;
- }
- } while (!Worklist.empty());
-
- // No escapes found.
- return false;
-}
-
-//===----------------------------------------------------------------------===//
-// ARC AliasAnalysis.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Pass.h"
-
-namespace {
- /// ObjCARCAliasAnalysis - This is a simple alias analysis
- /// implementation that uses knowledge of ARC constructs to answer queries.
- ///
- /// TODO: This class could be generalized to know about other ObjC-specific
- /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
- /// even though their offsets are dynamic.
- class ObjCARCAliasAnalysis : public ImmutablePass,
- public AliasAnalysis {
- public:
- static char ID; // Class identification, replacement for typeinfo
- ObjCARCAliasAnalysis() : ImmutablePass(ID) {
- initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry());
- }
-
- private:
- virtual void initializePass() {
- InitializeAliasAnalysis(this);
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const void *PI) {
- if (PI == &AliasAnalysis::ID)
- return static_cast<AliasAnalysis *>(this);
- return this;
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual AliasResult alias(const Location &LocA, const Location &LocB);
- virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
- virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
- virtual ModRefBehavior getModRefBehavior(const Function *F);
- virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
- const Location &Loc);
- virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2);
- };
-} // End of anonymous namespace
-
-// Register this pass...
-char ObjCARCAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa",
- "ObjC-ARC-Based Alias Analysis", false, true, false)
-
-ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
- return new ObjCARCAliasAnalysis();
-}
-
-void
-ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AliasAnalysis::getAnalysisUsage(AU);
-}
-
-AliasAnalysis::AliasResult
-ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
- if (!EnableARCOpts)
- return AliasAnalysis::alias(LocA, LocB);
-
- // First, strip off no-ops, including ObjC-specific no-ops, and try making a
- // precise alias query.
- const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr);
- const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr);
- AliasResult Result =
- AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag),
- Location(SB, LocB.Size, LocB.TBAATag));
- if (Result != MayAlias)
- return Result;
-
- // If that failed, climb to the underlying object, including climbing through
- // ObjC-specific no-ops, and try making an imprecise alias query.
- const Value *UA = GetUnderlyingObjCPtr(SA);
- const Value *UB = GetUnderlyingObjCPtr(SB);
- if (UA != SA || UB != SB) {
- Result = AliasAnalysis::alias(Location(UA), Location(UB));
- // We can't use MustAlias or PartialAlias results here because
- // GetUnderlyingObjCPtr may return an offsetted pointer value.
- if (Result == NoAlias)
- return NoAlias;
- }
-
- // If that failed, fail. We don't need to chain here, since that's covered
- // by the earlier precise query.
- return MayAlias;
-}
-
-bool
-ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
- bool OrLocal) {
- if (!EnableARCOpts)
- return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
-
- // First, strip off no-ops, including ObjC-specific no-ops, and try making
- // a precise alias query.
- const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr);
- if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag),
- OrLocal))
- return true;
-
- // If that failed, climb to the underlying object, including climbing through
- // ObjC-specific no-ops, and try making an imprecise alias query.
- const Value *U = GetUnderlyingObjCPtr(S);
- if (U != S)
- return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
-
- // If that failed, fail. We don't need to chain here, since that's covered
- // by the earlier precise query.
- return false;
-}
-
-AliasAnalysis::ModRefBehavior
-ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
- // We have nothing to do. Just chain to the next AliasAnalysis.
- return AliasAnalysis::getModRefBehavior(CS);
-}
-
-AliasAnalysis::ModRefBehavior
-ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
- if (!EnableARCOpts)
- return AliasAnalysis::getModRefBehavior(F);
-
- switch (GetFunctionClass(F)) {
- case IC_NoopCast:
- return DoesNotAccessMemory;
- default:
- break;
- }
-
- return AliasAnalysis::getModRefBehavior(F);
-}
-
-AliasAnalysis::ModRefResult
-ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
- if (!EnableARCOpts)
- return AliasAnalysis::getModRefInfo(CS, Loc);
-
- switch (GetBasicInstructionClass(CS.getInstruction())) {
- case IC_Retain:
- case IC_RetainRV:
- case IC_Autorelease:
- case IC_AutoreleaseRV:
- case IC_NoopCast:
- case IC_AutoreleasepoolPush:
- case IC_FusedRetainAutorelease:
- case IC_FusedRetainAutoreleaseRV:
- // These functions don't access any memory visible to the compiler.
- // Note that this doesn't include objc_retainBlock, because it updates
- // pointers when it copies block data.
- return NoModRef;
- default:
- break;
- }
-
- return AliasAnalysis::getModRefInfo(CS, Loc);
-}
-
-AliasAnalysis::ModRefResult
-ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
- // TODO: Theoretically we could check for dependencies between objc_* calls
- // and OnlyAccessesArgumentPointees calls or other well-behaved calls.
- return AliasAnalysis::getModRefInfo(CS1, CS2);
-}
-
-//===----------------------------------------------------------------------===//
-// ARC expansion.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Transforms/Scalar.h"
-
-namespace {
- /// ObjCARCExpand - Early ARC transformations.
- class ObjCARCExpand : public FunctionPass {
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual bool doInitialization(Module &M);
- virtual bool runOnFunction(Function &F);
-
- /// Run - A flag indicating whether this optimization pass should run.
- bool Run;
-
- public:
- static char ID;
- ObjCARCExpand() : FunctionPass(ID) {
- initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
- }
- };
-}
-
-char ObjCARCExpand::ID = 0;
-INITIALIZE_PASS(ObjCARCExpand,
- "objc-arc-expand", "ObjC ARC expansion", false, false)
-
-Pass *llvm::createObjCARCExpandPass() {
- return new ObjCARCExpand();
-}
-
-void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
-}
-
-bool ObjCARCExpand::doInitialization(Module &M) {
- Run = ModuleHasARC(M);
- return false;
-}
-
-bool ObjCARCExpand::runOnFunction(Function &F) {
- if (!EnableARCOpts)
- return false;
-
- // If nothing in the Module uses ARC, don't do anything.
- if (!Run)
- return false;
-
- bool Changed = false;
-
- for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
- Instruction *Inst = &*I;
-
- DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << *Inst << "\n");
-
- switch (GetBasicInstructionClass(Inst)) {
- case IC_Retain:
- case IC_RetainRV:
- case IC_Autorelease:
- case IC_AutoreleaseRV:
- case IC_FusedRetainAutorelease:
- case IC_FusedRetainAutoreleaseRV: {
- // These calls return their argument verbatim, as a low-level
- // optimization. However, this makes high-level optimizations
- // harder. Undo any uses of this optimization that the front-end
- // emitted here. We'll redo them in the contract pass.
- Changed = true;
- Value *Value = cast<CallInst>(Inst)->getArgOperand(0);
- DEBUG(dbgs() << "ObjCARCExpand: Old = " << *Inst << "\n"
- " New = " << *Value << "\n");
- Inst->replaceAllUsesWith(Value);
- break;
- }
- default:
- break;
- }
- }
-
- DEBUG(dbgs() << "ObjCARCExpand: Finished List.\n\n");
-
- return Changed;
-}
-
-//===----------------------------------------------------------------------===//
-// ARC autorelease pool elimination.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/Constants.h"
-
-namespace {
- /// ObjCARCAPElim - Autorelease pool elimination.
- class ObjCARCAPElim : public ModulePass {
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual bool runOnModule(Module &M);
-
- static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0);
- static bool OptimizeBB(BasicBlock *BB);
-
- public:
- static char ID;
- ObjCARCAPElim() : ModulePass(ID) {
- initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry());
- }
- };
-}
-
-char ObjCARCAPElim::ID = 0;
-INITIALIZE_PASS(ObjCARCAPElim,
- "objc-arc-apelim",
- "ObjC ARC autorelease pool elimination",
- false, false)
-
-Pass *llvm::createObjCARCAPElimPass() {
- return new ObjCARCAPElim();
-}
-
-void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
-}
-
-/// MayAutorelease - Interprocedurally determine if calls made by the
-/// given call site can possibly produce autoreleases.
-bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) {
- if (const Function *Callee = CS.getCalledFunction()) {
- if (Callee->isDeclaration() || Callee->mayBeOverridden())
- return true;
- for (Function::const_iterator I = Callee->begin(), E = Callee->end();
- I != E; ++I) {
- const BasicBlock *BB = I;
- for (BasicBlock::const_iterator J = BB->begin(), F = BB->end();
- J != F; ++J)
- if (ImmutableCallSite JCS = ImmutableCallSite(J))
- // This recursion depth limit is arbitrary. It's just great
- // enough to cover known interesting testcases.
- if (Depth < 3 &&
- !JCS.onlyReadsMemory() &&
- MayAutorelease(JCS, Depth + 1))
- return true;
- }
- return false;
- }
-
- return true;
-}
-
-bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
- bool Changed = false;
-
- Instruction *Push = 0;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
- Instruction *Inst = I++;
- switch (GetBasicInstructionClass(Inst)) {
- case IC_AutoreleasepoolPush:
- Push = Inst;
- break;
- case IC_AutoreleasepoolPop:
- // If this pop matches a push and nothing in between can autorelease,
- // zap the pair.
- if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) {
- Changed = true;
- DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop autorelease pair:\n"
- << " Pop: " << *Inst << "\n"
- << " Push: " << *Push << "\n");
- Inst->eraseFromParent();
- Push->eraseFromParent();
- }
- Push = 0;
- break;
- case IC_CallOrUser:
- if (MayAutorelease(ImmutableCallSite(Inst)))
- Push = 0;
- break;
- default:
- break;
- }
- }
-
- return Changed;
-}
-
-bool ObjCARCAPElim::runOnModule(Module &M) {
- if (!EnableARCOpts)
- return false;
-
- // If nothing in the Module uses ARC, don't do anything.
- if (!ModuleHasARC(M))
- return false;
-
- // Find the llvm.global_ctors variable, as the first step in
- // identifying the global constructors. In theory, unnecessary autorelease
- // pools could occur anywhere, but in practice it's pretty rare. Global
- // ctors are a place where autorelease pools get inserted automatically,
- // so it's pretty common for them to be unnecessary, and it's pretty
- // profitable to eliminate them.
- GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
- if (!GV)
- return false;
-
- assert(GV->hasDefinitiveInitializer() &&
- "llvm.global_ctors is uncooperative!");
-
- bool Changed = false;
-
- // Dig the constructor functions out of GV's initializer.
- ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
- for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end();
- OI != OE; ++OI) {
- Value *Op = *OI;
- // llvm.global_ctors is an array of pairs where the second members
- // are constructor functions.
- Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
- // If the user used a constructor function with the wrong signature and
- // it got bitcasted or whatever, look the other way.
- if (!F)
- continue;
- // Only look at function definitions.
- if (F->isDeclaration())
- continue;
- // Only look at functions with one basic block.
- if (llvm::next(F->begin()) != F->end())
- continue;
- // Ok, a single-block constructor function definition. Try to optimize it.
- Changed |= OptimizeBB(F->begin());
- }
-
- return Changed;
-}
-
-//===----------------------------------------------------------------------===//
-// ARC optimization.
-//===----------------------------------------------------------------------===//
-
-// TODO: On code like this:
-//
-// objc_retain(%x)
-// stuff_that_cannot_release()
-// objc_autorelease(%x)
-// stuff_that_cannot_release()
-// objc_retain(%x)
-// stuff_that_cannot_release()
-// objc_autorelease(%x)
-//
-// The second retain and autorelease can be deleted.
-
-// TODO: It should be possible to delete
-// objc_autoreleasePoolPush and objc_autoreleasePoolPop
-// pairs if nothing is actually autoreleased between them. Also, autorelease
-// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
-// after inlining) can be turned into plain release calls.
-
-// TODO: Critical-edge splitting. If the optimial insertion point is
-// a critical edge, the current algorithm has to fail, because it doesn't
-// know how to split edges. It should be possible to make the optimizer
-// think in terms of edges, rather than blocks, and then split critical
-// edges on demand.
-
-// TODO: OptimizeSequences could generalized to be Interprocedural.
-
-// TODO: Recognize that a bunch of other objc runtime calls have
-// non-escaping arguments and non-releasing arguments, and may be
-// non-autoreleasing.
-
-// TODO: Sink autorelease calls as far as possible. Unfortunately we
-// usually can't sink them past other calls, which would be the main
-// case where it would be useful.
-
-// TODO: The pointer returned from objc_loadWeakRetained is retained.
-
-// TODO: Delete release+retain pairs (rare).
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/CFG.h"
-
-STATISTIC(NumNoops, "Number of no-op objc calls eliminated");
-STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
-STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
-STATISTIC(NumRets, "Number of return value forwarding "
- "retain+autoreleaes eliminated");
-STATISTIC(NumRRs, "Number of retain+release paths eliminated");
-STATISTIC(NumPeeps, "Number of calls peephole-optimized");
-
-namespace {
- /// ProvenanceAnalysis - This is similar to BasicAliasAnalysis, and it
- /// uses many of the same techniques, except it uses special ObjC-specific
- /// reasoning about pointer relationships.
- class ProvenanceAnalysis {
- AliasAnalysis *AA;
-
- typedef std::pair<const Value *, const Value *> ValuePairTy;
- typedef DenseMap<ValuePairTy, bool> CachedResultsTy;
- CachedResultsTy CachedResults;
-
- bool relatedCheck(const Value *A, const Value *B);
- bool relatedSelect(const SelectInst *A, const Value *B);
- bool relatedPHI(const PHINode *A, const Value *B);
-
- void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
- ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
-
- public:
- ProvenanceAnalysis() {}
-
- void setAA(AliasAnalysis *aa) { AA = aa; }
-
- AliasAnalysis *getAA() const { return AA; }
-
- bool related(const Value *A, const Value *B);
-
- void clear() {
- CachedResults.clear();
- }
- };
-}
-
-bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) {
- // If the values are Selects with the same condition, we can do a more precise
- // check: just check for relations between the values on corresponding arms.
- if (const SelectInst *SB = dyn_cast<SelectInst>(B))
- if (A->getCondition() == SB->getCondition())
- return related(A->getTrueValue(), SB->getTrueValue()) ||
- related(A->getFalseValue(), SB->getFalseValue());
-
- // Check both arms of the Select node individually.
- return related(A->getTrueValue(), B) ||
- related(A->getFalseValue(), B);
-}
-
-bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) {
- // If the values are PHIs in the same block, we can do a more precise as well
- // as efficient check: just check for relations between the values on
- // corresponding edges.
- if (const PHINode *PNB = dyn_cast<PHINode>(B))
- if (PNB->getParent() == A->getParent()) {
- for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
- if (related(A->getIncomingValue(i),
- PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
- return true;
- return false;
- }
-
- // Check each unique source of the PHI node against B.
- SmallPtrSet<const Value *, 4> UniqueSrc;
- for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
- const Value *PV1 = A->getIncomingValue(i);
- if (UniqueSrc.insert(PV1) && related(PV1, B))
- return true;
- }
-
- // All of the arms checked out.
- return false;
-}
-
-/// isStoredObjCPointer - Test if the value of P, or any value covered by its
-/// provenance, is ever stored within the function (not counting callees).
-static bool isStoredObjCPointer(const Value *P) {
- SmallPtrSet<const Value *, 8> Visited;
- SmallVector<const Value *, 8> Worklist;
- Worklist.push_back(P);
- Visited.insert(P);
- do {
- P = Worklist.pop_back_val();
- for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end();
- UI != UE; ++UI) {
- const User *Ur = *UI;
- if (isa<StoreInst>(Ur)) {
- if (UI.getOperandNo() == 0)
- // The pointer is stored.
- return true;
- // The pointed is stored through.
- continue;
- }
- if (isa<CallInst>(Ur))
- // The pointer is passed as an argument, ignore this.
- continue;
- if (isa<PtrToIntInst>(P))
- // Assume the worst.
- return true;
- if (Visited.insert(Ur))
- Worklist.push_back(Ur);
- }
- } while (!Worklist.empty());
-
- // Everything checked out.
- return false;
-}
-
-bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) {
- // Skip past provenance pass-throughs.
- A = GetUnderlyingObjCPtr(A);
- B = GetUnderlyingObjCPtr(B);
-
- // Quick check.
- if (A == B)
- return true;
-
- // Ask regular AliasAnalysis, for a first approximation.
- switch (AA->alias(A, B)) {
- case AliasAnalysis::NoAlias:
- return false;
- case AliasAnalysis::MustAlias:
- case AliasAnalysis::PartialAlias:
- return true;
- case AliasAnalysis::MayAlias:
- break;
- }
-
- bool AIsIdentified = IsObjCIdentifiedObject(A);
- bool BIsIdentified = IsObjCIdentifiedObject(B);
-
- // An ObjC-Identified object can't alias a load if it is never locally stored.
- if (AIsIdentified) {
- // Check for an obvious escape.
- if (isa<LoadInst>(B))
- return isStoredObjCPointer(A);
- if (BIsIdentified) {
- // Check for an obvious escape.
- if (isa<LoadInst>(A))
- return isStoredObjCPointer(B);
- // Both pointers are identified and escapes aren't an evident problem.
- return false;
- }
- } else if (BIsIdentified) {
- // Check for an obvious escape.
- if (isa<LoadInst>(A))
- return isStoredObjCPointer(B);
- }
-
- // Special handling for PHI and Select.
- if (const PHINode *PN = dyn_cast<PHINode>(A))
- return relatedPHI(PN, B);
- if (const PHINode *PN = dyn_cast<PHINode>(B))
- return relatedPHI(PN, A);
- if (const SelectInst *S = dyn_cast<SelectInst>(A))
- return relatedSelect(S, B);
- if (const SelectInst *S = dyn_cast<SelectInst>(B))
- return relatedSelect(S, A);
-
- // Conservative.
- return true;
-}
-
-bool ProvenanceAnalysis::related(const Value *A, const Value *B) {
- // Begin by inserting a conservative value into the map. If the insertion
- // fails, we have the answer already. If it succeeds, leave it there until we
- // compute the real answer to guard against recursive queries.
- if (A > B) std::swap(A, B);
- std::pair<CachedResultsTy::iterator, bool> Pair =
- CachedResults.insert(std::make_pair(ValuePairTy(A, B), true));
- if (!Pair.second)
- return Pair.first->second;
-
- bool Result = relatedCheck(A, B);
- CachedResults[ValuePairTy(A, B)] = Result;
- return Result;
-}
-
-namespace {
- // Sequence - A sequence of states that a pointer may go through in which an
- // objc_retain and objc_release are actually needed.
- enum Sequence {
- S_None,
- S_Retain, ///< objc_retain(x)
- S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement
- S_Use, ///< any use of x
- S_Stop, ///< like S_Release, but code motion is stopped
- S_Release, ///< objc_release(x)
- S_MovableRelease ///< objc_release(x), !clang.imprecise_release
- };
-}
-
-static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
- // The easy cases.
- if (A == B)
- return A;
- if (A == S_None || B == S_None)
- return S_None;
-
- if (A > B) std::swap(A, B);
- if (TopDown) {
- // Choose the side which is further along in the sequence.
- if ((A == S_Retain || A == S_CanRelease) &&
- (B == S_CanRelease || B == S_Use))
- return B;
- } else {
- // Choose the side which is further along in the sequence.
- if ((A == S_Use || A == S_CanRelease) &&
- (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
- return A;
- // If both sides are releases, choose the more conservative one.
- if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
- return A;
- if (A == S_Release && B == S_MovableRelease)
- return A;
- }
-
- return S_None;
-}
-
-namespace {
- /// RRInfo - Unidirectional information about either a
- /// retain-decrement-use-release sequence or release-use-decrement-retain
- /// reverese sequence.
- struct RRInfo {
- /// KnownSafe - After an objc_retain, the reference count of the referenced
- /// object is known to be positive. Similarly, before an objc_release, the
- /// reference count of the referenced object is known to be positive. If
- /// there are retain-release pairs in code regions where the retain count
- /// is known to be positive, they can be eliminated, regardless of any side
- /// effects between them.
- ///
- /// Also, a retain+release pair nested within another retain+release
- /// pair all on the known same pointer value can be eliminated, regardless
- /// of any intervening side effects.
- ///
- /// KnownSafe is true when either of these conditions is satisfied.
- bool KnownSafe;
-
- /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as
- /// opposed to objc_retain calls).
- bool IsRetainBlock;
-
- /// IsTailCallRelease - True of the objc_release calls are all marked
- /// with the "tail" keyword.
- bool IsTailCallRelease;
-
- /// ReleaseMetadata - If the Calls are objc_release calls and they all have
- /// a clang.imprecise_release tag, this is the metadata tag.
- MDNode *ReleaseMetadata;
-
- /// Calls - For a top-down sequence, the set of objc_retains or
- /// objc_retainBlocks. For bottom-up, the set of objc_releases.
- SmallPtrSet<Instruction *, 2> Calls;
-
- /// ReverseInsertPts - The set of optimal insert positions for
- /// moving calls in the opposite sequence.
- SmallPtrSet<Instruction *, 2> ReverseInsertPts;
-
- RRInfo() :
- KnownSafe(false), IsRetainBlock(false),
- IsTailCallRelease(false),
- ReleaseMetadata(0) {}
-
- void clear();
- };
-}
-
-void RRInfo::clear() {
- KnownSafe = false;
- IsRetainBlock = false;
- IsTailCallRelease = false;
- ReleaseMetadata = 0;
- Calls.clear();
- ReverseInsertPts.clear();
-}
-
-namespace {
- /// PtrState - This class summarizes several per-pointer runtime properties
- /// which are propogated through the flow graph.
- class PtrState {
- /// KnownPositiveRefCount - True if the reference count is known to
- /// be incremented.
- bool KnownPositiveRefCount;
-
- /// Partial - True of we've seen an opportunity for partial RR elimination,
- /// such as pushing calls into a CFG triangle or into one side of a
- /// CFG diamond.
- bool Partial;
-
- /// Seq - The current position in the sequence.
- Sequence Seq : 8;
-
- public:
- /// RRI - Unidirectional information about the current sequence.
- /// TODO: Encapsulate this better.
- RRInfo RRI;
-
- PtrState() : KnownPositiveRefCount(false), Partial(false),
- Seq(S_None) {}
-
- void SetKnownPositiveRefCount() {
- KnownPositiveRefCount = true;
- }
-
- void ClearRefCount() {
- KnownPositiveRefCount = false;
- }
-
- bool IsKnownIncremented() const {
- return KnownPositiveRefCount;
- }
-
- void SetSeq(Sequence NewSeq) {
- Seq = NewSeq;
- }
-
- Sequence GetSeq() const {
- return Seq;
- }
-
- void ClearSequenceProgress() {
- ResetSequenceProgress(S_None);
- }
-
- void ResetSequenceProgress(Sequence NewSeq) {
- Seq = NewSeq;
- Partial = false;
- RRI.clear();
- }
-
- void Merge(const PtrState &Other, bool TopDown);
- };
-}
-
-void
-PtrState::Merge(const PtrState &Other, bool TopDown) {
- Seq = MergeSeqs(Seq, Other.Seq, TopDown);
- KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
-
- // We can't merge a plain objc_retain with an objc_retainBlock.
- if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
- Seq = S_None;
-
- // If we're not in a sequence (anymore), drop all associated state.
- if (Seq == S_None) {
- Partial = false;
- RRI.clear();
- } else if (Partial || Other.Partial) {
- // If we're doing a merge on a path that's previously seen a partial
- // merge, conservatively drop the sequence, to avoid doing partial
- // RR elimination. If the branch predicates for the two merge differ,
- // mixing them is unsafe.
- ClearSequenceProgress();
- } else {
- // Conservatively merge the ReleaseMetadata information.
- if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
- RRI.ReleaseMetadata = 0;
-
- RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe;
- RRI.IsTailCallRelease = RRI.IsTailCallRelease &&
- Other.RRI.IsTailCallRelease;
- RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
-
- // Merge the insert point sets. If there are any differences,
- // that makes this a partial merge.
- Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size();
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- I = Other.RRI.ReverseInsertPts.begin(),
- E = Other.RRI.ReverseInsertPts.end(); I != E; ++I)
- Partial |= RRI.ReverseInsertPts.insert(*I);
- }
-}
-
-namespace {
- /// BBState - Per-BasicBlock state.
- class BBState {
- /// TopDownPathCount - The number of unique control paths from the entry
- /// which can reach this block.
- unsigned TopDownPathCount;
-
- /// BottomUpPathCount - The number of unique control paths to exits
- /// from this block.
- unsigned BottomUpPathCount;
-
- /// MapTy - A type for PerPtrTopDown and PerPtrBottomUp.
- typedef MapVector<const Value *, PtrState> MapTy;
-
- /// PerPtrTopDown - The top-down traversal uses this to record information
- /// known about a pointer at the bottom of each block.
- MapTy PerPtrTopDown;
-
- /// PerPtrBottomUp - The bottom-up traversal uses this to record information
- /// known about a pointer at the top of each block.
- MapTy PerPtrBottomUp;
-
- /// Preds, Succs - Effective successors and predecessors of the current
- /// block (this ignores ignorable edges and ignored backedges).
- SmallVector<BasicBlock *, 2> Preds;
- SmallVector<BasicBlock *, 2> Succs;
-
- public:
- BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
-
- typedef MapTy::iterator ptr_iterator;
- typedef MapTy::const_iterator ptr_const_iterator;
-
- ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
- ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
- ptr_const_iterator top_down_ptr_begin() const {
- return PerPtrTopDown.begin();
- }
- ptr_const_iterator top_down_ptr_end() const {
- return PerPtrTopDown.end();
- }
-
- ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); }
- ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
- ptr_const_iterator bottom_up_ptr_begin() const {
- return PerPtrBottomUp.begin();
- }
- ptr_const_iterator bottom_up_ptr_end() const {
- return PerPtrBottomUp.end();
- }
-
- /// SetAsEntry - Mark this block as being an entry block, which has one
- /// path from the entry by definition.
- void SetAsEntry() { TopDownPathCount = 1; }
-
- /// SetAsExit - Mark this block as being an exit block, which has one
- /// path to an exit by definition.
- void SetAsExit() { BottomUpPathCount = 1; }
-
- PtrState &getPtrTopDownState(const Value *Arg) {
- return PerPtrTopDown[Arg];
- }
-
- PtrState &getPtrBottomUpState(const Value *Arg) {
- return PerPtrBottomUp[Arg];
- }
-
- void clearBottomUpPointers() {
- PerPtrBottomUp.clear();
- }
-
- void clearTopDownPointers() {
- PerPtrTopDown.clear();
- }
-
- void InitFromPred(const BBState &Other);
- void InitFromSucc(const BBState &Other);
- void MergePred(const BBState &Other);
- void MergeSucc(const BBState &Other);
-
- /// GetAllPathCount - Return the number of possible unique paths from an
- /// entry to an exit which pass through this block. This is only valid
- /// after both the top-down and bottom-up traversals are complete.
- unsigned GetAllPathCount() const {
- assert(TopDownPathCount != 0);
- assert(BottomUpPathCount != 0);
- return TopDownPathCount * BottomUpPathCount;
- }
-
- // Specialized CFG utilities.
- typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator;
- edge_iterator pred_begin() { return Preds.begin(); }
- edge_iterator pred_end() { return Preds.end(); }
- edge_iterator succ_begin() { return Succs.begin(); }
- edge_iterator succ_end() { return Succs.end(); }
-
- void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
- void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
-
- bool isExit() const { return Succs.empty(); }
- };
-}
-
-void BBState::InitFromPred(const BBState &Other) {
- PerPtrTopDown = Other.PerPtrTopDown;
- TopDownPathCount = Other.TopDownPathCount;
-}
-
-void BBState::InitFromSucc(const BBState &Other) {
- PerPtrBottomUp = Other.PerPtrBottomUp;
- BottomUpPathCount = Other.BottomUpPathCount;
-}
-
-/// MergePred - The top-down traversal uses this to merge information about
-/// predecessors to form the initial state for a new block.
-void BBState::MergePred(const BBState &Other) {
- // Other.TopDownPathCount can be 0, in which case it is either dead or a
- // loop backedge. Loop backedges are special.
- TopDownPathCount += Other.TopDownPathCount;
-
- // Check for overflow. If we have overflow, fall back to conservative behavior.
- if (TopDownPathCount < Other.TopDownPathCount) {
- clearTopDownPointers();
- return;
- }
-
- // For each entry in the other set, if our set has an entry with the same key,
- // merge the entries. Otherwise, copy the entry and merge it with an empty
- // entry.
- for (ptr_const_iterator MI = Other.top_down_ptr_begin(),
- ME = Other.top_down_ptr_end(); MI != ME; ++MI) {
- std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI);
- Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
- /*TopDown=*/true);
- }
-
- // For each entry in our set, if the other set doesn't have an entry with the
- // same key, force it to merge with an empty entry.
- for (ptr_iterator MI = top_down_ptr_begin(),
- ME = top_down_ptr_end(); MI != ME; ++MI)
- if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
- MI->second.Merge(PtrState(), /*TopDown=*/true);
-}
-
-/// MergeSucc - The bottom-up traversal uses this to merge information about
-/// successors to form the initial state for a new block.
-void BBState::MergeSucc(const BBState &Other) {
- // Other.BottomUpPathCount can be 0, in which case it is either dead or a
- // loop backedge. Loop backedges are special.
- BottomUpPathCount += Other.BottomUpPathCount;
-
- // Check for overflow. If we have overflow, fall back to conservative behavior.
- if (BottomUpPathCount < Other.BottomUpPathCount) {
- clearBottomUpPointers();
- return;
- }
-
- // For each entry in the other set, if our set has an entry with the
- // same key, merge the entries. Otherwise, copy the entry and merge
- // it with an empty entry.
- for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(),
- ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) {
- std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI);
- Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
- /*TopDown=*/false);
- }
-
- // For each entry in our set, if the other set doesn't have an entry
- // with the same key, force it to merge with an empty entry.
- for (ptr_iterator MI = bottom_up_ptr_begin(),
- ME = bottom_up_ptr_end(); MI != ME; ++MI)
- if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
- MI->second.Merge(PtrState(), /*TopDown=*/false);
-}
-
-namespace {
- /// ObjCARCOpt - The main ARC optimization pass.
- class ObjCARCOpt : public FunctionPass {
- bool Changed;
- ProvenanceAnalysis PA;
-
- /// Run - A flag indicating whether this optimization pass should run.
- bool Run;
-
- /// RetainRVCallee, etc. - Declarations for ObjC runtime
- /// functions, for use in creating calls to them. These are initialized
- /// lazily to avoid cluttering up the Module with unused declarations.
- Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee,
- *RetainCallee, *RetainBlockCallee, *AutoreleaseCallee;
-
- /// UsedInThisFunciton - Flags which determine whether each of the
- /// interesting runtine functions is in fact used in the current function.
- unsigned UsedInThisFunction;
-
- /// ImpreciseReleaseMDKind - The Metadata Kind for clang.imprecise_release
- /// metadata.
- unsigned ImpreciseReleaseMDKind;
-
- /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape
- /// metadata.
- unsigned CopyOnEscapeMDKind;
-
- /// NoObjCARCExceptionsMDKind - The Metadata Kind for
- /// clang.arc.no_objc_arc_exceptions metadata.
- unsigned NoObjCARCExceptionsMDKind;
-
- Constant *getRetainRVCallee(Module *M);
- Constant *getAutoreleaseRVCallee(Module *M);
- Constant *getReleaseCallee(Module *M);
- Constant *getRetainCallee(Module *M);
- Constant *getRetainBlockCallee(Module *M);
- Constant *getAutoreleaseCallee(Module *M);
-
- bool IsRetainBlockOptimizable(const Instruction *Inst);
-
- void OptimizeRetainCall(Function &F, Instruction *Retain);
- bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
- void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV);
- void OptimizeIndividualCalls(Function &F);
-
- void CheckForCFGHazards(const BasicBlock *BB,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- BBState &MyStates) const;
- bool VisitInstructionBottomUp(Instruction *Inst,
- BasicBlock *BB,
- MapVector<Value *, RRInfo> &Retains,
- BBState &MyStates);
- bool VisitBottomUp(BasicBlock *BB,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains);
- bool VisitInstructionTopDown(Instruction *Inst,
- DenseMap<Value *, RRInfo> &Releases,
- BBState &MyStates);
- bool VisitTopDown(BasicBlock *BB,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- DenseMap<Value *, RRInfo> &Releases);
- bool Visit(Function &F,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases);
-
- void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases,
- SmallVectorImpl<Instruction *> &DeadInsts,
- Module *M);
-
- bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases,
- Module *M);
-
- void OptimizeWeakCalls(Function &F);
-
- bool OptimizeSequences(Function &F);
-
- void OptimizeReturns(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual bool doInitialization(Module &M);
- virtual bool runOnFunction(Function &F);
- virtual void releaseMemory();
-
- public:
- static char ID;
- ObjCARCOpt() : FunctionPass(ID) {
- initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
- }
- };
-}
-
-char ObjCARCOpt::ID = 0;
-INITIALIZE_PASS_BEGIN(ObjCARCOpt,
- "objc-arc", "ObjC ARC optimization", false, false)
-INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis)
-INITIALIZE_PASS_END(ObjCARCOpt,
- "objc-arc", "ObjC ARC optimization", false, false)
-
-Pass *llvm::createObjCARCOptPass() {
- return new ObjCARCOpt();
-}
-
-void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<ObjCARCAliasAnalysis>();
- AU.addRequired<AliasAnalysis>();
- // ARC optimization doesn't currently split critical edges.
- AU.setPreservesCFG();
-}
-
-bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
- // Without the magic metadata tag, we have to assume this might be an
- // objc_retainBlock call inserted to convert a block pointer to an id,
- // in which case it really is needed.
- if (!Inst->getMetadata(CopyOnEscapeMDKind))
- return false;
-
- // If the pointer "escapes" (not including being used in a call),
- // the copy may be needed.
- if (DoesObjCBlockEscape(Inst))
- return false;
-
- // Otherwise, it's not needed.
- return true;
-}
-
-Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
- if (!RetainRVCallee) {
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *Params[] = { I8X };
- FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttributeSet Attribute =
- AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::get(C, Attribute::NoUnwind));
- RetainRVCallee =
- M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
- Attribute);
- }
- return RetainRVCallee;
-}
-
-Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
- if (!AutoreleaseRVCallee) {
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *Params[] = { I8X };
- FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttributeSet Attribute =
- AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::get(C, Attribute::NoUnwind));
- AutoreleaseRVCallee =
- M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
- Attribute);
- }
- return AutoreleaseRVCallee;
-}
-
-Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
- if (!ReleaseCallee) {
- LLVMContext &C = M->getContext();
- Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- AttributeSet Attribute =
- AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::get(C, Attribute::NoUnwind));
- ReleaseCallee =
- M->getOrInsertFunction(
- "objc_release",
- FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
- Attribute);
- }
- return ReleaseCallee;
-}
-
-Constant *ObjCARCOpt::getRetainCallee(Module *M) {
- if (!RetainCallee) {
- LLVMContext &C = M->getContext();
- Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- AttributeSet Attribute =
- AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::get(C, Attribute::NoUnwind));
- RetainCallee =
- M->getOrInsertFunction(
- "objc_retain",
- FunctionType::get(Params[0], Params, /*isVarArg=*/false),
- Attribute);
- }
- return RetainCallee;
-}
-
-Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
- if (!RetainBlockCallee) {
- LLVMContext &C = M->getContext();
- Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- // objc_retainBlock is not nounwind because it calls user copy constructors
- // which could theoretically throw.
- RetainBlockCallee =
- M->getOrInsertFunction(
- "objc_retainBlock",
- FunctionType::get(Params[0], Params, /*isVarArg=*/false),
- AttributeSet());
- }
- return RetainBlockCallee;
-}
-
-Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
- if (!AutoreleaseCallee) {
- LLVMContext &C = M->getContext();
- Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- AttributeSet Attribute =
- AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::get(C, Attribute::NoUnwind));
- AutoreleaseCallee =
- M->getOrInsertFunction(
- "objc_autorelease",
- FunctionType::get(Params[0], Params, /*isVarArg=*/false),
- Attribute);
- }
- return AutoreleaseCallee;
-}
-
-/// IsPotentialUse - Test whether the given value is possible a
-/// reference-counted pointer, including tests which utilize AliasAnalysis.
-static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) {
- // First make the rudimentary check.
- if (!IsPotentialUse(Op))
- return false;
-
- // Objects in constant memory are not reference-counted.
- if (AA.pointsToConstantMemory(Op))
- return false;
-
- // Pointers in constant memory are not pointing to reference-counted objects.
- if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
- if (AA.pointsToConstantMemory(LI->getPointerOperand()))
- return false;
-
- // Otherwise assume the worst.
- return true;
-}
-
-/// CanAlterRefCount - Test whether the given instruction can result in a
-/// reference count modification (positive or negative) for the pointer's
-/// object.
-static bool
-CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
- ProvenanceAnalysis &PA, InstructionClass Class) {
- switch (Class) {
- case IC_Autorelease:
- case IC_AutoreleaseRV:
- case IC_User:
- // These operations never directly modify a reference count.
- return false;
- default: break;
- }
-
- ImmutableCallSite CS = static_cast<const Value *>(Inst);
- assert(CS && "Only calls can alter reference counts!");
-
- // See if AliasAnalysis can help us with the call.
- AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
- if (AliasAnalysis::onlyReadsMemory(MRB))
- return false;
- if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
- for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
- I != E; ++I) {
- const Value *Op = *I;
- if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
- return true;
- }
- return false;
- }
-
- // Assume the worst.
- return true;
-}
-
-/// CanUse - Test whether the given instruction can "use" the given pointer's
-/// object in a way that requires the reference count to be positive.
-static bool
-CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
- InstructionClass Class) {
- // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers.
- if (Class == IC_Call)
- return false;
-
- // Consider various instructions which may have pointer arguments which are
- // not "uses".
- if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
- // Comparing a pointer with null, or any other constant, isn't really a use,
- // because we don't care what the pointer points to, or about the values
- // of any other dynamic reference-counted pointers.
- if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA()))
- return false;
- } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
- // For calls, just check the arguments (and not the callee operand).
- for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
- OE = CS.arg_end(); OI != OE; ++OI) {
- const Value *Op = *OI;
- if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
- return true;
- }
- return false;
- } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- // Special-case stores, because we don't care about the stored value, just
- // the store address.
- const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
- // If we can't tell what the underlying object was, assume there is a
- // dependence.
- return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr);
- }
-
- // Check each operand for a match.
- for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
- OI != OE; ++OI) {
- const Value *Op = *OI;
- if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
- return true;
- }
- return false;
-}
-
-/// CanInterruptRV - Test whether the given instruction can autorelease
-/// any pointer or cause an autoreleasepool pop.
-static bool
-CanInterruptRV(InstructionClass Class) {
- switch (Class) {
- case IC_AutoreleasepoolPop:
- case IC_CallOrUser:
- case IC_Call:
- case IC_Autorelease:
- case IC_AutoreleaseRV:
- case IC_FusedRetainAutorelease:
- case IC_FusedRetainAutoreleaseRV:
- return true;
- default:
- return false;
- }
-}
-
-namespace {
- /// DependenceKind - There are several kinds of dependence-like concepts in
- /// use here.
- enum DependenceKind {
- NeedsPositiveRetainCount,
- AutoreleasePoolBoundary,
- CanChangeRetainCount,
- RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease.
- RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue.
- RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue.
- };
-}
-
-/// Depends - Test if there can be dependencies on Inst through Arg. This
-/// function only tests dependencies relevant for removing pairs of calls.
-static bool
-Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
- ProvenanceAnalysis &PA) {
- // If we've reached the definition of Arg, stop.
- if (Inst == Arg)
- return true;
-
- switch (Flavor) {
- case NeedsPositiveRetainCount: {
- InstructionClass Class = GetInstructionClass(Inst);
- switch (Class) {
- case IC_AutoreleasepoolPop:
- case IC_AutoreleasepoolPush:
- case IC_None:
- return false;
- default:
- return CanUse(Inst, Arg, PA, Class);
- }
- }
-
- case AutoreleasePoolBoundary: {
- InstructionClass Class = GetInstructionClass(Inst);
- switch (Class) {
- case IC_AutoreleasepoolPop:
- case IC_AutoreleasepoolPush:
- // These mark the end and begin of an autorelease pool scope.
- return true;
- default:
- // Nothing else does this.
- return false;
- }
- }
-
- case CanChangeRetainCount: {
- InstructionClass Class = GetInstructionClass(Inst);
- switch (Class) {
- case IC_AutoreleasepoolPop:
- // Conservatively assume this can decrement any count.
- return true;
- case IC_AutoreleasepoolPush:
- case IC_None:
- return false;
- default:
- return CanAlterRefCount(Inst, Arg, PA, Class);
- }
- }
-
- case RetainAutoreleaseDep:
- switch (GetBasicInstructionClass(Inst)) {
- case IC_AutoreleasepoolPop:
- case IC_AutoreleasepoolPush:
- // Don't merge an objc_autorelease with an objc_retain inside a different
- // autoreleasepool scope.
- return true;
- case IC_Retain:
- case IC_RetainRV:
- // Check for a retain of the same pointer for merging.
- return GetObjCArg(Inst) == Arg;
- default:
- // Nothing else matters for objc_retainAutorelease formation.
- return false;
- }
-
- case RetainAutoreleaseRVDep: {
- InstructionClass Class = GetBasicInstructionClass(Inst);
- switch (Class) {
- case IC_Retain:
- case IC_RetainRV:
- // Check for a retain of the same pointer for merging.
- return GetObjCArg(Inst) == Arg;
- default:
- // Anything that can autorelease interrupts
- // retainAutoreleaseReturnValue formation.
- return CanInterruptRV(Class);
- }
- }
-
- case RetainRVDep:
- return CanInterruptRV(GetBasicInstructionClass(Inst));
- }
-
- llvm_unreachable("Invalid dependence flavor");
-}
-
-/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and
-/// find local and non-local dependencies on Arg.
-/// TODO: Cache results?
-static void
-FindDependencies(DependenceKind Flavor,
- const Value *Arg,
- BasicBlock *StartBB, Instruction *StartInst,
- SmallPtrSet<Instruction *, 4> &DependingInstructions,
- SmallPtrSet<const BasicBlock *, 4> &Visited,
- ProvenanceAnalysis &PA) {
- BasicBlock::iterator StartPos = StartInst;
-
- SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
- Worklist.push_back(std::make_pair(StartBB, StartPos));
- do {
- std::pair<BasicBlock *, BasicBlock::iterator> Pair =
- Worklist.pop_back_val();
- BasicBlock *LocalStartBB = Pair.first;
- BasicBlock::iterator LocalStartPos = Pair.second;
- BasicBlock::iterator StartBBBegin = LocalStartBB->begin();
- for (;;) {
- if (LocalStartPos == StartBBBegin) {
- pred_iterator PI(LocalStartBB), PE(LocalStartBB, false);
- if (PI == PE)
- // If we've reached the function entry, produce a null dependence.
- DependingInstructions.insert(0);
- else
- // Add the predecessors to the worklist.
- do {
- BasicBlock *PredBB = *PI;
- if (Visited.insert(PredBB))
- Worklist.push_back(std::make_pair(PredBB, PredBB->end()));
- } while (++PI != PE);
- break;
- }
-
- Instruction *Inst = --LocalStartPos;
- if (Depends(Flavor, Inst, Arg, PA)) {
- DependingInstructions.insert(Inst);
- break;
- }
- }
- } while (!Worklist.empty());
-
- // Determine whether the original StartBB post-dominates all of the blocks we
- // visited. If not, insert a sentinal indicating that most optimizations are
- // not safe.
- for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(),
- E = Visited.end(); I != E; ++I) {
- const BasicBlock *BB = *I;
- if (BB == StartBB)
- continue;
- const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
- for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
- const BasicBlock *Succ = *SI;
- if (Succ != StartBB && !Visited.count(Succ)) {
- DependingInstructions.insert(reinterpret_cast<Instruction *>(-1));
- return;
- }
- }
- }
-}
-
-static bool isNullOrUndef(const Value *V) {
- return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
-}
-
-static bool isNoopInstruction(const Instruction *I) {
- return isa<BitCastInst>(I) ||
- (isa<GetElementPtrInst>(I) &&
- cast<GetElementPtrInst>(I)->hasAllZeroIndices());
-}
-
-/// OptimizeRetainCall - Turn objc_retain into
-/// objc_retainAutoreleasedReturnValue if the operand is a return value.
-void
-ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
- ImmutableCallSite CS(GetObjCArg(Retain));
- const Instruction *Call = CS.getInstruction();
- if (!Call) return;
- if (Call->getParent() != Retain->getParent()) return;
-
- // Check that the call is next to the retain.
- BasicBlock::const_iterator I = Call;
- ++I;
- while (isNoopInstruction(I)) ++I;
- if (&*I != Retain)
- return;
-
- // Turn it to an objc_retainAutoreleasedReturnValue..
- Changed = true;
- ++NumPeeps;
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming "
- "objc_retainAutoreleasedReturnValue => "
- "objc_retain since the operand is not a return value.\n"
- " Old: "
- << *Retain << "\n");
-
- cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
-
- DEBUG(dbgs() << " New: "
- << *Retain << "\n");
-}
-
-/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into
-/// objc_retain if the operand is not a return value. Or, if it can be paired
-/// with an objc_autoreleaseReturnValue, delete the pair and return true.
-bool
-ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
- // Check for the argument being from an immediately preceding call or invoke.
- const Value *Arg = GetObjCArg(RetainRV);
- ImmutableCallSite CS(Arg);
- if (const Instruction *Call = CS.getInstruction()) {
- if (Call->getParent() == RetainRV->getParent()) {
- BasicBlock::const_iterator I = Call;
- ++I;
- while (isNoopInstruction(I)) ++I;
- if (&*I == RetainRV)
- return false;
- } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- BasicBlock *RetainRVParent = RetainRV->getParent();
- if (II->getNormalDest() == RetainRVParent) {
- BasicBlock::const_iterator I = RetainRVParent->begin();
- while (isNoopInstruction(I)) ++I;
- if (&*I == RetainRV)
- return false;
- }
- }
- }
-
- // Check for being preceded by an objc_autoreleaseReturnValue on the same
- // pointer. In this case, we can delete the pair.
- BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
- if (I != Begin) {
- do --I; while (I != Begin && isNoopInstruction(I));
- if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
- GetObjCArg(I) == Arg) {
- Changed = true;
- ++NumPeeps;
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n"
- << " Erasing " << *RetainRV
- << "\n");
-
- EraseInstruction(I);
- EraseInstruction(RetainRV);
- return true;
- }
- }
-
- // Turn it to a plain objc_retain.
- Changed = true;
- ++NumPeeps;
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming "
- "objc_retainAutoreleasedReturnValue => "
- "objc_retain since the operand is not a return value.\n"
- " Old: "
- << *RetainRV << "\n");
-
- cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
-
- DEBUG(dbgs() << " New: "
- << *RetainRV << "\n");
-
- return false;
-}
-
-/// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into
-/// objc_autorelease if the result is not used as a return value.
-void
-ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) {
- // Check for a return of the pointer value.
- const Value *Ptr = GetObjCArg(AutoreleaseRV);
- SmallVector<const Value *, 2> Users;
- Users.push_back(Ptr);
- do {
- Ptr = Users.pop_back_val();
- for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
- UI != UE; ++UI) {
- const User *I = *UI;
- if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
- return;
- if (isa<BitCastInst>(I))
- Users.push_back(I);
- }
- } while (!Users.empty());
-
- Changed = true;
- ++NumPeeps;
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming "
- "objc_autoreleaseReturnValue => "
- "objc_autorelease since its operand is not used as a return "
- "value.\n"
- " Old: "
- << *AutoreleaseRV << "\n");
-
- cast<CallInst>(AutoreleaseRV)->
- setCalledFunction(getAutoreleaseCallee(F.getParent()));
-
- DEBUG(dbgs() << " New: "
- << *AutoreleaseRV << "\n");
-
-}
-
-/// OptimizeIndividualCalls - Visit each call, one at a time, and make
-/// simplifications without doing any additional analysis.
-void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
- // Reset all the flags in preparation for recomputing them.
- UsedInThisFunction = 0;
-
- // Visit all objc_* calls in F.
- for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
- Instruction *Inst = &*I++;
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: " <<
- *Inst << "\n");
-
- InstructionClass Class = GetBasicInstructionClass(Inst);
-
- switch (Class) {
- default: break;
-
- // Delete no-op casts. These function calls have special semantics, but
- // the semantics are entirely implemented via lowering in the front-end,
- // so by the time they reach the optimizer, they are just no-op calls
- // which return their argument.
- //
- // There are gray areas here, as the ability to cast reference-counted
- // pointers to raw void* and back allows code to break ARC assumptions,
- // however these are currently considered to be unimportant.
- case IC_NoopCast:
- Changed = true;
- ++NumNoops;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:"
- " " << *Inst << "\n");
- EraseInstruction(Inst);
- continue;
-
- // If the pointer-to-weak-pointer is null, it's undefined behavior.
- case IC_StoreWeak:
- case IC_LoadWeak:
- case IC_LoadWeakRetained:
- case IC_InitWeak:
- case IC_DestroyWeak: {
- CallInst *CI = cast<CallInst>(Inst);
- if (isNullOrUndef(CI->getArgOperand(0))) {
- Changed = true;
- Type *Ty = CI->getArgOperand(0)->getType();
- new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
- Constant::getNullValue(Ty),
- CI);
- llvm::Value *NewValue = UndefValue::get(CI->getType());
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
- "pointer-to-weak-pointer is undefined behavior.\n"
- " Old = " << *CI <<
- "\n New = " <<
- *NewValue << "\n");
- CI->replaceAllUsesWith(NewValue);
- CI->eraseFromParent();
- continue;
- }
- break;
- }
- case IC_CopyWeak:
- case IC_MoveWeak: {
- CallInst *CI = cast<CallInst>(Inst);
- if (isNullOrUndef(CI->getArgOperand(0)) ||
- isNullOrUndef(CI->getArgOperand(1))) {
- Changed = true;
- Type *Ty = CI->getArgOperand(0)->getType();
- new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
- Constant::getNullValue(Ty),
- CI);
-
- llvm::Value *NewValue = UndefValue::get(CI->getType());
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
- "pointer-to-weak-pointer is undefined behavior.\n"
- " Old = " << *CI <<
- "\n New = " <<
- *NewValue << "\n");
-
- CI->replaceAllUsesWith(NewValue);
- CI->eraseFromParent();
- continue;
- }
- break;
- }
- case IC_Retain:
- OptimizeRetainCall(F, Inst);
- break;
- case IC_RetainRV:
- if (OptimizeRetainRVCall(F, Inst))
- continue;
- break;
- case IC_AutoreleaseRV:
- OptimizeAutoreleaseRVCall(F, Inst);
- break;
- }
-
- // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
- if (IsAutorelease(Class) && Inst->use_empty()) {
- CallInst *Call = cast<CallInst>(Inst);
- const Value *Arg = Call->getArgOperand(0);
- Arg = FindSingleUseIdentifiedObject(Arg);
- if (Arg) {
- Changed = true;
- ++NumAutoreleases;
-
- // Create the declaration lazily.
- LLVMContext &C = Inst->getContext();
- CallInst *NewCall =
- CallInst::Create(getReleaseCallee(F.getParent()),
- Call->getArgOperand(0), "", Call);
- NewCall->setMetadata(ImpreciseReleaseMDKind,
- MDNode::get(C, ArrayRef<Value *>()));
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing "
- "objc_autorelease(x) with objc_release(x) since x is "
- "otherwise unused.\n"
- " Old: " << *Call <<
- "\n New: " <<
- *NewCall << "\n");
-
- EraseInstruction(Call);
- Inst = NewCall;
- Class = IC_Release;
- }
- }
-
- // For functions which can never be passed stack arguments, add
- // a tail keyword.
- if (IsAlwaysTail(Class)) {
- Changed = true;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword"
- " to function since it can never be passed stack args: " << *Inst <<
- "\n");
- cast<CallInst>(Inst)->setTailCall();
- }
-
- // Set nounwind as needed.
- if (IsNoThrow(Class)) {
- Changed = true;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw"
- " class. Setting nounwind on: " << *Inst << "\n");
- cast<CallInst>(Inst)->setDoesNotThrow();
- }
-
- if (!IsNoopOnNull(Class)) {
- UsedInThisFunction |= 1 << Class;
- continue;
- }
-
- const Value *Arg = GetObjCArg(Inst);
-
- // ARC calls with null are no-ops. Delete them.
- if (isNullOrUndef(Arg)) {
- Changed = true;
- ++NumNoops;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with "
- " null are no-ops. Erasing: " << *Inst << "\n");
- EraseInstruction(Inst);
- continue;
- }
-
- // Keep track of which of retain, release, autorelease, and retain_block
- // are actually present in this function.
- UsedInThisFunction |= 1 << Class;
-
- // If Arg is a PHI, and one or more incoming values to the
- // PHI are null, and the call is control-equivalent to the PHI, and there
- // are no relevant side effects between the PHI and the call, the call
- // could be pushed up to just those paths with non-null incoming values.
- // For now, don't bother splitting critical edges for this.
- SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
- Worklist.push_back(std::make_pair(Inst, Arg));
- do {
- std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
- Inst = Pair.first;
- Arg = Pair.second;
-
- const PHINode *PN = dyn_cast<PHINode>(Arg);
- if (!PN) continue;
-
- // Determine if the PHI has any null operands, or any incoming
- // critical edges.
- bool HasNull = false;
- bool HasCriticalEdges = false;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *Incoming =
- StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
- if (isNullOrUndef(Incoming))
- HasNull = true;
- else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
- .getNumSuccessors() != 1) {
- HasCriticalEdges = true;
- break;
- }
- }
- // If we have null operands and no critical edges, optimize.
- if (!HasCriticalEdges && HasNull) {
- SmallPtrSet<Instruction *, 4> DependingInstructions;
- SmallPtrSet<const BasicBlock *, 4> Visited;
-
- // Check that there is nothing that cares about the reference
- // count between the call and the phi.
- switch (Class) {
- case IC_Retain:
- case IC_RetainBlock:
- // These can always be moved up.
- break;
- case IC_Release:
- // These can't be moved across things that care about the retain
- // count.
- FindDependencies(NeedsPositiveRetainCount, Arg,
- Inst->getParent(), Inst,
- DependingInstructions, Visited, PA);
- break;
- case IC_Autorelease:
- // These can't be moved across autorelease pool scope boundaries.
- FindDependencies(AutoreleasePoolBoundary, Arg,
- Inst->getParent(), Inst,
- DependingInstructions, Visited, PA);
- break;
- case IC_RetainRV:
- case IC_AutoreleaseRV:
- // Don't move these; the RV optimization depends on the autoreleaseRV
- // being tail called, and the retainRV being immediately after a call
- // (which might still happen if we get lucky with codegen layout, but
- // it's not worth taking the chance).
- continue;
- default:
- llvm_unreachable("Invalid dependence flavor");
- }
-
- if (DependingInstructions.size() == 1 &&
- *DependingInstructions.begin() == PN) {
- Changed = true;
- ++NumPartialNoops;
- // Clone the call into each predecessor that has a non-null value.
- CallInst *CInst = cast<CallInst>(Inst);
- Type *ParamTy = CInst->getArgOperand(0)->getType();
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *Incoming =
- StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
- if (!isNullOrUndef(Incoming)) {
- CallInst *Clone = cast<CallInst>(CInst->clone());
- Value *Op = PN->getIncomingValue(i);
- Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
- if (Op->getType() != ParamTy)
- Op = new BitCastInst(Op, ParamTy, "", InsertPos);
- Clone->setArgOperand(0, Op);
- Clone->insertBefore(InsertPos);
- Worklist.push_back(std::make_pair(Clone, Incoming));
- }
- }
- // Erase the original call.
- EraseInstruction(CInst);
- continue;
- }
- }
- } while (!Worklist.empty());
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished Queue.\n\n");
-
- }
-}
-
-/// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible
-/// control flow, or other CFG structures where moving code across the edge
-/// would result in it being executed more.
-void
-ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- BBState &MyStates) const {
- // If any top-down local-use or possible-dec has a succ which is earlier in
- // the sequence, forget it.
- for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
- E = MyStates.top_down_ptr_end(); I != E; ++I)
- switch (I->second.GetSeq()) {
- default: break;
- case S_Use: {
- const Value *Arg = I->first;
- const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
- bool SomeSuccHasSame = false;
- bool AllSuccsHaveSame = true;
- PtrState &S = I->second;
- succ_const_iterator SI(TI), SE(TI, false);
-
- // If the terminator is an invoke marked with the
- // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
- // ignored, for ARC purposes.
- if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
- --SE;
-
- for (; SI != SE; ++SI) {
- Sequence SuccSSeq = S_None;
- bool SuccSRRIKnownSafe = false;
- // If VisitBottomUp has pointer information for this successor, take
- // what we know about it.
- DenseMap<const BasicBlock *, BBState>::iterator BBI =
- BBStates.find(*SI);
- assert(BBI != BBStates.end());
- const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
- SuccSSeq = SuccS.GetSeq();
- SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
- switch (SuccSSeq) {
- case S_None:
- case S_CanRelease: {
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
- S.ClearSequenceProgress();
- break;
- }
- continue;
- }
- case S_Use:
- SomeSuccHasSame = true;
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
- AllSuccsHaveSame = false;
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
- }
- // If the state at the other end of any of the successor edges
- // matches the current state, require all edges to match. This
- // guards against loops in the middle of a sequence.
- if (SomeSuccHasSame && !AllSuccsHaveSame)
- S.ClearSequenceProgress();
- break;
- }
- case S_CanRelease: {
- const Value *Arg = I->first;
- const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
- bool SomeSuccHasSame = false;
- bool AllSuccsHaveSame = true;
- PtrState &S = I->second;
- succ_const_iterator SI(TI), SE(TI, false);
-
- // If the terminator is an invoke marked with the
- // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
- // ignored, for ARC purposes.
- if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
- --SE;
-
- for (; SI != SE; ++SI) {
- Sequence SuccSSeq = S_None;
- bool SuccSRRIKnownSafe = false;
- // If VisitBottomUp has pointer information for this successor, take
- // what we know about it.
- DenseMap<const BasicBlock *, BBState>::iterator BBI =
- BBStates.find(*SI);
- assert(BBI != BBStates.end());
- const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
- SuccSSeq = SuccS.GetSeq();
- SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
- switch (SuccSSeq) {
- case S_None: {
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
- S.ClearSequenceProgress();
- break;
- }
- continue;
- }
- case S_CanRelease:
- SomeSuccHasSame = true;
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- case S_Use:
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
- AllSuccsHaveSame = false;
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
- }
- // If the state at the other end of any of the successor edges
- // matches the current state, require all edges to match. This
- // guards against loops in the middle of a sequence.
- if (SomeSuccHasSame && !AllSuccsHaveSame)
- S.ClearSequenceProgress();
- break;
- }
- }
-}
-
-bool
-ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
- BasicBlock *BB,
- MapVector<Value *, RRInfo> &Retains,
- BBState &MyStates) {
- bool NestingDetected = false;
- InstructionClass Class = GetInstructionClass(Inst);
- const Value *Arg = 0;
-
- switch (Class) {
- case IC_Release: {
- Arg = GetObjCArg(Inst);
-
- PtrState &S = MyStates.getPtrBottomUpState(Arg);
-
- // If we see two releases in a row on the same pointer. If so, make
- // a note, and we'll cicle back to revisit it after we've
- // hopefully eliminated the second release, which may allow us to
- // eliminate the first release too.
- // Theoretically we could implement removal of nested retain+release
- // pairs by making PtrState hold a stack of states, but this is
- // simple and avoids adding overhead for the non-nested case.
- if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
- NestingDetected = true;
-
- MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
- S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release);
- S.RRI.ReleaseMetadata = ReleaseMetadata;
- S.RRI.KnownSafe = S.IsKnownIncremented();
- S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
- S.RRI.Calls.insert(Inst);
-
- S.SetKnownPositiveRefCount();
- break;
- }
- case IC_RetainBlock:
- // An objc_retainBlock call with just a use may need to be kept,
- // because it may be copying a block from the stack to the heap.
- if (!IsRetainBlockOptimizable(Inst))
- break;
- // FALLTHROUGH
- case IC_Retain:
- case IC_RetainRV: {
- Arg = GetObjCArg(Inst);
-
- PtrState &S = MyStates.getPtrBottomUpState(Arg);
- S.SetKnownPositiveRefCount();
-
- switch (S.GetSeq()) {
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- case S_Use:
- S.RRI.ReverseInsertPts.clear();
- // FALL THROUGH
- case S_CanRelease:
- // Don't do retain+release tracking for IC_RetainRV, because it's
- // better to let it remain as the first instruction after a call.
- if (Class != IC_RetainRV) {
- S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- Retains[Inst] = S.RRI;
- }
- S.ClearSequenceProgress();
- break;
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
- return NestingDetected;
- }
- case IC_AutoreleasepoolPop:
- // Conservatively, clear MyStates for all known pointers.
- MyStates.clearBottomUpPointers();
- return NestingDetected;
- case IC_AutoreleasepoolPush:
- case IC_None:
- // These are irrelevant.
- return NestingDetected;
- default:
- break;
- }
-
- // Consider any other possible effects of this instruction on each
- // pointer being tracked.
- for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
- ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
- const Value *Ptr = MI->first;
- if (Ptr == Arg)
- continue; // Handled above.
- PtrState &S = MI->second;
- Sequence Seq = S.GetSeq();
-
- // Check for possible releases.
- if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- S.ClearRefCount();
- switch (Seq) {
- case S_Use:
- S.SetSeq(S_CanRelease);
- continue;
- case S_CanRelease:
- case S_Release:
- case S_MovableRelease:
- case S_Stop:
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
- }
-
- // Check for possible direct uses.
- switch (Seq) {
- case S_Release:
- case S_MovableRelease:
- if (CanUse(Inst, Ptr, PA, Class)) {
- assert(S.RRI.ReverseInsertPts.empty());
- // If this is an invoke instruction, we're scanning it as part of
- // one of its successor blocks, since we can't insert code after it
- // in its own block, and we don't want to split critical edges.
- if (isa<InvokeInst>(Inst))
- S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
- else
- S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
- S.SetSeq(S_Use);
- } else if (Seq == S_Release &&
- (Class == IC_User || Class == IC_CallOrUser)) {
- // Non-movable releases depend on any possible objc pointer use.
- S.SetSeq(S_Stop);
- assert(S.RRI.ReverseInsertPts.empty());
- // As above; handle invoke specially.
- if (isa<InvokeInst>(Inst))
- S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
- else
- S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
- }
- break;
- case S_Stop:
- if (CanUse(Inst, Ptr, PA, Class))
- S.SetSeq(S_Use);
- break;
- case S_CanRelease:
- case S_Use:
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
- }
-
- return NestingDetected;
-}
-
-bool
-ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains) {
- bool NestingDetected = false;
- BBState &MyStates = BBStates[BB];
-
- // Merge the states from each successor to compute the initial state
- // for the current block.
- BBState::edge_iterator SI(MyStates.succ_begin()),
- SE(MyStates.succ_end());
- if (SI != SE) {
- const BasicBlock *Succ = *SI;
- DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
- assert(I != BBStates.end());
- MyStates.InitFromSucc(I->second);
- ++SI;
- for (; SI != SE; ++SI) {
- Succ = *SI;
- I = BBStates.find(Succ);
- assert(I != BBStates.end());
- MyStates.MergeSucc(I->second);
- }
- }
-
- // Visit all the instructions, bottom-up.
- for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
- Instruction *Inst = llvm::prior(I);
-
- // Invoke instructions are visited as part of their successors (below).
- if (isa<InvokeInst>(Inst))
- continue;
-
- NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
- }
-
- // If there's a predecessor with an invoke, visit the invoke as if it were
- // part of this block, since we can't insert code after an invoke in its own
- // block, and we don't want to split critical edges.
- for (BBState::edge_iterator PI(MyStates.pred_begin()),
- PE(MyStates.pred_end()); PI != PE; ++PI) {
- BasicBlock *Pred = *PI;
- if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back()))
- NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
- }
-
- return NestingDetected;
-}
-
-bool
-ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
- DenseMap<Value *, RRInfo> &Releases,
- BBState &MyStates) {
- bool NestingDetected = false;
- InstructionClass Class = GetInstructionClass(Inst);
- const Value *Arg = 0;
-
- switch (Class) {
- case IC_RetainBlock:
- // An objc_retainBlock call with just a use may need to be kept,
- // because it may be copying a block from the stack to the heap.
- if (!IsRetainBlockOptimizable(Inst))
- break;
- // FALLTHROUGH
- case IC_Retain:
- case IC_RetainRV: {
- Arg = GetObjCArg(Inst);
-
- PtrState &S = MyStates.getPtrTopDownState(Arg);
-
- // Don't do retain+release tracking for IC_RetainRV, because it's
- // better to let it remain as the first instruction after a call.
- if (Class != IC_RetainRV) {
- // If we see two retains in a row on the same pointer. If so, make
- // a note, and we'll cicle back to revisit it after we've
- // hopefully eliminated the second retain, which may allow us to
- // eliminate the first retain too.
- // Theoretically we could implement removal of nested retain+release
- // pairs by making PtrState hold a stack of states, but this is
- // simple and avoids adding overhead for the non-nested case.
- if (S.GetSeq() == S_Retain)
- NestingDetected = true;
-
- S.ResetSequenceProgress(S_Retain);
- S.RRI.IsRetainBlock = Class == IC_RetainBlock;
- S.RRI.KnownSafe = S.IsKnownIncremented();
- S.RRI.Calls.insert(Inst);
- }
-
- S.SetKnownPositiveRefCount();
-
- // A retain can be a potential use; procede to the generic checking
- // code below.
- break;
- }
- case IC_Release: {
- Arg = GetObjCArg(Inst);
-
- PtrState &S = MyStates.getPtrTopDownState(Arg);
- S.ClearRefCount();
-
- switch (S.GetSeq()) {
- case S_Retain:
- case S_CanRelease:
- S.RRI.ReverseInsertPts.clear();
- // FALL THROUGH
- case S_Use:
- S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
- S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
- Releases[Inst] = S.RRI;
- S.ClearSequenceProgress();
- break;
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
- }
- break;
- }
- case IC_AutoreleasepoolPop:
- // Conservatively, clear MyStates for all known pointers.
- MyStates.clearTopDownPointers();
- return NestingDetected;
- case IC_AutoreleasepoolPush:
- case IC_None:
- // These are irrelevant.
- return NestingDetected;
- default:
- break;
- }
-
- // Consider any other possible effects of this instruction on each
- // pointer being tracked.
- for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
- ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
- const Value *Ptr = MI->first;
- if (Ptr == Arg)
- continue; // Handled above.
- PtrState &S = MI->second;
- Sequence Seq = S.GetSeq();
-
- // Check for possible releases.
- if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- S.ClearRefCount();
- switch (Seq) {
- case S_Retain:
- S.SetSeq(S_CanRelease);
- assert(S.RRI.ReverseInsertPts.empty());
- S.RRI.ReverseInsertPts.insert(Inst);
-
- // One call can't cause a transition from S_Retain to S_CanRelease
- // and S_CanRelease to S_Use. If we've made the first transition,
- // we're done.
- continue;
- case S_Use:
- case S_CanRelease:
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
- }
- }
-
- // Check for possible direct uses.
- switch (Seq) {
- case S_CanRelease:
- if (CanUse(Inst, Ptr, PA, Class))
- S.SetSeq(S_Use);
- break;
- case S_Retain:
- case S_Use:
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
- }
- }
-
- return NestingDetected;
-}
-
-bool
-ObjCARCOpt::VisitTopDown(BasicBlock *BB,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- DenseMap<Value *, RRInfo> &Releases) {
- bool NestingDetected = false;
- BBState &MyStates = BBStates[BB];
-
- // Merge the states from each predecessor to compute the initial state
- // for the current block.
- BBState::edge_iterator PI(MyStates.pred_begin()),
- PE(MyStates.pred_end());
- if (PI != PE) {
- const BasicBlock *Pred = *PI;
- DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
- assert(I != BBStates.end());
- MyStates.InitFromPred(I->second);
- ++PI;
- for (; PI != PE; ++PI) {
- Pred = *PI;
- I = BBStates.find(Pred);
- assert(I != BBStates.end());
- MyStates.MergePred(I->second);
- }
- }
-
- // Visit all the instructions, top-down.
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- Instruction *Inst = I;
- NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
- }
-
- CheckForCFGHazards(BB, BBStates, MyStates);
- return NestingDetected;
-}
-
-static void
-ComputePostOrders(Function &F,
- SmallVectorImpl<BasicBlock *> &PostOrder,
- SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder,
- unsigned NoObjCARCExceptionsMDKind,
- DenseMap<const BasicBlock *, BBState> &BBStates) {
- /// Visited - The visited set, for doing DFS walks.
- SmallPtrSet<BasicBlock *, 16> Visited;
-
- // Do DFS, computing the PostOrder.
- SmallPtrSet<BasicBlock *, 16> OnStack;
- SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
-
- // Functions always have exactly one entry block, and we don't have
- // any other block that we treat like an entry block.
- BasicBlock *EntryBB = &F.getEntryBlock();
- BBState &MyStates = BBStates[EntryBB];
- MyStates.SetAsEntry();
- TerminatorInst *EntryTI = cast<TerminatorInst>(&EntryBB->back());
- SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI)));
- Visited.insert(EntryBB);
- OnStack.insert(EntryBB);
- do {
- dfs_next_succ:
- BasicBlock *CurrBB = SuccStack.back().first;
- TerminatorInst *TI = cast<TerminatorInst>(&CurrBB->back());
- succ_iterator SE(TI, false);
-
- // If the terminator is an invoke marked with the
- // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
- // ignored, for ARC purposes.
- if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
- --SE;
-
- while (SuccStack.back().second != SE) {
- BasicBlock *SuccBB = *SuccStack.back().second++;
- if (Visited.insert(SuccBB)) {
- TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back());
- SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI)));
- BBStates[CurrBB].addSucc(SuccBB);
- BBState &SuccStates = BBStates[SuccBB];
- SuccStates.addPred(CurrBB);
- OnStack.insert(SuccBB);
- goto dfs_next_succ;
- }
-
- if (!OnStack.count(SuccBB)) {
- BBStates[CurrBB].addSucc(SuccBB);
- BBStates[SuccBB].addPred(CurrBB);
- }
- }
- OnStack.erase(CurrBB);
- PostOrder.push_back(CurrBB);
- SuccStack.pop_back();
- } while (!SuccStack.empty());
-
- Visited.clear();
-
- // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
- // Functions may have many exits, and there also blocks which we treat
- // as exits due to ignored edges.
- SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- BasicBlock *ExitBB = I;
- BBState &MyStates = BBStates[ExitBB];
- if (!MyStates.isExit())
- continue;
-
- MyStates.SetAsExit();
-
- PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin()));
- Visited.insert(ExitBB);
- while (!PredStack.empty()) {
- reverse_dfs_next_succ:
- BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
- while (PredStack.back().second != PE) {
- BasicBlock *BB = *PredStack.back().second++;
- if (Visited.insert(BB)) {
- PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin()));
- goto reverse_dfs_next_succ;
- }
- }
- ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
- }
- }
-}
-
-// Visit - Visit the function both top-down and bottom-up.
-bool
-ObjCARCOpt::Visit(Function &F,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases) {
-
- // Use reverse-postorder traversals, because we magically know that loops
- // will be well behaved, i.e. they won't repeatedly call retain on a single
- // pointer without doing a release. We can't use the ReversePostOrderTraversal
- // class here because we want the reverse-CFG postorder to consider each
- // function exit point, and we want to ignore selected cycle edges.
- SmallVector<BasicBlock *, 16> PostOrder;
- SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
- ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
- NoObjCARCExceptionsMDKind,
- BBStates);
-
- // Use reverse-postorder on the reverse CFG for bottom-up.
- bool BottomUpNestingDetected = false;
- for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
- ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend();
- I != E; ++I)
- BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains);
-
- // Use reverse-postorder for top-down.
- bool TopDownNestingDetected = false;
- for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
- PostOrder.rbegin(), E = PostOrder.rend();
- I != E; ++I)
- TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases);
-
- return TopDownNestingDetected && BottomUpNestingDetected;
-}
-
-/// MoveCalls - Move the calls in RetainsToMove and ReleasesToMove.
-void ObjCARCOpt::MoveCalls(Value *Arg,
- RRInfo &RetainsToMove,
- RRInfo &ReleasesToMove,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases,
- SmallVectorImpl<Instruction *> &DeadInsts,
- Module *M) {
- Type *ArgTy = Arg->getType();
- Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
-
- // Insert the new retain and release calls.
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- PI = ReleasesToMove.ReverseInsertPts.begin(),
- PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
- Instruction *InsertPt = *PI;
- Value *MyArg = ArgTy == ParamTy ? Arg :
- new BitCastInst(Arg, ParamTy, "", InsertPt);
- CallInst *Call =
- CallInst::Create(RetainsToMove.IsRetainBlock ?
- getRetainBlockCallee(M) : getRetainCallee(M),
- MyArg, "", InsertPt);
- Call->setDoesNotThrow();
- if (RetainsToMove.IsRetainBlock)
- Call->setMetadata(CopyOnEscapeMDKind,
- MDNode::get(M->getContext(), ArrayRef<Value *>()));
- else
- Call->setTailCall();
- }
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- PI = RetainsToMove.ReverseInsertPts.begin(),
- PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
- Instruction *InsertPt = *PI;
- Value *MyArg = ArgTy == ParamTy ? Arg :
- new BitCastInst(Arg, ParamTy, "", InsertPt);
- CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
- "", InsertPt);
- // Attach a clang.imprecise_release metadata tag, if appropriate.
- if (MDNode *M = ReleasesToMove.ReleaseMetadata)
- Call->setMetadata(ImpreciseReleaseMDKind, M);
- Call->setDoesNotThrow();
- if (ReleasesToMove.IsTailCallRelease)
- Call->setTailCall();
- }
-
- // Delete the original retain and release calls.
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- AI = RetainsToMove.Calls.begin(),
- AE = RetainsToMove.Calls.end(); AI != AE; ++AI) {
- Instruction *OrigRetain = *AI;
- Retains.blot(OrigRetain);
- DeadInsts.push_back(OrigRetain);
- }
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- AI = ReleasesToMove.Calls.begin(),
- AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) {
- Instruction *OrigRelease = *AI;
- Releases.erase(OrigRelease);
- DeadInsts.push_back(OrigRelease);
- }
-}
-
-/// PerformCodePlacement - Identify pairings between the retains and releases,
-/// and delete and/or move them.
-bool
-ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
- &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases,
- Module *M) {
- bool AnyPairsCompletelyEliminated = false;
- RRInfo RetainsToMove;
- RRInfo ReleasesToMove;
- SmallVector<Instruction *, 4> NewRetains;
- SmallVector<Instruction *, 4> NewReleases;
- SmallVector<Instruction *, 8> DeadInsts;
-
- // Visit each retain.
- for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
- E = Retains.end(); I != E; ++I) {
- Value *V = I->first;
- if (!V) continue; // blotted
-
- Instruction *Retain = cast<Instruction>(V);
- Value *Arg = GetObjCArg(Retain);
-
- // If the object being released is in static or stack storage, we know it's
- // not being managed by ObjC reference counting, so we can delete pairs
- // regardless of what possible decrements or uses lie between them.
- bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
-
- // A constant pointer can't be pointing to an object on the heap. It may
- // be reference-counted, but it won't be deleted.
- if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
- if (const GlobalVariable *GV =
- dyn_cast<GlobalVariable>(
- StripPointerCastsAndObjCCalls(LI->getPointerOperand())))
- if (GV->isConstant())
- KnownSafe = true;
-
- // If a pair happens in a region where it is known that the reference count
- // is already incremented, we can similarly ignore possible decrements.
- bool KnownSafeTD = true, KnownSafeBU = true;
-
- // Connect the dots between the top-down-collected RetainsToMove and
- // bottom-up-collected ReleasesToMove to form sets of related calls.
- // This is an iterative process so that we connect multiple releases
- // to multiple retains if needed.
- unsigned OldDelta = 0;
- unsigned NewDelta = 0;
- unsigned OldCount = 0;
- unsigned NewCount = 0;
- bool FirstRelease = true;
- bool FirstRetain = true;
- NewRetains.push_back(Retain);
- for (;;) {
- for (SmallVectorImpl<Instruction *>::const_iterator
- NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
- Instruction *NewRetain = *NI;
- MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
- assert(It != Retains.end());
- const RRInfo &NewRetainRRI = It->second;
- KnownSafeTD &= NewRetainRRI.KnownSafe;
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- LI = NewRetainRRI.Calls.begin(),
- LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
- Instruction *NewRetainRelease = *LI;
- DenseMap<Value *, RRInfo>::const_iterator Jt =
- Releases.find(NewRetainRelease);
- if (Jt == Releases.end())
- goto next_retain;
- const RRInfo &NewRetainReleaseRRI = Jt->second;
- assert(NewRetainReleaseRRI.Calls.count(NewRetain));
- if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
- OldDelta -=
- BBStates[NewRetainRelease->getParent()].GetAllPathCount();
-
- // Merge the ReleaseMetadata and IsTailCallRelease values.
- if (FirstRelease) {
- ReleasesToMove.ReleaseMetadata =
- NewRetainReleaseRRI.ReleaseMetadata;
- ReleasesToMove.IsTailCallRelease =
- NewRetainReleaseRRI.IsTailCallRelease;
- FirstRelease = false;
- } else {
- if (ReleasesToMove.ReleaseMetadata !=
- NewRetainReleaseRRI.ReleaseMetadata)
- ReleasesToMove.ReleaseMetadata = 0;
- if (ReleasesToMove.IsTailCallRelease !=
- NewRetainReleaseRRI.IsTailCallRelease)
- ReleasesToMove.IsTailCallRelease = false;
- }
-
- // Collect the optimal insertion points.
- if (!KnownSafe)
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- RI = NewRetainReleaseRRI.ReverseInsertPts.begin(),
- RE = NewRetainReleaseRRI.ReverseInsertPts.end();
- RI != RE; ++RI) {
- Instruction *RIP = *RI;
- if (ReleasesToMove.ReverseInsertPts.insert(RIP))
- NewDelta -= BBStates[RIP->getParent()].GetAllPathCount();
- }
- NewReleases.push_back(NewRetainRelease);
- }
- }
- }
- NewRetains.clear();
- if (NewReleases.empty()) break;
-
- // Back the other way.
- for (SmallVectorImpl<Instruction *>::const_iterator
- NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) {
- Instruction *NewRelease = *NI;
- DenseMap<Value *, RRInfo>::const_iterator It =
- Releases.find(NewRelease);
- assert(It != Releases.end());
- const RRInfo &NewReleaseRRI = It->second;
- KnownSafeBU &= NewReleaseRRI.KnownSafe;
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- LI = NewReleaseRRI.Calls.begin(),
- LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
- Instruction *NewReleaseRetain = *LI;
- MapVector<Value *, RRInfo>::const_iterator Jt =
- Retains.find(NewReleaseRetain);
- if (Jt == Retains.end())
- goto next_retain;
- const RRInfo &NewReleaseRetainRRI = Jt->second;
- assert(NewReleaseRetainRRI.Calls.count(NewRelease));
- if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
- unsigned PathCount =
- BBStates[NewReleaseRetain->getParent()].GetAllPathCount();
- OldDelta += PathCount;
- OldCount += PathCount;
-
- // Merge the IsRetainBlock values.
- if (FirstRetain) {
- RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock;
- FirstRetain = false;
- } else if (ReleasesToMove.IsRetainBlock !=
- NewReleaseRetainRRI.IsRetainBlock)
- // It's not possible to merge the sequences if one uses
- // objc_retain and the other uses objc_retainBlock.
- goto next_retain;
-
- // Collect the optimal insertion points.
- if (!KnownSafe)
- for (SmallPtrSet<Instruction *, 2>::const_iterator
- RI = NewReleaseRetainRRI.ReverseInsertPts.begin(),
- RE = NewReleaseRetainRRI.ReverseInsertPts.end();
- RI != RE; ++RI) {
- Instruction *RIP = *RI;
- if (RetainsToMove.ReverseInsertPts.insert(RIP)) {
- PathCount = BBStates[RIP->getParent()].GetAllPathCount();
- NewDelta += PathCount;
- NewCount += PathCount;
- }
- }
- NewRetains.push_back(NewReleaseRetain);
- }
- }
- }
- NewReleases.clear();
- if (NewRetains.empty()) break;
- }
-
- // If the pointer is known incremented or nested, we can safely delete the
- // pair regardless of what's between them.
- if (KnownSafeTD || KnownSafeBU) {
- RetainsToMove.ReverseInsertPts.clear();
- ReleasesToMove.ReverseInsertPts.clear();
- NewCount = 0;
- } else {
- // Determine whether the new insertion points we computed preserve the
- // balance of retain and release calls through the program.
- // TODO: If the fully aggressive solution isn't valid, try to find a
- // less aggressive solution which is.
- if (NewDelta != 0)
- goto next_retain;
- }
-
- // Determine whether the original call points are balanced in the retain and
- // release calls through the program. If not, conservatively don't touch
- // them.
- // TODO: It's theoretically possible to do code motion in this case, as
- // long as the existing imbalances are maintained.
- if (OldDelta != 0)
- goto next_retain;
-
- // Ok, everything checks out and we're all set. Let's move some code!
- Changed = true;
- assert(OldCount != 0 && "Unreachable code?");
- AnyPairsCompletelyEliminated = NewCount == 0;
- NumRRs += OldCount - NewCount;
- MoveCalls(Arg, RetainsToMove, ReleasesToMove,
- Retains, Releases, DeadInsts, M);
-
- next_retain:
- NewReleases.clear();
- NewRetains.clear();
- RetainsToMove.clear();
- ReleasesToMove.clear();
- }
-
- // Now that we're done moving everything, we can delete the newly dead
- // instructions, as we no longer need them as insert points.
- while (!DeadInsts.empty())
- EraseInstruction(DeadInsts.pop_back_val());
-
- return AnyPairsCompletelyEliminated;
-}
-
-/// OptimizeWeakCalls - Weak pointer optimizations.
-void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
- // First, do memdep-style RLE and S2L optimizations. We can't use memdep
- // itself because it uses AliasAnalysis and we need to do provenance
- // queries instead.
- for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
- Instruction *Inst = &*I++;
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst <<
- "\n");
-
- InstructionClass Class = GetBasicInstructionClass(Inst);
- if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
- continue;
-
- // Delete objc_loadWeak calls with no users.
- if (Class == IC_LoadWeak && Inst->use_empty()) {
- Inst->eraseFromParent();
- continue;
- }
-
- // TODO: For now, just look for an earlier available version of this value
- // within the same block. Theoretically, we could do memdep-style non-local
- // analysis too, but that would want caching. A better approach would be to
- // use the technique that EarlyCSE uses.
- inst_iterator Current = llvm::prior(I);
- BasicBlock *CurrentBB = Current.getBasicBlockIterator();
- for (BasicBlock::iterator B = CurrentBB->begin(),
- J = Current.getInstructionIterator();
- J != B; --J) {
- Instruction *EarlierInst = &*llvm::prior(J);
- InstructionClass EarlierClass = GetInstructionClass(EarlierInst);
- switch (EarlierClass) {
- case IC_LoadWeak:
- case IC_LoadWeakRetained: {
- // If this is loading from the same pointer, replace this load's value
- // with that one.
- CallInst *Call = cast<CallInst>(Inst);
- CallInst *EarlierCall = cast<CallInst>(EarlierInst);
- Value *Arg = Call->getArgOperand(0);
- Value *EarlierArg = EarlierCall->getArgOperand(0);
- switch (PA.getAA()->alias(Arg, EarlierArg)) {
- case AliasAnalysis::MustAlias:
- Changed = true;
- // If the load has a builtin retain, insert a plain retain for it.
- if (Class == IC_LoadWeakRetained) {
- CallInst *CI =
- CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
- "", Call);
- CI->setTailCall();
- }
- // Zap the fully redundant load.
- Call->replaceAllUsesWith(EarlierCall);
- Call->eraseFromParent();
- goto clobbered;
- case AliasAnalysis::MayAlias:
- case AliasAnalysis::PartialAlias:
- goto clobbered;
- case AliasAnalysis::NoAlias:
- break;
- }
- break;
- }
- case IC_StoreWeak:
- case IC_InitWeak: {
- // If this is storing to the same pointer and has the same size etc.
- // replace this load's value with the stored value.
- CallInst *Call = cast<CallInst>(Inst);
- CallInst *EarlierCall = cast<CallInst>(EarlierInst);
- Value *Arg = Call->getArgOperand(0);
- Value *EarlierArg = EarlierCall->getArgOperand(0);
- switch (PA.getAA()->alias(Arg, EarlierArg)) {
- case AliasAnalysis::MustAlias:
- Changed = true;
- // If the load has a builtin retain, insert a plain retain for it.
- if (Class == IC_LoadWeakRetained) {
- CallInst *CI =
- CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
- "", Call);
- CI->setTailCall();
- }
- // Zap the fully redundant load.
- Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
- Call->eraseFromParent();
- goto clobbered;
- case AliasAnalysis::MayAlias:
- case AliasAnalysis::PartialAlias:
- goto clobbered;
- case AliasAnalysis::NoAlias:
- break;
- }
- break;
- }
- case IC_MoveWeak:
- case IC_CopyWeak:
- // TOOD: Grab the copied value.
- goto clobbered;
- case IC_AutoreleasepoolPush:
- case IC_None:
- case IC_User:
- // Weak pointers are only modified through the weak entry points
- // (and arbitrary calls, which could call the weak entry points).
- break;
- default:
- // Anything else could modify the weak pointer.
- goto clobbered;
- }
- }
- clobbered:;
- }
-
- // Then, for each destroyWeak with an alloca operand, check to see if
- // the alloca and all its users can be zapped.
- for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
- Instruction *Inst = &*I++;
- InstructionClass Class = GetBasicInstructionClass(Inst);
- if (Class != IC_DestroyWeak)
- continue;
-
- CallInst *Call = cast<CallInst>(Inst);
- Value *Arg = Call->getArgOperand(0);
- if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
- for (Value::use_iterator UI = Alloca->use_begin(),
- UE = Alloca->use_end(); UI != UE; ++UI) {
- const Instruction *UserInst = cast<Instruction>(*UI);
- switch (GetBasicInstructionClass(UserInst)) {
- case IC_InitWeak:
- case IC_StoreWeak:
- case IC_DestroyWeak:
- continue;
- default:
- goto done;
- }
- }
- Changed = true;
- for (Value::use_iterator UI = Alloca->use_begin(),
- UE = Alloca->use_end(); UI != UE; ) {
- CallInst *UserInst = cast<CallInst>(*UI++);
- switch (GetBasicInstructionClass(UserInst)) {
- case IC_InitWeak:
- case IC_StoreWeak:
- // These functions return their second argument.
- UserInst->replaceAllUsesWith(UserInst->getArgOperand(1));
- break;
- case IC_DestroyWeak:
- // No return value.
- break;
- default:
- llvm_unreachable("alloca really is used!");
- }
- UserInst->eraseFromParent();
- }
- Alloca->eraseFromParent();
- done:;
- }
- }
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n");
-
-}
-
-/// OptimizeSequences - Identify program paths which execute sequences of
-/// retains and releases which can be eliminated.
-bool ObjCARCOpt::OptimizeSequences(Function &F) {
- /// Releases, Retains - These are used to store the results of the main flow
- /// analysis. These use Value* as the key instead of Instruction* so that the
- /// map stays valid when we get around to rewriting code and calls get
- /// replaced by arguments.
- DenseMap<Value *, RRInfo> Releases;
- MapVector<Value *, RRInfo> Retains;
-
- /// BBStates, This is used during the traversal of the function to track the
- /// states for each identified object at each block.
- DenseMap<const BasicBlock *, BBState> BBStates;
-
- // Analyze the CFG of the function, and all instructions.
- bool NestingDetected = Visit(F, BBStates, Retains, Releases);
-
- // Transform.
- return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) &&
- NestingDetected;
-}
-
-/// OptimizeReturns - Look for this pattern:
-/// \code
-/// %call = call i8* @something(...)
-/// %2 = call i8* @objc_retain(i8* %call)
-/// %3 = call i8* @objc_autorelease(i8* %2)
-/// ret i8* %3
-/// \endcode
-/// And delete the retain and autorelease.
-///
-/// Otherwise if it's just this:
-/// \code
-/// %3 = call i8* @objc_autorelease(i8* %2)
-/// ret i8* %3
-/// \endcode
-/// convert the autorelease to autoreleaseRV.
-void ObjCARCOpt::OptimizeReturns(Function &F) {
- if (!F.getReturnType()->isPointerTy())
- return;
-
- SmallPtrSet<Instruction *, 4> DependingInstructions;
- SmallPtrSet<const BasicBlock *, 4> Visited;
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
- BasicBlock *BB = FI;
- ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n");
-
- if (!Ret) continue;
-
- const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
- FindDependencies(NeedsPositiveRetainCount, Arg,
- BB, Ret, DependingInstructions, Visited, PA);
- if (DependingInstructions.size() != 1)
- goto next_block;
-
- {
- CallInst *Autorelease =
- dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
- if (!Autorelease)
- goto next_block;
- InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
- if (!IsAutorelease(AutoreleaseClass))
- goto next_block;
- if (GetObjCArg(Autorelease) != Arg)
- goto next_block;
-
- DependingInstructions.clear();
- Visited.clear();
-
- // Check that there is nothing that can affect the reference
- // count between the autorelease and the retain.
- FindDependencies(CanChangeRetainCount, Arg,
- BB, Autorelease, DependingInstructions, Visited, PA);
- if (DependingInstructions.size() != 1)
- goto next_block;
-
- {
- CallInst *Retain =
- dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
-
- // Check that we found a retain with the same argument.
- if (!Retain ||
- !IsRetain(GetBasicInstructionClass(Retain)) ||
- GetObjCArg(Retain) != Arg)
- goto next_block;
-
- DependingInstructions.clear();
- Visited.clear();
-
- // Convert the autorelease to an autoreleaseRV, since it's
- // returning the value.
- if (AutoreleaseClass == IC_Autorelease) {
- Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent()));
- AutoreleaseClass = IC_AutoreleaseRV;
- }
-
- // Check that there is nothing that can affect the reference
- // count between the retain and the call.
- // Note that Retain need not be in BB.
- FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
- DependingInstructions, Visited, PA);
- if (DependingInstructions.size() != 1)
- goto next_block;
-
- {
- CallInst *Call =
- dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
-
- // Check that the pointer is the return value of the call.
- if (!Call || Arg != Call)
- goto next_block;
-
- // Check that the call is a regular call.
- InstructionClass Class = GetBasicInstructionClass(Call);
- if (Class != IC_CallOrUser && Class != IC_Call)
- goto next_block;
-
- // If so, we can zap the retain and autorelease.
- Changed = true;
- ++NumRets;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain
- << "\n Erasing: "
- << *Autorelease << "\n");
- EraseInstruction(Retain);
- EraseInstruction(Autorelease);
- }
- }
- }
-
- next_block:
- DependingInstructions.clear();
- Visited.clear();
- }
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n");
-
-}
-
-bool ObjCARCOpt::doInitialization(Module &M) {
- if (!EnableARCOpts)
- return false;
-
- // If nothing in the Module uses ARC, don't do anything.
- Run = ModuleHasARC(M);
- if (!Run)
- return false;
-
- // Identify the imprecise release metadata kind.
- ImpreciseReleaseMDKind =
- M.getContext().getMDKindID("clang.imprecise_release");
- CopyOnEscapeMDKind =
- M.getContext().getMDKindID("clang.arc.copy_on_escape");
- NoObjCARCExceptionsMDKind =
- M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
-
- // Intuitively, objc_retain and others are nocapture, however in practice
- // they are not, because they return their argument value. And objc_release
- // calls finalizers which can have arbitrary side effects.
-
- // These are initialized lazily.
- RetainRVCallee = 0;
- AutoreleaseRVCallee = 0;
- ReleaseCallee = 0;
- RetainCallee = 0;
- RetainBlockCallee = 0;
- AutoreleaseCallee = 0;
-
- return false;
-}
-
-bool ObjCARCOpt::runOnFunction(Function &F) {
- if (!EnableARCOpts)
- return false;
-
- // If nothing in the Module uses ARC, don't do anything.
- if (!Run)
- return false;
-
- Changed = false;
-
- PA.setAA(&getAnalysis<AliasAnalysis>());
-
- // This pass performs several distinct transformations. As a compile-time aid
- // when compiling code that isn't ObjC, skip these if the relevant ObjC
- // library functions aren't declared.
-
- // Preliminary optimizations. This also computs UsedInThisFunction.
- OptimizeIndividualCalls(F);
-
- // Optimizations for weak pointers.
- if (UsedInThisFunction & ((1 << IC_LoadWeak) |
- (1 << IC_LoadWeakRetained) |
- (1 << IC_StoreWeak) |
- (1 << IC_InitWeak) |
- (1 << IC_CopyWeak) |
- (1 << IC_MoveWeak) |
- (1 << IC_DestroyWeak)))
- OptimizeWeakCalls(F);
-
- // Optimizations for retain+release pairs.
- if (UsedInThisFunction & ((1 << IC_Retain) |
- (1 << IC_RetainRV) |
- (1 << IC_RetainBlock)))
- if (UsedInThisFunction & (1 << IC_Release))
- // Run OptimizeSequences until it either stops making changes or
- // no retain+release pair nesting is detected.
- while (OptimizeSequences(F)) {}
-
- // Optimizations if objc_autorelease is used.
- if (UsedInThisFunction & ((1 << IC_Autorelease) |
- (1 << IC_AutoreleaseRV)))
- OptimizeReturns(F);
-
- return Changed;
-}
-
-void ObjCARCOpt::releaseMemory() {
- PA.clear();
-}
-
-//===----------------------------------------------------------------------===//
-// ARC contraction.
-//===----------------------------------------------------------------------===//
-
-// TODO: ObjCARCContract could insert PHI nodes when uses aren't
-// dominated by single calls.
-
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Operator.h"
-
-STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
-
-namespace {
- /// ObjCARCContract - Late ARC optimizations. These change the IR in a way
- /// that makes it difficult to be analyzed by ObjCARCOpt, so it's run late.
- class ObjCARCContract : public FunctionPass {
- bool Changed;
- AliasAnalysis *AA;
- DominatorTree *DT;
- ProvenanceAnalysis PA;
-
- /// Run - A flag indicating whether this optimization pass should run.
- bool Run;
-
- /// StoreStrongCallee, etc. - Declarations for ObjC runtime
- /// functions, for use in creating calls to them. These are initialized
- /// lazily to avoid cluttering up the Module with unused declarations.
- Constant *StoreStrongCallee,
- *RetainAutoreleaseCallee, *RetainAutoreleaseRVCallee;
-
- /// RetainRVMarker - The inline asm string to insert between calls and
- /// RetainRV calls to make the optimization work on targets which need it.
- const MDString *RetainRVMarker;
-
- /// StoreStrongCalls - The set of inserted objc_storeStrong calls. If
- /// at the end of walking the function we have found no alloca
- /// instructions, these calls can be marked "tail".
- SmallPtrSet<CallInst *, 8> StoreStrongCalls;
-
- Constant *getStoreStrongCallee(Module *M);
- Constant *getRetainAutoreleaseCallee(Module *M);
- Constant *getRetainAutoreleaseRVCallee(Module *M);
-
- bool ContractAutorelease(Function &F, Instruction *Autorelease,
- InstructionClass Class,
- SmallPtrSet<Instruction *, 4>
- &DependingInstructions,
- SmallPtrSet<const BasicBlock *, 4>
- &Visited);
-
- void ContractRelease(Instruction *Release,
- inst_iterator &Iter);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- virtual bool doInitialization(Module &M);
- virtual bool runOnFunction(Function &F);
-
- public:
- static char ID;
- ObjCARCContract() : FunctionPass(ID) {
- initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
- }
- };
-}
-
-char ObjCARCContract::ID = 0;
-INITIALIZE_PASS_BEGIN(ObjCARCContract,
- "objc-arc-contract", "ObjC ARC contraction", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_END(ObjCARCContract,
- "objc-arc-contract", "ObjC ARC contraction", false, false)
-
-Pass *llvm::createObjCARCContractPass() {
- return new ObjCARCContract();
-}
-
-void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AliasAnalysis>();
- AU.addRequired<DominatorTree>();
- AU.setPreservesCFG();
-}
-
-Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
- if (!StoreStrongCallee) {
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *I8XX = PointerType::getUnqual(I8X);
- Type *Params[] = { I8XX, I8X };
-
- AttributeSet Attribute = AttributeSet()
- .addAttr(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::get(C, Attribute::NoUnwind))
- .addAttr(M->getContext(), 1, Attribute::get(C, Attribute::NoCapture));
-
- StoreStrongCallee =
- M->getOrInsertFunction(
- "objc_storeStrong",
- FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
- Attribute);
- }
- return StoreStrongCallee;
-}
-
-Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
- if (!RetainAutoreleaseCallee) {
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *Params[] = { I8X };
- FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttributeSet Attribute =
- AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::get(C, Attribute::NoUnwind));
- RetainAutoreleaseCallee =
- M->getOrInsertFunction("objc_retainAutorelease", FTy, Attribute);
- }
- return RetainAutoreleaseCallee;
-}
-
-Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
- if (!RetainAutoreleaseRVCallee) {
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *Params[] = { I8X };
- FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttributeSet Attribute =
- AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::get(C, Attribute::NoUnwind));
- RetainAutoreleaseRVCallee =
- M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
- Attribute);
- }
- return RetainAutoreleaseRVCallee;
-}
-
-/// ContractAutorelease - Merge an autorelease with a retain into a fused call.
-bool
-ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
- InstructionClass Class,
- SmallPtrSet<Instruction *, 4>
- &DependingInstructions,
- SmallPtrSet<const BasicBlock *, 4>
- &Visited) {
- const Value *Arg = GetObjCArg(Autorelease);
-
- // Check that there are no instructions between the retain and the autorelease
- // (such as an autorelease_pop) which may change the count.
- CallInst *Retain = 0;
- if (Class == IC_AutoreleaseRV)
- FindDependencies(RetainAutoreleaseRVDep, Arg,
- Autorelease->getParent(), Autorelease,
- DependingInstructions, Visited, PA);
- else
- FindDependencies(RetainAutoreleaseDep, Arg,
- Autorelease->getParent(), Autorelease,
- DependingInstructions, Visited, PA);
-
- Visited.clear();
- if (DependingInstructions.size() != 1) {
- DependingInstructions.clear();
- return false;
- }
-
- Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
- DependingInstructions.clear();
-
- if (!Retain ||
- GetBasicInstructionClass(Retain) != IC_Retain ||
- GetObjCArg(Retain) != Arg)
- return false;
-
- Changed = true;
- ++NumPeeps;
-
- DEBUG(dbgs() << "ObjCARCContract::ContractAutorelease: Fusing "
- "retain/autorelease. Erasing: " << *Autorelease << "\n"
- " Old Retain: "
- << *Retain << "\n");
-
- if (Class == IC_AutoreleaseRV)
- Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent()));
- else
- Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent()));
-
- DEBUG(dbgs() << " New Retain: "
- << *Retain << "\n");
-
- EraseInstruction(Autorelease);
- return true;
-}
-
-/// ContractRelease - Attempt to merge an objc_release with a store, load, and
-/// objc_retain to form an objc_storeStrong. This can be a little tricky because
-/// the instructions don't always appear in order, and there may be unrelated
-/// intervening instructions.
-void ObjCARCContract::ContractRelease(Instruction *Release,
- inst_iterator &Iter) {
- LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
- if (!Load || !Load->isSimple()) return;
-
- // For now, require everything to be in one basic block.
- BasicBlock *BB = Release->getParent();
- if (Load->getParent() != BB) return;
-
- // Walk down to find the store and the release, which may be in either order.
- BasicBlock::iterator I = Load, End = BB->end();
- ++I;
- AliasAnalysis::Location Loc = AA->getLocation(Load);
- StoreInst *Store = 0;
- bool SawRelease = false;
- for (; !Store || !SawRelease; ++I) {
- if (I == End)
- return;
-
- Instruction *Inst = I;
- if (Inst == Release) {
- SawRelease = true;
- continue;
- }
-
- InstructionClass Class = GetBasicInstructionClass(Inst);
-
- // Unrelated retains are harmless.
- if (IsRetain(Class))
- continue;
-
- if (Store) {
- // The store is the point where we're going to put the objc_storeStrong,
- // so make sure there are no uses after it.
- if (CanUse(Inst, Load, PA, Class))
- return;
- } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) {
- // We are moving the load down to the store, so check for anything
- // else which writes to the memory between the load and the store.
- Store = dyn_cast<StoreInst>(Inst);
- if (!Store || !Store->isSimple()) return;
- if (Store->getPointerOperand() != Loc.Ptr) return;
- }
- }
-
- Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
-
- // Walk up to find the retain.
- I = Store;
- BasicBlock::iterator Begin = BB->begin();
- while (I != Begin && GetBasicInstructionClass(I) != IC_Retain)
- --I;
- Instruction *Retain = I;
- if (GetBasicInstructionClass(Retain) != IC_Retain) return;
- if (GetObjCArg(Retain) != New) return;
-
- Changed = true;
- ++NumStoreStrongs;
-
- LLVMContext &C = Release->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *I8XX = PointerType::getUnqual(I8X);
-
- Value *Args[] = { Load->getPointerOperand(), New };
- if (Args[0]->getType() != I8XX)
- Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
- if (Args[1]->getType() != I8X)
- Args[1] = new BitCastInst(Args[1], I8X, "", Store);
- CallInst *StoreStrong =
- CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()),
- Args, "", Store);
- StoreStrong->setDoesNotThrow();
- StoreStrong->setDebugLoc(Store->getDebugLoc());
-
- // We can't set the tail flag yet, because we haven't yet determined
- // whether there are any escaping allocas. Remember this call, so that
- // we can set the tail flag once we know it's safe.
- StoreStrongCalls.insert(StoreStrong);
-
- if (&*Iter == Store) ++Iter;
- Store->eraseFromParent();
- Release->eraseFromParent();
- EraseInstruction(Retain);
- if (Load->use_empty())
- Load->eraseFromParent();
-}
-
-bool ObjCARCContract::doInitialization(Module &M) {
- // If nothing in the Module uses ARC, don't do anything.
- Run = ModuleHasARC(M);
- if (!Run)
- return false;
-
- // These are initialized lazily.
- StoreStrongCallee = 0;
- RetainAutoreleaseCallee = 0;
- RetainAutoreleaseRVCallee = 0;
-
- // Initialize RetainRVMarker.
- RetainRVMarker = 0;
- if (NamedMDNode *NMD =
- M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
- if (NMD->getNumOperands() == 1) {
- const MDNode *N = NMD->getOperand(0);
- if (N->getNumOperands() == 1)
- if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
- RetainRVMarker = S;
- }
-
- return false;
-}
-
-bool ObjCARCContract::runOnFunction(Function &F) {
- if (!EnableARCOpts)
- return false;
-
- // If nothing in the Module uses ARC, don't do anything.
- if (!Run)
- return false;
-
- Changed = false;
- AA = &getAnalysis<AliasAnalysis>();
- DT = &getAnalysis<DominatorTree>();
-
- PA.setAA(&getAnalysis<AliasAnalysis>());
-
- // Track whether it's ok to mark objc_storeStrong calls with the "tail"
- // keyword. Be conservative if the function has variadic arguments.
- // It seems that functions which "return twice" are also unsafe for the
- // "tail" argument, because they are setjmp, which could need to
- // return to an earlier stack state.
- bool TailOkForStoreStrongs = !F.isVarArg() &&
- !F.callsFunctionThatReturnsTwice();
-
- // For ObjC library calls which return their argument, replace uses of the
- // argument with uses of the call return value, if it dominates the use. This
- // reduces register pressure.
- SmallPtrSet<Instruction *, 4> DependingInstructions;
- SmallPtrSet<const BasicBlock *, 4> Visited;
- for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
- Instruction *Inst = &*I++;
-
- DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n");
-
- // Only these library routines return their argument. In particular,
- // objc_retainBlock does not necessarily return its argument.
- InstructionClass Class = GetBasicInstructionClass(Inst);
- switch (Class) {
- case IC_Retain:
- case IC_FusedRetainAutorelease:
- case IC_FusedRetainAutoreleaseRV:
- break;
- case IC_Autorelease:
- case IC_AutoreleaseRV:
- if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
- continue;
- break;
- case IC_RetainRV: {
- // If we're compiling for a target which needs a special inline-asm
- // marker to do the retainAutoreleasedReturnValue optimization,
- // insert it now.
- if (!RetainRVMarker)
- break;
- BasicBlock::iterator BBI = Inst;
- BasicBlock *InstParent = Inst->getParent();
-
- // Step up to see if the call immediately precedes the RetainRV call.
- // If it's an invoke, we have to cross a block boundary. And we have
- // to carefully dodge no-op instructions.
- do {
- if (&*BBI == InstParent->begin()) {
- BasicBlock *Pred = InstParent->getSinglePredecessor();
- if (!Pred)
- goto decline_rv_optimization;
- BBI = Pred->getTerminator();
- break;
- }
- --BBI;
- } while (isNoopInstruction(BBI));
-
- if (&*BBI == GetObjCArg(Inst)) {
- DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for "
- "retainAutoreleasedReturnValue optimization.\n");
- Changed = true;
- InlineAsm *IA =
- InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
- /*isVarArg=*/false),
- RetainRVMarker->getString(),
- /*Constraints=*/"", /*hasSideEffects=*/true);
- CallInst::Create(IA, "", Inst);
- }
- decline_rv_optimization:
- break;
- }
- case IC_InitWeak: {
- // objc_initWeak(p, null) => *p = null
- CallInst *CI = cast<CallInst>(Inst);
- if (isNullOrUndef(CI->getArgOperand(1))) {
- Value *Null =
- ConstantPointerNull::get(cast<PointerType>(CI->getType()));
- Changed = true;
- new StoreInst(Null, CI->getArgOperand(0), CI);
-
- DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n"
- << " New = " << *Null << "\n");
-
- CI->replaceAllUsesWith(Null);
- CI->eraseFromParent();
- }
- continue;
- }
- case IC_Release:
- ContractRelease(Inst, I);
- continue;
- case IC_User:
- // Be conservative if the function has any alloca instructions.
- // Technically we only care about escaping alloca instructions,
- // but this is sufficient to handle some interesting cases.
- if (isa<AllocaInst>(Inst))
- TailOkForStoreStrongs = false;
- continue;
- default:
- continue;
- }
-
- DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n");
-
- // Don't use GetObjCArg because we don't want to look through bitcasts
- // and such; to do the replacement, the argument must have type i8*.
- const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
- for (;;) {
- // If we're compiling bugpointed code, don't get in trouble.
- if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
- break;
- // Look through the uses of the pointer.
- for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
- UI != UE; ) {
- Use &U = UI.getUse();
- unsigned OperandNo = UI.getOperandNo();
- ++UI; // Increment UI now, because we may unlink its element.
-
- // If the call's return value dominates a use of the call's argument
- // value, rewrite the use to use the return value. We check for
- // reachability here because an unreachable call is considered to
- // trivially dominate itself, which would lead us to rewriting its
- // argument in terms of its return value, which would lead to
- // infinite loops in GetObjCArg.
- if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) {
- Changed = true;
- Instruction *Replacement = Inst;
- Type *UseTy = U.get()->getType();
- if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
- // For PHI nodes, insert the bitcast in the predecessor block.
- unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
- BasicBlock *BB = PHI->getIncomingBlock(ValNo);
- if (Replacement->getType() != UseTy)
- Replacement = new BitCastInst(Replacement, UseTy, "",
- &BB->back());
- // While we're here, rewrite all edges for this PHI, rather
- // than just one use at a time, to minimize the number of
- // bitcasts we emit.
- for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
- if (PHI->getIncomingBlock(i) == BB) {
- // Keep the UI iterator valid.
- if (&PHI->getOperandUse(
- PHINode::getOperandNumForIncomingValue(i)) ==
- &UI.getUse())
- ++UI;
- PHI->setIncomingValue(i, Replacement);
- }
- } else {
- if (Replacement->getType() != UseTy)
- Replacement = new BitCastInst(Replacement, UseTy, "",
- cast<Instruction>(U.getUser()));
- U.set(Replacement);
- }
- }
- }
-
- // If Arg is a no-op casted pointer, strip one level of casts and iterate.
- if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
- Arg = BI->getOperand(0);
- else if (isa<GEPOperator>(Arg) &&
- cast<GEPOperator>(Arg)->hasAllZeroIndices())
- Arg = cast<GEPOperator>(Arg)->getPointerOperand();
- else if (isa<GlobalAlias>(Arg) &&
- !cast<GlobalAlias>(Arg)->mayBeOverridden())
- Arg = cast<GlobalAlias>(Arg)->getAliasee();
- else
- break;
- }
- }
-
- // If this function has no escaping allocas or suspicious vararg usage,
- // objc_storeStrong calls can be marked with the "tail" keyword.
- if (TailOkForStoreStrongs)
- for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(),
- E = StoreStrongCalls.end(); I != E; ++I)
- (*I)->setTailCall();
- StoreStrongCalls.clear();
-
- return Changed;
-}
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 3e935d8..e30a274 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -271,13 +271,6 @@ public:
return I->second;
}
- /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
- DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I =
- StructValueState.find(std::make_pair(V, i));
- assert(I != StructValueState.end() && "V is not in valuemap!");
- return I->second;
- }*/
-
/// getTrackedRetVals - Get the inferred return value map.
///
const DenseMap<Function*, LatticeVal> &getTrackedRetVals() {
@@ -710,9 +703,6 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
markConstant(&PN, OperandVal); // Acquire operand value
}
-
-
-
void SCCPSolver::visitReturnInst(ReturnInst &I) {
if (I.getNumOperands() == 0) return; // ret void
@@ -1185,7 +1175,7 @@ void SCCPSolver::Solve() {
DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n');
// "I" got into the work list because it either made the transition from
- // bottom to constant
+ // bottom to constant, or to overdefined.
//
// Anything on this worklist that is overdefined need not be visited
// since all of its users will have already been marked as overdefined
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 4204171..e90fe90 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -43,14 +43,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/InstVisitor.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -411,9 +409,9 @@ static Value *foldSelectInst(SelectInst &SI) {
// early on.
if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
return SI.getOperand(1+CI->isZero());
- if (SI.getOperand(1) == SI.getOperand(2)) {
+ if (SI.getOperand(1) == SI.getOperand(2))
return SI.getOperand(1);
- }
+
return 0;
}
@@ -621,7 +619,7 @@ private:
}
// Disable SRoA for any intrinsics except for lifetime invariants.
- // FIXME: What about debug instrinsics? This matches old behavior, but
+ // FIXME: What about debug intrinsics? This matches old behavior, but
// doesn't make sense.
void visitIntrinsicInst(IntrinsicInst &II) {
if (!IsOffsetKnown)
@@ -1141,8 +1139,7 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
StringRef Indent) const {
- for (const_use_iterator UI = use_begin(I), UE = use_end(I);
- UI != UE; ++UI) {
+ for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
if (!UI->U)
continue; // Skip dead uses.
OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") "
@@ -1170,8 +1167,7 @@ void AllocaPartitioning::print(raw_ostream &OS) const {
}
OS << "Partitioning of alloca: " << AI << "\n";
- unsigned Num = 0;
- for (const_iterator I = begin(), E = end(); I != E; ++I, ++Num) {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
print(OS, I);
printUsers(OS, I);
}
@@ -1242,7 +1238,7 @@ public:
for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
- Value *Arg = NULL;
+ Value *Arg = 0;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
@@ -1277,7 +1273,7 @@ namespace {
/// 1) It takes allocations of aggregates and analyzes the ways in which they
/// are used to try to split them into smaller allocations, ideally of
/// a single scalar data type. It will split up memcpy and memset accesses
-/// as necessary and try to isolate invidual scalar accesses.
+/// as necessary and try to isolate individual scalar accesses.
/// 2) It will transform accesses into forms which are suitable for SSA value
/// promotion. This can be replacing a memset with a scalar store of an
/// integer value, or it can involve speculating operations on a PHI or
@@ -1439,8 +1435,7 @@ private:
// We can only transform this if it is safe to push the loads into the
// predecessor blocks. The only thing to watch out for is that we can't put
// a possibly trapping load in the predecessor if it is a critical edge.
- for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
- ++Idx) {
+ for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
Value *InVal = PN.getIncomingValue(Idx);
@@ -1483,7 +1478,7 @@ private:
PN.getName() + ".sroa.speculated");
// Get the TBAA tag and alignment to use from one of the loads. It doesn't
- // matter which one we get and if any differ, it doesn't matter.
+ // matter which one we get and if any differ.
LoadInst *SomeLoad = cast<LoadInst>(Loads.back());
MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
unsigned Align = SomeLoad->getAlignment();
@@ -1816,7 +1811,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
/// The strategy for finding the more natural GEPs is to peel off layers of the
/// pointer, walking back through bit casts and GEPs, searching for a base
/// pointer from which we can compute a natural GEP with the desired
-/// properities. The algorithm tries to fold as many constant indices into
+/// properties. The algorithm tries to fold as many constant indices into
/// a single GEP as possible, thus making each GEP more independent of the
/// surrounding code.
static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
@@ -2062,9 +2057,9 @@ static bool isIntegerWideningViable(const DataLayout &TD,
uint64_t Size = TD.getTypeStoreSize(AllocaTy);
- // Check the uses to ensure the uses are (likely) promoteable integer uses.
+ // Check the uses to ensure the uses are (likely) promotable integer uses.
// Also ensure that the alloca has a covering load or store. We don't want
- // to widen the integer operotains only to fail to promote due to some other
+ // to widen the integer operations only to fail to promote due to some other
// unsplittable entry (which we may make splittable later).
bool WholeAllocaOp = false;
for (; I != E; ++I) {
@@ -2283,7 +2278,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
// If we are rewriting an alloca partition which can be written as pure
// vector operations, we stash extra information here. When VecTy is
- // non-null, we have some strict guarantees about the rewriten alloca:
+ // non-null, we have some strict guarantees about the rewritten alloca:
// - The new alloca is exactly the size of the vector type here.
// - The accesses all either map to the entire vector or to a single
// element.
@@ -2636,7 +2631,7 @@ private:
///
/// Note that this routine assumes an i8 is a byte. If that isn't true, don't
/// call this routine.
- /// FIXME: Heed the abvice above.
+ /// FIXME: Heed the advice above.
///
/// \param V The i8 value to splat.
/// \param Size The number of bytes in the output (assuming i8 is one byte)
@@ -2971,6 +2966,7 @@ private:
else
New = IRB.CreateLifetimeEnd(Ptr, Size);
+ (void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return true;
}
@@ -3147,9 +3143,8 @@ private:
void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
assert(Ty->isSingleValueType());
// Load the single value and insert it using the indices.
- Value *Load = IRB.CreateLoad(IRB.CreateInBoundsGEP(Ptr, GEPIndices,
- Name + ".gep"),
- Name + ".load");
+ Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep");
+ Value *Load = IRB.CreateLoad(GEP, Name + ".load");
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
DEBUG(dbgs() << " to: " << *Load << "\n");
}
@@ -3422,7 +3417,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
// Check for the case where we're going to rewrite to a new alloca of the
// exact same type as the original, and with the same access offsets. In that
// case, re-use the existing alloca, but still run through the rewriter to
- // performe phi and select speculation.
+ // perform phi and select speculation.
AllocaInst *NewAI;
if (AllocaTy == AI.getAllocatedType()) {
assert(PI->BeginOffset == 0 &&
@@ -3589,7 +3584,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
/// If there is a domtree available, we attempt to promote using the full power
/// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is
/// based on the SSAUpdater utilities. This function returns whether any
-/// promotion occured.
+/// promotion occurred.
bool SROA::promoteAllocas(Function &F) {
if (PromotableAllocas.empty())
return false;
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 35d2fa0..8a9c7da 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -50,11 +50,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLowerAtomicPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeMemCpyOptPass(Registry);
- initializeObjCARCAliasAnalysisPass(Registry);
- initializeObjCARCAPElimPass(Registry);
- initializeObjCARCExpandPass(Registry);
- initializeObjCARCContractPass(Registry);
- initializeObjCARCOptPass(Registry);
initializeReassociatePass(Registry);
initializeRegToMemPass(Registry);
initializeSCCPPass(Registry);
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index d5cefa3..916b37d 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -165,7 +165,7 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
// Ignore non-calls.
CallInst *CI = dyn_cast<CallInst>(I++);
- if (!CI) continue;
+ if (!CI || CI->hasFnAttr(Attribute::NoBuiltin)) continue;
// Ignore indirect calls and calls to non-external functions.
Function *Callee = CI->getCalledFunction();
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 6572e09..2002e68 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -58,6 +58,7 @@
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -79,11 +80,15 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
namespace {
struct TailCallElim : public FunctionPass {
+ const TargetTransformInfo *TTI;
+
static char ID; // Pass identification, replacement for typeid
TailCallElim() : FunctionPass(ID) {
initializeTailCallElimPass(*PassRegistry::getPassRegistry());
}
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
virtual bool runOnFunction(Function &F);
private:
@@ -109,14 +114,21 @@ namespace {
}
char TailCallElim::ID = 0;
-INITIALIZE_PASS(TailCallElim, "tailcallelim",
- "Tail Call Elimination", false, false)
+INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim",
+ "Tail Call Elimination", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(TailCallElim, "tailcallelim",
+ "Tail Call Elimination", false, false)
// Public interface to the TailCallElimination pass
FunctionPass *llvm::createTailCallEliminationPass() {
return new TailCallElim();
}
+void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfo>();
+}
+
/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
/// callees of this function. We only do very simple analysis right now, this
/// could be expanded in the future to use mod/ref information for particular
@@ -151,6 +163,7 @@ bool TailCallElim::runOnFunction(Function &F) {
// right, so don't even try to convert it...
if (F.getFunctionType()->isVarArg()) return false;
+ TTI = &getAnalysis<TargetTransformInfo>();
BasicBlock *OldEntry = 0;
bool TailCallsAreMarkedTail = false;
SmallVector<PHINode*, 8> ArgumentPHIs;
@@ -391,7 +404,8 @@ TailCallElim::FindTRECandidate(Instruction *TI,
if (BB == &F->getEntryBlock() &&
FirstNonDbg(BB->front()) == CI &&
FirstNonDbg(llvm::next(BB->begin())) == TI &&
- callIsSmall(CI)) {
+ CI->getCalledFunction() &&
+ !TTI->isLoweredToCall(CI->getCalledFunction())) {
// A single-block function with just a call and a return. Check that
// the arguments match.
CallSite::arg_iterator I = CallSite(CI).arg_begin(),
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 8330e84..ba99d2e 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -37,12 +37,12 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
// Can delete self loop.
BB->getSinglePredecessor() == BB) && "Block is not dead!");
TerminatorInst *BBTerm = BB->getTerminator();
-
+
// Loop through all of our successors and make sure they know that one
// of their predecessors is going away.
for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i)
BBTerm->getSuccessor(i)->removePredecessor(BB);
-
+
// Zap all the instructions in the block.
while (!BB->empty()) {
Instruction &I = BB->back();
@@ -55,7 +55,7 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
I.replaceAllUsesWith(UndefValue::get(I.getType()));
BB->getInstList().pop_back();
}
-
+
// Zap the block!
BB->eraseFromParent();
}
@@ -66,25 +66,25 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
/// when the block has exactly one predecessor.
void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
if (!isa<PHINode>(BB->begin())) return;
-
+
AliasAnalysis *AA = 0;
MemoryDependenceAnalysis *MemDep = 0;
if (P) {
AA = P->getAnalysisIfAvailable<AliasAnalysis>();
MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
}
-
+
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
if (PN->getIncomingValue(0) != PN)
PN->replaceAllUsesWith(PN->getIncomingValue(0));
else
PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
-
+
if (MemDep)
MemDep->removeInstruction(PN); // Memdep updates AA itself.
else if (AA && isa<PointerType>(PN->getType()))
AA->deleteValue(PN);
-
+
PN->eraseFromParent();
}
}
@@ -115,7 +115,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
// Don't merge away blocks who have their address taken.
if (BB->hasAddressTaken()) return false;
-
+
// Can't merge if there are multiple predecessors, or no predecessors.
BasicBlock *PredBB = BB->getUniquePredecessor();
if (!PredBB) return false;
@@ -124,7 +124,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
if (PredBB == BB) return false;
// Don't break invokes.
if (isa<InvokeInst>(PredBB->getTerminator())) return false;
-
+
succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
BasicBlock *OnlySucc = BB;
for (; SI != SE; ++SI)
@@ -132,7 +132,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
OnlySucc = 0; // There are multiple distinct successors!
break;
}
-
+
// Can't merge if there are multiple successors.
if (!OnlySucc) return false;
@@ -149,21 +149,21 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
// Begin by getting rid of unneeded PHIs.
if (isa<PHINode>(BB->front()))
FoldSingleEntryPHINodes(BB, P);
-
+
// Delete the unconditional branch from the predecessor...
PredBB->getInstList().pop_back();
-
+
// Make all PHI nodes that referred to BB now refer to Pred as their
// source...
BB->replaceAllUsesWith(PredBB);
-
+
// Move all definitions in the successor to the predecessor...
PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
-
+
// Inherit predecessors name if it exists.
if (!PredBB->hasName())
PredBB->takeName(BB);
-
+
// Finally, erase the old block and update dominator info.
if (P) {
if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
@@ -176,16 +176,16 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
DT->eraseNode(BB);
}
-
+
if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
LI->removeBlock(BB);
-
+
if (MemoryDependenceAnalysis *MD =
P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
MD->invalidateCachedPredecessors();
}
}
-
+
BB->eraseFromParent();
return true;
}
@@ -251,11 +251,11 @@ unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
}
}
-/// SplitEdge - Split the edge connecting specified block. Pass P must
-/// not be NULL.
+/// SplitEdge - Split the edge connecting specified block. Pass P must
+/// not be NULL.
BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
unsigned SuccNum = GetSuccessorNumber(BB, Succ);
-
+
// If this is a critical edge, let SplitCriticalEdge do it.
TerminatorInst *LatchTerm = BB->getTerminator();
if (SplitCriticalEdge(LatchTerm, SuccNum, P))
@@ -271,11 +271,11 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
SP = NULL;
return SplitBlock(Succ, Succ->begin(), P);
}
-
+
// Otherwise, if BB has a single successor, split it at the bottom of the
// block.
assert(BB->getTerminator()->getNumSuccessors() == 1 &&
- "Should have a single succ!");
+ "Should have a single succ!");
return SplitBlock(BB, BB->getTerminator(), P);
}
@@ -301,12 +301,12 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
if (DomTreeNode *OldNode = DT->getNode(Old)) {
std::vector<DomTreeNode *> Children;
for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
- I != E; ++I)
+ I != E; ++I)
Children.push_back(*I);
DomTreeNode *NewNode = DT->addNewBlock(New,Old);
for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
- E = Children.end(); I != E; ++I)
+ E = Children.end(); I != E; ++I)
DT->changeImmediateDominator(*I, NewNode);
}
}
@@ -424,7 +424,7 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
PHINode *NewPHI =
PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
if (AA) AA->copyValue(PN, NewPHI);
-
+
// Move all of the PHI values for 'Preds' to the new PHI.
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
Value *V = PN->removeIncomingValue(Preds[i], false);
@@ -451,16 +451,16 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
/// preserve LoopSimplify (because it's complicated to handle the case where one
/// of the edges being split is an exit of a loop with other exits).
///
-BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
ArrayRef<BasicBlock*> Preds,
const char *Suffix, Pass *P) {
// Create new basic block, insert right before the original block.
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
BB->getParent(), BB);
-
+
// The new block unconditionally branches to the old block.
BranchInst *BI = BranchInst::Create(BB, NewBB);
-
+
// Move the edges from Preds to point to NewBB instead of BB.
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
// This is slightly more strict than necessary; the minimum requirement
@@ -497,13 +497,13 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
/// block gets the remaining predecessors of OrigBB. The landingpad instruction
/// OrigBB is clone into both of the new basic blocks. The new blocks are given
/// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector.
-///
+///
/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular,
/// it does not preserve LoopSimplify (because it's complicated to handle the
/// case where one of the edges being split is an exit of a loop with other
/// exits).
-///
+///
void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
ArrayRef<BasicBlock*> Preds,
const char *Suffix1, const char *Suffix2,
@@ -608,11 +608,11 @@ void llvm::FindFunctionBackedges(const Function &F,
const BasicBlock *BB = &F.getEntryBlock();
if (succ_begin(BB) == succ_end(BB))
return;
-
+
SmallPtrSet<const BasicBlock*, 8> Visited;
SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
SmallPtrSet<const BasicBlock*, 8> InStack;
-
+
Visited.insert(BB);
VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
InStack.insert(BB);
@@ -620,7 +620,7 @@ void llvm::FindFunctionBackedges(const Function &F,
std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
const BasicBlock *ParentBB = Top.first;
succ_const_iterator &I = Top.second;
-
+
bool FoundNew = false;
while (I != succ_end(ParentBB)) {
BB = *I++;
@@ -632,7 +632,7 @@ void llvm::FindFunctionBackedges(const Function &F,
if (InStack.count(BB))
Result.push_back(std::make_pair(ParentBB, BB));
}
-
+
if (FoundNew) {
// Go down one level if there is a unvisited successor.
InStack.insert(BB);
@@ -641,7 +641,7 @@ void llvm::FindFunctionBackedges(const Function &F,
// Go up one level.
InStack.erase(VisitStack.pop_back_val().first);
}
- } while (!VisitStack.empty());
+ } while (!VisitStack.empty());
}
/// FoldReturnIntoUncondBranch - This method duplicates the specified return
@@ -655,7 +655,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
// Clone the return and add it to the end of the predecessor.
Instruction *NewRet = RI->clone();
Pred->getInstList().push_back(NewRet);
-
+
// If the return instruction returns a value, and if the value was a
// PHI node in "BB", propagate the right value into the return.
for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
@@ -679,7 +679,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
}
}
}
-
+
// Update any PHI nodes in the returning block to realize that we no
// longer branch to them.
BB->removePredecessor(Pred);
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index bf540b0..6d13217 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -38,16 +38,16 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture);
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
- ArrayRef<Attribute::AttrKind>(AVs, 2));
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
Constant *StrLen = M->getOrInsertFunction("strlen",
AttributeSet::get(M->getContext(),
- AWI),
+ AS),
TD->getIntPtrType(Context),
B.getInt8PtrTy(),
NULL);
@@ -67,16 +67,16 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture);
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
- ArrayRef<Attribute::AttrKind>(AVs, 2));
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
Constant *StrNLen = M->getOrInsertFunction("strnlen",
AttributeSet::get(M->getContext(),
- AWI),
+ AS),
TD->getIntPtrType(Context),
B.getInt8PtrTy(),
TD->getIntPtrType(Context),
@@ -98,15 +98,15 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
Module *M = B.GetInsertBlock()->getParent()->getParent();
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AttributeWithIndex AWI =
- AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
- ArrayRef<Attribute::AttrKind>(AVs, 2));
+ AttributeSet AS =
+ AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
Type *I8Ptr = B.getInt8PtrTy();
Type *I32Ty = B.getInt32Ty();
Constant *StrChr = M->getOrInsertFunction("strchr",
AttributeSet::get(M->getContext(),
- AWI),
+ AS),
I8Ptr, I8Ptr, I32Ty, NULL);
CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
ConstantInt::get(I32Ty, C), "strchr");
@@ -123,17 +123,17 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture);
+ AttributeSet AS[3];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
- ArrayRef<Attribute::AttrKind>(AVs, 2));
+ AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *StrNCmp = M->getOrInsertFunction("strncmp",
AttributeSet::get(M->getContext(),
- AWI),
+ AS),
B.getInt32Ty(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
@@ -156,13 +156,13 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
Type *I8Ptr = B.getInt8PtrTy();
Value *StrCpy = M->getOrInsertFunction(Name,
- AttributeSet::get(M->getContext(), AWI),
+ AttributeSet::get(M->getContext(), AS),
I8Ptr, I8Ptr, I8Ptr, NULL);
CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
Name);
@@ -180,14 +180,14 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
Type *I8Ptr = B.getInt8PtrTy();
Value *StrNCpy = M->getOrInsertFunction(Name,
AttributeSet::get(M->getContext(),
- AWI),
+ AS),
I8Ptr, I8Ptr, I8Ptr,
Len->getType(), NULL);
CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
@@ -207,12 +207,12 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeWithIndex AWI;
- AWI = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ AttributeSet AS;
+ AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
- AttributeSet::get(M->getContext(), AWI),
+ AttributeSet::get(M->getContext(), AS),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
@@ -235,13 +235,13 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeWithIndex AWI;
+ AttributeSet AS;
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AWI = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
- ArrayRef<Attribute::AttrKind>(AVs, 2));
+ AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemChr = M->getOrInsertFunction("memchr",
- AttributeSet::get(M->getContext(), AWI),
+ AttributeSet::get(M->getContext(), AS),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
B.getInt32Ty(),
@@ -263,16 +263,16 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture);
+ AttributeSet AS[3];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
- AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
- ArrayRef<Attribute::AttrKind>(AVs, 2));
+ AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ ArrayRef<Attribute::AttrKind>(AVs, 2));
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCmp = M->getOrInsertFunction("memcmp",
- AttributeSet::get(M->getContext(), AWI),
+ AttributeSet::get(M->getContext(), AS),
B.getInt32Ty(),
B.getInt8PtrTy(),
B.getInt8PtrTy(),
@@ -344,13 +344,13 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
Value *PutS = M->getOrInsertFunction("puts",
- AttributeSet::get(M->getContext(), AWI),
+ AttributeSet::get(M->getContext(), AS),
B.getInt32Ty(),
B.getInt8PtrTy(),
NULL);
@@ -368,14 +368,14 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeWithIndex AWI[2];
- AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ AttributeSet AS[2];
+ AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
Constant *F;
if (File->getType()->isPointerTy())
F = M->getOrInsertFunction("fputc",
- AttributeSet::get(M->getContext(), AWI),
+ AttributeSet::get(M->getContext(), AS),
B.getInt32Ty(),
B.getInt32Ty(), File->getType(),
NULL);
@@ -401,16 +401,16 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture);
- AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ AttributeSet AS[3];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture);
+ AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
StringRef FPutsName = TLI->getName(LibFunc::fputs);
Constant *F;
if (File->getType()->isPointerTy())
F = M->getOrInsertFunction(FPutsName,
- AttributeSet::get(M->getContext(), AWI),
+ AttributeSet::get(M->getContext(), AS),
B.getInt32Ty(),
B.getInt8PtrTy(),
File->getType(), NULL);
@@ -434,17 +434,17 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
return 0;
Module *M = B.GetInsertBlock()->getParent()->getParent();
- AttributeWithIndex AWI[3];
- AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture);
- AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attribute::NoCapture);
- AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ AttributeSet AS[3];
+ AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
+ AS[1] = AttributeSet::get(M->getContext(), 4, Attribute::NoCapture);
+ AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
StringRef FWriteName = TLI->getName(LibFunc::fwrite);
Constant *F;
if (File->getType()->isPointerTy())
F = M->getOrInsertFunction(FWriteName,
- AttributeSet::get(M->getContext(), AWI),
+ AttributeSet::get(M->getContext(), AS),
TD->getIntPtrType(Context),
B.getInt8PtrTy(),
TD->getIntPtrType(Context),
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index ccc3eae..a309bce 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -95,18 +95,16 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
for (Function::const_arg_iterator I = OldFunc->arg_begin(),
E = OldFunc->arg_end(); I != E; ++I)
if (Argument* Anew = dyn_cast<Argument>(VMap[I]))
- Anew->addAttr( OldFunc->getAttributes()
+ Anew->addAttr(OldFunc->getAttributes()
.getParamAttributes(I->getArgNo() + 1));
NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttr(NewFunc->getContext(),
- AttributeSet::ReturnIndex,
- OldFunc->getAttributes()
- .getRetAttributes()));
+ .addAttributes(NewFunc->getContext(),
+ AttributeSet::ReturnIndex,
+ OldFunc->getAttributes()));
NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttr(NewFunc->getContext(),
- AttributeSet::FunctionIndex,
- OldFunc->getAttributes()
- .getFnAttributes()));
+ .addAttributes(NewFunc->getContext(),
+ AttributeSet::FunctionIndex,
+ OldFunc->getAttributes()));
}
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 3a21528..f7c659f 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -15,6 +15,7 @@
#include "llvm/Transforms/Utils/CodeExtractor.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index d5c41f5..db525cd 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Function.h"
@@ -78,12 +79,21 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
InsertPt = &I;
++InsertPt;
} else {
- // We cannot demote invoke instructions to the stack if their normal edge
- // is critical.
InvokeInst &II = cast<InvokeInst>(I);
- assert(II.getNormalDest()->getSinglePredecessor() &&
- "Cannot demote invoke with a critical successor!");
- InsertPt = II.getNormalDest()->begin();
+ if (II.getNormalDest()->getSinglePredecessor())
+ InsertPt = II.getNormalDest()->getFirstInsertionPt();
+ else {
+ // We cannot demote invoke instructions to the stack if their normal edge
+ // is critical. Therefore, split the critical edge and insert the store
+ // in the newly created basic block.
+ unsigned SuccNum = GetSuccessorNumber(I.getParent(), II.getNormalDest());
+ TerminatorInst *TI = &cast<TerminatorInst>(I);
+ assert (isCriticalEdge(TI, SuccNum) &&
+ "Expected a critical edge!");
+ BasicBlock *BB = SplitCriticalEdge(TI, SuccNum);
+ assert (BB && "Unable to split critical edge.");
+ InsertPt = BB->getFirstInsertionPt();
+ }
}
for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 5187d7c..3cb8ded 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -418,3 +418,107 @@ bool llvm::expandDivision(BinaryOperator *Div) {
return true;
}
+
+/// Generate code to compute the remainder of two integers of bitwidth up to
+/// 32 bits. Uses the above routines and extends the inputs/truncates the
+/// outputs to operate in 32 bits; that is, these routines are good for targets
+/// that have no or very little suppport for smaller than 32 bit integer
+/// arithmetic.
+///
+/// @brief Replace Rem with emulation code.
+bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
+ assert((Rem->getOpcode() == Instruction::SRem ||
+ Rem->getOpcode() == Instruction::URem) &&
+ "Trying to expand remainder from a non-remainder function");
+
+ Type *RemTy = Rem->getType();
+ if (RemTy->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
+
+ if (RemTyBitWidth > 32)
+ llvm_unreachable("Div of bitwidth greater than 32 not supported");
+
+ if (RemTyBitWidth == 32)
+ return expandRemainder(Rem);
+
+ // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+ // with 32 bit division.
+ IRBuilder<> Builder(Rem);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtRem;
+ Value *Trunc;
+ Type *Int32Ty = Builder.getInt32Ty();
+
+ if (Rem->getOpcode() == Instruction::SRem) {
+ ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int32Ty);
+ ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int32Ty);
+ ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtRem, RemTy);
+
+ Rem->replaceAllUsesWith(Trunc);
+ Rem->dropAllReferences();
+ Rem->eraseFromParent();
+
+ return expandRemainder(cast<BinaryOperator>(ExtRem));
+}
+
+
+/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the
+/// above routines and extends the inputs/truncates the outputs to operate
+/// in 32 bits; that is, these routines are good for targets that have no
+/// or very little support for smaller than 32 bit integer arithmetic.
+///
+/// @brief Replace Div with emulation code.
+bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
+ assert((Div->getOpcode() == Instruction::SDiv ||
+ Div->getOpcode() == Instruction::UDiv) &&
+ "Trying to expand division from a non-division function");
+
+ Type *DivTy = Div->getType();
+ if (DivTy->isVectorTy())
+ llvm_unreachable("Div over vectors not supported");
+
+ unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
+
+ if (DivTyBitWidth > 32)
+ llvm_unreachable("Div of bitwidth greater than 32 not supported");
+
+ if (DivTyBitWidth == 32)
+ return expandDivision(Div);
+
+ // If bitwidth smaller than 32 extend inputs, truncate output and proceed
+ // with 32 bit division.
+ IRBuilder<> Builder(Div);
+
+ Value *ExtDividend;
+ Value *ExtDivisor;
+ Value *ExtDiv;
+ Value *Trunc;
+ Type *Int32Ty = Builder.getInt32Ty();
+
+ if (Div->getOpcode() == Instruction::SDiv) {
+ ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int32Ty);
+ ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor);
+ } else {
+ ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty);
+ ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty);
+ ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor);
+ }
+ Trunc = Builder.CreateTrunc(ExtDiv, DivTy);
+
+ Div->replaceAllUsesWith(Trunc);
+ Div->dropAllReferences();
+ Div->eraseFromParent();
+
+ return expandDivision(cast<BinaryOperator>(ExtDiv));
+}
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index d519fb7..3716f58 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -72,13 +72,23 @@ namespace {
// Rename all aliases
for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end();
- AI != AE; ++AI)
- AI->setName("alias");
+ AI != AE; ++AI) {
+ StringRef Name = AI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+ AI->setName("alias");
+ }
+
// Rename all global variables
for (Module::global_iterator GI = M.global_begin(), GE = M.global_end();
- GI != GE; ++GI)
+ GI != GE; ++GI) {
+ StringRef Name = GI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+
GI->setName("global");
+ }
// Rename all struct types
TypeFinder StructTypes;
@@ -95,6 +105,10 @@ namespace {
// Rename all functions
for (Module::iterator FI = M.begin(), FE = M.end();
FI != FE; ++FI) {
+ StringRef Name = FI->getName();
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ continue;
+
FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]);
runOnFunction(*FI);
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index f10c35f..a63d31d 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1332,149 +1332,180 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
return Changed;
}
-/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1
-/// and an BB2 and the only successor of BB1 is BB2, hoist simple code
-/// (for now, restricted to a single instruction that's side effect free) from
-/// the BB1 into the branch block to speculatively execute it.
+/// \brief Speculate a conditional basic block flattening the CFG.
///
-/// Turn
-/// BB:
-/// %t1 = icmp
-/// br i1 %t1, label %BB1, label %BB2
-/// BB1:
-/// %t3 = add %t2, c
+/// Note that this is a very risky transform currently. Speculating
+/// instructions like this is most often not desirable. Instead, there is an MI
+/// pass which can do it with full awareness of the resource constraints.
+/// However, some cases are "obvious" and we should do directly. An example of
+/// this is speculating a single, reasonably cheap instruction.
+///
+/// There is only one distinct advantage to flattening the CFG at the IR level:
+/// it makes very common but simplistic optimizations such as are common in
+/// instcombine and the DAG combiner more powerful by removing CFG edges and
+/// modeling their effects with easier to reason about SSA value graphs.
+///
+///
+/// An illustration of this transform is turning this IR:
+/// \code
+/// BB:
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// %sub = sub %x, %y
/// br label BB2
-/// BB2:
-/// =>
-/// BB:
-/// %t1 = icmp
-/// %t4 = add %t2, c
-/// %t3 = select i1 %t1, %t2, %t3
-static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
- // Only speculatively execution a single instruction (not counting the
- // terminator) for now.
- Instruction *HInst = NULL;
- Instruction *Term = BB1->getTerminator();
- for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end();
+/// EndBB:
+/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
+/// ...
+/// \endcode
+///
+/// Into this IR:
+/// \code
+/// BB:
+/// %cmp = icmp ult %x, %y
+/// %sub = sub %x, %y
+/// %cond = select i1 %cmp, 0, %sub
+/// ...
+/// \endcode
+///
+/// \returns true if the conditional block is removed.
+static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
+ // Be conservative for now. FP select instruction can often be expensive.
+ Value *BrCond = BI->getCondition();
+ if (isa<FCmpInst>(BrCond))
+ return false;
+
+ BasicBlock *BB = BI->getParent();
+ BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
+
+ // If ThenBB is actually on the false edge of the conditional branch, remember
+ // to swap the select operands later.
+ bool Invert = false;
+ if (ThenBB != BI->getSuccessor(0)) {
+ assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
+ Invert = true;
+ }
+ assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
+
+ // Keep a count of how many times instructions are used within CondBB when
+ // they are candidates for sinking into CondBB. Specifically:
+ // - They are defined in BB, and
+ // - They have no side effects, and
+ // - All of their uses are in CondBB.
+ SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
+
+ unsigned SpeculationCost = 0;
+ for (BasicBlock::iterator BBI = ThenBB->begin(),
+ BBE = llvm::prior(ThenBB->end());
BBI != BBE; ++BBI) {
Instruction *I = BBI;
// Skip debug info.
- if (isa<DbgInfoIntrinsic>(I)) continue;
- if (I == Term) break;
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
- if (HInst)
+ // Only speculatively execution a single instruction (not counting the
+ // terminator) for now.
+ ++SpeculationCost;
+ if (SpeculationCost > 1)
return false;
- HInst = I;
- }
-
- BasicBlock *BIParent = BI->getParent();
- // Check the instruction to be hoisted, if there is one.
- if (HInst) {
// Don't hoist the instruction if it's unsafe or expensive.
- if (!isSafeToSpeculativelyExecute(HInst))
+ if (!isSafeToSpeculativelyExecute(I))
return false;
- if (ComputeSpeculationCost(HInst) > PHINodeFoldingThreshold)
+ if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
return false;
// Do not hoist the instruction if any of its operands are defined but not
// used in this BB. The transformation will prevent the operand from
// being sunk into the use block.
- for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
+ for (User::op_iterator i = I->op_begin(), e = I->op_end();
i != e; ++i) {
Instruction *OpI = dyn_cast<Instruction>(*i);
- if (OpI && OpI->getParent() == BIParent &&
- !OpI->mayHaveSideEffects() &&
- !OpI->isUsedInBasicBlock(BIParent))
- return false;
+ if (!OpI || OpI->getParent() != BB ||
+ OpI->mayHaveSideEffects())
+ continue; // Not a candidate for sinking.
+
+ ++SinkCandidateUseCounts[OpI];
}
}
- // Be conservative for now. FP select instruction can often be expensive.
- Value *BrCond = BI->getCondition();
- if (isa<FCmpInst>(BrCond))
- return false;
-
- // If BB1 is actually on the false edge of the conditional branch, remember
- // to swap the select operands later.
- bool Invert = false;
- if (BB1 != BI->getSuccessor(0)) {
- assert(BB1 == BI->getSuccessor(1) && "No edge from 'if' block?");
- Invert = true;
- }
+ // Consider any sink candidates which are only used in CondBB as costs for
+ // speculation. Note, while we iterate over a DenseMap here, we are summing
+ // and so iteration order isn't significant.
+ for (SmallDenseMap<Instruction *, unsigned, 4>::iterator I =
+ SinkCandidateUseCounts.begin(), E = SinkCandidateUseCounts.end();
+ I != E; ++I)
+ if (I->first->getNumUses() == I->second) {
+ ++SpeculationCost;
+ if (SpeculationCost > 1)
+ return false;
+ }
- // Collect interesting PHIs, and scan for hazards.
- SmallSetVector<std::pair<Value *, Value *>, 4> PHIs;
- BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0);
- for (BasicBlock::iterator I = BB2->begin();
+ // Check that the PHI nodes can be converted to selects.
+ bool HaveRewritablePHIs = false;
+ for (BasicBlock::iterator I = EndBB->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- Value *BB1V = PN->getIncomingValueForBlock(BB1);
- Value *BIParentV = PN->getIncomingValueForBlock(BIParent);
+ Value *OrigV = PN->getIncomingValueForBlock(BB);
+ Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
// Skip PHIs which are trivial.
- if (BB1V == BIParentV)
+ if (ThenV == OrigV)
continue;
- // Check for safety.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BB1V)) {
- // An unfolded ConstantExpr could end up getting expanded into
- // Instructions. Don't speculate this and another instruction at
- // the same time.
- if (HInst)
- return false;
- if (!isSafeToSpeculativelyExecute(CE))
- return false;
- if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold)
- return false;
- }
+ HaveRewritablePHIs = true;
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(ThenV);
+ if (!CE)
+ continue; // Known safe and cheap.
+
+ if (!isSafeToSpeculativelyExecute(CE))
+ return false;
+ if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold)
+ return false;
- // Ok, we may insert a select for this PHI.
- PHIs.insert(std::make_pair(BB1V, BIParentV));
+ // Account for the cost of an unfolded ConstantExpr which could end up
+ // getting expanded into Instructions.
+ // FIXME: This doesn't account for how many operations are combined in the
+ // constant expression.
+ ++SpeculationCost;
+ if (SpeculationCost > 1)
+ return false;
}
// If there are no PHIs to process, bail early. This helps ensure idempotence
// as well.
- if (PHIs.empty())
+ if (!HaveRewritablePHIs)
return false;
// If we get here, we can hoist the instruction and if-convert.
- DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";);
+ DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
- // Hoist the instruction.
- if (HInst)
- BIParent->getInstList().splice(BI, BB1->getInstList(), HInst);
+ // Hoist the instructions.
+ BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
+ llvm::prior(ThenBB->end()));
// Insert selects and rewrite the PHI operands.
IRBuilder<true, NoFolder> Builder(BI);
- for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
- Value *TrueV = PHIs[i].first;
- Value *FalseV = PHIs[i].second;
+ for (BasicBlock::iterator I = EndBB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+ unsigned OrigI = PN->getBasicBlockIndex(BB);
+ unsigned ThenI = PN->getBasicBlockIndex(ThenBB);
+ Value *OrigV = PN->getIncomingValue(OrigI);
+ Value *ThenV = PN->getIncomingValue(ThenI);
+
+ // Skip PHIs which are trivial.
+ if (OrigV == ThenV)
+ continue;
// Create a select whose true value is the speculatively executed value and
- // false value is the previously determined FalseV.
- SelectInst *SI;
+ // false value is the preexisting value. Swap them if the branch
+ // destinations were inverted.
+ Value *TrueV = ThenV, *FalseV = OrigV;
if (Invert)
- SI = cast<SelectInst>
- (Builder.CreateSelect(BrCond, FalseV, TrueV,
- FalseV->getName() + "." + TrueV->getName()));
- else
- SI = cast<SelectInst>
- (Builder.CreateSelect(BrCond, TrueV, FalseV,
- TrueV->getName() + "." + FalseV->getName()));
-
- // Make the PHI node use the select for all incoming values for "then" and
- // "if" blocks.
- for (BasicBlock::iterator I = BB2->begin();
- PHINode *PN = dyn_cast<PHINode>(I); ++I) {
- unsigned BB1I = PN->getBasicBlockIndex(BB1);
- unsigned BIParentI = PN->getBasicBlockIndex(BIParent);
- Value *BB1V = PN->getIncomingValue(BB1I);
- Value *BIParentV = PN->getIncomingValue(BIParentI);
- if (TrueV == BB1V && FalseV == BIParentV) {
- PN->setIncomingValue(BB1I, SI);
- PN->setIncomingValue(BIParentI, SI);
- }
- }
+ std::swap(TrueV, FalseV);
+ Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV,
+ TrueV->getName() + "." + FalseV->getName());
+ PN->setIncomingValue(OrigI, V);
+ PN->setIncomingValue(ThenI, V);
}
++NumSpeculations;
@@ -3382,7 +3413,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
ConstantInt *Offset,
const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
Constant *DefaultValue,
- const DataLayout *TD) {
+ const DataLayout *TD)
+ : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) {
assert(Values.size() && "Can't build lookup table without values!");
assert(TableSize >= Values.size() && "Can't fit values in table!");
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 83c74e7..8ad566c 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -50,6 +50,10 @@ public:
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
=0;
+ /// ignoreCallingConv - Returns false if this transformation could possibly
+ /// change the calling convention.
+ virtual bool ignoreCallingConv() { return false; }
+
Value *optimizeCall(CallInst *CI, const DataLayout *TD,
const TargetLibraryInfo *TLI,
const LibCallSimplifier *LCS, IRBuilder<> &B) {
@@ -61,7 +65,7 @@ public:
Context = &CI->getCalledFunction()->getContext();
// We never change the calling convention.
- if (CI->getCallingConv() != llvm::CallingConv::C)
+ if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C)
return NULL;
return callOptimizer(CI->getCalledFunction(), CI, B);
@@ -724,6 +728,7 @@ struct StrNCpyOpt : public LibCallOptimization {
};
struct StrLenOpt : public LibCallOptimization {
+ virtual bool ignoreCallingConv() { return true; }
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 ||
@@ -1260,6 +1265,7 @@ struct FFSOpt : public LibCallOptimization {
};
struct AbsOpt : public LibCallOptimization {
+ virtual bool ignoreCallingConv() { return true; }
virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
FunctionType *FT = Callee->getFunctionType();
// We require integer(integer) where the types agree.
@@ -1883,6 +1889,7 @@ LibCallSimplifier::~LibCallSimplifier() {
}
Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+ if (CI->hasFnAttr(Attribute::NoBuiltin)) return 0;
return Impl->optimizeCall(CI);
}
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index a5e1643..b5941bd 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -63,14 +63,29 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// Check all operands to see if any need to be remapped.
for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
Value *OP = MD->getOperand(i);
- if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue;
+ if (OP == 0) continue;
+ Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper);
+ // Use identity map if Mapped_Op is null and we can ignore missing
+ // entries.
+ if (Mapped_OP == OP ||
+ (Mapped_OP == 0 && (Flags & RF_IgnoreMissingEntries)))
+ continue;
// Ok, at least one operand needs remapping.
SmallVector<Value*, 4> Elts;
Elts.reserve(MD->getNumOperands());
for (i = 0; i != e; ++i) {
Value *Op = MD->getOperand(i);
- Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0);
+ if (Op == 0)
+ Elts.push_back(0);
+ else {
+ Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper);
+ // Use identity map if Mapped_Op is null and we can ignore missing
+ // entries.
+ if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries))
+ Mapped_Op = Op;
+ Elts.push_back(Mapped_Op);
+ }
}
MDNode *NewMD = MDNode::get(V->getContext(), Elts);
Dummy->replaceAllUsesWith(NewMD);
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index d72a4a1..7636541 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -48,7 +48,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
-#include <map>
using namespace llvm;
static cl::opt<bool>
@@ -89,6 +88,10 @@ MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden,
cl::desc("The maximum number of pairable instructions per group"));
static cl::opt<unsigned>
+MaxPairs("bb-vectorize-max-pairs-per-group", cl::init(3000), cl::Hidden,
+ cl::desc("The maximum number of candidate instruction pairs per group"));
+
+static cl::opt<unsigned>
MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200),
cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use"
" a full cycle check"));
@@ -207,11 +210,6 @@ namespace {
typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair
typedef std::pair<VPPair, unsigned> VPPairWithType;
- typedef std::pair<std::multimap<Value *, Value *>::iterator,
- std::multimap<Value *, Value *>::iterator> VPIteratorPair;
- typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator,
- std::multimap<ValuePair, ValuePair>::iterator>
- VPPIteratorPair;
AliasAnalysis *AA;
DominatorTree *DT;
@@ -225,7 +223,7 @@ namespace {
bool getCandidatePairs(BasicBlock &BB,
BasicBlock::iterator &Start,
- std::multimap<Value *, Value *> &CandidatePairs,
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
DenseSet<ValuePair> &FixedOrderPairs,
DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts, bool NonPow2Len);
@@ -239,33 +237,36 @@ namespace {
PairConnectionSplat
};
- void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes);
+ void computeConnectedPairs(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes);
void buildDepMap(BasicBlock &BB,
- std::multimap<Value *, Value *> &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &PairableInstUsers);
-
- void choosePairs(std::multimap<Value *, Value *> &CandidatePairs,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs,
- std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<Value *, Value *>& ChosenPairs);
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &PairableInstUsers);
+
+ void choosePairs(DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
+ std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<Value *, Value *>& ChosenPairs);
void fuseChosenPairs(BasicBlock &BB,
- std::vector<Value *> &PairableInsts,
- DenseMap<Value *, Value *>& ChosenPairs,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs,
- std::multimap<ValuePair, ValuePair> &ConnectedPairDeps);
+ std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *>& ChosenPairs,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps);
bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore);
@@ -277,56 +278,63 @@ namespace {
bool trackUsesOfI(DenseSet<Value *> &Users,
AliasSetTracker &WriteSet, Instruction *I,
Instruction *J, bool UpdateUsers = true,
- std::multimap<Value *, Value *> *LoadMoveSet = 0);
+ DenseSet<ValuePair> *LoadMoveSetPairs = 0);
- void computePairsConnectedTo(
- std::multimap<Value *, Value *> &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- ValuePair P);
+ void computePairsConnectedTo(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ ValuePair P);
bool pairsConflict(ValuePair P, ValuePair Q,
- DenseSet<ValuePair> &PairableInstUsers,
- std::multimap<ValuePair, ValuePair> *PairableInstUserMap = 0);
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<ValuePair, std::vector<ValuePair> >
+ *PairableInstUserMap = 0,
+ DenseSet<VPPair> *PairableInstUserPairSet = 0);
bool pairWillFormCycle(ValuePair P,
- std::multimap<ValuePair, ValuePair> &PairableInstUsers,
- DenseSet<ValuePair> &CurrentPairs);
-
- void pruneTreeFor(
- std::multimap<Value *, Value *> &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs,
- DenseSet<ValuePair> &PairableInstUsers,
- std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<ValuePair, size_t> &Tree,
- DenseSet<ValuePair> &PrunedTree, ValuePair J,
- bool UseCycleCheck);
-
- void buildInitialTreeFor(
- std::multimap<Value *, Value *> &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<ValuePair, size_t> &Tree, ValuePair J);
-
- void findBestTreeFor(
- std::multimap<Value *, Value *> &CandidatePairs,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs,
- std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
- DenseSet<ValuePair> &PairableInstUsers,
- std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
- int &BestEffSize, VPIteratorPair ChoiceRange,
- bool UseCycleCheck);
+ DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUsers,
+ DenseSet<ValuePair> &CurrentPairs);
+
+ void pruneDAGFor(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+ DenseSet<VPPair> &PairableInstUserPairSet,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<ValuePair, size_t> &DAG,
+ DenseSet<ValuePair> &PrunedDAG, ValuePair J,
+ bool UseCycleCheck);
+
+ void buildInitialDAGFor(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<ValuePair, size_t> &DAG, ValuePair J);
+
+ void findBestDAGFor(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
+ std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+ DenseSet<VPPair> &PairableInstUserPairSet,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth,
+ int &BestEffSize, Value *II, std::vector<Value *>&JJ,
+ bool UseCycleCheck);
Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
Instruction *J, unsigned o);
@@ -358,20 +366,22 @@ namespace {
void collectPairLoadMoveSet(BasicBlock &BB,
DenseMap<Value *, Value *> &ChosenPairs,
- std::multimap<Value *, Value *> &LoadMoveSet,
+ DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
+ DenseSet<ValuePair> &LoadMoveSetPairs,
Instruction *I);
void collectLoadMoveSet(BasicBlock &BB,
std::vector<Value *> &PairableInsts,
DenseMap<Value *, Value *> &ChosenPairs,
- std::multimap<Value *, Value *> &LoadMoveSet);
+ DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
+ DenseSet<ValuePair> &LoadMoveSetPairs);
bool canMoveUsesOfIAfterJ(BasicBlock &BB,
- std::multimap<Value *, Value *> &LoadMoveSet,
+ DenseSet<ValuePair> &LoadMoveSetPairs,
Instruction *I, Instruction *J);
void moveUsesOfIAfterJ(BasicBlock &BB,
- std::multimap<Value *, Value *> &LoadMoveSet,
+ DenseSet<ValuePair> &LoadMoveSetPairs,
Instruction *&InsertionPt,
Instruction *I, Instruction *J);
@@ -463,18 +473,18 @@ namespace {
static inline void getInstructionTypes(Instruction *I,
Type *&T1, Type *&T2) {
- if (isa<StoreInst>(I)) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
// For stores, it is the value type, not the pointer type that matters
// because the value is what will come from a vector register.
- Value *IVal = cast<StoreInst>(I)->getValueOperand();
+ Value *IVal = SI->getValueOperand();
T1 = IVal->getType();
} else {
T1 = I->getType();
}
- if (I->isCast())
- T2 = cast<CastInst>(I)->getSrcTy();
+ if (CastInst *CI = dyn_cast<CastInst>(I))
+ T2 = CI->getSrcTy();
else
T2 = T1;
@@ -500,7 +510,7 @@ namespace {
// InsertElement and ExtractElement have a depth factor of zero. This is
// for two reasons: First, they cannot be usefully fused. Second, because
// the pass generates a lot of these, they can confuse the simple metric
- // used to compare the trees in the next iteration. Thus, giving them a
+ // used to compare the dags in the next iteration. Thus, giving them a
// weight of zero allows the pass to essentially ignore them in
// subsequent iterations when looking for vectorization opportunities
// while still tracking dependency chains that flow through those
@@ -661,19 +671,6 @@ namespace {
}
}
- // Returns true if J is the second element in some pair referenced by
- // some multimap pair iterator pair.
- template <typename V>
- bool isSecondInIteratorPair(V J, std::pair<
- typename std::multimap<V, V>::iterator,
- typename std::multimap<V, V>::iterator> PairRange) {
- for (typename std::multimap<V, V>::iterator K = PairRange.first;
- K != PairRange.second; ++K)
- if (K->second == J) return true;
-
- return false;
- }
-
bool isPureIEChain(InsertElementInst *IE) {
InsertElementInst *IENext = IE;
do {
@@ -698,11 +695,12 @@ namespace {
DenseMap<Value *, Value *> AllChosenPairs;
DenseSet<ValuePair> AllFixedOrderPairs;
DenseMap<VPPair, unsigned> AllPairConnectionTypes;
- std::multimap<ValuePair, ValuePair> AllConnectedPairs, AllConnectedPairDeps;
+ DenseMap<ValuePair, std::vector<ValuePair> > AllConnectedPairs,
+ AllConnectedPairDeps;
do {
std::vector<Value *> PairableInsts;
- std::multimap<Value *, Value *> CandidatePairs;
+ DenseMap<Value *, std::vector<Value *> > CandidatePairs;
DenseSet<ValuePair> FixedOrderPairs;
DenseMap<ValuePair, int> CandidatePairCostSavings;
ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
@@ -711,6 +709,14 @@ namespace {
PairableInsts, NonPow2Len);
if (PairableInsts.empty()) continue;
+ // Build the candidate pair set for faster lookups.
+ DenseSet<ValuePair> CandidatePairsSet;
+ for (DenseMap<Value *, std::vector<Value *> >::iterator I =
+ CandidatePairs.begin(), E = CandidatePairs.end(); I != E; ++I)
+ for (std::vector<Value *>::iterator J = I->second.begin(),
+ JE = I->second.end(); J != JE; ++J)
+ CandidatePairsSet.insert(ValuePair(I->first, *J));
+
// Now we have a map of all of the pairable instructions and we need to
// select the best possible pairing. A good pairing is one such that the
// users of the pair are also paired. This defines a (directed) forest
@@ -720,30 +726,33 @@ namespace {
// Note that it only matters that both members of the second pair use some
// element of the first pair (to allow for splatting).
- std::multimap<ValuePair, ValuePair> ConnectedPairs, ConnectedPairDeps;
+ DenseMap<ValuePair, std::vector<ValuePair> > ConnectedPairs,
+ ConnectedPairDeps;
DenseMap<VPPair, unsigned> PairConnectionTypes;
- computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs,
- PairConnectionTypes);
+ computeConnectedPairs(CandidatePairs, CandidatePairsSet,
+ PairableInsts, ConnectedPairs, PairConnectionTypes);
if (ConnectedPairs.empty()) continue;
- for (std::multimap<ValuePair, ValuePair>::iterator
+ for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator
I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
- I != IE; ++I) {
- ConnectedPairDeps.insert(VPPair(I->second, I->first));
- }
+ I != IE; ++I)
+ for (std::vector<ValuePair>::iterator J = I->second.begin(),
+ JE = I->second.end(); J != JE; ++J)
+ ConnectedPairDeps[*J].push_back(I->first);
// Build the pairable-instruction dependency map
DenseSet<ValuePair> PairableInstUsers;
buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers);
// There is now a graph of the connected pairs. For each variable, pick
- // the pairing with the largest tree meeting the depth requirement on at
- // least one branch. Then select all pairings that are part of that tree
+ // the pairing with the largest dag meeting the depth requirement on at
+ // least one branch. Then select all pairings that are part of that dag
// and remove them from the list of available pairings and pairable
// variables.
DenseMap<Value *, Value *> ChosenPairs;
- choosePairs(CandidatePairs, CandidatePairCostSavings,
+ choosePairs(CandidatePairs, CandidatePairsSet,
+ CandidatePairCostSavings,
PairableInsts, FixedOrderPairs, PairConnectionTypes,
ConnectedPairs, ConnectedPairDeps,
PairableInstUsers, ChosenPairs);
@@ -777,14 +786,15 @@ namespace {
}
}
- for (std::multimap<ValuePair, ValuePair>::iterator
+ for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator
I = ConnectedPairs.begin(), IE = ConnectedPairs.end();
- I != IE; ++I) {
- if (AllPairConnectionTypes.count(*I)) {
- AllConnectedPairs.insert(*I);
- AllConnectedPairDeps.insert(VPPair(I->second, I->first));
- }
- }
+ I != IE; ++I)
+ for (std::vector<ValuePair>::iterator J = I->second.begin(),
+ JE = I->second.end(); J != JE; ++J)
+ if (AllPairConnectionTypes.count(VPPair(I->first, *J))) {
+ AllConnectedPairs[I->first].push_back(*J);
+ AllConnectedPairDeps[*J].push_back(I->first);
+ }
} while (ShouldContinue);
if (AllChosenPairs.empty()) return false;
@@ -910,7 +920,7 @@ namespace {
// This function returns true if the two provided instructions are compatible
// (meaning that they can be fused into a vector instruction). This assumes
// that I has already been determined to be vectorizable and that J is not
- // in the use tree of I.
+ // in the use dag of I.
bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J,
bool IsSimpleLoadStore, bool NonPow2Len,
int &CostSavings, int &FixedOrder) {
@@ -972,6 +982,11 @@ namespace {
unsigned VCost = TTI->getMemoryOpCost(I->getOpcode(), VType,
BottomAlignment,
IAddressSpace);
+
+ ICost += TTI->getAddressComputationCost(aTypeI);
+ JCost += TTI->getAddressComputationCost(aTypeJ);
+ VCost += TTI->getAddressComputationCost(VType);
+
if (VCost > ICost + JCost)
return false;
@@ -994,6 +1009,12 @@ namespace {
unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2);
Type *VT1 = getVecTypeForPair(IT1, JT1),
*VT2 = getVecTypeForPair(IT2, JT2);
+
+ // Note that this procedure is incorrect for insert and extract element
+ // instructions (because combining these often results in a shuffle),
+ // but this cost is ignored (because insert and extract element
+ // instructions are assigned a zero depth factor and are not really
+ // fused in general).
unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2);
if (VCost > ICost + JCost)
@@ -1090,7 +1111,7 @@ namespace {
// to contain any memory locations to which J writes. The function returns
// true if J uses I. By default, alias analysis is used to determine
// whether J reads from memory that overlaps with a location in WriteSet.
- // If LoadMoveSet is not null, then it is a previously-computed multimap
+ // If LoadMoveSet is not null, then it is a previously-computed map
// where the key is the memory-based user instruction and the value is
// the instruction to be compared with I. So, if LoadMoveSet is provided,
// then the alias analysis is not used. This is necessary because this
@@ -1100,7 +1121,7 @@ namespace {
bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users,
AliasSetTracker &WriteSet, Instruction *I,
Instruction *J, bool UpdateUsers,
- std::multimap<Value *, Value *> *LoadMoveSet) {
+ DenseSet<ValuePair> *LoadMoveSetPairs) {
bool UsesI = false;
// This instruction may already be marked as a user due, for example, to
@@ -1118,9 +1139,8 @@ namespace {
}
}
if (!UsesI && J->mayReadFromMemory()) {
- if (LoadMoveSet) {
- VPIteratorPair JPairRange = LoadMoveSet->equal_range(J);
- UsesI = isSecondInIteratorPair<Value*>(I, JPairRange);
+ if (LoadMoveSetPairs) {
+ UsesI = LoadMoveSetPairs->count(ValuePair(J, I));
} else {
for (AliasSetTracker::iterator W = WriteSet.begin(),
WE = WriteSet.end(); W != WE; ++W) {
@@ -1144,10 +1164,11 @@ namespace {
// basic block and collects all candidate pairs for vectorization.
bool BBVectorize::getCandidatePairs(BasicBlock &BB,
BasicBlock::iterator &Start,
- std::multimap<Value *, Value *> &CandidatePairs,
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
DenseSet<ValuePair> &FixedOrderPairs,
DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts, bool NonPow2Len) {
+ size_t TotalPairs = 0;
BasicBlock::iterator E = BB.end();
if (Start == E) return false;
@@ -1193,7 +1214,8 @@ namespace {
PairableInsts.push_back(I);
}
- CandidatePairs.insert(ValuePair(I, J));
+ CandidatePairs[I].push_back(J);
+ ++TotalPairs;
if (TTI)
CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J),
CostSavings));
@@ -1217,7 +1239,8 @@ namespace {
// If we have already found too many pairs, break here and this function
// will be called again starting after the last instruction selected
// during this invocation.
- if (PairableInsts.size() >= Config.MaxInsts) {
+ if (PairableInsts.size() >= Config.MaxInsts ||
+ TotalPairs >= Config.MaxPairs) {
ShouldContinue = true;
break;
}
@@ -1237,11 +1260,12 @@ namespace {
// it looks for pairs such that both members have an input which is an
// output of PI or PJ.
void BBVectorize::computePairsConnectedTo(
- std::multimap<Value *, Value *> &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- ValuePair P) {
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ ValuePair P) {
StoreInst *SI, *SJ;
// For each possible pairing for this variable, look at the uses of
@@ -1259,8 +1283,6 @@ namespace {
continue;
}
- VPIteratorPair IPairRange = CandidatePairs.equal_range(*I);
-
// For each use of the first variable, look for uses of the second
// variable...
for (Value::use_iterator J = P.second->use_begin(),
@@ -1269,19 +1291,17 @@ namespace {
P.second == SJ->getPointerOperand())
continue;
- VPIteratorPair JPairRange = CandidatePairs.equal_range(*J);
-
// Look for <I, J>:
- if (isSecondInIteratorPair<Value*>(*J, IPairRange)) {
+ if (CandidatePairsSet.count(ValuePair(*I, *J))) {
VPPair VP(P, ValuePair(*I, *J));
- ConnectedPairs.insert(VP);
+ ConnectedPairs[VP.first].push_back(VP.second);
PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect));
}
// Look for <J, I>:
- if (isSecondInIteratorPair<Value*>(*I, JPairRange)) {
+ if (CandidatePairsSet.count(ValuePair(*J, *I))) {
VPPair VP(P, ValuePair(*J, *I));
- ConnectedPairs.insert(VP);
+ ConnectedPairs[VP.first].push_back(VP.second);
PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap));
}
}
@@ -1294,9 +1314,9 @@ namespace {
P.first == SJ->getPointerOperand())
continue;
- if (isSecondInIteratorPair<Value*>(*J, IPairRange)) {
+ if (CandidatePairsSet.count(ValuePair(*I, *J))) {
VPPair VP(P, ValuePair(*I, *J));
- ConnectedPairs.insert(VP);
+ ConnectedPairs[VP.first].push_back(VP.second);
PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
}
}
@@ -1313,16 +1333,14 @@ namespace {
P.second == SI->getPointerOperand())
continue;
- VPIteratorPair IPairRange = CandidatePairs.equal_range(*I);
-
for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) {
if ((SJ = dyn_cast<StoreInst>(*J)) &&
P.second == SJ->getPointerOperand())
continue;
- if (isSecondInIteratorPair<Value*>(*J, IPairRange)) {
+ if (CandidatePairsSet.count(ValuePair(*I, *J))) {
VPPair VP(P, ValuePair(*I, *J));
- ConnectedPairs.insert(VP);
+ ConnectedPairs[VP.first].push_back(VP.second);
PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat));
}
}
@@ -1333,55 +1351,73 @@ namespace {
// connected if some output of the first pair forms an input to both members
// of the second pair.
void BBVectorize::computeConnectedPairs(
- std::multimap<Value *, Value *> &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes) {
-
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes) {
for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
PE = PairableInsts.end(); PI != PE; ++PI) {
- VPIteratorPair choiceRange = CandidatePairs.equal_range(*PI);
+ DenseMap<Value *, std::vector<Value *> >::iterator PP =
+ CandidatePairs.find(*PI);
+ if (PP == CandidatePairs.end())
+ continue;
- for (std::multimap<Value *, Value *>::iterator P = choiceRange.first;
- P != choiceRange.second; ++P)
- computePairsConnectedTo(CandidatePairs, PairableInsts,
- ConnectedPairs, PairConnectionTypes, *P);
+ for (std::vector<Value *>::iterator P = PP->second.begin(),
+ E = PP->second.end(); P != E; ++P)
+ computePairsConnectedTo(CandidatePairs, CandidatePairsSet,
+ PairableInsts, ConnectedPairs,
+ PairConnectionTypes, ValuePair(*PI, *P));
}
- DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size()
+ DEBUG(size_t TotalPairs = 0;
+ for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator I =
+ ConnectedPairs.begin(), IE = ConnectedPairs.end(); I != IE; ++I)
+ TotalPairs += I->second.size();
+ dbgs() << "BBV: found " << TotalPairs
<< " pair connections.\n");
}
// This function builds a set of use tuples such that <A, B> is in the set
- // if B is in the use tree of A. If B is in the use tree of A, then B
+ // if B is in the use dag of A. If B is in the use dag of A, then B
// depends on the output of A.
void BBVectorize::buildDepMap(
BasicBlock &BB,
- std::multimap<Value *, Value *> &CandidatePairs,
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
std::vector<Value *> &PairableInsts,
DenseSet<ValuePair> &PairableInstUsers) {
DenseSet<Value *> IsInPair;
- for (std::multimap<Value *, Value *>::iterator C = CandidatePairs.begin(),
- E = CandidatePairs.end(); C != E; ++C) {
+ for (DenseMap<Value *, std::vector<Value *> >::iterator C =
+ CandidatePairs.begin(), E = CandidatePairs.end(); C != E; ++C) {
IsInPair.insert(C->first);
- IsInPair.insert(C->second);
+ IsInPair.insert(C->second.begin(), C->second.end());
}
- // Iterate through the basic block, recording all Users of each
+ // Iterate through the basic block, recording all users of each
// pairable instruction.
- BasicBlock::iterator E = BB.end();
+ BasicBlock::iterator E = BB.end(), EL =
+ BasicBlock::iterator(cast<Instruction>(PairableInsts.back()));
for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) {
if (IsInPair.find(I) == IsInPair.end()) continue;
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
- for (BasicBlock::iterator J = llvm::next(I); J != E; ++J)
+ for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) {
(void) trackUsesOfI(Users, WriteSet, I, J);
+ if (J == EL)
+ break;
+ }
+
for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end();
- U != E; ++U)
+ U != E; ++U) {
+ if (IsInPair.find(*U) == IsInPair.end()) continue;
PairableInstUsers.insert(ValuePair(I, *U));
+ }
+
+ if (I == EL)
+ break;
}
}
@@ -1389,8 +1425,9 @@ namespace {
// input of pair Q is an output of pair P. If this is the case, then these
// two pairs cannot be simultaneously fused.
bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q,
- DenseSet<ValuePair> &PairableInstUsers,
- std::multimap<ValuePair, ValuePair> *PairableInstUserMap) {
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<ValuePair, std::vector<ValuePair> > *PairableInstUserMap,
+ DenseSet<VPPair> *PairableInstUserPairSet) {
// Two pairs are in conflict if they are mutual Users of eachother.
bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) ||
PairableInstUsers.count(ValuePair(P.first, Q.second)) ||
@@ -1403,17 +1440,14 @@ namespace {
if (PairableInstUserMap) {
// FIXME: The expensive part of the cycle check is not so much the cycle
// check itself but this edge insertion procedure. This needs some
- // profiling and probably a different data structure (same is true of
- // most uses of std::multimap).
+ // profiling and probably a different data structure.
if (PUsesQ) {
- VPPIteratorPair QPairRange = PairableInstUserMap->equal_range(Q);
- if (!isSecondInIteratorPair(P, QPairRange))
- PairableInstUserMap->insert(VPPair(Q, P));
+ if (PairableInstUserPairSet->insert(VPPair(Q, P)).second)
+ (*PairableInstUserMap)[Q].push_back(P);
}
if (QUsesP) {
- VPPIteratorPair PPairRange = PairableInstUserMap->equal_range(P);
- if (!isSecondInIteratorPair(Q, PPairRange))
- PairableInstUserMap->insert(VPPair(P, Q));
+ if (PairableInstUserPairSet->insert(VPPair(P, Q)).second)
+ (*PairableInstUserMap)[P].push_back(Q);
}
}
@@ -1423,8 +1457,8 @@ namespace {
// This function walks the use graph of current pairs to see if, starting
// from P, the walk returns to P.
bool BBVectorize::pairWillFormCycle(ValuePair P,
- std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
- DenseSet<ValuePair> &CurrentPairs) {
+ DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+ DenseSet<ValuePair> &CurrentPairs) {
DEBUG(if (DebugCycleCheck)
dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> "
<< *P.second << "\n");
@@ -1441,36 +1475,41 @@ namespace {
DEBUG(if (DebugCycleCheck)
dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> "
<< *QTop.second << "\n");
- VPPIteratorPair QPairRange = PairableInstUserMap.equal_range(QTop);
- for (std::multimap<ValuePair, ValuePair>::iterator C = QPairRange.first;
- C != QPairRange.second; ++C) {
- if (C->second == P) {
+ DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
+ PairableInstUserMap.find(QTop);
+ if (QQ == PairableInstUserMap.end())
+ continue;
+
+ for (std::vector<ValuePair>::iterator C = QQ->second.begin(),
+ CE = QQ->second.end(); C != CE; ++C) {
+ if (*C == P) {
DEBUG(dbgs()
<< "BBV: rejected to prevent non-trivial cycle formation: "
- << *C->first.first << " <-> " << *C->first.second << "\n");
+ << QTop.first << " <-> " << C->second << "\n");
return true;
}
- if (CurrentPairs.count(C->second) && !Visited.count(C->second))
- Q.push_back(C->second);
+ if (CurrentPairs.count(*C) && !Visited.count(*C))
+ Q.push_back(*C);
}
} while (!Q.empty());
return false;
}
- // This function builds the initial tree of connected pairs with the
+ // This function builds the initial dag of connected pairs with the
// pair J at the root.
- void BBVectorize::buildInitialTreeFor(
- std::multimap<Value *, Value *> &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<ValuePair, size_t> &Tree, ValuePair J) {
- // Each of these pairs is viewed as the root node of a Tree. The Tree
+ void BBVectorize::buildInitialDAGFor(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<ValuePair, size_t> &DAG, ValuePair J) {
+ // Each of these pairs is viewed as the root node of a DAG. The DAG
// is then walked (depth-first). As this happens, we keep track of
- // the pairs that compose the Tree and the maximum depth of the Tree.
+ // the pairs that compose the DAG and the maximum depth of the DAG.
SmallVector<ValuePairWithDepth, 32> Q;
// General depth-first post-order traversal:
Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
@@ -1480,69 +1519,65 @@ namespace {
// Push each child onto the queue:
bool MoreChildren = false;
size_t MaxChildDepth = QTop.second;
- VPPIteratorPair qtRange = ConnectedPairs.equal_range(QTop.first);
- for (std::multimap<ValuePair, ValuePair>::iterator k = qtRange.first;
- k != qtRange.second; ++k) {
- // Make sure that this child pair is still a candidate:
- bool IsStillCand = false;
- VPIteratorPair checkRange =
- CandidatePairs.equal_range(k->second.first);
- for (std::multimap<Value *, Value *>::iterator m = checkRange.first;
- m != checkRange.second; ++m) {
- if (m->second == k->second.second) {
- IsStillCand = true;
- break;
- }
- }
-
- if (IsStillCand) {
- DenseMap<ValuePair, size_t>::iterator C = Tree.find(k->second);
- if (C == Tree.end()) {
- size_t d = getDepthFactor(k->second.first);
- Q.push_back(ValuePairWithDepth(k->second, QTop.second+d));
- MoreChildren = true;
- } else {
- MaxChildDepth = std::max(MaxChildDepth, C->second);
+ DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
+ ConnectedPairs.find(QTop.first);
+ if (QQ != ConnectedPairs.end())
+ for (std::vector<ValuePair>::iterator k = QQ->second.begin(),
+ ke = QQ->second.end(); k != ke; ++k) {
+ // Make sure that this child pair is still a candidate:
+ if (CandidatePairsSet.count(*k)) {
+ DenseMap<ValuePair, size_t>::iterator C = DAG.find(*k);
+ if (C == DAG.end()) {
+ size_t d = getDepthFactor(k->first);
+ Q.push_back(ValuePairWithDepth(*k, QTop.second+d));
+ MoreChildren = true;
+ } else {
+ MaxChildDepth = std::max(MaxChildDepth, C->second);
+ }
}
}
- }
if (!MoreChildren) {
- // Record the current pair as part of the Tree:
- Tree.insert(ValuePairWithDepth(QTop.first, MaxChildDepth));
+ // Record the current pair as part of the DAG:
+ DAG.insert(ValuePairWithDepth(QTop.first, MaxChildDepth));
Q.pop_back();
}
} while (!Q.empty());
}
- // Given some initial tree, prune it by removing conflicting pairs (pairs
+ // Given some initial dag, prune it by removing conflicting pairs (pairs
// that cannot be simultaneously chosen for vectorization).
- void BBVectorize::pruneTreeFor(
- std::multimap<Value *, Value *> &CandidatePairs,
- std::vector<Value *> &PairableInsts,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs,
- DenseSet<ValuePair> &PairableInstUsers,
- std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseMap<ValuePair, size_t> &Tree,
- DenseSet<ValuePair> &PrunedTree, ValuePair J,
- bool UseCycleCheck) {
+ void BBVectorize::pruneDAGFor(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ std::vector<Value *> &PairableInsts,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+ DenseSet<VPPair> &PairableInstUserPairSet,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseMap<ValuePair, size_t> &DAG,
+ DenseSet<ValuePair> &PrunedDAG, ValuePair J,
+ bool UseCycleCheck) {
SmallVector<ValuePairWithDepth, 32> Q;
// General depth-first post-order traversal:
Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
do {
ValuePairWithDepth QTop = Q.pop_back_val();
- PrunedTree.insert(QTop.first);
+ PrunedDAG.insert(QTop.first);
// Visit each child, pruning as necessary...
SmallVector<ValuePairWithDepth, 8> BestChildren;
- VPPIteratorPair QTopRange = ConnectedPairs.equal_range(QTop.first);
- for (std::multimap<ValuePair, ValuePair>::iterator K = QTopRange.first;
- K != QTopRange.second; ++K) {
- DenseMap<ValuePair, size_t>::iterator C = Tree.find(K->second);
- if (C == Tree.end()) continue;
+ DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ =
+ ConnectedPairs.find(QTop.first);
+ if (QQ == ConnectedPairs.end())
+ continue;
- // This child is in the Tree, now we need to make sure it is the
+ for (std::vector<ValuePair>::iterator K = QQ->second.begin(),
+ KE = QQ->second.end(); K != KE; ++K) {
+ DenseMap<ValuePair, size_t>::iterator C = DAG.find(*K);
+ if (C == DAG.end()) continue;
+
+ // This child is in the DAG, now we need to make sure it is the
// best of any conflicting children. There could be multiple
// conflicting children, so first, determine if we're keeping
// this child, then delete conflicting children as necessary.
@@ -1556,7 +1591,7 @@ namespace {
// fusing (a,b) we have y .. a/b .. x where y is an input
// to a/b and x is an output to a/b: x and y can no longer
// be legally fused. To prevent this condition, we must
- // make sure that a child pair added to the Tree is not
+ // make sure that a child pair added to the DAG is not
// both an input and output of an already-selected pair.
// Pairing-induced dependencies can also form from more complicated
@@ -1575,7 +1610,8 @@ namespace {
C2->first.second == C->first.first ||
C2->first.second == C->first.second ||
pairsConflict(C2->first, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0)) {
+ UseCycleCheck ? &PairableInstUserMap : 0,
+ UseCycleCheck ? &PairableInstUserPairSet : 0)) {
if (C2->second >= C->second) {
CanAdd = false;
break;
@@ -1587,15 +1623,16 @@ namespace {
if (!CanAdd) continue;
// Even worse, this child could conflict with another node already
- // selected for the Tree. If that is the case, ignore this child.
- for (DenseSet<ValuePair>::iterator T = PrunedTree.begin(),
- E2 = PrunedTree.end(); T != E2; ++T) {
+ // selected for the DAG. If that is the case, ignore this child.
+ for (DenseSet<ValuePair>::iterator T = PrunedDAG.begin(),
+ E2 = PrunedDAG.end(); T != E2; ++T) {
if (T->first == C->first.first ||
T->first == C->first.second ||
T->second == C->first.first ||
T->second == C->first.second ||
pairsConflict(*T, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0)) {
+ UseCycleCheck ? &PairableInstUserMap : 0,
+ UseCycleCheck ? &PairableInstUserPairSet : 0)) {
CanAdd = false;
break;
}
@@ -1612,7 +1649,8 @@ namespace {
C2->first.second == C->first.first ||
C2->first.second == C->first.second ||
pairsConflict(C2->first, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0)) {
+ UseCycleCheck ? &PairableInstUserMap : 0,
+ UseCycleCheck ? &PairableInstUserPairSet : 0)) {
CanAdd = false;
break;
}
@@ -1627,7 +1665,8 @@ namespace {
ChosenPairs.begin(), E2 = ChosenPairs.end();
C2 != E2; ++C2) {
if (pairsConflict(*C2, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0)) {
+ UseCycleCheck ? &PairableInstUserMap : 0,
+ UseCycleCheck ? &PairableInstUserPairSet : 0)) {
CanAdd = false;
break;
}
@@ -1639,7 +1678,7 @@ namespace {
// To check for non-trivial cycles formed by the addition of the
// current pair we've formed a list of all relevant pairs, now use a
// graph walk to check for a cycle. We start from the current pair and
- // walk the use tree to see if we again reach the current pair. If we
+ // walk the use dag to see if we again reach the current pair. If we
// do, then the current pair is rejected.
// FIXME: It may be more efficient to use a topological-ordering
@@ -1676,34 +1715,40 @@ namespace {
} while (!Q.empty());
}
- // This function finds the best tree of mututally-compatible connected
+ // This function finds the best dag of mututally-compatible connected
// pairs, given the choice of root pairs as an iterator range.
- void BBVectorize::findBestTreeFor(
- std::multimap<Value *, Value *> &CandidatePairs,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs,
- std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
- DenseSet<ValuePair> &PairableInstUsers,
- std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
- int &BestEffSize, VPIteratorPair ChoiceRange,
- bool UseCycleCheck) {
- for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first;
- J != ChoiceRange.second; ++J) {
+ void BBVectorize::findBestDAGFor(
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
+ std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap,
+ DenseSet<VPPair> &PairableInstUserPairSet,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth,
+ int &BestEffSize, Value *II, std::vector<Value *>&JJ,
+ bool UseCycleCheck) {
+ for (std::vector<Value *>::iterator J = JJ.begin(), JE = JJ.end();
+ J != JE; ++J) {
+ ValuePair IJ(II, *J);
+ if (!CandidatePairsSet.count(IJ))
+ continue;
// Before going any further, make sure that this pair does not
// conflict with any already-selected pairs (see comment below
- // near the Tree pruning for more details).
+ // near the DAG pruning for more details).
DenseSet<ValuePair> ChosenPairSet;
bool DoesConflict = false;
for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(),
E = ChosenPairs.end(); C != E; ++C) {
- if (pairsConflict(*C, *J, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0)) {
+ if (pairsConflict(*C, IJ, PairableInstUsers,
+ UseCycleCheck ? &PairableInstUserMap : 0,
+ UseCycleCheck ? &PairableInstUserPairSet : 0)) {
DoesConflict = true;
break;
}
@@ -1713,40 +1758,42 @@ namespace {
if (DoesConflict) continue;
if (UseCycleCheck &&
- pairWillFormCycle(*J, PairableInstUserMap, ChosenPairSet))
+ pairWillFormCycle(IJ, PairableInstUserMap, ChosenPairSet))
continue;
- DenseMap<ValuePair, size_t> Tree;
- buildInitialTreeFor(CandidatePairs, PairableInsts, ConnectedPairs,
- PairableInstUsers, ChosenPairs, Tree, *J);
+ DenseMap<ValuePair, size_t> DAG;
+ buildInitialDAGFor(CandidatePairs, CandidatePairsSet,
+ PairableInsts, ConnectedPairs,
+ PairableInstUsers, ChosenPairs, DAG, IJ);
// Because we'll keep the child with the largest depth, the largest
- // depth is still the same in the unpruned Tree.
- size_t MaxDepth = Tree.lookup(*J);
+ // depth is still the same in the unpruned DAG.
+ size_t MaxDepth = DAG.lookup(IJ);
- DEBUG(if (DebugPairSelection) dbgs() << "BBV: found Tree for pair {"
- << *J->first << " <-> " << *J->second << "} of depth " <<
- MaxDepth << " and size " << Tree.size() << "\n");
+ DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {"
+ << IJ.first << " <-> " << IJ.second << "} of depth " <<
+ MaxDepth << " and size " << DAG.size() << "\n");
- // At this point the Tree has been constructed, but, may contain
+ // At this point the DAG has been constructed, but, may contain
// contradictory children (meaning that different children of
- // some tree node may be attempting to fuse the same instruction).
- // So now we walk the tree again, in the case of a conflict,
+ // some dag node may be attempting to fuse the same instruction).
+ // So now we walk the dag again, in the case of a conflict,
// keep only the child with the largest depth. To break a tie,
// favor the first child.
- DenseSet<ValuePair> PrunedTree;
- pruneTreeFor(CandidatePairs, PairableInsts, ConnectedPairs,
- PairableInstUsers, PairableInstUserMap, ChosenPairs, Tree,
- PrunedTree, *J, UseCycleCheck);
+ DenseSet<ValuePair> PrunedDAG;
+ pruneDAGFor(CandidatePairs, PairableInsts, ConnectedPairs,
+ PairableInstUsers, PairableInstUserMap,
+ PairableInstUserPairSet,
+ ChosenPairs, DAG, PrunedDAG, IJ, UseCycleCheck);
int EffSize = 0;
if (TTI) {
- DenseSet<Value *> PrunedTreeInstrs;
- for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
- E = PrunedTree.end(); S != E; ++S) {
- PrunedTreeInstrs.insert(S->first);
- PrunedTreeInstrs.insert(S->second);
+ DenseSet<Value *> PrunedDAGInstrs;
+ for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
+ E = PrunedDAG.end(); S != E; ++S) {
+ PrunedDAGInstrs.insert(S->first);
+ PrunedDAGInstrs.insert(S->second);
}
// The set of pairs that have already contributed to the total cost.
@@ -1759,8 +1806,8 @@ namespace {
// The node weights represent the cost savings associated with
// fusing the pair of instructions.
- for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
- E = PrunedTree.end(); S != E; ++S) {
+ for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
+ E = PrunedDAG.end(); S != E; ++S) {
if (!isa<ShuffleVectorInst>(S->first) &&
!isa<InsertElementInst>(S->first) &&
!isa<ExtractElementInst>(S->first))
@@ -1778,15 +1825,17 @@ namespace {
// The edge weights contribute in a negative sense: they represent
// the cost of shuffles.
- VPPIteratorPair IP = ConnectedPairDeps.equal_range(*S);
- if (IP.first != ConnectedPairDeps.end()) {
+ DenseMap<ValuePair, std::vector<ValuePair> >::iterator SS =
+ ConnectedPairDeps.find(*S);
+ if (SS != ConnectedPairDeps.end()) {
unsigned NumDepsDirect = 0, NumDepsSwap = 0;
- for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
- Q != IP.second; ++Q) {
- if (!PrunedTree.count(Q->second))
+ for (std::vector<ValuePair>::iterator T = SS->second.begin(),
+ TE = SS->second.end(); T != TE; ++T) {
+ VPPair Q(*S, *T);
+ if (!PrunedDAG.count(Q.second))
continue;
DenseMap<VPPair, unsigned>::iterator R =
- PairConnectionTypes.find(VPPair(Q->second, Q->first));
+ PairConnectionTypes.find(VPPair(Q.second, Q.first));
assert(R != PairConnectionTypes.end() &&
"Cannot find pair connection type");
if (R->second == PairConnectionDirect)
@@ -1802,24 +1851,35 @@ namespace {
((NumDepsSwap > NumDepsDirect) ||
FixedOrderPairs.count(ValuePair(S->second, S->first)));
- for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
- Q != IP.second; ++Q) {
- if (!PrunedTree.count(Q->second))
+ for (std::vector<ValuePair>::iterator T = SS->second.begin(),
+ TE = SS->second.end(); T != TE; ++T) {
+ VPPair Q(*S, *T);
+ if (!PrunedDAG.count(Q.second))
continue;
DenseMap<VPPair, unsigned>::iterator R =
- PairConnectionTypes.find(VPPair(Q->second, Q->first));
+ PairConnectionTypes.find(VPPair(Q.second, Q.first));
assert(R != PairConnectionTypes.end() &&
"Cannot find pair connection type");
- Type *Ty1 = Q->second.first->getType(),
- *Ty2 = Q->second.second->getType();
+ Type *Ty1 = Q.second.first->getType(),
+ *Ty2 = Q.second.second->getType();
Type *VTy = getVecTypeForPair(Ty1, Ty2);
if ((R->second == PairConnectionDirect && FlipOrder) ||
(R->second == PairConnectionSwap && !FlipOrder) ||
R->second == PairConnectionSplat) {
int ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
VTy, VTy);
+
+ if (VTy->getVectorNumElements() == 2) {
+ if (R->second == PairConnectionSplat)
+ ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+ TargetTransformInfo::SK_Broadcast, VTy));
+ else
+ ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+ TargetTransformInfo::SK_Reverse, VTy));
+ }
+
DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
- *Q->second.first << " <-> " << *Q->second.second <<
+ *Q.second.first << " <-> " << *Q.second.second <<
"} -> {" <<
*S->first << " <-> " << *S->second << "} = " <<
ESContrib << "\n");
@@ -1846,7 +1906,7 @@ namespace {
}
if (isa<ExtractElementInst>(*I))
continue;
- if (PrunedTreeInstrs.count(*I))
+ if (PrunedDAGInstrs.count(*I))
continue;
NeedsExtraction = true;
break;
@@ -1854,10 +1914,12 @@ namespace {
if (NeedsExtraction) {
int ESContrib;
- if (Ty1->isVectorTy())
+ if (Ty1->isVectorTy()) {
ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
Ty1, VTy);
- else
+ ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+ TargetTransformInfo::SK_ExtractSubvector, VTy, 0, Ty1));
+ } else
ESContrib = (int) TTI->getVectorInstrCost(
Instruction::ExtractElement, VTy, 0);
@@ -1876,7 +1938,7 @@ namespace {
}
if (isa<ExtractElementInst>(*I))
continue;
- if (PrunedTreeInstrs.count(*I))
+ if (PrunedDAGInstrs.count(*I))
continue;
NeedsExtraction = true;
break;
@@ -1884,10 +1946,13 @@ namespace {
if (NeedsExtraction) {
int ESContrib;
- if (Ty2->isVectorTy())
+ if (Ty2->isVectorTy()) {
ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
Ty2, VTy);
- else
+ ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+ TargetTransformInfo::SK_ExtractSubvector, VTy,
+ Ty1->isVectorTy() ? Ty1->getVectorNumElements() : 1, Ty2));
+ } else
ESContrib = (int) TTI->getVectorInstrCost(
Instruction::ExtractElement, VTy, 1);
DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
@@ -1915,7 +1980,7 @@ namespace {
ValuePair VPR = ValuePair(O2, O1);
// Internal edges are not handled here.
- if (PrunedTree.count(VP) || PrunedTree.count(VPR))
+ if (PrunedDAG.count(VP) || PrunedDAG.count(VPR))
continue;
Type *Ty1 = O1->getType(),
@@ -1963,6 +2028,10 @@ namespace {
} else if (IncomingPairs.count(VPR)) {
ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
VTy, VTy);
+
+ if (VTy->getVectorNumElements() == 2)
+ ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost(
+ TargetTransformInfo::SK_Reverse, VTy));
} else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) {
ESContrib = (int) TTI->getVectorInstrCost(
Instruction::InsertElement, VTy, 0);
@@ -2005,27 +2074,27 @@ namespace {
if (!HasNontrivialInsts) {
DEBUG(if (DebugPairSelection) dbgs() <<
- "\tNo non-trivial instructions in tree;"
+ "\tNo non-trivial instructions in DAG;"
" override to zero effective size\n");
EffSize = 0;
}
} else {
- for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
- E = PrunedTree.end(); S != E; ++S)
+ for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(),
+ E = PrunedDAG.end(); S != E; ++S)
EffSize += (int) getDepthFactor(S->first);
}
DEBUG(if (DebugPairSelection)
- dbgs() << "BBV: found pruned Tree for pair {"
- << *J->first << " <-> " << *J->second << "} of depth " <<
- MaxDepth << " and size " << PrunedTree.size() <<
+ dbgs() << "BBV: found pruned DAG for pair {"
+ << IJ.first << " <-> " << IJ.second << "} of depth " <<
+ MaxDepth << " and size " << PrunedDAG.size() <<
" (effective size: " << EffSize << ")\n");
if (((TTI && !UseChainDepthWithTI) ||
MaxDepth >= Config.ReqChainDepth) &&
EffSize > 0 && EffSize > BestEffSize) {
BestMaxDepth = MaxDepth;
BestEffSize = EffSize;
- BestTree = PrunedTree;
+ BestDAG = PrunedDAG;
}
}
}
@@ -2033,66 +2102,98 @@ namespace {
// Given the list of candidate pairs, this function selects those
// that will be fused into vector instructions.
void BBVectorize::choosePairs(
- std::multimap<Value *, Value *> &CandidatePairs,
- DenseMap<ValuePair, int> &CandidatePairCostSavings,
- std::vector<Value *> &PairableInsts,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs,
- std::multimap<ValuePair, ValuePair> &ConnectedPairDeps,
- DenseSet<ValuePair> &PairableInstUsers,
- DenseMap<Value *, Value *>& ChosenPairs) {
+ DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
+ DenseSet<ValuePair> &CandidatePairsSet,
+ DenseMap<ValuePair, int> &CandidatePairCostSavings,
+ std::vector<Value *> &PairableInsts,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps,
+ DenseSet<ValuePair> &PairableInstUsers,
+ DenseMap<Value *, Value *>& ChosenPairs) {
bool UseCycleCheck =
- CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck;
- std::multimap<ValuePair, ValuePair> PairableInstUserMap;
+ CandidatePairsSet.size() <= Config.MaxCandPairsForCycleCheck;
+
+ DenseMap<Value *, std::vector<Value *> > CandidatePairs2;
+ for (DenseSet<ValuePair>::iterator I = CandidatePairsSet.begin(),
+ E = CandidatePairsSet.end(); I != E; ++I) {
+ std::vector<Value *> &JJ = CandidatePairs2[I->second];
+ if (JJ.empty()) JJ.reserve(32);
+ JJ.push_back(I->first);
+ }
+
+ DenseMap<ValuePair, std::vector<ValuePair> > PairableInstUserMap;
+ DenseSet<VPPair> PairableInstUserPairSet;
for (std::vector<Value *>::iterator I = PairableInsts.begin(),
E = PairableInsts.end(); I != E; ++I) {
// The number of possible pairings for this variable:
- size_t NumChoices = CandidatePairs.count(*I);
+ size_t NumChoices = CandidatePairs.lookup(*I).size();
if (!NumChoices) continue;
- VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I);
+ std::vector<Value *> &JJ = CandidatePairs[*I];
- // The best pair to choose and its tree:
+ // The best pair to choose and its dag:
size_t BestMaxDepth = 0;
int BestEffSize = 0;
- DenseSet<ValuePair> BestTree;
- findBestTreeFor(CandidatePairs, CandidatePairCostSavings,
+ DenseSet<ValuePair> BestDAG;
+ findBestDAGFor(CandidatePairs, CandidatePairsSet,
+ CandidatePairCostSavings,
PairableInsts, FixedOrderPairs, PairConnectionTypes,
ConnectedPairs, ConnectedPairDeps,
- PairableInstUsers, PairableInstUserMap, ChosenPairs,
- BestTree, BestMaxDepth, BestEffSize, ChoiceRange,
+ PairableInstUsers, PairableInstUserMap,
+ PairableInstUserPairSet, ChosenPairs,
+ BestDAG, BestMaxDepth, BestEffSize, *I, JJ,
UseCycleCheck);
- // A tree has been chosen (or not) at this point. If no tree was
+ if (BestDAG.empty())
+ continue;
+
+ // A dag has been chosen (or not) at this point. If no dag was
// chosen, then this instruction, I, cannot be paired (and is no longer
// considered).
- DEBUG(if (BestTree.size() > 0)
- dbgs() << "BBV: selected pairs in the best tree for: "
- << *cast<Instruction>(*I) << "\n");
+ DEBUG(dbgs() << "BBV: selected pairs in the best DAG for: "
+ << *cast<Instruction>(*I) << "\n");
- for (DenseSet<ValuePair>::iterator S = BestTree.begin(),
- SE2 = BestTree.end(); S != SE2; ++S) {
- // Insert the members of this tree into the list of chosen pairs.
+ for (DenseSet<ValuePair>::iterator S = BestDAG.begin(),
+ SE2 = BestDAG.end(); S != SE2; ++S) {
+ // Insert the members of this dag into the list of chosen pairs.
ChosenPairs.insert(ValuePair(S->first, S->second));
DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " <<
*S->second << "\n");
- // Remove all candidate pairs that have values in the chosen tree.
- for (std::multimap<Value *, Value *>::iterator K =
- CandidatePairs.begin(); K != CandidatePairs.end();) {
- if (K->first == S->first || K->second == S->first ||
- K->second == S->second || K->first == S->second) {
- // Don't remove the actual pair chosen so that it can be used
- // in subsequent tree selections.
- if (!(K->first == S->first && K->second == S->second))
- CandidatePairs.erase(K++);
- else
- ++K;
- } else {
- ++K;
- }
+ // Remove all candidate pairs that have values in the chosen dag.
+ std::vector<Value *> &KK = CandidatePairs[S->first];
+ for (std::vector<Value *>::iterator K = KK.begin(), KE = KK.end();
+ K != KE; ++K) {
+ if (*K == S->second)
+ continue;
+
+ CandidatePairsSet.erase(ValuePair(S->first, *K));
+ }
+
+ std::vector<Value *> &LL = CandidatePairs2[S->second];
+ for (std::vector<Value *>::iterator L = LL.begin(), LE = LL.end();
+ L != LE; ++L) {
+ if (*L == S->first)
+ continue;
+
+ CandidatePairsSet.erase(ValuePair(*L, S->second));
+ }
+
+ std::vector<Value *> &MM = CandidatePairs[S->second];
+ for (std::vector<Value *>::iterator M = MM.begin(), ME = MM.end();
+ M != ME; ++M) {
+ assert(*M != S->first && "Flipped pair in candidate list?");
+ CandidatePairsSet.erase(ValuePair(S->second, *M));
+ }
+
+ std::vector<Value *> &NN = CandidatePairs2[S->first];
+ for (std::vector<Value *>::iterator N = NN.begin(), NE = NN.end();
+ N != NE; ++N) {
+ assert(*N != S->second && "Flipped pair in candidate list?");
+ CandidatePairsSet.erase(ValuePair(*N, S->first));
}
}
}
@@ -2696,7 +2797,7 @@ namespace {
// Move all uses of the function I (including pairing-induced uses) after J.
bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB,
- std::multimap<Value *, Value *> &LoadMoveSet,
+ DenseSet<ValuePair> &LoadMoveSetPairs,
Instruction *I, Instruction *J) {
// Skip to the first instruction past I.
BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
@@ -2704,18 +2805,18 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
for (; cast<Instruction>(L) != J; ++L)
- (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet);
+ (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs);
assert(cast<Instruction>(L) == J &&
"Tracking has not proceeded far enough to check for dependencies");
// If J is now in the use set of I, then trackUsesOfI will return true
// and we have a dependency cycle (and the fusing operation must abort).
- return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSet);
+ return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSetPairs);
}
// Move all uses of the function I (including pairing-induced uses) after J.
void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB,
- std::multimap<Value *, Value *> &LoadMoveSet,
+ DenseSet<ValuePair> &LoadMoveSetPairs,
Instruction *&InsertionPt,
Instruction *I, Instruction *J) {
// Skip to the first instruction past I.
@@ -2724,7 +2825,7 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
for (; cast<Instruction>(L) != J;) {
- if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet)) {
+ if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) {
// Move this instruction
Instruction *InstToMove = L; ++L;
@@ -2744,7 +2845,8 @@ namespace {
// to be moved after J (the second instruction) when the pair is fused.
void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB,
DenseMap<Value *, Value *> &ChosenPairs,
- std::multimap<Value *, Value *> &LoadMoveSet,
+ DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
+ DenseSet<ValuePair> &LoadMoveSetPairs,
Instruction *I) {
// Skip to the first instruction past I.
BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
@@ -2757,8 +2859,10 @@ namespace {
// could be before I if this is an inverted input.
for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) {
if (trackUsesOfI(Users, WriteSet, I, L)) {
- if (L->mayReadFromMemory())
- LoadMoveSet.insert(ValuePair(L, I));
+ if (L->mayReadFromMemory()) {
+ LoadMoveSet[L].push_back(I);
+ LoadMoveSetPairs.insert(ValuePair(L, I));
+ }
}
}
}
@@ -2767,20 +2871,22 @@ namespace {
// are chosen for vectorization, we can end up in a situation where the
// aliasing analysis starts returning different query results as the
// process of fusing instruction pairs continues. Because the algorithm
- // relies on finding the same use trees here as were found earlier, we'll
+ // relies on finding the same use dags here as were found earlier, we'll
// need to precompute the necessary aliasing information here and then
// manually update it during the fusion process.
void BBVectorize::collectLoadMoveSet(BasicBlock &BB,
std::vector<Value *> &PairableInsts,
DenseMap<Value *, Value *> &ChosenPairs,
- std::multimap<Value *, Value *> &LoadMoveSet) {
+ DenseMap<Value *, std::vector<Value *> > &LoadMoveSet,
+ DenseSet<ValuePair> &LoadMoveSetPairs) {
for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
PIE = PairableInsts.end(); PI != PIE; ++PI) {
DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
if (P == ChosenPairs.end()) continue;
Instruction *I = cast<Instruction>(P->first);
- collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet, I);
+ collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet,
+ LoadMoveSetPairs, I);
}
}
@@ -2816,12 +2922,12 @@ namespace {
// because the vector instruction is inserted in the location of the pair's
// second member).
void BBVectorize::fuseChosenPairs(BasicBlock &BB,
- std::vector<Value *> &PairableInsts,
- DenseMap<Value *, Value *> &ChosenPairs,
- DenseSet<ValuePair> &FixedOrderPairs,
- DenseMap<VPPair, unsigned> &PairConnectionTypes,
- std::multimap<ValuePair, ValuePair> &ConnectedPairs,
- std::multimap<ValuePair, ValuePair> &ConnectedPairDeps) {
+ std::vector<Value *> &PairableInsts,
+ DenseMap<Value *, Value *> &ChosenPairs,
+ DenseSet<ValuePair> &FixedOrderPairs,
+ DenseMap<VPPair, unsigned> &PairConnectionTypes,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs,
+ DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps) {
LLVMContext& Context = BB.getContext();
// During the vectorization process, the order of the pairs to be fused
@@ -2835,8 +2941,10 @@ namespace {
E = FlippedPairs.end(); P != E; ++P)
ChosenPairs.insert(*P);
- std::multimap<Value *, Value *> LoadMoveSet;
- collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet);
+ DenseMap<Value *, std::vector<Value *> > LoadMoveSet;
+ DenseSet<ValuePair> LoadMoveSetPairs;
+ collectLoadMoveSet(BB, PairableInsts, ChosenPairs,
+ LoadMoveSet, LoadMoveSetPairs);
DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
@@ -2868,7 +2976,7 @@ namespace {
ChosenPairs.erase(FP);
ChosenPairs.erase(P);
- if (!canMoveUsesOfIAfterJ(BB, LoadMoveSet, I, J)) {
+ if (!canMoveUsesOfIAfterJ(BB, LoadMoveSetPairs, I, J)) {
DEBUG(dbgs() << "BBV: fusion of: " << *I <<
" <-> " << *J <<
" aborted because of non-trivial dependency cycle\n");
@@ -2885,18 +2993,20 @@ namespace {
// of dependencies connected via swaps, and those directly connected,
// and flip the order if the number of swaps is greater.
bool OrigOrder = true;
- VPPIteratorPair IP = ConnectedPairDeps.equal_range(ValuePair(I, J));
- if (IP.first == ConnectedPairDeps.end()) {
- IP = ConnectedPairDeps.equal_range(ValuePair(J, I));
+ DenseMap<ValuePair, std::vector<ValuePair> >::iterator IJ =
+ ConnectedPairDeps.find(ValuePair(I, J));
+ if (IJ == ConnectedPairDeps.end()) {
+ IJ = ConnectedPairDeps.find(ValuePair(J, I));
OrigOrder = false;
}
- if (IP.first != ConnectedPairDeps.end()) {
+ if (IJ != ConnectedPairDeps.end()) {
unsigned NumDepsDirect = 0, NumDepsSwap = 0;
- for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
- Q != IP.second; ++Q) {
+ for (std::vector<ValuePair>::iterator T = IJ->second.begin(),
+ TE = IJ->second.end(); T != TE; ++T) {
+ VPPair Q(IJ->first, *T);
DenseMap<VPPair, unsigned>::iterator R =
- PairConnectionTypes.find(VPPair(Q->second, Q->first));
+ PairConnectionTypes.find(VPPair(Q.second, Q.first));
assert(R != PairConnectionTypes.end() &&
"Cannot find pair connection type");
if (R->second == PairConnectionDirect)
@@ -2922,17 +3032,20 @@ namespace {
// If the pair being fused uses the opposite order from that in the pair
// connection map, then we need to flip the types.
- VPPIteratorPair IP = ConnectedPairs.equal_range(ValuePair(H, L));
- for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
- Q != IP.second; ++Q) {
- DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(*Q);
- assert(R != PairConnectionTypes.end() &&
- "Cannot find pair connection type");
- if (R->second == PairConnectionDirect)
- R->second = PairConnectionSwap;
- else if (R->second == PairConnectionSwap)
- R->second = PairConnectionDirect;
- }
+ DenseMap<ValuePair, std::vector<ValuePair> >::iterator HL =
+ ConnectedPairs.find(ValuePair(H, L));
+ if (HL != ConnectedPairs.end())
+ for (std::vector<ValuePair>::iterator T = HL->second.begin(),
+ TE = HL->second.end(); T != TE; ++T) {
+ VPPair Q(HL->first, *T);
+ DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(Q);
+ assert(R != PairConnectionTypes.end() &&
+ "Cannot find pair connection type");
+ if (R->second == PairConnectionDirect)
+ R->second = PairConnectionSwap;
+ else if (R->second == PairConnectionSwap)
+ R->second = PairConnectionDirect;
+ }
bool LBeforeH = !FlipPairOrder;
unsigned NumOperands = I->getNumOperands();
@@ -2964,12 +3077,12 @@ namespace {
Instruction *K1 = 0, *K2 = 0;
replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2);
- // The use tree of the first original instruction must be moved to after
- // the location of the second instruction. The entire use tree of the
- // first instruction is disjoint from the input tree of the second
+ // The use dag of the first original instruction must be moved to after
+ // the location of the second instruction. The entire use dag of the
+ // first instruction is disjoint from the input dag of the second
// (by definition), and so commutes with it.
- moveUsesOfIAfterJ(BB, LoadMoveSet, InsertionPt, I, J);
+ moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J);
if (!isa<StoreInst>(I)) {
L->replaceAllUsesWith(K1);
@@ -2986,17 +3099,23 @@ namespace {
// yet-to-be-fused pair. The loads in question are the keys of the map.
if (I->mayReadFromMemory()) {
std::vector<ValuePair> NewSetMembers;
- VPIteratorPair IPairRange = LoadMoveSet.equal_range(I);
- VPIteratorPair JPairRange = LoadMoveSet.equal_range(J);
- for (std::multimap<Value *, Value *>::iterator N = IPairRange.first;
- N != IPairRange.second; ++N)
- NewSetMembers.push_back(ValuePair(K, N->second));
- for (std::multimap<Value *, Value *>::iterator N = JPairRange.first;
- N != JPairRange.second; ++N)
- NewSetMembers.push_back(ValuePair(K, N->second));
+ DenseMap<Value *, std::vector<Value *> >::iterator II =
+ LoadMoveSet.find(I);
+ if (II != LoadMoveSet.end())
+ for (std::vector<Value *>::iterator N = II->second.begin(),
+ NE = II->second.end(); N != NE; ++N)
+ NewSetMembers.push_back(ValuePair(K, *N));
+ DenseMap<Value *, std::vector<Value *> >::iterator JJ =
+ LoadMoveSet.find(J);
+ if (JJ != LoadMoveSet.end())
+ for (std::vector<Value *>::iterator N = JJ->second.begin(),
+ NE = JJ->second.end(); N != NE; ++N)
+ NewSetMembers.push_back(ValuePair(K, *N));
for (std::vector<ValuePair>::iterator A = NewSetMembers.begin(),
- AE = NewSetMembers.end(); A != AE; ++A)
- LoadMoveSet.insert(*A);
+ AE = NewSetMembers.end(); A != AE; ++A) {
+ LoadMoveSet[A->first].push_back(A->second);
+ LoadMoveSetPairs.insert(*A);
+ }
}
// Before removing I, set the iterator to the next instruction.
@@ -3056,6 +3175,7 @@ VectorizeConfig::VectorizeConfig() {
MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck;
SplatBreaksChain = ::SplatBreaksChain;
MaxInsts = ::MaxInsts;
+ MaxPairs = ::MaxPairs;
MaxIter = ::MaxIter;
Pow2LenOnly = ::Pow2LenOnly;
NoMemOpBoost = ::NoMemOpBoost;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9c82cb8..f489393 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9,10 +9,10 @@
//
// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
// and generates target-independent LLVM-IR. Legalization of the IR is done
-// in the codegen. However, the vectorizes uses (will use) the codegen
+// in the codegen. However, the vectorizer uses (will use) the codegen
// interfaces to generate IR that is likely to result in an optimal binary.
//
-// The loop vectorizer combines consecutive loop iteration into a single
+// The loop vectorizer combines consecutive loop iterations into a single
// 'wide' iteration. After this transformation the index is incremented
// by the SIMD vector width, and not by one.
//
@@ -32,7 +32,7 @@
// D. Nuzman and R. Henderson. Multi-platform Auto-vectorization.
//
// Variable uniformity checks are inspired by:
-// Karrenberg, R. and Hack, S. Whole Function Vectorization.
+// Karrenberg, R. and Hack, S. Whole Function Vectorization.
//
// Other ideas/concepts are from:
// A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
@@ -101,24 +101,20 @@ EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
/// We don't vectorize loops with a known constant trip count below this number.
-static const unsigned TinyTripCountVectorThreshold = 16;
+static cl::opt<unsigned>
+TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
+ cl::Hidden,
+ cl::desc("Don't vectorize loops with a constant "
+ "trip count that is smaller than this "
+ "value."));
/// We don't unroll loops with a known constant trip count below this number.
static const unsigned TinyTripCountUnrollThreshold = 128;
-/// We don't unroll loops that are larget than this threshold.
-static const unsigned MaxLoopSizeThreshold = 32;
-
/// When performing a runtime memory check, do not check more than this
/// number of pointers. Notice that the check is quadratic!
static const unsigned RuntimeMemoryCheckThreshold = 4;
-/// This is the highest vector width that we try to generate.
-static const unsigned MaxVectorSize = 8;
-
-/// This is the highest Unroll Factor.
-static const unsigned MaxUnrollSize = 4;
-
namespace {
// Forward declarations.
@@ -169,8 +165,8 @@ private:
/// Add code that checks at runtime if the accessed arrays overlap.
/// Returns the comparator value or NULL if no check is needed.
- Value *addRuntimeCheck(LoopVectorizationLegality *Legal,
- Instruction *Loc);
+ Instruction *addRuntimeCheck(LoopVectorizationLegality *Legal,
+ Instruction *Loc);
/// Create an empty loop, based on the loop ranges of the old loop.
void createEmptyLoop(LoopVectorizationLegality *Legal);
/// Copy and widen the instructions from the old loop.
@@ -196,6 +192,10 @@ private:
/// of scalars.
void scalarizeInstruction(Instruction *Instr);
+ /// Vectorize Load and Store instructions,
+ void vectorizeMemoryInstruction(Instruction *Instr,
+ LoopVectorizationLegality *Legal);
+
/// Create a broadcast instruction. This method generates a broadcast
/// instruction (shuffle) for loop invariant values and for the induction
/// value. If this is the induction variable then we extend it to N, N+1, ...
@@ -228,31 +228,34 @@ private:
ValueMap(unsigned UnrollFactor) : UF(UnrollFactor) {}
/// \return True if 'Key' is saved in the Value Map.
- bool has(Value *Key) { return MapStoreage.count(Key); }
+ bool has(Value *Key) const { return MapStorage.count(Key); }
/// Initializes a new entry in the map. Sets all of the vector parts to the
/// save value in 'Val'.
/// \return A reference to a vector with splat values.
VectorParts &splat(Value *Key, Value *Val) {
- MapStoreage[Key].clear();
- MapStoreage[Key].append(UF, Val);
- return MapStoreage[Key];
+ VectorParts &Entry = MapStorage[Key];
+ Entry.assign(UF, Val);
+ return Entry;
}
///\return A reference to the value that is stored at 'Key'.
VectorParts &get(Value *Key) {
- if (!has(Key))
- MapStoreage[Key].resize(UF);
- return MapStoreage[Key];
+ VectorParts &Entry = MapStorage[Key];
+ if (Entry.empty())
+ Entry.resize(UF);
+ assert(Entry.size() == UF);
+ return Entry;
}
+ private:
/// The unroll factor. Each entry in the map stores this number of vector
/// elements.
unsigned UF;
/// Map storage. We use std::map and not DenseMap because insertions to a
/// dense map invalidates its iterators.
- std::map<Value*, VectorParts> MapStoreage;
+ std::map<Value *, VectorParts> MapStorage;
};
/// The original loop.
@@ -289,8 +292,8 @@ private:
BasicBlock *LoopVectorBody;
///The scalar loop body.
BasicBlock *LoopScalarBody;
- ///The first bypass block.
- BasicBlock *LoopBypassBlock;
+ /// A list of all bypass blocks. The first block is the entry of the loop.
+ SmallVector<BasicBlock *, 4> LoopBypassBlocks;
/// The new Induction variable which was added to the new block.
PHINode *Induction;
@@ -316,8 +319,9 @@ private:
class LoopVectorizationLegality {
public:
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL,
- DominatorTree *DT)
- : TheLoop(L), SE(SE), DL(DL), DT(DT), Induction(0) {}
+ DominatorTree *DT, TargetTransformInfo* TTI,
+ AliasAnalysis* AA)
+ : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), Induction(0) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -336,7 +340,8 @@ public:
IK_NoInduction, ///< Not an induction variable.
IK_IntInduction, ///< Integer induction variable. Step = 1.
IK_ReverseIntInduction, ///< Reverse int induction variable. Step = -1.
- IK_PtrInduction ///< Pointer induction variable. Step = sizeof(elem).
+ IK_PtrInduction, ///< Pointer induction var. Step = sizeof(elem).
+ IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem).
};
/// This POD struct holds information about reduction variables.
@@ -400,6 +405,11 @@ public:
/// induction descriptor.
typedef MapVector<PHINode*, InductionInfo> InductionList;
+ /// Alias(Multi)Map stores the values (GEPs or underlying objects and their
+ /// respective Store/Load instruction(s) to calculate aliasing.
+ typedef DenseMap<Value*, Instruction* > AliasMap;
+ typedef DenseMap<Value*, std::vector<Instruction*> > AliasMultiMap;
+
/// Returns true if it is legal to vectorize this loop.
/// This does not mean that it is profitable to vectorize this
/// loop, only that it is legal to do so.
@@ -473,6 +483,14 @@ private:
InductionKind isInductionVariable(PHINode *Phi);
/// Return true if can compute the address bounds of Ptr within the loop.
bool hasComputableBounds(Value *Ptr);
+ /// Return true if there is the chance of write reorder.
+ bool hasPossibleGlobalWriteReorder(Value *Object,
+ Instruction *Inst,
+ AliasMultiMap &WriteObjects,
+ unsigned MaxByteWidth);
+ /// Return the AA location for a load or a store.
+ AliasAnalysis::Location getLoadStoreLocation(Instruction *Inst);
+
/// The loop that we evaluate.
Loop *TheLoop;
@@ -480,8 +498,12 @@ private:
ScalarEvolution *SE;
/// DataLayout analysis.
DataLayout *DL;
- // Dominators.
+ /// Dominators.
DominatorTree *DT;
+ /// Target Info.
+ TargetTransformInfo *TTI;
+ /// Alias Analysis.
+ AliasAnalysis *AA;
// --- vectorization state --- //
@@ -517,20 +539,34 @@ class LoopVectorizationCostModel {
public:
LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
LoopVectorizationLegality *Legal,
- const TargetTransformInfo &TTI)
- : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI) {}
-
- /// \return The most profitable vectorization factor.
+ const TargetTransformInfo &TTI,
+ DataLayout *DL)
+ : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL) {}
+
+ /// Information about vectorization costs
+ struct VectorizationFactor {
+ unsigned Width; // Vector width with best cost
+ unsigned Cost; // Cost of the loop with that width
+ };
+ /// \return The most profitable vectorization factor and the cost of that VF.
/// This method checks every power of two up to VF. If UserVF is not ZERO
/// then this vectorization factor will be selected if vectorization is
/// possible.
- unsigned selectVectorizationFactor(bool OptForSize, unsigned UserVF);
+ VectorizationFactor selectVectorizationFactor(bool OptForSize,
+ unsigned UserVF);
+ /// \return The size (in bits) of the widest type in the code that
+ /// needs to be vectorized. We ignore values that remain scalar such as
+ /// 64 bit loop indices.
+ unsigned getWidestType();
/// \return The most profitable unroll factor.
/// If UserUF is non-zero then this method finds the best unroll-factor
/// based on register pressure and other parameters.
- unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF);
+ /// VF and LoopCost are the selected vectorization factor and the cost of the
+ /// selected VF.
+ unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF, unsigned VF,
+ unsigned LoopCost);
/// \brief A struct that represents some properties of the register usage
/// of a loop.
@@ -562,6 +598,10 @@ private:
/// the scalar type.
static Type* ToVectorTy(Type *Scalar, unsigned VF);
+ /// Returns whether the instruction is a load or store and will be a emitted
+ /// as a vector operation.
+ bool isConsecutiveLoadOrStore(Instruction *I);
+
/// The loop that we evaluate.
Loop *TheLoop;
/// Scev analysis.
@@ -572,6 +612,8 @@ private:
LoopVectorizationLegality *Legal;
/// Vector target information.
const TargetTransformInfo &TTI;
+ /// Target data layout information.
+ DataLayout *DL;
};
/// The LoopVectorize Pass.
@@ -588,6 +630,7 @@ struct LoopVectorize : public LoopPass {
LoopInfo *LI;
TargetTransformInfo *TTI;
DominatorTree *DT;
+ AliasAnalysis *AA;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
// We only vectorize innermost loops.
@@ -599,21 +642,22 @@ struct LoopVectorize : public LoopPass {
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
DT = &getAnalysis<DominatorTree>();
+ AA = getAnalysisIfAvailable<AliasAnalysis>();
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT);
+ LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing.\n");
return false;
}
// Use the cost model.
- LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI);
+ LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL);
- // Check the function attribues to find out if this function should be
+ // Check the function attributes to find out if this function should be
// optimized for size.
Function *F = L->getHeader()->getParent();
Attribute::AttrKind SzAttr = Attribute::OptimizeForSize;
@@ -628,20 +672,24 @@ struct LoopVectorize : public LoopPass {
return false;
}
- unsigned VF = CM.selectVectorizationFactor(OptForSize, VectorizationFactor);
- unsigned UF = CM.selectUnrollFactor(OptForSize, VectorizationUnroll);
+ // Select the optimal vectorization factor.
+ LoopVectorizationCostModel::VectorizationFactor VF;
+ VF = CM.selectVectorizationFactor(OptForSize, VectorizationFactor);
+ // Select the unroll factor.
+ unsigned UF = CM.selectUnrollFactor(OptForSize, VectorizationUnroll,
+ VF.Width, VF.Cost);
- if (VF == 1) {
+ if (VF.Width == 1) {
DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
return false;
}
- DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<<
+ DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
F->getParent()->getModuleIdentifier()<<"\n");
DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n");
- // If we decided that it is *legal* to vectorizer the loop then do it.
- InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF, UF);
+ // If we decided that it is *legal* to vectorize the loop then do it.
+ InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF.Width, UF);
LB.vectorize(&LVL);
DEBUG(verifyFunction(*L->getHeader()->getParent()));
@@ -730,6 +778,9 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx,
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
+ // Make sure that the pointer does not point to structs.
+ if (cast<PointerType>(Ptr->getType())->getElementType()->isAggregateType())
+ return 0;
// If this value is a pointer induction variable we know it is consecutive.
PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
@@ -737,6 +788,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
InductionInfo II = Inductions[Phi];
if (IK_PtrInduction == II.IK)
return 1;
+ else if (IK_ReversePtrInduction == II.IK)
+ return -1;
}
GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
@@ -746,6 +799,29 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
unsigned NumOperands = Gep->getNumOperands();
Value *LastIndex = Gep->getOperand(NumOperands - 1);
+ Value *GpPtr = Gep->getPointerOperand();
+ // If this GEP value is a consecutive pointer induction variable and all of
+ // the indices are constant then we know it is consecutive. We can
+ Phi = dyn_cast<PHINode>(GpPtr);
+ if (Phi && Inductions.count(Phi)) {
+
+ // Make sure that the pointer does not point to structs.
+ PointerType *GepPtrType = cast<PointerType>(GpPtr->getType());
+ if (GepPtrType->getElementType()->isAggregateType())
+ return 0;
+
+ // Make sure that all of the index operands are loop invariant.
+ for (unsigned i = 1; i < NumOperands; ++i)
+ if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+ return 0;
+
+ InductionInfo II = Inductions[Phi];
+ if (IK_PtrInduction == II.IK)
+ return 1;
+ else if (IK_ReversePtrInduction == II.IK)
+ return -1;
+ }
+
// Check that all of the gep indices are uniform except for the last.
for (unsigned i = 0; i < NumOperands - 1; ++i)
if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
@@ -784,8 +860,7 @@ InnerLoopVectorizer::getVectorValue(Value *V) {
// If this scalar is unknown, assume that it is a constant or that it is
// loop invariant. Broadcast V and save the value for future uses.
Value *B = getBroadcastInstrs(V);
- WidenMap.splat(V, B);
- return WidenMap.get(V);
+ return WidenMap.splat(V, B);
}
Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
@@ -799,6 +874,111 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
"reverse");
}
+
+void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
+ LoopVectorizationLegality *Legal) {
+ // Attempt to issue a wide load.
+ LoadInst *LI = dyn_cast<LoadInst>(Instr);
+ StoreInst *SI = dyn_cast<StoreInst>(Instr);
+
+ assert((LI || SI) && "Invalid Load/Store instruction");
+
+ Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType();
+ Type *DataTy = VectorType::get(ScalarDataTy, VF);
+ Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
+ unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
+
+ // If the pointer is loop invariant or if it is non consecutive,
+ // scalarize the load.
+ int Stride = Legal->isConsecutivePtr(Ptr);
+ bool Reverse = Stride < 0;
+ bool UniformLoad = LI && Legal->isUniform(Ptr);
+ if (Stride == 0 || UniformLoad)
+ return scalarizeInstruction(Instr);
+
+ Constant *Zero = Builder.getInt32(0);
+ VectorParts &Entry = WidenMap.get(Instr);
+
+ // Handle consecutive loads/stores.
+ GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+ if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
+ Value *PtrOperand = Gep->getPointerOperand();
+ Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
+ FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero);
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ Gep2->setOperand(0, FirstBasePtr);
+ Gep2->setName("gep.indvar.base");
+ Ptr = Builder.Insert(Gep2);
+ } else if (Gep) {
+ assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()),
+ OrigLoop) && "Base ptr must be invariant");
+
+ // The last index does not have to be the induction. It can be
+ // consecutive and be a function of the index. For example A[I+1];
+ unsigned NumOperands = Gep->getNumOperands();
+
+ Value *LastGepOperand = Gep->getOperand(NumOperands - 1);
+ VectorParts &GEPParts = getVectorValue(LastGepOperand);
+ Value *LastIndex = GEPParts[0];
+ LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ Gep2->setOperand(NumOperands - 1, LastIndex);
+ Gep2->setName("gep.indvar.idx");
+ Ptr = Builder.Insert(Gep2);
+ } else {
+ // Use the induction element ptr.
+ assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+ VectorParts &PtrVal = getVectorValue(Ptr);
+ Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
+ }
+
+ // Handle Stores:
+ if (SI) {
+ assert(!Legal->isUniform(SI->getPointerOperand()) &&
+ "We do not allow storing to uniform addresses");
+
+ VectorParts &StoredVal = getVectorValue(SI->getValueOperand());
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ // Calculate the pointer for the specific unroll-part.
+ Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
+
+ if (Reverse) {
+ // If we store to reverse consecutive memory locations then we need
+ // to reverse the order of elements in the stored value.
+ StoredVal[Part] = reverseVector(StoredVal[Part]);
+ // If the address is consecutive but reversed, then the
+ // wide store needs to start at the last vector element.
+ PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
+ PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+ }
+
+ Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo());
+ Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment);
+ }
+ }
+
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ // Calculate the pointer for the specific unroll-part.
+ Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
+
+ if (Reverse) {
+ // If the address is consecutive but reversed, then the
+ // wide store needs to start at the last vector element.
+ PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
+ PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+ }
+
+ Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo());
+ Value *LI = Builder.CreateLoad(VecPtr, "wide.load");
+ cast<LoadInst>(LI)->setAlignment(Alignment);
+ Entry[Part] = Reverse ? reverseVector(LI) : LI;
+ }
+}
+
void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
// Holds vector parameters or scalars, in case of uniform vals.
@@ -870,7 +1050,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
}
}
-Value*
+Instruction *
InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
Instruction *Loc) {
LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
@@ -879,7 +1059,7 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
if (!PtrRtCheck->Need)
return NULL;
- Value *MemoryRuntimeCheck = 0;
+ Instruction *MemoryRuntimeCheck = 0;
unsigned NumPointers = PtrRtCheck->Pointers.size();
SmallVector<Value* , 2> Starts;
SmallVector<Value* , 2> Ends;
@@ -908,28 +1088,23 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
}
}
+ IRBuilder<> ChkBuilder(Loc);
+
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i+1; j < NumPointers; ++j) {
- Instruction::CastOps Op = Instruction::BitCast;
- Value *Start0 = CastInst::Create(Op, Starts[i], PtrArithTy, "bc", Loc);
- Value *Start1 = CastInst::Create(Op, Starts[j], PtrArithTy, "bc", Loc);
- Value *End0 = CastInst::Create(Op, Ends[i], PtrArithTy, "bc", Loc);
- Value *End1 = CastInst::Create(Op, Ends[j], PtrArithTy, "bc", Loc);
-
- Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
- Start0, End1, "bound0", Loc);
- Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
- Start1, End0, "bound1", Loc);
- Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1,
- "found.conflict", Loc);
+ Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
+ Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
+ Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc");
+ Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy, "bc");
+
+ Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
+ Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
+ Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
if (MemoryRuntimeCheck)
- MemoryRuntimeCheck = BinaryOperator::Create(Instruction::Or,
- MemoryRuntimeCheck,
- IsConflict,
- "conflict.rdx", Loc);
- else
- MemoryRuntimeCheck = IsConflict;
+ IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
+ "conflict.rdx");
+ MemoryRuntimeCheck = cast<Instruction>(IsConflict);
}
}
@@ -943,7 +1118,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
the vectorized instructions while the old loop will continue to run the
scalar remainder.
- [ ] <-- vector loop bypass.
+ [ ] <-- vector loop bypass (may consist of multiple blocks).
/ |
/ v
| [ ] <-- vector pre header.
@@ -1004,10 +1179,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
ConstantInt::get(IdxTy, 0);
assert(BypassBlock && "Invalid loop structure");
-
- // Generate the code that checks in runtime if arrays overlap.
- Value *MemoryRuntimeCheck = addRuntimeCheck(Legal,
- BypassBlock->getTerminator());
+ LoopBypassBlocks.push_back(BypassBlock);
// Split the single block loop into the two loop structure described above.
BasicBlock *VectorPH =
@@ -1019,10 +1191,6 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
BasicBlock *ScalarPH =
MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
- // This is the location in which we add all of the logic for bypassing
- // the new vector loop.
- Instruction *Loc = BypassBlock->getTerminator();
-
// Use this IR builder to create the loop instructions (Phi, Br, Cmp)
// inside the loop.
Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
@@ -1033,42 +1201,62 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// times the unroll factor (num of SIMD instructions).
Constant *Step = ConstantInt::get(IdxTy, VF * UF);
+ // This is the IR builder that we use to add all of the logic for bypassing
+ // the new vector loop.
+ IRBuilder<> BypassBuilder(BypassBlock->getTerminator());
+
// We may need to extend the index in case there is a type mismatch.
// We know that the count starts at zero and does not overflow.
if (Count->getType() != IdxTy) {
// The exit count can be of pointer type. Convert it to the correct
// integer type.
if (ExitCount->getType()->isPointerTy())
- Count = CastInst::CreatePointerCast(Count, IdxTy, "ptrcnt.to.int", Loc);
+ Count = BypassBuilder.CreatePointerCast(Count, IdxTy, "ptrcnt.to.int");
else
- Count = CastInst::CreateZExtOrBitCast(Count, IdxTy, "zext.cnt", Loc);
+ Count = BypassBuilder.CreateZExtOrTrunc(Count, IdxTy, "cnt.cast");
}
// Add the start index to the loop count to get the new end index.
- Value *IdxEnd = BinaryOperator::CreateAdd(Count, StartIdx, "end.idx", Loc);
+ Value *IdxEnd = BypassBuilder.CreateAdd(Count, StartIdx, "end.idx");
// Now we need to generate the expression for N - (N % VF), which is
// the part that the vectorized body will execute.
- Value *R = BinaryOperator::CreateURem(Count, Step, "n.mod.vf", Loc);
- Value *CountRoundDown = BinaryOperator::CreateSub(Count, R, "n.vec", Loc);
- Value *IdxEndRoundDown = BinaryOperator::CreateAdd(CountRoundDown, StartIdx,
- "end.idx.rnd.down", Loc);
+ Value *R = BypassBuilder.CreateURem(Count, Step, "n.mod.vf");
+ Value *CountRoundDown = BypassBuilder.CreateSub(Count, R, "n.vec");
+ Value *IdxEndRoundDown = BypassBuilder.CreateAdd(CountRoundDown, StartIdx,
+ "end.idx.rnd.down");
// Now, compare the new count to zero. If it is zero skip the vector loop and
// jump to the scalar loop.
- Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
- IdxEndRoundDown,
- StartIdx,
- "cmp.zero", Loc);
-
- // If we are using memory runtime checks, include them in.
- if (MemoryRuntimeCheck)
- Cmp = BinaryOperator::Create(Instruction::Or, Cmp, MemoryRuntimeCheck,
- "CntOrMem", Loc);
+ Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx,
+ "cmp.zero");
+
+ BasicBlock *LastBypassBlock = BypassBlock;
+
+ // Generate the code that checks in runtime if arrays overlap. We put the
+ // checks into a separate block to make the more common case of few elements
+ // faster.
+ Instruction *MemRuntimeCheck = addRuntimeCheck(Legal,
+ BypassBlock->getTerminator());
+ if (MemRuntimeCheck) {
+ // Create a new block containing the memory check.
+ BasicBlock *CheckBlock = BypassBlock->splitBasicBlock(MemRuntimeCheck,
+ "vector.memcheck");
+ LoopBypassBlocks.push_back(CheckBlock);
+
+ // Replace the branch into the memory check block with a conditional branch
+ // for the "few elements case".
+ Instruction *OldTerm = BypassBlock->getTerminator();
+ BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm);
+ OldTerm->eraseFromParent();
+
+ Cmp = MemRuntimeCheck;
+ LastBypassBlock = CheckBlock;
+ }
- BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc);
- // Remove the old terminator.
- Loc->eraseFromParent();
+ LastBypassBlock->getTerminator()->eraseFromParent();
+ BranchInst::Create(MiddleBlock, VectorPH, Cmp,
+ LastBypassBlock);
// We are going to resume the execution of the scalar loop.
// Go over all of the induction variables that we found and fix the
@@ -1108,30 +1296,45 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
Value *CRD = CountRoundDown;
if (CRDSize > IISize)
CRD = CastInst::Create(Instruction::Trunc, CountRoundDown,
- II.StartValue->getType(),
- "tr.crd", BypassBlock->getTerminator());
+ II.StartValue->getType(), "tr.crd",
+ LoopBypassBlocks.back()->getTerminator());
else if (CRDSize < IISize)
CRD = CastInst::Create(Instruction::SExt, CountRoundDown,
II.StartValue->getType(),
- "sext.crd", BypassBlock->getTerminator());
+ "sext.crd",
+ LoopBypassBlocks.back()->getTerminator());
// Handle reverse integer induction counter:
- EndValue = BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end",
- BypassBlock->getTerminator());
+ EndValue =
+ BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end",
+ LoopBypassBlocks.back()->getTerminator());
break;
}
case LoopVectorizationLegality::IK_PtrInduction: {
// For pointer induction variables, calculate the offset using
// the end index.
- EndValue = GetElementPtrInst::Create(II.StartValue, CountRoundDown,
- "ptr.ind.end",
- BypassBlock->getTerminator());
+ EndValue =
+ GetElementPtrInst::Create(II.StartValue, CountRoundDown, "ptr.ind.end",
+ LoopBypassBlocks.back()->getTerminator());
+ break;
+ }
+ case LoopVectorizationLegality::IK_ReversePtrInduction: {
+ // The value at the end of the loop for the reverse pointer is calculated
+ // by creating a GEP with a negative index starting from the start value.
+ Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0);
+ Value *NegIdx = BinaryOperator::CreateSub(Zero, CountRoundDown,
+ "rev.ind.end",
+ LoopBypassBlocks.back()->getTerminator());
+ EndValue = GetElementPtrInst::Create(II.StartValue, NegIdx,
+ "rev.ptr.ind.end",
+ LoopBypassBlocks.back()->getTerminator());
break;
}
}// end of case
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
- ResumeVal->addIncoming(II.StartValue, BypassBlock);
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
ResumeVal->addIncoming(EndValue, VecBody);
// Fix the scalar body counter (PHI node).
@@ -1147,7 +1350,8 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
assert(!ResumeIndex && "Unexpected resume value found");
ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val",
MiddleBlock->getTerminator());
- ResumeIndex->addIncoming(StartIdx, BypassBlock);
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ ResumeIndex->addIncoming(StartIdx, LoopBypassBlocks[I]);
ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
}
@@ -1187,6 +1391,8 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// Insert the new loop into the loop nest and register the new basic blocks.
if (ParentLoop) {
ParentLoop->addChildLoop(Lp);
+ for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
+ ParentLoop->addBasicBlockToLoop(LoopBypassBlocks[I], LI->getBase());
ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase());
ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase());
ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase());
@@ -1203,7 +1409,6 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
LoopExitBlock = ExitBlock;
LoopVectorBody = VecBody;
LoopScalarBody = OldBasicBlock;
- LoopBypassBlock = BypassBlock;
}
/// This function returns the identity element (or neutral element) for
@@ -1295,9 +1500,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// the cost-model.
//
//===------------------------------------------------===//
- BasicBlock &BB = *OrigLoop->getHeader();
- Constant *Zero =
- ConstantInt::get(IntegerType::getInt32Ty(BB.getContext()), 0);
+ Constant *Zero = Builder.getInt32(0);
// In order to support reduction variables we need to be able to vectorize
// Phi nodes. Phi nodes have cycles, so we need to vectorize them in two
@@ -1343,7 +1546,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// To do so, we need to generate the 'identity' vector and overide
// one of the elements with the incoming scalar reduction. We need
// to do it in the vector-loop preheader.
- Builder.SetInsertPoint(LoopBypassBlock->getTerminator());
+ Builder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator());
// This is the vector-clone of the value that leaves the loop.
VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
@@ -1391,7 +1594,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
Value *StartVal = (part == 0) ? VectorStart : Identity;
- NewPhi->addIncoming(StartVal, LoopBypassBlock);
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody);
RdxParts.push_back(NewPhi);
}
@@ -1533,8 +1737,6 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
void
InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
BasicBlock *BB, PhiVector *PV) {
- Constant *Zero = Builder.getInt32(0);
-
// For each instruction in the old loop.
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
VectorParts &Entry = WidenMap.get(it);
@@ -1568,7 +1770,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// optimizations will clean it up.
VectorParts Cond = createEdgeMask(P->getIncomingBlock(0),
P->getParent());
-
+
for (unsigned part = 0; part < UF; ++part) {
VectorParts &In0 = getVectorValue(P->getIncomingValue(0));
VectorParts &In1 = getVectorValue(P->getIncomingValue(1));
@@ -1600,6 +1802,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
}
case LoopVectorizationLegality::IK_ReverseIntInduction:
case LoopVectorizationLegality::IK_PtrInduction:
+ case LoopVectorizationLegality::IK_ReversePtrInduction:
// Handle reverse integer and pointer inductions.
Value *StartIdx = 0;
// If we have a single integer induction variable then use it.
@@ -1635,15 +1838,23 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// Handle the pointer induction variable case.
assert(P->getType()->isPointerTy() && "Unexpected type.");
+ // Is this a reverse induction ptr or a consecutive induction ptr.
+ bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
+ II.IK);
+
// This is the vector of results. Notice that we don't generate
// vector geps because scalar geps result in better code.
for (unsigned part = 0; part < UF; ++part) {
Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
for (unsigned int i = 0; i < VF; ++i) {
- Constant *Idx = ConstantInt::get(Induction->getType(),
- i + part * VF);
- Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx,
- "gep.idx");
+ int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
+ Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+ Value *GlobalIdx;
+ if (!Reverse)
+ GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+ else
+ GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
+
Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
"next.gep");
VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
@@ -1684,13 +1895,13 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
for (unsigned Part = 0; Part < UF; ++Part) {
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]);
- // Update the NSW, NUW and Exact flags.
- BinaryOperator *VecOp = cast<BinaryOperator>(V);
- if (isa<OverflowingBinaryOperator>(BinOp)) {
+ // Update the NSW, NUW and Exact flags. Notice: V can be an Undef.
+ BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V);
+ if (VecOp && isa<OverflowingBinaryOperator>(BinOp)) {
VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap());
VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap());
}
- if (isa<PossiblyExactOperator>(VecOp))
+ if (VecOp && isa<PossiblyExactOperator>(VecOp))
VecOp->setIsExact(BinOp->isExact());
Entry[Part] = V;
@@ -1740,124 +1951,10 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
break;
}
- case Instruction::Store: {
- // Attempt to issue a wide store.
- StoreInst *SI = dyn_cast<StoreInst>(it);
- Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF);
- Value *Ptr = SI->getPointerOperand();
- unsigned Alignment = SI->getAlignment();
-
- assert(!Legal->isUniform(Ptr) &&
- "We do not allow storing to uniform addresses");
-
-
- int Stride = Legal->isConsecutivePtr(Ptr);
- bool Reverse = Stride < 0;
- if (Stride == 0) {
- scalarizeInstruction(it);
+ case Instruction::Store:
+ case Instruction::Load:
+ vectorizeMemoryInstruction(it, Legal);
break;
- }
-
- // Handle consecutive stores.
-
- GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
- if (Gep) {
- // The last index does not have to be the induction. It can be
- // consecutive and be a function of the index. For example A[I+1];
- unsigned NumOperands = Gep->getNumOperands();
-
- Value *LastGepOperand = Gep->getOperand(NumOperands - 1);
- VectorParts &GEPParts = getVectorValue(LastGepOperand);
- Value *LastIndex = GEPParts[0];
- LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
-
- // Create the new GEP with the new induction variable.
- GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
- Gep2->setOperand(NumOperands - 1, LastIndex);
- Ptr = Builder.Insert(Gep2);
- } else {
- // Use the induction element ptr.
- assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
- VectorParts &PtrVal = getVectorValue(Ptr);
- Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
- }
-
- VectorParts &StoredVal = getVectorValue(SI->getValueOperand());
- for (unsigned Part = 0; Part < UF; ++Part) {
- // Calculate the pointer for the specific unroll-part.
- Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
-
- if (Reverse) {
- // If we store to reverse consecutive memory locations then we need
- // to reverse the order of elements in the stored value.
- StoredVal[Part] = reverseVector(StoredVal[Part]);
- // If the address is consecutive but reversed, then the
- // wide store needs to start at the last vector element.
- PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
- PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
- }
-
- Value *VecPtr = Builder.CreateBitCast(PartPtr, StTy->getPointerTo());
- Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment);
- }
- break;
- }
- case Instruction::Load: {
- // Attempt to issue a wide load.
- LoadInst *LI = dyn_cast<LoadInst>(it);
- Type *RetTy = VectorType::get(LI->getType(), VF);
- Value *Ptr = LI->getPointerOperand();
- unsigned Alignment = LI->getAlignment();
-
- // If the pointer is loop invariant or if it is non consecutive,
- // scalarize the load.
- int Stride = Legal->isConsecutivePtr(Ptr);
- bool Reverse = Stride < 0;
- if (Legal->isUniform(Ptr) || Stride == 0) {
- scalarizeInstruction(it);
- break;
- }
-
- GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
- if (Gep) {
- // The last index does not have to be the induction. It can be
- // consecutive and be a function of the index. For example A[I+1];
- unsigned NumOperands = Gep->getNumOperands();
-
- Value *LastGepOperand = Gep->getOperand(NumOperands - 1);
- VectorParts &GEPParts = getVectorValue(LastGepOperand);
- Value *LastIndex = GEPParts[0];
- LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
-
- // Create the new GEP with the new induction variable.
- GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
- Gep2->setOperand(NumOperands - 1, LastIndex);
- Ptr = Builder.Insert(Gep2);
- } else {
- // Use the induction element ptr.
- assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
- VectorParts &PtrVal = getVectorValue(Ptr);
- Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
- }
-
- for (unsigned Part = 0; Part < UF; ++Part) {
- // Calculate the pointer for the specific unroll-part.
- Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
-
- if (Reverse) {
- // If the address is consecutive but reversed, then the
- // wide store needs to start at the last vector element.
- PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
- PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
- }
-
- Value *VecPtr = Builder.CreateBitCast(PartPtr, RetTy->getPointerTo());
- Value *LI = Builder.CreateLoad(VecPtr, "wide.load");
- cast<LoadInst>(LI)->setAlignment(Alignment);
- Entry[Part] = Reverse ? reverseVector(LI) : LI;
- }
- break;
- }
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
@@ -1924,12 +2021,14 @@ void InnerLoopVectorizer::updateAnalysis() {
SE->forgetLoop(OrigLoop);
// Update the dominator tree information.
- assert(DT->properlyDominates(LoopBypassBlock, LoopExitBlock) &&
+ assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
"Entry does not dominate exit.");
- DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlock);
+ for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
+ DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]);
+ DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back());
DT->addNewBlock(LoopVectorBody, LoopVectorPreHeader);
- DT->addNewBlock(LoopMiddleBlock, LoopBypassBlock);
+ DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks.front());
DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock);
DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
@@ -2196,7 +2295,51 @@ void LoopVectorizationLegality::collectLoopUniforms() {
}
}
+AliasAnalysis::Location
+LoopVectorizationLegality::getLoadStoreLocation(Instruction *Inst) {
+ if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
+ return AA->getLocation(Store);
+ else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
+ return AA->getLocation(Load);
+
+ llvm_unreachable("Should be either load or store instruction");
+}
+
+bool
+LoopVectorizationLegality::hasPossibleGlobalWriteReorder(
+ Value *Object,
+ Instruction *Inst,
+ AliasMultiMap& WriteObjects,
+ unsigned MaxByteWidth) {
+
+ AliasAnalysis::Location ThisLoc = getLoadStoreLocation(Inst);
+
+ std::vector<Instruction*>::iterator
+ it = WriteObjects[Object].begin(),
+ end = WriteObjects[Object].end();
+
+ for (; it != end; ++it) {
+ Instruction* I = *it;
+ if (I == Inst)
+ continue;
+
+ AliasAnalysis::Location ThatLoc = getLoadStoreLocation(I);
+ if (AA->alias(ThisLoc.getWithNewSize(MaxByteWidth),
+ ThatLoc.getWithNewSize(MaxByteWidth)))
+ return true;
+ }
+ return false;
+}
+
bool LoopVectorizationLegality::canVectorizeMemory() {
+
+ if (TheLoop->isAnnotatedParallel()) {
+ DEBUG(dbgs()
+ << "LV: A loop annotated parallel, ignore memory dependency "
+ << "checks.\n");
+ return true;
+ }
+
typedef SmallVector<Value*, 16> ValueVector;
typedef SmallPtrSet<Value*, 16> ValueSet;
// Holds the Load and Store *instructions*.
@@ -2250,9 +2393,10 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return true;
}
- // Holds the read and read-write *pointers* that we find.
- ValueVector Reads;
- ValueVector ReadWrites;
+ // Holds the read and read-write *pointers* that we find. These maps hold
+ // unique values for pointers (so no need for multi-map).
+ AliasMap Reads;
+ AliasMap ReadWrites;
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
@@ -2274,7 +2418,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// If we did *not* see this pointer before, insert it to
// the read-write list. At this phase it is only a 'write' list.
if (Seen.insert(Ptr))
- ReadWrites.push_back(Ptr);
+ ReadWrites.insert(std::make_pair(Ptr, ST));
}
for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
@@ -2289,7 +2433,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// read a few words, modify, and write a few words, and some of the
// words may be written to the same address.
if (Seen.insert(Ptr) || 0 == isConsecutivePtr(Ptr))
- Reads.push_back(Ptr);
+ Reads.insert(std::make_pair(Ptr, LD));
}
// If we write (or read-write) to a single destination and there are no
@@ -2302,22 +2446,27 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
bool CanDoRT = true;
- for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I)
- if (hasComputableBounds(*I)) {
- PtrRtCheck.insert(SE, TheLoop, *I);
- DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
+ AliasMap::iterator MI, ME;
+ for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
+ Value *V = (*MI).first;
+ if (hasComputableBounds(V)) {
+ PtrRtCheck.insert(SE, TheLoop, V);
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
} else {
CanDoRT = false;
break;
}
- for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I)
- if (hasComputableBounds(*I)) {
- PtrRtCheck.insert(SE, TheLoop, *I);
- DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
+ }
+ for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
+ Value *V = (*MI).first;
+ if (hasComputableBounds(V)) {
+ PtrRtCheck.insert(SE, TheLoop, V);
+ DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
} else {
CanDoRT = false;
break;
}
+ }
// Check that we did not collect too many pointers or found a
// unsizeable pointer.
@@ -2332,47 +2481,104 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
bool NeedRTCheck = false;
+ // Biggest vectorized access possible, vector width * unroll factor.
+ // TODO: We're being very pessimistic here, find a way to know the
+ // real access width before getting here.
+ unsigned MaxByteWidth = (TTI->getRegisterBitWidth(true) / 8) *
+ TTI->getMaximumUnrollFactor();
// Now that the pointers are in two lists (Reads and ReadWrites), we
// can check that there are no conflicts between each of the writes and
// between the writes to the reads.
- ValueSet WriteObjects;
+ // Note that WriteObjects duplicates the stores (indexed now by underlying
+ // objects) to avoid pointing to elements inside ReadWrites.
+ // TODO: Maybe create a new type where they can interact without duplication.
+ AliasMultiMap WriteObjects;
ValueVector TempObjects;
// Check that the read-writes do not conflict with other read-write
// pointers.
bool AllWritesIdentified = true;
- for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) {
- GetUnderlyingObjects(*I, TempObjects, DL);
- for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
- it != e; ++it) {
- if (!isIdentifiedObject(*it)) {
- DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n");
+ for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
+ Value *Val = (*MI).first;
+ Instruction *Inst = (*MI).second;
+
+ GetUnderlyingObjects(Val, TempObjects, DL);
+ for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end();
+ UI != UE; ++UI) {
+ if (!isIdentifiedObject(*UI)) {
+ DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **UI <<"\n");
NeedRTCheck = true;
AllWritesIdentified = false;
}
- if (!WriteObjects.insert(*it)) {
+
+ // Never seen it before, can't alias.
+ if (WriteObjects[*UI].empty()) {
+ DEBUG(dbgs() << "LV: Adding Underlying value:" << **UI <<"\n");
+ WriteObjects[*UI].push_back(Inst);
+ continue;
+ }
+ // Direct alias found.
+ if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) {
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
+ << **UI <<"\n");
+ return false;
+ }
+ DEBUG(dbgs() << "LV: Found a conflicting global value:"
+ << **UI <<"\n");
+ DEBUG(dbgs() << "LV: While examining store:" << *Inst <<"\n");
+ DEBUG(dbgs() << "LV: On value:" << *Val <<"\n");
+
+ // If global alias, make sure they do alias.
+ if (hasPossibleGlobalWriteReorder(*UI,
+ Inst,
+ WriteObjects,
+ MaxByteWidth)) {
DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
- << **it <<"\n");
+ << *UI <<"\n");
return false;
}
+
+ // Didn't alias, insert into map for further reference.
+ WriteObjects[*UI].push_back(Inst);
}
TempObjects.clear();
}
/// Check that the reads don't conflict with the read-writes.
- for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) {
- GetUnderlyingObjects(*I, TempObjects, DL);
- for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
- it != e; ++it) {
+ for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
+ Value *Val = (*MI).first;
+ GetUnderlyingObjects(Val, TempObjects, DL);
+ for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end();
+ UI != UE; ++UI) {
// If all of the writes are identified then we don't care if the read
// pointer is identified or not.
- if (!AllWritesIdentified && !isIdentifiedObject(*it)) {
- DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n");
+ if (!AllWritesIdentified && !isIdentifiedObject(*UI)) {
+ DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **UI <<"\n");
NeedRTCheck = true;
}
- if (WriteObjects.count(*it)) {
- DEBUG(dbgs() << "LV: Found a possible read/write reorder:"
- << **it <<"\n");
+
+ // Never seen it before, can't alias.
+ if (WriteObjects[*UI].empty())
+ continue;
+ // Direct alias found.
+ if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) {
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
+ << **UI <<"\n");
+ return false;
+ }
+ DEBUG(dbgs() << "LV: Found a global value: "
+ << **UI <<"\n");
+ Instruction *Inst = (*MI).second;
+ DEBUG(dbgs() << "LV: While examining load:" << *Inst <<"\n");
+ DEBUG(dbgs() << "LV: On value:" << *Val <<"\n");
+
+ // If global alias, make sure they do alias.
+ if (hasPossibleGlobalWriteReorder(*UI,
+ Inst,
+ WriteObjects,
+ MaxByteWidth)) {
+ DEBUG(dbgs() << "LV: Found a possible read-write reorder:"
+ << *UI <<"\n");
return false;
}
}
@@ -2535,7 +2741,7 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
return IK_NoInduction;
- // Check that the PHI is consecutive and starts at zero.
+ // Check that the PHI is consecutive.
const SCEV *PhiScev = SE->getSCEV(Phi);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
if (!AR) {
@@ -2562,6 +2768,8 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType());
if (C->getValue()->equalsInt(Size))
return IK_PtrInduction;
+ else if (C->getValue()->equalsInt(0 - Size))
+ return IK_ReversePtrInduction;
return IK_NoInduction;
}
@@ -2612,18 +2820,34 @@ bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
return AR->isAffine();
}
-unsigned
+LoopVectorizationCostModel::VectorizationFactor
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
unsigned UserVF) {
+ // Width 1 means no vectorize
+ VectorizationFactor Factor = { 1U, 0U };
if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
- return 1;
+ return Factor;
}
// Find the trip count.
unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n");
+ unsigned WidestType = getWidestType();
+ unsigned WidestRegister = TTI.getRegisterBitWidth(true);
+ unsigned MaxVectorSize = WidestRegister / WidestType;
+ DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
+ DEBUG(dbgs() << "LV: The Widest register is:" << WidestRegister << "bits.\n");
+
+ if (MaxVectorSize == 0) {
+ DEBUG(dbgs() << "LV: The target has no vector registers.\n");
+ MaxVectorSize = 1;
+ }
+
+ assert(MaxVectorSize <= 32 && "Did not expect to pack so many elements"
+ " into one vector!");
+
unsigned VF = MaxVectorSize;
// If we optimize the program for size, avoid creating the tail loop.
@@ -2631,7 +2855,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
// If we are unable to calculate the trip count then don't try to vectorize.
if (TC < 2) {
DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
- return 1;
+ return Factor;
}
// Find the maximum SIMD width that can fit within the trip count.
@@ -2644,7 +2868,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
// zero then we require a tail.
if (VF < 2) {
DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
- return 1;
+ return Factor;
}
}
@@ -2652,7 +2876,8 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
DEBUG(dbgs() << "LV: Using user VF "<<UserVF<<".\n");
- return UserVF;
+ Factor.Width = UserVF;
+ return Factor;
}
float Cost = expectedCost(1);
@@ -2672,12 +2897,70 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
}
DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n");
- return Width;
+ Factor.Width = Width;
+ Factor.Cost = Width * Cost;
+ return Factor;
+}
+
+unsigned LoopVectorizationCostModel::getWidestType() {
+ unsigned MaxWidth = 8;
+
+ // For each block.
+ for (Loop::block_iterator bb = TheLoop->block_begin(),
+ be = TheLoop->block_end(); bb != be; ++bb) {
+ BasicBlock *BB = *bb;
+
+ // For each instruction in the loop.
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ Type *T = it->getType();
+
+ // Only examine Loads, Stores and PHINodes.
+ if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
+ continue;
+
+ // Examine PHI nodes that are reduction variables.
+ if (PHINode *PN = dyn_cast<PHINode>(it))
+ if (!Legal->getReductionVars()->count(PN))
+ continue;
+
+ // Examine the stored values.
+ if (StoreInst *ST = dyn_cast<StoreInst>(it))
+ T = ST->getValueOperand()->getType();
+
+ // Ignore loaded pointer types and stored pointer types that are not
+ // consecutive. However, we do want to take consecutive stores/loads of
+ // pointer vectors into account.
+ if (T->isPointerTy() && !isConsecutiveLoadOrStore(it))
+ continue;
+
+ MaxWidth = std::max(MaxWidth,
+ (unsigned)DL->getTypeSizeInBits(T->getScalarType()));
+ }
+ }
+
+ return MaxWidth;
}
unsigned
LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
- unsigned UserUF) {
+ unsigned UserUF,
+ unsigned VF,
+ unsigned LoopCost) {
+
+ // -- The unroll heuristics --
+ // We unroll the loop in order to expose ILP and reduce the loop overhead.
+ // There are many micro-architectural considerations that we can't predict
+ // at this level. For example frontend pressure (on decode or fetch) due to
+ // code size, or the number and capabilities of the execution ports.
+ //
+ // We use the following heuristics to select the unroll factor:
+ // 1. If the code has reductions the we unroll in order to break the cross
+ // iteration dependency.
+ // 2. If the loop is really small then we unroll in order to reduce the loop
+ // overhead.
+ // 3. We don't unroll if we think that we will spill registers to memory due
+ // to the increased register pressure.
+
// Use the user preference, unless 'auto' is selected.
if (UserUF != 0)
return UserUF;
@@ -2710,17 +2993,39 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
// fit without causing spills.
unsigned UF = (TargetVectorRegisters - R.LoopInvariantRegs) / R.MaxLocalUsers;
- // We don't want to unroll the loops to the point where they do not fit into
- // the decoded cache. Assume that we only allow 32 IR instructions.
- UF = std::min(UF, (MaxLoopSizeThreshold / R.NumInstructions));
-
// Clamp the unroll factor ranges to reasonable factors.
+ unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();
+
+ // If we did not calculate the cost for VF (because the user selected the VF)
+ // then we calculate the cost of VF here.
+ if (LoopCost == 0)
+ LoopCost = expectedCost(VF);
+
+ // Clamp the calculated UF to be between the 1 and the max unroll factor
+ // that the target allows.
if (UF > MaxUnrollSize)
UF = MaxUnrollSize;
else if (UF < 1)
UF = 1;
- return UF;
+ if (Legal->getReductionVars()->size()) {
+ DEBUG(dbgs() << "LV: Unrolling because of reductions. \n");
+ return UF;
+ }
+
+ // We want to unroll tiny loops in order to reduce the loop overhead.
+ // We assume that the cost overhead is 1 and we use the cost model
+ // to estimate the cost of the loop and unroll until the cost of the
+ // loop overhead is about 5% of the cost of the loop.
+ DEBUG(dbgs() << "LV: Loop cost is "<< LoopCost <<" \n");
+ if (LoopCost < 20) {
+ DEBUG(dbgs() << "LV: Unrolling to reduce branch cost. \n");
+ unsigned NewUF = 20/LoopCost + 1;
+ return std::min(NewUF, UF);
+ }
+
+ DEBUG(dbgs() << "LV: Not Unrolling. \n");
+ return 1;
}
LoopVectorizationCostModel::RegisterUsage
@@ -2878,9 +3183,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// TODO: We need to estimate the cost of intrinsic calls.
switch (I->getOpcode()) {
case Instruction::GetElementPtr:
- // We mark this instruction as zero-cost because scalar GEPs are usually
- // lowered to the intruction addressing mode. At the moment we don't
- // generate vector geps.
+ // We mark this instruction as zero-cost because the cost of GEPs in
+ // vectorized code depends on whether the corresponding memory instruction
+ // is scalarized or not. Therefore, we handle GEPs with the memory
+ // instruction cost.
return 0;
case Instruction::Br: {
return TTI.getCFInstrCost(I->getOpcode());
@@ -2923,83 +3229,59 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
VectorTy = ToVectorTy(ValTy, VF);
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
}
- case Instruction::Store: {
- StoreInst *SI = cast<StoreInst>(I);
- Type *ValTy = SI->getValueOperand()->getType();
+ case Instruction::Store:
+ case Instruction::Load: {
+ StoreInst *SI = dyn_cast<StoreInst>(I);
+ LoadInst *LI = dyn_cast<LoadInst>(I);
+ Type *ValTy = (SI ? SI->getValueOperand()->getType() :
+ LI->getType());
VectorTy = ToVectorTy(ValTy, VF);
+ unsigned Alignment = SI ? SI->getAlignment() : LI->getAlignment();
+ unsigned AS = SI ? SI->getPointerAddressSpace() :
+ LI->getPointerAddressSpace();
+ Value *Ptr = SI ? SI->getPointerOperand() : LI->getPointerOperand();
+ // We add the cost of address computation here instead of with the gep
+ // instruction because only here we know whether the operation is
+ // scalarized.
if (VF == 1)
- return TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
- SI->getAlignment(),
- SI->getPointerAddressSpace());
+ return TTI.getAddressComputationCost(VectorTy) +
+ TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
- // Scalarized stores.
- int Stride = Legal->isConsecutivePtr(SI->getPointerOperand());
+ // Scalarized loads/stores.
+ int Stride = Legal->isConsecutivePtr(Ptr);
bool Reverse = Stride < 0;
if (0 == Stride) {
unsigned Cost = 0;
-
// The cost of extracting from the value vector and pointer vector.
- Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF);
+ Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
for (unsigned i = 0; i < VF; ++i) {
- Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy,
- i);
+ // The cost of extracting the pointer operand.
Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i);
+ // In case of STORE, the cost of ExtractElement from the vector.
+ // In case of LOAD, the cost of InsertElement into the returned
+ // vector.
+ Cost += TTI.getVectorInstrCost(SI ? Instruction::ExtractElement :
+ Instruction::InsertElement,
+ VectorTy, i);
}
- // The cost of the scalar stores.
+ // The cost of the scalar loads/stores.
+ Cost += VF * TTI.getAddressComputationCost(ValTy->getScalarType());
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
- SI->getAlignment(),
- SI->getPointerAddressSpace());
+ Alignment, AS);
return Cost;
}
- // Wide stores.
- unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
- SI->getAlignment(),
- SI->getPointerAddressSpace());
+ // Wide load/stores.
+ unsigned Cost = TTI.getAddressComputationCost(VectorTy);
+ Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+
if (Reverse)
Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
VectorTy, 0);
return Cost;
}
- case Instruction::Load: {
- LoadInst *LI = cast<LoadInst>(I);
-
- if (VF == 1)
- return TTI.getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(),
- LI->getPointerAddressSpace());
-
- // Scalarized loads.
- int Stride = Legal->isConsecutivePtr(LI->getPointerOperand());
- bool Reverse = Stride < 0;
- if (0 == Stride) {
- unsigned Cost = 0;
- Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF);
-
- // The cost of extracting from the pointer vector.
- for (unsigned i = 0; i < VF; ++i)
- Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i);
-
- // The cost of inserting data to the result vector.
- for (unsigned i = 0; i < VF; ++i)
- Cost += TTI.getVectorInstrCost(Instruction::InsertElement, VectorTy, i);
-
- // The cost of the scalar stores.
- Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), RetTy->getScalarType(),
- LI->getAlignment(),
- LI->getPointerAddressSpace());
- return Cost;
- }
-
- // Wide loads.
- unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy,
- LI->getAlignment(),
- LI->getPointerAddressSpace());
- if (Reverse)
- Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
- return Cost;
- }
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
@@ -3077,4 +3359,14 @@ namespace llvm {
}
}
+bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
+ // Check for a store.
+ if (StoreInst *ST = dyn_cast<StoreInst>(Inst))
+ return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0;
+
+ // Check for a load.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0;
+ return false;
+}