diff options
Diffstat (limited to 'lib/Transforms')
78 files changed, 8130 insertions, 6203 deletions
diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt index de1353e..2bb6e90 100644 --- a/lib/Transforms/CMakeLists.txt +++ b/lib/Transforms/CMakeLists.txt @@ -5,3 +5,4 @@ add_subdirectory(Scalar) add_subdirectory(IPO) add_subdirectory(Vectorize) add_subdirectory(Hello) +add_subdirectory(ObjCARC) diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 385544a..e6fa4ed 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -514,14 +514,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Attribute - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost - SmallVector<AttributeWithIndex, 8> AttributesVec; + SmallVector<AttributeSet, 8> AttributesVec; const AttributeSet &PAL = F->getAttributes(); // Add any return attributes. - Attribute attrs = PAL.getRetAttributes(); - if (attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, - attrs)); + if (PAL.hasAttributes(AttributeSet::ReturnIndex)) + AttributesVec.push_back(AttributeSet::get(F->getContext(), + PAL.getRetAttributes())); // First, determine the new argument list unsigned ArgIndex = 1; @@ -537,9 +536,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } else if (!ArgsToPromote.count(I)) { // Unchanged argument Params.push_back(I->getType()); - Attribute attrs = PAL.getParamAttributes(ArgIndex); - if (attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs)); + AttributeSet attrs = PAL.getParamAttributes(ArgIndex); + if (attrs.hasAttributes(ArgIndex)) { + AttrBuilder B(attrs, ArgIndex); + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Params.size(), B)); + } } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; @@ -591,10 +593,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, } // Add any function attributes. - attrs = PAL.getFnAttributes(); - if (attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, - attrs)); + if (PAL.hasAttributes(AttributeSet::FunctionIndex)) + AttributesVec.push_back(AttributeSet::get(FTy->getContext(), + PAL.getFnAttributes())); Type *RetTy = FTy->getReturnType(); @@ -639,10 +640,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, const AttributeSet &CallPAL = CS.getAttributes(); // Add any return attributes. - Attribute attrs = CallPAL.getRetAttributes(); - if (attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, - attrs)); + if (CallPAL.hasAttributes(AttributeSet::ReturnIndex)) + AttributesVec.push_back(AttributeSet::get(F->getContext(), + CallPAL.getRetAttributes())); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. @@ -653,10 +653,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { Args.push_back(*AI); // Unmodified argument - Attribute Attrs = CallPAL.getParamAttributes(ArgIndex); - if (Attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); - + if (CallPAL.hasAttributes(ArgIndex)) { + AttrBuilder B(CallPAL, ArgIndex); + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Args.size(), B)); + } } else if (ByValArgsToTransform.count(I)) { // Emit a GEP and load for each element of the struct. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); @@ -715,16 +716,17 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { Args.push_back(*AI); - Attribute Attrs = CallPAL.getParamAttributes(ArgIndex); - if (Attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); + if (CallPAL.hasAttributes(ArgIndex)) { + AttrBuilder B(CallPAL, ArgIndex); + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Args.size(), B)); + } } // Add any function attributes. - attrs = CallPAL.getFnAttributes(); - if (attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, - attrs)); + if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) + AttributesVec.push_back(AttributeSet::get(Call->getContext(), + CallPAL.getFnAttributes())); Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index ff040e7..49ef1e7 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -272,14 +272,13 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // Drop any attributes that were on the vararg arguments. AttributeSet PAL = CS.getAttributes(); - if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) { - SmallVector<AttributeWithIndex, 8> AttributesVec; - for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i) - AttributesVec.push_back(PAL.getSlot(i)); - Attribute FnAttrs = PAL.getFnAttributes(); - if (FnAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, - FnAttrs)); + if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) { + SmallVector<AttributeSet, 8> AttributesVec; + for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i) + AttributesVec.push_back(PAL.getSlotAttributes(i)); + if (PAL.hasAttributes(AttributeSet::FunctionIndex)) + AttributesVec.push_back(AttributeSet::get(Fn.getContext(), + PAL.getFnAttributes())); PAL = AttributeSet::get(Fn.getContext(), AttributesVec); } @@ -351,7 +350,7 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) if (Fn.use_empty()) return false; - llvm::SmallVector<unsigned, 8> UnusedArgs; + SmallVector<unsigned, 8> UnusedArgs; for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); I != E; ++I) { Argument *Arg = I; @@ -697,15 +696,10 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { std::vector<Type*> Params; // Set up to build a new list of parameter attributes. - SmallVector<AttributeWithIndex, 8> AttributesVec; + SmallVector<AttributeSet, 8> AttributesVec; const AttributeSet &PAL = F->getAttributes(); - // The existing function return attributes. - Attribute RAttrs = PAL.getRetAttributes(); - Attribute FnAttrs = PAL.getFnAttributes(); - // Find out the new return value. - Type *RetTy = FTy->getReturnType(); Type *NRetTy = NULL; unsigned RetCount = NumRetVals(F); @@ -759,22 +753,29 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { assert(NRetTy && "No new return type found?"); + // The existing function return attributes. + AttributeSet RAttrs = PAL.getRetAttributes(); + // Remove any incompatible attributes, but only if we removed all return // values. Otherwise, ensure that we don't have any conflicting attributes // here. Currently, this should not be possible, but special handling might be // required when new return value attributes are added. if (NRetTy->isVoidTy()) RAttrs = - Attribute::get(NRetTy->getContext(), AttrBuilder(RAttrs). - removeAttributes(Attribute::typeIncompatible(NRetTy))); + AttributeSet::get(NRetTy->getContext(), AttributeSet::ReturnIndex, + AttrBuilder(RAttrs, AttributeSet::ReturnIndex). + removeAttributes(AttributeFuncs:: + typeIncompatible(NRetTy, AttributeSet::ReturnIndex), + AttributeSet::ReturnIndex)); else - assert(!AttrBuilder(RAttrs). - hasAttributes(Attribute::typeIncompatible(NRetTy)) && + assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex). + hasAttributes(AttributeFuncs:: + typeIncompatible(NRetTy, AttributeSet::ReturnIndex), + AttributeSet::ReturnIndex) && "Return attributes no longer compatible?"); - if (RAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, - RAttrs)); + if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) + AttributesVec.push_back(AttributeSet::get(NRetTy->getContext(), RAttrs)); // Remember which arguments are still alive. SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false); @@ -791,9 +792,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Get the original parameter attributes (skipping the first one, that is // for the return value. - Attribute Attrs = PAL.getParamAttributes(i + 1); - if (Attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs)); + if (PAL.hasAttributes(i + 1)) { + AttrBuilder B(PAL, i + 1); + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Params.size(), B)); + } } else { ++NumArgumentsEliminated; DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName() @@ -801,9 +804,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } } - if (FnAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, - FnAttrs)); + if (PAL.hasAttributes(AttributeSet::FunctionIndex)) + AttributesVec.push_back(AttributeSet::get(F->getContext(), + PAL.getFnAttributes())); // Reconstruct the AttributesList based on the vector we constructed. AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec); @@ -836,15 +839,18 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { const AttributeSet &CallPAL = CS.getAttributes(); // The call return attributes. - Attribute RAttrs = CallPAL.getRetAttributes(); - Attribute FnAttrs = CallPAL.getFnAttributes(); + AttributeSet RAttrs = CallPAL.getRetAttributes(); + // Adjust in case the function was changed to return void. RAttrs = - Attribute::get(NF->getContext(), AttrBuilder(RAttrs). - removeAttributes(Attribute::typeIncompatible(NF->getReturnType()))); - if (RAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, - RAttrs)); + AttributeSet::get(NF->getContext(), AttributeSet::ReturnIndex, + AttrBuilder(RAttrs, AttributeSet::ReturnIndex). + removeAttributes(AttributeFuncs:: + typeIncompatible(NF->getReturnType(), + AttributeSet::ReturnIndex), + AttributeSet::ReturnIndex)); + if (RAttrs.hasAttributes(AttributeSet::ReturnIndex)) + AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs)); // Declare these outside of the loops, so we can reuse them for the second // loop, which loops the varargs. @@ -856,22 +862,26 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (ArgAlive[i]) { Args.push_back(*I); // Get original parameter attributes, but skip return attributes. - Attribute Attrs = CallPAL.getParamAttributes(i + 1); - if (Attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); + if (CallPAL.hasAttributes(i + 1)) { + AttrBuilder B(CallPAL, i + 1); + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Args.size(), B)); + } } // Push any varargs arguments on the list. Don't forget their attributes. for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { Args.push_back(*I); - Attribute Attrs = CallPAL.getParamAttributes(i + 1); - if (Attrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); + if (CallPAL.hasAttributes(i + 1)) { + AttrBuilder B(CallPAL, i + 1); + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Args.size(), B)); + } } - if (FnAttrs.hasAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, - FnAttrs)); + if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) + AttributesVec.push_back(AttributeSet::get(Call->getContext(), + CallPAL.getFnAttributes())); // Reconstruct the AttributesList based on the vector we constructed. AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec); diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index e9bc4ad..a75212a 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -215,14 +215,13 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { AttrBuilder B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone); - F->removeAttribute(AttributeSet::FunctionIndex, - Attribute::get(F->getContext(), B)); + F->removeAttributes(AttributeSet::FunctionIndex, + AttributeSet::get(F->getContext(), + AttributeSet::FunctionIndex, B)); // Add in the new attribute. - B.clear(); - B.addAttribute(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); F->addAttribute(AttributeSet::FunctionIndex, - Attribute::get(F->getContext(), B)); + ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); if (ReadsMemory) ++NumReadOnly; @@ -381,7 +380,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; ++A) { if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { - A->addAttr(Attribute::get(F->getContext(), B)); + A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B)); ++NumNoCapture; Changed = true; } @@ -396,7 +395,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { if (!Tracker.Captured) { if (Tracker.Uses.empty()) { // If it's trivially not captured, mark it nocapture now. - A->addAttr(Attribute::get(F->getContext(), B)); + A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo()+1, B)); ++NumNoCapture; Changed = true; } else { @@ -431,7 +430,9 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { ArgumentSCC[0]-> Definition-> - addAttr(Attribute::get(ArgumentSCC[0]->Definition->getContext(), B)); + addAttr(AttributeSet::get(ArgumentSCC[0]->Definition->getContext(), + ArgumentSCC[0]->Definition->getArgNo() + 1, + B)); ++NumNoCapture; Changed = true; } @@ -473,7 +474,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; - A->addAttr(Attribute::get(A->getContext(), B)); + A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B)); ++NumNoCapture; Changed = true; } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index abd37c2..2b9d667 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -448,8 +448,8 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, Dead[i].second->eraseFromParent(); Instruction *I = Dead[i].first; do { - if (isAllocationFn(I, TLI)) - break; + if (isAllocationFn(I, TLI)) + break; Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); if (!J) break; @@ -1825,7 +1825,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { GlobalValue::InternalLinkage, ConstantInt::getFalse(GV->getContext()), GV->getName()+".b", - GV->getThreadLocalMode()); + GV->getThreadLocalMode(), + GV->getType()->getAddressSpace()); GV->getParent()->getGlobalList().insert(GV, NewGV); Constant *InitVal = GV->getInitializer(); @@ -1845,10 +1846,10 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { bool StoringOther = SI->getOperand(0) == OtherVal; // Only do this if we weren't storing a loaded value. Value *StoreVal; - if (StoringOther || SI->getOperand(0) == InitVal) + if (StoringOther || SI->getOperand(0) == InitVal) { StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), StoringOther); - else { + } else { // Otherwise, we are storing a previously loaded copy. To do this, // change the copy from copying the original value to just copying the // bool. @@ -1887,6 +1888,9 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { UI->eraseFromParent(); } + // Retain the name of the old global variable. People who are debugging their + // programs may expect these variables to be named the same. + NewGV->takeName(GV); GV->eraseFromParent(); return true; } @@ -1989,7 +1993,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return Changed; } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { - DEBUG(dbgs() << "MARKING CONSTANT: " << *GV); + DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n"); GV->setConstant(true); // Clean up any obviously simplifiable users now. @@ -2067,12 +2071,12 @@ static void ChangeCalleesToFastCall(Function *F) { static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) { for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { - if (!Attrs.getSlot(i).Attrs.hasAttribute(Attribute::Nest)) + unsigned Index = Attrs.getSlotIndex(i); + if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest)) continue; // There can be only one. - return Attrs.removeAttr(C, Attrs.getSlot(i).Index, - Attribute::get(C, Attribute::Nest)); + return Attrs.removeAttribute(C, Index, Attribute::Nest); } return Attrs; @@ -2584,24 +2588,38 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, while (1) { Constant *InstResult = 0; + DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); + if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { - if (!SI->isSimple()) return false; // no volatile/atomic accesses. + if (!SI->isSimple()) { + DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); + return false; // no volatile/atomic accesses. + } Constant *Ptr = getVal(SI->getOperand(1)); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { + DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); Ptr = ConstantFoldConstantExpression(CE, TD, TLI); - if (!isSimpleEnoughPointerToCommit(Ptr)) + DEBUG(dbgs() << "; To: " << *Ptr << "\n"); + } + if (!isSimpleEnoughPointerToCommit(Ptr)) { // If this is too complex for us to commit, reject it. + DEBUG(dbgs() << "Pointer is too complex for us to evaluate store."); return false; + } Constant *Val = getVal(SI->getOperand(0)); // If this might be too difficult for the backend to handle (e.g. the addr // of one global variable divided by another) then we can't commit it. - if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) + if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD)) { + DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val + << "\n"); return false; + } - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { if (CE->getOpcode() == Instruction::BitCast) { + DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n"); // If we're evaluating a store through a bitcast, then we need // to pull the bitcast off the pointer type and push it onto the // stored value. @@ -2630,6 +2648,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If we can't improve the situation by introspecting NewTy, // we have to give up. } else { + DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " + "evaluate.\n"); return false; } } @@ -2637,25 +2657,36 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If we found compatible types, go ahead and push the bitcast // onto the stored value. Val = ConstantExpr::getBitCast(Val, NewTy); + + DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); } + } MutatedMemory[Ptr] = Val; } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { InstResult = ConstantExpr::get(BO->getOpcode(), getVal(BO->getOperand(0)), getVal(BO->getOperand(1))); + DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult + << "\n"); } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { InstResult = ConstantExpr::getCompare(CI->getPredicate(), getVal(CI->getOperand(0)), getVal(CI->getOperand(1))); + DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult + << "\n"); } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { InstResult = ConstantExpr::getCast(CI->getOpcode(), getVal(CI->getOperand(0)), CI->getType()); + DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult + << "\n"); } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)), getVal(SI->getOperand(1)), getVal(SI->getOperand(2))); + DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult + << "\n"); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { Constant *P = getVal(GEP->getOperand(0)); SmallVector<Constant*, 8> GEPOps; @@ -2665,41 +2696,70 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, InstResult = ConstantExpr::getGetElementPtr(P, GEPOps, cast<GEPOperator>(GEP)->isInBounds()); + DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult + << "\n"); } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { - if (!LI->isSimple()) return false; // no volatile/atomic accesses. + + if (!LI->isSimple()) { + DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); + return false; // no volatile/atomic accesses. + } + Constant *Ptr = getVal(LI->getOperand(0)); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { Ptr = ConstantFoldConstantExpression(CE, TD, TLI); + DEBUG(dbgs() << "Found a constant pointer expression, constant " + "folding: " << *Ptr << "\n"); + } InstResult = ComputeLoadResult(Ptr); - if (InstResult == 0) return false; // Could not evaluate load. + if (InstResult == 0) { + DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." + "\n"); + return false; // Could not evaluate load. + } + + DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n"); } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { - if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. + if (AI->isArrayAllocation()) { + DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); + return false; // Cannot handle array allocs. + } Type *Ty = AI->getType()->getElementType(); AllocaTmps.push_back(new GlobalVariable(Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName())); InstResult = AllocaTmps.back(); + DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) { CallSite CS(CurInst); // Debug info can safely be ignored here. if (isa<DbgInfoIntrinsic>(CS.getInstruction())) { + DEBUG(dbgs() << "Ignoring debug info.\n"); ++CurInst; continue; } // Cannot handle inline asm. - if (isa<InlineAsm>(CS.getCalledValue())) return false; + if (isa<InlineAsm>(CS.getCalledValue())) { + DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); + return false; + } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) { - if (MSI->isVolatile()) return false; + if (MSI->isVolatile()) { + DEBUG(dbgs() << "Can not optimize a volatile memset " << + "intrinsic.\n"); + return false; + } Constant *Ptr = getVal(MSI->getDest()); Constant *Val = getVal(MSI->getValue()); Constant *DestVal = ComputeLoadResult(getVal(Ptr)); if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { // This memset is a no-op. + DEBUG(dbgs() << "Ignoring no-op memset.\n"); ++CurInst; continue; } @@ -2707,6 +2767,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (II->getIntrinsicID() == Intrinsic::lifetime_start || II->getIntrinsicID() == Intrinsic::lifetime_end) { + DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); ++CurInst; continue; } @@ -2714,8 +2775,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (II->getIntrinsicID() == Intrinsic::invariant_start) { // We don't insert an entry into Values, as it doesn't have a // meaningful return value. - if (!II->use_empty()) + if (!II->use_empty()) { + DEBUG(dbgs() << "Found unused invariant_start. Cant evaluate.\n"); return false; + } ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0)); Value *PtrArg = getVal(II->getArgOperand(1)); Value *Ptr = PtrArg->stripPointerCasts(); @@ -2723,20 +2786,30 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Type *ElemTy = cast<PointerType>(GV->getType())->getElementType(); if (!Size->isAllOnesValue() && Size->getValue().getLimitedValue() >= - TD->getTypeStoreSize(ElemTy)) + TD->getTypeStoreSize(ElemTy)) { Invariants.insert(GV); + DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV + << "\n"); + } else { + DEBUG(dbgs() << "Found a global var, but can not treat it as an " + "invariant.\n"); + } } // Continue even if we do nothing. ++CurInst; continue; } + + DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); return false; } // Resolve function pointers. Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue())); - if (!Callee || Callee->mayBeOverridden()) + if (!Callee || Callee->mayBeOverridden()) { + DEBUG(dbgs() << "Can not resolve function pointer.\n"); return false; // Cannot resolve. + } SmallVector<Constant*, 8> Formals; for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) @@ -2746,22 +2819,38 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If this is a function we can constant fold, do it. if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { InstResult = C; + DEBUG(dbgs() << "Constant folded function call. Result: " << + *InstResult << "\n"); } else { + DEBUG(dbgs() << "Can not constant fold function call.\n"); return false; } } else { - if (Callee->getFunctionType()->isVarArg()) + if (Callee->getFunctionType()->isVarArg()) { + DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); return false; + } - Constant *RetVal; + Constant *RetVal = 0; // Execute the call, if successful, use the return value. ValueStack.push_back(new DenseMap<Value*, Constant*>); - if (!EvaluateFunction(Callee, RetVal, Formals)) + if (!EvaluateFunction(Callee, RetVal, Formals)) { + DEBUG(dbgs() << "Failed to evaluate function.\n"); return false; + } delete ValueStack.pop_back_val(); InstResult = RetVal; + + if (InstResult != NULL) { + DEBUG(dbgs() << "Successfully evaluated function. Result: " << + InstResult << "\n\n"); + } else { + DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n"); + } } } else if (isa<TerminatorInst>(CurInst)) { + DEBUG(dbgs() << "Found a terminator instruction.\n"); + if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { if (BI->isUnconditional()) { NextBB = BI->getSuccessor(0); @@ -2787,13 +2876,17 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, NextBB = 0; } else { // invoke, unwind, resume, unreachable. + DEBUG(dbgs() << "Can not handle terminator."); return false; // Cannot handle this terminator. } // We succeeded at evaluating this block! + DEBUG(dbgs() << "Successfully evaluated block.\n"); return true; } else { // Did not know how to evaluate this! + DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction." + "\n"); return false; } @@ -2807,6 +2900,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, // If we just processed an invoke, we finished evaluating the block. if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) { NextBB = II->getNormalDest(); + DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n"); return true; } @@ -2845,6 +2939,8 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, while (1) { BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings. + DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); + if (!EvaluateBlock(CurInst, NextBB)) return false; @@ -2924,6 +3020,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { } break; } + DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); // We cannot simplify external ctor functions. if (F->empty()) continue; diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 2971803..a0095da 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -30,35 +30,41 @@ using namespace llvm; namespace { - // AlwaysInliner only inlines functions that are mark as "always inline". - class AlwaysInliner : public Inliner { - InlineCostAnalyzer CA; - public: - // Use extremely low threshold. - AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/true) { - initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); - } - AlwaysInliner(bool InsertLifetime) : Inliner(ID, -2000000000, - InsertLifetime) { - initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); - } - static char ID; // Pass identification, replacement for typeid - virtual InlineCost getInlineCost(CallSite CS); - - using llvm::Pass::doInitialization; - using llvm::Pass::doFinalization; - - virtual bool doFinalization(CallGraph &CG) { - return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true); - } - virtual bool doInitialization(CallGraph &CG); - }; +/// \brief Inliner pass which only handles "always inline" functions. +class AlwaysInliner : public Inliner { + InlineCostAnalysis *ICA; + +public: + // Use extremely low threshold. + AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), ICA(0) { + initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); + } + + AlwaysInliner(bool InsertLifetime) + : Inliner(ID, -2000000000, InsertLifetime), ICA(0) { + initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); + } + + static char ID; // Pass identification, replacement for typeid + + virtual InlineCost getInlineCost(CallSite CS); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnSCC(CallGraphSCC &SCC); + + using llvm::Pass::doFinalization; + virtual bool doFinalization(CallGraph &CG) { + return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true); + } +}; + } char AlwaysInliner::ID = 0; INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) INITIALIZE_PASS_END(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) @@ -89,15 +95,18 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) { if (Callee && !Callee->isDeclaration() && Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::AlwaysInline) && - CA.isInlineViable(*Callee)) + ICA->isInlineViable(*Callee)) return InlineCost::getAlways(); return InlineCost::getNever(); } -// doInitialization - Initializes the vector of functions that have not -// been annotated with the "always inline" attribute. -bool AlwaysInliner::doInitialization(CallGraph &CG) { - CA.setDataLayout(getAnalysisIfAvailable<DataLayout>()); - return false; +bool AlwaysInliner::runOnSCC(CallGraphSCC &SCC) { + ICA = &getAnalysis<InlineCostAnalysis>(); + return Inliner::runOnSCC(SCC); +} + +void AlwaysInliner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<InlineCostAnalysis>(); + Inliner::getAnalysisUsage(AU); } diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index 9682923..a4f7026 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -28,29 +28,41 @@ using namespace llvm; namespace { - class SimpleInliner : public Inliner { - InlineCostAnalyzer CA; - public: - SimpleInliner() : Inliner(ID) { - initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); - } - SimpleInliner(int Threshold) : Inliner(ID, Threshold, - /*InsertLifetime*/true) { - initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); - } - static char ID; // Pass identification, replacement for typeid - InlineCost getInlineCost(CallSite CS) { - return CA.getInlineCost(CS, getInlineThreshold(CS)); - } - using llvm::Pass::doInitialization; - virtual bool doInitialization(CallGraph &CG); - }; -} +/// \brief Actaul inliner pass implementation. +/// +/// The common implementation of the inlining logic is shared between this +/// inliner pass and the always inliner pass. The two passes use different cost +/// analyses to determine when to inline. +class SimpleInliner : public Inliner { + InlineCostAnalysis *ICA; + +public: + SimpleInliner() : Inliner(ID), ICA(0) { + initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); + } + + SimpleInliner(int Threshold) + : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(0) { + initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); + } + + static char ID; // Pass identification, replacement for typeid + + InlineCost getInlineCost(CallSite CS) { + return ICA->getInlineCost(CS, getInlineThreshold(CS)); + } + + virtual bool runOnSCC(CallGraphSCC &SCC); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; +}; + +} // end anonymous namespace char SimpleInliner::ID = 0; INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", "Function Integration/Inlining", false, false) INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) INITIALIZE_PASS_END(SimpleInliner, "inline", "Function Integration/Inlining", false, false) @@ -60,10 +72,12 @@ Pass *llvm::createFunctionInliningPass(int Threshold) { return new SimpleInliner(Threshold); } -// doInitialization - Initializes the vector of functions that have been -// annotated with the noinline attribute. -bool SimpleInliner::doInitialization(CallGraph &CG) { - CA.setDataLayout(getAnalysisIfAvailable<DataLayout>()); - return false; +bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) { + ICA = &getAnalysis<InlineCostAnalysis>(); + return Inliner::runOnSCC(SCC); } +void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<InlineCostAnalysis>(); + Inliner::getAnalysisUsage(AU); +} diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 2187a2a..663ddb7 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -72,6 +72,40 @@ void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { typedef DenseMap<ArrayType*, std::vector<AllocaInst*> > InlinedArrayAllocasTy; +/// \brief If the inlined function had a higher stack protection level than the +/// calling function, then bump up the caller's stack protection level. +static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) { + // If upgrading the SSP attribute, clear out the old SSP Attributes first. + // Having multiple SSP attributes doesn't actually hurt, but it adds useless + // clutter to the IR. + AttrBuilder B; + B.addAttribute(Attribute::StackProtect) + .addAttribute(Attribute::StackProtectStrong); + AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(), + AttributeSet::FunctionIndex, + B); + AttributeSet CallerAttr = Caller->getAttributes(), + CalleeAttr = Callee->getAttributes(); + + if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectReq)) { + Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); + Caller->addFnAttr(Attribute::StackProtectReq); + } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong) && + !CallerAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectReq)) { + Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); + Caller->addFnAttr(Attribute::StackProtectStrong); + } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtect) && + !CallerAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectReq) && + !CallerAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong)) + Caller->addFnAttr(Attribute::StackProtect); +} + /// InlineCallIfPossible - If it is possible to inline the specified call site, /// do so and update the CallGraph for this operation. /// @@ -91,16 +125,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, if (!InlineFunction(CS, IFI, InsertLifetime)) return false; - // If the inlined function had a higher stack protection level than the - // calling function, then bump up the caller's stack protection level. - if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectReq)) - Caller->addFnAttr(Attribute::StackProtectReq); - else if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtect) && - !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectReq)) - Caller->addFnAttr(Attribute::StackProtect); + AdjustCallerSSPLevel(Caller, Callee); // Look at all of the allocas that we inlined through this call site. If we // have already inlined other allocas through other calls into this function, diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 70d55b0..4bfab5b 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -50,6 +50,8 @@ namespace { explicit InternalizePass(); explicit InternalizePass(ArrayRef<const char *> exportList); void LoadFile(const char *Filename); + void ClearExportList(); + void AddToExportList(const std::string &val); virtual bool runOnModule(Module &M); virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -97,6 +99,14 @@ void InternalizePass::LoadFile(const char *Filename) { } } +void InternalizePass::ClearExportList() { + ExternalNames.clear(); +} + +void InternalizePass::AddToExportList(const std::string &val) { + ExternalNames.insert(val); +} + bool InternalizePass::runOnModule(Module &M) { CallGraph *CG = getAnalysisIfAvailable<CallGraph>(); CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0; diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt index b18c915..124cbb6 100644 --- a/lib/Transforms/IPO/LLVMBuild.txt +++ b/lib/Transforms/IPO/LLVMBuild.txt @@ -20,4 +20,4 @@ type = Library name = IPO parent = Transforms library_name = ipo -required_libraries = Analysis Core IPA InstCombine Scalar Vectorize Support Target TransformUtils +required_libraries = Analysis Core IPA InstCombine Scalar Vectorize Support Target TransformUtils ObjCARC diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 6dc1773..47b2b51 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -214,6 +214,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createGVNPass()); // Remove redundancies else MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + + // BBVectorize may have significantly shortened a loop body; unroll again. + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); } MPM.add(createAggressiveDCEPass()); // Delete dead instructions diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index d872f0c..73d9323 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -146,9 +146,11 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { Function *F = (*I)->getFunction(); const AttributeSet &PAL = F->getAttributes(); - const AttributeSet &NPAL = PAL.addAttr(F->getContext(), ~0, - Attribute::get(F->getContext(), - NewAttributes)); + const AttributeSet &NPAL = + PAL.addAttributes(F->getContext(), AttributeSet::FunctionIndex, + AttributeSet::get(F->getContext(), + AttributeSet::FunctionIndex, + NewAttributes)); if (PAL != NPAL) { MadeChange = true; F->setAttributes(NPAL); diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 959daa2..1f6a3a5e 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -27,7 +27,7 @@ namespace llvm { class DbgDeclareInst; class MemIntrinsic; class MemSetInst; - + /// SelectPatternFlavor - We can match a variety of different patterns for /// select operations. enum SelectPatternFlavor { @@ -36,7 +36,7 @@ enum SelectPatternFlavor { SPF_SMAX, SPF_UMAX //SPF_ABS - TODO. }; - + /// getComplexity: Assign a complexity or rank value to LLVM Values... /// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst static inline unsigned getComplexity(Value *V) { @@ -51,23 +51,23 @@ static inline unsigned getComplexity(Value *V) { return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2; } - + /// InstCombineIRInserter - This is an IRBuilder insertion helper that works /// just like the normal insertion helper, but also adds any new instructions /// to the instcombine worklist. -class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter +class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter : public IRBuilderDefaultInserter<true> { InstCombineWorklist &Worklist; public: InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} - + void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, BasicBlock::iterator InsertPt) const { IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt); Worklist.Add(I); } }; - + /// InstCombiner - The -instcombine pass. class LLVM_LIBRARY_VISIBILITY InstCombiner : public FunctionPass, @@ -85,7 +85,7 @@ public: /// instructions into the worklist when they are created. typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy; BuilderTy *Builder; - + static char ID; // Pass identification, replacement for typeid InstCombiner() : FunctionPass(ID), TD(0), Builder(0) { MinimizeSize = false; @@ -94,7 +94,7 @@ public: public: virtual bool runOnFunction(Function &F); - + bool DoOneIteration(Function &F, unsigned ItNum); virtual void getAnalysisUsage(AnalysisUsage &AU) const; @@ -211,11 +211,11 @@ public: private: bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; - Value *dyn_castFNegVal(Value *V) const; - Type *FindElementAtOffset(Type *Ty, int64_t Offset, + Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const; + Type *FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl<Value*> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); - + /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually /// results in any code being generated and is interesting to optimize out. If /// the cast can be eliminated by some other simple transformation, we prefer @@ -247,7 +247,7 @@ public: return New; } - // InsertNewInstWith - same as InsertNewInstBefore, but also sets the + // InsertNewInstWith - same as InsertNewInstBefore, but also sets the // debug loc. // Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) { @@ -263,10 +263,10 @@ public: // Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. - + // If we are replacing the instruction with itself, this must be in a // segment of unreachable code, so just clobber the instruction. - if (&I == V) + if (&I == V) V = UndefValue::get(I.getType()); DEBUG(errs() << "IC: Replacing " << I << "\n" @@ -296,13 +296,13 @@ public: MadeIRChange = true; return 0; // Don't do anything with FI } - + void ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned Depth = 0) const { return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); } - - bool MaskedValueIsZero(Value *V, const APInt &Mask, + + bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0) const { return llvm::MaskedValueIsZero(V, Mask, TD, Depth); } @@ -325,10 +325,10 @@ private: /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value /// based on the demanded bits. - Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, + Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt& KnownZero, APInt& KnownOne, unsigned Depth); - bool SimplifyDemandedBits(Use &U, APInt DemandedMask, + bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt& KnownZero, APInt& KnownOne, unsigned Depth=0); /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded @@ -336,15 +336,15 @@ private: Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne); - + /// SimplifyDemandedInstructionBits - Inst is an integer instruction that /// SimplifyDemandedBits knows about. See if the instruction has any /// properties that allow us to simplify its operands. bool SimplifyDemandedInstructionBits(Instruction &Inst); - + Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt& UndefElts, unsigned Depth = 0); - + // FoldOpIntoPhi - Given a binary operator, cast instruction, or select // which has a PHI node as operand #0, see if we can fold the instruction // into the PHI (which is only possible if all operands to the PHI are @@ -360,10 +360,10 @@ private: Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); - + Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, ConstantInt *AndRHS, BinaryOperator &TheAnd); - + Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, bool isSub, Instruction &I); Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, @@ -382,8 +382,8 @@ private: Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap); }; - - + + } // end namespace llvm. #endif diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index f07c58d..c6d60d6 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -66,10 +66,12 @@ namespace { bool insaneIntVal(int V) { return V > 4 || V < -4; } APFloat *getFpValPtr(void) { return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); } + const APFloat *getFpValPtr(void) const + { return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); } const APFloat &getFpVal(void) const { assert(IsFp && BufHasFpVal && "Incorret state"); - return *reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); + return *getFpValPtr(); } APFloat &getFpVal(void) @@ -1248,6 +1250,16 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; + + // Fold (sub 0, (zext bool to B)) --> (sext bool to B) + if (C->isZero() && match(Op1, m_ZExt(m_Value(X)))) + if (X->getType()->isIntegerTy(1)) + return CastInst::CreateSExtOrBitCast(X, Op1->getType()); + + // Fold (sub 0, (sext bool to B)) --> (zext bool to B) + if (C->isZero() && match(Op1, m_SExt(m_Value(X)))) + if (X->getType()->isIntegerTy(1)) + return CastInst::CreateZExtOrBitCast(X, Op1->getType()); } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index c1e60d4..4332467 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1245,6 +1245,34 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } } + { + Value *X = 0; + bool OpsSwapped = false; + // Canonicalize SExt or Not to the LHS + if (match(Op1, m_SExt(m_Value())) || + match(Op1, m_Not(m_Value()))) { + std::swap(Op0, Op1); + OpsSwapped = true; + } + + // Fold (and (sext bool to A), B) --> (select bool, B, 0) + if (match(Op0, m_SExt(m_Value(X))) && + X->getType()->getScalarType()->isIntegerTy(1)) { + Value *Zero = Constant::getNullValue(Op1->getType()); + return SelectInst::Create(X, Op1, Zero); + } + + // Fold (and ~(sext bool to A), B) --> (select bool, 0, B) + if (match(Op0, m_Not(m_SExt(m_Value(X)))) && + X->getType()->getScalarType()->isIntegerTy(1)) { + Value *Zero = Constant::getNullValue(Op0->getType()); + return SelectInst::Create(X, Zero, Op1); + } + + if (OpsSwapped) + std::swap(Op0, Op1); + } + return Changed ? &I : 0; } @@ -2043,6 +2071,20 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return BinaryOperator::CreateOr(Inner, C1); } + // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D)) + // Since this OR statement hasn't been optimized further yet, we hope + // that this transformation will allow the new ORs to be optimized. + { + Value *X = 0, *Y = 0; + if (Op0->hasOneUse() && Op1->hasOneUse() && + match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) && + match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) { + Value *orTrue = Builder->CreateOr(A, C); + Value *orFalse = Builder->CreateOr(B, D); + return SelectInst::Create(X, orTrue, orFalse); + } + } + return Changed ? &I : 0; } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index d17879b..64cd1bd 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1014,8 +1014,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; // Cannot transform this return value. if (!CallerPAL.isEmpty() && !Caller->use_empty()) { - AttrBuilder RAttrs = CallerPAL.getRetAttributes(); - if (RAttrs.hasAttributes(Attribute::typeIncompatible(NewRetTy))) + AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); + if (RAttrs. + hasAttributes(AttributeFuncs:: + typeIncompatible(NewRetTy, AttributeSet::ReturnIndex), + AttributeSet::ReturnIndex)) return false; // Attribute not compatible with transformed value. } @@ -1044,14 +1047,16 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!CastInst::isCastable(ActTy, ParamTy)) return false; // Cannot transform this parameter value. - Attribute Attrs = CallerPAL.getParamAttributes(i + 1); - if (AttrBuilder(Attrs). - hasAttributes(Attribute::typeIncompatible(ParamTy))) + if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1). + hasAttributes(AttributeFuncs:: + typeIncompatible(ParamTy, i + 1), i + 1)) return false; // Attribute not compatible with transformed value. // If the parameter is passed as a byval argument, then we have to have a // sized type and the sized type has to have the same size as the old type. - if (ParamTy != ActTy && Attrs.hasAttribute(Attribute::ByVal)) { + if (ParamTy != ActTy && + CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1, + Attribute::ByVal)) { PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0) return false; @@ -1100,11 +1105,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // won't be dropping them. Check that these extra arguments have attributes // that are compatible with being a vararg call argument. for (unsigned i = CallerPAL.getNumSlots(); i; --i) { - if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams()) + unsigned Index = CallerPAL.getSlotIndex(i - 1); + if (Index <= FT->getNumParams()) break; - Attribute PAttrs = CallerPAL.getSlot(i - 1).Attrs; + // Check if it has an attribute that's incompatible with varargs. - if (PAttrs.hasAttribute(Attribute::StructRet)) + AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1); + if (PAttrs.hasAttribute(Index, Attribute::StructRet)) return false; } @@ -1113,21 +1120,23 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // inserting cast instructions as necessary. std::vector<Value*> Args; Args.reserve(NumActualArgs); - SmallVector<AttributeWithIndex, 8> attrVec; + SmallVector<AttributeSet, 8> attrVec; attrVec.reserve(NumCommonArgs); // Get any return attributes. - AttrBuilder RAttrs = CallerPAL.getRetAttributes(); + AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); // If the return value is not being used, the type may not be compatible // with the existing attributes. Wipe out any problematic attributes. - RAttrs.removeAttributes(Attribute::typeIncompatible(NewRetTy)); + RAttrs. + removeAttributes(AttributeFuncs:: + typeIncompatible(NewRetTy, AttributeSet::ReturnIndex), + AttributeSet::ReturnIndex); // Add the new return attributes. if (RAttrs.hasAttributes()) - attrVec.push_back( - AttributeWithIndex::get(AttributeSet::ReturnIndex, - Attribute::get(FT->getContext(), RAttrs))); + attrVec.push_back(AttributeSet::get(Caller->getContext(), + AttributeSet::ReturnIndex, RAttrs)); AI = CS.arg_begin(); for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { @@ -1141,9 +1150,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } // Add any parameter attributes. - Attribute PAttrs = CallerPAL.getParamAttributes(i + 1); + AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); if (PAttrs.hasAttributes()) - attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); + attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1, + PAttrs)); } // If the function takes more arguments than the call was taking, add them @@ -1168,23 +1178,23 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } // Add any parameter attributes. - Attribute PAttrs = CallerPAL.getParamAttributes(i + 1); + AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); if (PAttrs.hasAttributes()) - attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); + attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1, + PAttrs)); } } } - Attribute FnAttrs = CallerPAL.getFnAttributes(); - if (FnAttrs.hasAttributes()) - attrVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, - FnAttrs)); + AttributeSet FnAttrs = CallerPAL.getFnAttributes(); + if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex)) + attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs)); if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(), - attrVec); + attrVec); Instruction *NC; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { @@ -1262,12 +1272,12 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, if (!NestAttrs.isEmpty()) { unsigned NestIdx = 1; Type *NestTy = 0; - Attribute NestAttr; + AttributeSet NestAttr; // Look for a parameter marked with the 'nest' attribute. for (FunctionType::param_iterator I = NestFTy->param_begin(), E = NestFTy->param_end(); I != E; ++NestIdx, ++I) - if (NestAttrs.getParamAttributes(NestIdx).hasAttribute(Attribute::Nest)){ + if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) { // Record the parameter type and any other attributes. NestTy = *I; NestAttr = NestAttrs.getParamAttributes(NestIdx); @@ -1279,17 +1289,16 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, std::vector<Value*> NewArgs; NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1); - SmallVector<AttributeWithIndex, 8> NewAttrs; + SmallVector<AttributeSet, 8> NewAttrs; NewAttrs.reserve(Attrs.getNumSlots() + 1); // Insert the nest argument into the call argument list, which may // mean appending it. Likewise for attributes. // Add any result attributes. - Attribute Attr = Attrs.getRetAttributes(); - if (Attr.hasAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex, - Attr)); + if (Attrs.hasAttributes(AttributeSet::ReturnIndex)) + NewAttrs.push_back(AttributeSet::get(Caller->getContext(), + Attrs.getRetAttributes())); { unsigned Idx = 1; @@ -1301,7 +1310,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, if (NestVal->getType() != NestTy) NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); NewArgs.push_back(NestVal); - NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); + NewAttrs.push_back(AttributeSet::get(Caller->getContext(), + NestAttr)); } if (I == E) @@ -1309,20 +1319,21 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add the original argument and attributes. NewArgs.push_back(*I); - Attr = Attrs.getParamAttributes(Idx); - if (Attr.hasAttributes()) - NewAttrs.push_back - (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); + AttributeSet Attr = Attrs.getParamAttributes(Idx); + if (Attr.hasAttributes(Idx)) { + AttrBuilder B(Attr, Idx); + NewAttrs.push_back(AttributeSet::get(Caller->getContext(), + Idx + (Idx >= NestIdx), B)); + } ++Idx, ++I; } while (1); } // Add any function attributes. - Attr = Attrs.getFnAttributes(); - if (Attr.hasAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex, - Attr)); + if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) + NewAttrs.push_back(AttributeSet::get(FTy->getContext(), + Attrs.getFnAttributes())); // The trampoline may have been bitcast to a bogus type (FTy). // Handle this by synthesizing a new function type, equal to FTy diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 5af4442..a960ab2 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -30,7 +30,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, Scale = 0; return ConstantInt::get(Val->getType(), 0); } - + if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { // Cannot look past anything that might overflow. OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val); @@ -47,19 +47,19 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, Offset = 0; return I->getOperand(0); } - + if (I->getOpcode() == Instruction::Mul) { // This value is scaled by 'RHS'. Scale = RHS->getZExtValue(); Offset = 0; return I->getOperand(0); } - + if (I->getOpcode() == Instruction::Add) { - // We have X+C. Check to see if we really have (X*C2)+C1, + // We have X+C. Check to see if we really have (X*C2)+C1, // where C1 is divisible by C2. unsigned SubScale; - Value *SubVal = + Value *SubVal = DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); Offset += RHS->getZExtValue(); Scale = SubScale; @@ -82,7 +82,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, if (!TD) return 0; PointerType *PTy = cast<PointerType>(CI.getType()); - + BuilderTy AllocaBuilder(*Builder); AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); @@ -110,7 +110,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, uint64_t ArrayOffset; Value *NumElements = // See if the array size is a decomposable linear expr. DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); - + // If we can now satisfy the modulus, by using a non-1 scale, we really can // do the xform. if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || @@ -125,17 +125,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // Insert before the alloca, not before the cast. Amt = AllocaBuilder.CreateMul(Amt, NumElements); } - + if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { Value *Off = ConstantInt::get(AI.getArraySize()->getType(), Offset, true); Amt = AllocaBuilder.CreateAdd(Amt, Off); } - + AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); New->setAlignment(AI.getAlignment()); New->takeName(&AI); - + // If the allocation has multiple real uses, insert a cast and change all // things that used it to use the new cast. This will also hack on CI, but it // will die soon. @@ -148,10 +148,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, return ReplaceInstUsesWith(CI, New); } -/// EvaluateInDifferentType - Given an expression that +/// EvaluateInDifferentType - Given an expression that /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually /// insert the code to evaluate the expression. -Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, +Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned) { if (Constant *C = dyn_cast<Constant>(V)) { C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); @@ -181,7 +181,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); break; - } + } case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -190,7 +190,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, // new. if (I->getOperand(0)->getType() == Ty) return I->getOperand(0); - + // Otherwise, must be the same type of cast, so just reinsert a new one. // This also handles the case of zext(trunc(x)) -> zext(x). Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty, @@ -212,11 +212,11 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, Res = NPN; break; } - default: + default: // TODO: Can handle more cases here. llvm_unreachable("Unreachable!"); } - + Res->takeName(I); return InsertNewInstWith(Res, *I); } @@ -224,7 +224,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, /// This function is a wrapper around CastInst::isEliminableCastPair. It /// simply extracts arguments and returns what that function returns. -static Instruction::CastOps +static Instruction::CastOps isEliminableCastPair( const CastInst *CI, ///< The first cast instruction unsigned opcode, ///< The opcode of the second cast instruction @@ -253,7 +253,7 @@ isEliminableCastPair( if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) || (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy)) Res = 0; - + return Instruction::CastOps(Res); } @@ -265,18 +265,18 @@ bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, Type *Ty) { // Noop casts and casts of constants should be eliminated trivially. if (V->getType() == Ty || isa<Constant>(V)) return false; - + // If this is another cast that can be eliminated, we prefer to have it // eliminated. if (const CastInst *CI = dyn_cast<CastInst>(V)) if (isEliminableCastPair(CI, opc, Ty, TD)) return false; - + // If this is a vector sext from a compare, then we don't want to break the // idiom where each element of the extended vector is either zero or all ones. if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy()) return false; - + return true; } @@ -288,7 +288,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { // Many cases of "cast of a cast" are eliminable. If it's eliminable we just // eliminate it now. if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast - if (Instruction::CastOps opc = + if (Instruction::CastOps opc = isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { // The first cast (CSrc) is eliminable so we need to fix up or replace // the second cast (CI). CSrc will then have a good chance of being dead. @@ -311,7 +311,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { if (Instruction *NV = FoldOpIntoPhi(CI)) return NV; } - + return 0; } @@ -330,15 +330,15 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { // We can always evaluate constants in another type. if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + Type *OrigTy = V->getType(); - + // If this is an extension from the dest type, we can eliminate it, even if it // has multiple uses. - if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && + if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && I->getOperand(0)->getType() == Ty) return true; @@ -423,29 +423,29 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { // TODO: Can handle more cases here. break; } - + return false; } Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *Result = commonCastTransforms(CI)) return Result; - - // See if we can simplify any instructions used by the input whose sole + + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(), *SrcTy = Src->getType(); - + // Attempt to truncate the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && CanEvaluateTruncated(Src, DestTy)) { - + // If this cast is a truncate, evaluting in a different type always // eliminates the cast, so it is always a win. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" @@ -462,7 +462,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } - + // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. Value *A = 0; ConstantInt *Cst = 0; if (Src->hasOneUse() && @@ -472,7 +472,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // ASize < MidSize and MidSize > ResultSize, but don't know the relation // between ASize and ResultSize. unsigned ASize = A->getType()->getPrimitiveSizeInBits(); - + // If the shift amount is larger than the size of A, then the result is // known to be zero because all the input bits got shifted out. if (Cst->getZExtValue() >= ASize) @@ -485,7 +485,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Shift->takeName(Src); return CastInst::CreateIntegerCast(Shift, CI.getType(), false); } - + // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest // type isn't non-native. if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) && @@ -508,7 +508,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // cast to integer to avoid the comparison. if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) { const APInt &Op1CV = Op1C->getValue(); - + // zext (x <s 0) to i32 --> x>>u31 true if signbit set. // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || @@ -538,14 +538,14 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - if ((Op1CV == 0 || Op1CV.isPowerOf2()) && + if ((Op1CV == 0 || Op1CV.isPowerOf2()) && // This only works for EQ and NE ICI->isEquality()) { // If Op1C some other power of two, convert: uint32_t BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne); - + APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? if (!DoXform) return ICI; @@ -559,7 +559,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, Res = ConstantExpr::getZExt(Res, CI.getType()); return ReplaceInstUsesWith(CI, Res); } - + uint32_t ShiftAmt = KnownZeroMask.logBase2(); Value *In = ICI->getOperand(0); if (ShiftAmt) { @@ -568,12 +568,12 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), In->getName()+".lobit"); } - + if ((Op1CV != 0) == isNE) { // Toggle the low bit. Constant *One = ConstantInt::get(In->getType(), 1); In = Builder->CreateXor(In, One); } - + if (CI.getType() == In->getType()) return ReplaceInstUsesWith(CI, In); return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); @@ -646,19 +646,19 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { BitsToClear = 0; if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + // If the input is a truncate from the destination type, we can trivially // eliminate it. if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) return true; - + // We can't extend or shrink something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; - + unsigned Opc = I->getOpcode(), Tmp; switch (Opc) { case Instruction::ZExt: // zext(zext(x)) -> zext(x). @@ -678,7 +678,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { // These can all be promoted if neither operand has 'bits to clear'. if (BitsToClear == 0 && Tmp == 0) return true; - + // If the operation is an AND/OR/XOR and the bits to clear are zero in the // other side, BitsToClear is ok. if (Tmp == 0 && @@ -691,10 +691,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { APInt::getHighBitsSet(VSize, BitsToClear))) return true; } - + // Otherwise, we don't know how to analyze this BitsToClear case yet. return false; - + case Instruction::LShr: // We can promote lshr(x, cst) if we can promote x. This requires the // ultimate 'and' to clear out the high zero bits we're clearing out though. @@ -716,7 +716,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { Tmp != BitsToClear) return false; return true; - + case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider @@ -739,48 +739,48 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { } Instruction *InstCombiner::visitZExt(ZExtInst &CI) { - // If this zero extend is only used by a truncate, let the truncate by + // If this zero extend is only used by a truncate, let the truncate be // eliminated before we try to optimize this zext. if (CI.hasOneUse() && isa<TruncInst>(CI.use_back())) return 0; - + // If one of the common conversion will work, do it. if (Instruction *Result = commonCastTransforms(CI)) return Result; - // See if we can simplify any instructions used by the input whose sole + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); - + // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. unsigned BitsToClear; if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && - CanEvaluateZExtd(Src, DestTy, BitsToClear)) { + CanEvaluateZExtd(Src, DestTy, BitsToClear)) { assert(BitsToClear < SrcTy->getScalarSizeInBits() && "Unreasonable BitsToClear"); - + // Okay, we can transform this! Insert the new expression now. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" " to avoid zero extend: " << CI); Value *Res = EvaluateInDifferentType(Src, DestTy, false); assert(Res->getType() == DestTy); - + uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear; uint32_t DestBitSize = DestTy->getScalarSizeInBits(); - + // If the high bits are already filled with zeros, just replace this // cast with the result. if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize, DestBitSize-SrcBitsKept))) return ReplaceInstUsesWith(CI, Res); - + // We need to emit an AND to clear the high bits. Constant *C = ConstantInt::get(Res->getType(), APInt::getLowBitsSet(DestBitSize, SrcBitsKept)); @@ -792,7 +792,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // 'and' which will be much cheaper than the pair of casts. if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast // TODO: Subsume this into EvaluateInDifferentType. - + // Get the sizes of the types involved. We know that the intermediate type // will be smaller than A or C, but don't know the relation between A and C. Value *A = CSrc->getOperand(0); @@ -809,7 +809,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); return new ZExtInst(And, CI.getType()); } - + if (SrcSize == DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), @@ -818,7 +818,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (SrcSize > DstSize) { Value *Trunc = Builder->CreateTrunc(A, CI.getType()); APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); - return BinaryOperator::CreateAnd(Trunc, + return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(), AndValue)); } @@ -876,7 +876,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { Value *New = Builder->CreateZExt(X, CI.getType()); return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1)); } - + return 0; } @@ -989,14 +989,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // If this is a constant, it can be trivially promoted. if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + // If this is a truncate from the dest type, we can trivially eliminate it. if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) return true; - + // We can't extend or shrink something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; @@ -1015,14 +1015,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // These operators can all arbitrarily be extended if their inputs can. return CanEvaluateSExtd(I->getOperand(0), Ty) && CanEvaluateSExtd(I->getOperand(1), Ty); - + //case Instruction::Shl: TODO //case Instruction::LShr: TODO - + case Instruction::Select: return CanEvaluateSExtd(I->getOperand(1), Ty) && CanEvaluateSExtd(I->getOperand(2), Ty); - + case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider @@ -1036,24 +1036,24 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // TODO: Can handle more cases here. break; } - + return false; } Instruction *InstCombiner::visitSExt(SExtInst &CI) { - // If this sign extend is only used by a truncate, let the truncate by - // eliminated before we try to optimize this zext. + // If this sign extend is only used by a truncate, let the truncate be + // eliminated before we try to optimize this sext. if (CI.hasOneUse() && isa<TruncInst>(CI.use_back())) return 0; - + if (Instruction *I = commonCastTransforms(CI)) return I; - - // See if we can simplify any instructions used by the input whose sole + + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); @@ -1076,7 +1076,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // cast with the result. if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize) return ReplaceInstUsesWith(CI, Res); - + // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"), @@ -1089,7 +1089,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) { uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); uint32_t DestBitSize = DestTy->getScalarSizeInBits(); - + // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext"); @@ -1125,7 +1125,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { A = Builder->CreateShl(A, ShAmtV, CI.getName()); return BinaryOperator::CreateAShr(A, ShAmtV); } - + return 0; } @@ -1147,7 +1147,7 @@ static Value *LookThroughFPExtensions(Value *V) { if (Instruction *I = dyn_cast<Instruction>(V)) if (I->getOpcode() == Instruction::FPExt) return LookThroughFPExtensions(I->getOperand(0)); - + // If this value is a constant, return the constant in the smallest FP type // that can accurately represent it. This allows us to turn // (float)((double)X+2.0) into x+2.0f. @@ -1166,14 +1166,14 @@ static Value *LookThroughFPExtensions(Value *V) { return V; // Don't try to shrink to various long double types. } - + return V; } Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { if (Instruction *I = commonCastTransforms(CI)) return I; - + // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are // smaller than the destination type, we can eliminate the truncate by doing // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well @@ -1190,7 +1190,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Type *SrcTy = OpI->getType(); Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); - if (LHSTrunc->getType() != SrcTy && + if (LHSTrunc->getType() != SrcTy && RHSTrunc->getType() != SrcTy) { unsigned DstSize = CI.getType()->getScalarSizeInBits(); // If the source types were both smaller than the destination type of @@ -1202,10 +1202,36 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); } } - break; + break; + } + + // (fptrunc (fneg x)) -> (fneg (fptrunc x)) + if (BinaryOperator::isFNeg(OpI)) { + Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1), + CI.getType()); + return BinaryOperator::CreateFNeg(InnerTrunc); + } + } + + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0)); + if (II) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::fabs: { + // (fptrunc (fabs x)) -> (fabs (fptrunc x)) + Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0), + CI.getType()); + Type *IntrinsicType[] = { CI.getType() }; + Function *Overload = + Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(), + II->getIntrinsicID(), IntrinsicType); + + Value *Args[] = { InnerTrunc }; + return CallInst::Create(Overload, Args, II->getName()); + } } } - + // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) && @@ -1220,7 +1246,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Arg->getOperand(0)->getType()->isFloatTy()) { Function *Callee = Call->getCalledFunction(); Module *M = CI.getParent()->getParent()->getParent(); - Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", + Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", Callee->getAttributes(), Builder->getFloatTy(), Builder->getFloatTy(), @@ -1228,15 +1254,15 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0), "sqrtfcall"); ret->setAttributes(Callee->getAttributes()); - - + + // Remove the old Call. With -fmath-errno, it won't get marked readnone. ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType())); EraseInstFromFunction(*Call); return ret; } } - + return 0; } @@ -1254,7 +1280,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { // This is safe if the intermediate type has enough bits in its mantissa to // accurately represent all values of X. For example, do not do this with // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. + // 'X' value would cause an undefined result for the fptoui. if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && OpI->getOperand(0)->getType() == FI.getType() && (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ @@ -1268,19 +1294,19 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); if (OpI == 0) return commonCastTransforms(FI); - + // fptosi(sitofp(X)) --> X // fptosi(uitofp(X)) --> X // This is safe if the intermediate type has enough bits in its mantissa to // accurately represent all values of X. For example, do not do this with // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. + // 'X' value would cause an undefined result for the fptoui. if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && OpI->getOperand(0)->getType() == FI.getType() && (int)FI.getType()->getScalarSizeInBits() <= OpI->getType()->getFPMantissaWidth()) return ReplaceInstUsesWith(FI, OpI->getOperand(0)); - + return commonCastTransforms(FI); } @@ -1296,21 +1322,16 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { // If the source integer type is not the intptr_t type for this target, do a // trunc or zext to the intptr_t type, then inttoptr of it. This allows the // cast to be exposed to other transforms. - if (TD) { - if (CI.getOperand(0)->getType()->getScalarSizeInBits() > - TD->getPointerSizeInBits()) { - Value *P = Builder->CreateTrunc(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); - return new IntToPtrInst(P, CI.getType()); - } - if (CI.getOperand(0)->getType()->getScalarSizeInBits() < - TD->getPointerSizeInBits()) { - Value *P = Builder->CreateZExt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); - return new IntToPtrInst(P, CI.getType()); - } + if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() != + TD->getPointerSizeInBits()) { + Type *Ty = TD->getIntPtrType(CI.getContext()); + if (CI.getType()->isVectorTy()) // Handle vectors of pointers. + Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); + + Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); + return new IntToPtrInst(P, CI.getType()); } - + if (Instruction *I = commonCastTransforms(CI)) return I; @@ -1320,19 +1341,19 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { /// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); - + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) { // If casting the result of a getelementptr instruction with no offset, turn // this into a cast of the original pointer! if (GEP->hasAllZeroIndices()) { // Changing the cast operand is usually not a good idea but it is safe - // here because the pointer operand is being replaced with another + // here because the pointer operand is being replaced with another // pointer operand so the opcode doesn't need to change. Worklist.Add(GEP); CI.setOperand(0, GEP->getOperand(0)); return &CI; } - + // If the GEP has a single use, and the base pointer is a bitcast, and the // GEP computes a constant offset, see if we can convert these three // instructions into fewer. This typically happens with unions and other @@ -1353,15 +1374,15 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { Builder->CreateInBoundsGEP(OrigBase, NewIndices) : Builder->CreateGEP(OrigBase, NewIndices); NGEP->takeName(GEP); - + if (isa<BitCastInst>(CI)) return new BitCastInst(NGEP, CI.getType()); assert(isa<PtrToIntInst>(CI)); return new PtrToIntInst(NGEP, CI.getType()); - } + } } } - + return commonCastTransforms(CI); } @@ -1369,19 +1390,15 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { // If the destination integer type is not the intptr_t type for this target, // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast // to be exposed to other transforms. - if (TD) { - if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); - return new TruncInst(P, CI.getType()); - } - if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) { - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); - return new ZExtInst(P, CI.getType()); - } + if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) { + Type *Ty = TD->getIntPtrType(CI.getContext()); + if (CI.getType()->isVectorTy()) // Handle vectors of pointers. + Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); + + Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty); + return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false); } - + return commonPointerCastTransforms(CI); } @@ -1396,33 +1413,33 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, // element size, or the input is a multiple of the output element size. // Convert the input type to have the same element type as the output. VectorType *SrcTy = cast<VectorType>(InVal->getType()); - + if (SrcTy->getElementType() != DestTy->getElementType()) { // The input types don't need to be identical, but for now they must be the // same size. There is no specific reason we couldn't handle things like // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten - // there yet. + // there yet. if (SrcTy->getElementType()->getPrimitiveSizeInBits() != DestTy->getElementType()->getPrimitiveSizeInBits()) return 0; - + SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); InVal = IC.Builder->CreateBitCast(InVal, SrcTy); } - + // Now that the element types match, get the shuffle mask and RHS of the // shuffle to use, which depends on whether we're increasing or decreasing the // size of the input. SmallVector<uint32_t, 16> ShuffleMask; Value *V2; - + if (SrcTy->getNumElements() > DestTy->getNumElements()) { // If we're shrinking the number of elements, just shuffle in the low // elements from the input and use undef as the second shuffle input. V2 = UndefValue::get(SrcTy); for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) ShuffleMask.push_back(i); - + } else { // If we're increasing the number of elements, shuffle in all of the // elements from InVal and fill the rest of the result elements with zeros @@ -1436,7 +1453,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) ShuffleMask.push_back(SrcElts); } - + return new ShuffleVectorInst(InVal, V2, ConstantDataVector::get(V2->getContext(), ShuffleMask)); @@ -1463,7 +1480,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, Type *VecEltTy) { // Undef values never contribute useful bits to the result. if (isa<UndefValue>(V)) return true; - + // If we got down to a value of the right type, we win, try inserting into the // right element. if (V->getType() == VecEltTy) { @@ -1471,15 +1488,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (Constant *C = dyn_cast<Constant>(V)) if (C->isNullValue()) return true; - + // Fail if multiple elements are inserted into this slot. if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0) return false; - + Elements[ElementIndex] = V; return true; } - + if (Constant *C = dyn_cast<Constant>(V)) { // Figure out the # elements this provides, and bitcast it or slice it up // as required. @@ -1490,7 +1507,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (NumElts == 1) return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), ElementIndex, Elements, VecEltTy); - + // Okay, this is a constant that covers multiple elements. Slice it up into // pieces and insert each element-sized piece into the vector. if (!isa<IntegerType>(C->getType())) @@ -1498,7 +1515,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, C->getType()->getPrimitiveSizeInBits())); unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits(); Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); - + for (unsigned i = 0; i != NumElts; ++i) { Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), i*ElementSize)); @@ -1508,23 +1525,23 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, } return true; } - + if (!V->hasOneUse()) return false; - + Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return false; switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy); + Elements, VecEltTy); case Instruction::ZExt: if (!isMultipleOfTypeSize( I->getOperand(0)->getType()->getPrimitiveSizeInBits(), VecEltTy)) return false; return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy); + Elements, VecEltTy); case Instruction::Or: return CollectInsertionElements(I->getOperand(0), ElementIndex, Elements, VecEltTy) && @@ -1536,11 +1553,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (CI == 0) return false; if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false; unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy); - + return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift, Elements, VecEltTy); } - + } } @@ -1575,11 +1592,11 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, Value *Result = Constant::getNullValue(CI.getType()); for (unsigned i = 0, e = Elements.size(); i != e; ++i) { if (Elements[i] == 0) continue; // Unset element. - + Result = IC.Builder->CreateInsertElement(Result, Elements[i], IC.Builder->getInt32(i)); } - + return Result; } @@ -1607,11 +1624,11 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ VecTy->getPrimitiveSizeInBits() / DestWidth); VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); } - + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); } } - + // bitcast(trunc(lshr(bitcast(somevector), cst)) ConstantInt *ShAmt = 0; if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), @@ -1628,7 +1645,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ VecTy->getPrimitiveSizeInBits() / DestWidth); VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); } - + unsigned Elt = ShAmt->getZExtValue() / DestWidth; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } @@ -1652,12 +1669,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { PointerType *SrcPTy = cast<PointerType>(SrcTy); Type *DstElTy = DstPTy->getElementType(); Type *SrcElTy = SrcPTy->getElementType(); - + // If the address spaces don't match, don't eliminate the bitcast, which is // required for changing types. if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) return 0; - + // If we are casting a alloca to a pointer to a type of the same // size, rewrite the allocation instruction to allocate the "right" type. // There is no need to modify malloc calls because it is their bitcast that @@ -1665,14 +1682,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (AllocaInst *AI = dyn_cast<AllocaInst>(Src)) if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) return V; - + // If the source and destination are pointers, and this cast is equivalent // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. // This can enhance SROA and other transforms that want type-safe pointers. Constant *ZeroUInt = Constant::getNullValue(Type::getInt32Ty(CI.getContext())); unsigned NumZeros = 0; - while (SrcElTy != DstElTy && + while (SrcElTy != DstElTy && isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() && SrcElTy->getNumContainedTypes() /* not "{}" */) { SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt); @@ -1685,7 +1702,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { return GetElementPtrInst::CreateInBounds(Src, Idxs); } } - + // Try to optimize int -> float bitcasts. if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy)) if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) @@ -1698,7 +1715,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) } - + if (isa<IntegerType>(SrcTy)) { // If this is a cast from an integer to vector, check to see if the input // is a trunc or zext of a bitcast from vector. If so, we can replace all @@ -1711,7 +1728,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { cast<VectorType>(DestTy), *this)) return I; } - + // If the input is an 'or' instruction, we may be doing shifts and ors to // assemble the elements of the vector manually. Try to rip the code out // and replace it with insertelements. @@ -1721,18 +1738,29 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { - if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) { - Value *Elem = - Builder->CreateExtractElement(Src, - Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); - return CastInst::Create(Instruction::BitCast, Elem, DestTy); + if (SrcVTy->getNumElements() == 1) { + // If our destination is not a vector, then make this a straight + // scalar-scalar cast. + if (!DestTy->isVectorTy()) { + Value *Elem = + Builder->CreateExtractElement(Src, + Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); + return CastInst::Create(Instruction::BitCast, Elem, DestTy); + } + + // Otherwise, see if our source is an insert. If so, then use the scalar + // component directly. + if (InsertElementInst *IEI = + dyn_cast<InsertElementInst>(CI.getOperand(0))) + return CastInst::Create(Instruction::BitCast, IEI->getOperand(1), + DestTy); } } if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) { // Okay, we have (bitcast (shuffle ..)). Check to see if this is // a bitcast to a vector with the same # elts. - if (SVI->hasOneUse() && DestTy->isVectorTy() && + if (SVI->hasOneUse() && DestTy->isVectorTy() && cast<VectorType>(DestTy)->getNumElements() == SVI->getType()->getNumElements() && SVI->getType()->getNumElements() == @@ -1741,9 +1769,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If either of the operands is a cast from CI.getType(), then // evaluating the shuffle in the casted destination's type will allow // us to eliminate at least one cast. - if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && + if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && Tmp->getOperand(0)->getType() == DestTy) || - ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && + ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && Tmp->getOperand(0)->getType() == DestTy)) { Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); @@ -1753,7 +1781,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } } - + if (SrcTy->isPointerTy()) return commonPointerCastTransforms(CI); return commonCastTransforms(CI); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 40e559e..bad46b4 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1331,6 +1331,25 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, And, Constant::getNullValue(And->getType())); } + + // Transform (icmp pred iM (shl iM %v, N), CI) + // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N)) + // Transform the shl to a trunc if (trunc (CI>>N)) has no loss. + // This enables to get rid of the shift in favor of a trunc which can be + // free on the target. It has the additional benefit of comparing to a + // smaller constant, which will be target friendly. + unsigned Amt = ShAmt->getLimitedValue(TypeBits-1); + if (Amt != 0 && RHSV.countTrailingZeros() >= Amt) { + Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt); + Constant *NCI = ConstantExpr::getTrunc( + ConstantExpr::getAShr(RHS, + ConstantInt::get(RHS->getType(), Amt)), + NTy); + return new ICmpInst(ICI.getPredicate(), + Builder->CreateTrunc(LHSI->getOperand(0), NTy), + NCI); + } + break; } diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index d0f4392..8e4267f 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -377,6 +377,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), TD)) return ReplaceInstUsesWith(I, V); + bool AllowReassociate = I.hasUnsafeAlgebra(); + // Simplify mul instructions with a constant RHS. if (isa<Constant>(Op1)) { // Try to fold constant mul into select arguments. @@ -389,7 +391,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { return NV; ConstantFP *C = dyn_cast<ConstantFP>(Op1); - if (C && I.hasUnsafeAlgebra() && C->getValueAPF().isNormal()) { + if (C && AllowReassociate && C->getValueAPF().isNormal()) { // Let MDC denote an expression in one of these forms: // X * C, C/X, X/C, where C is a constant. // @@ -430,7 +432,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { BinaryOperator::CreateFAdd(M0, M1) : BinaryOperator::CreateFSub(M0, M1); Instruction *RI = cast<Instruction>(R); - RI->setHasUnsafeAlgebra(true); + RI->copyFastMathFlags(&I); return RI; } } @@ -438,9 +440,6 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { } } - if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y - if (Value *Op1v = dyn_castFNegVal(Op1)) - return BinaryOperator::CreateFMul(Op0v, Op1v); // Under unsafe algebra do: // X * log2(0.5*Y) = X*log2(Y) - X @@ -469,36 +468,66 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { } } - // X * cond ? 1.0 : 0.0 => cond ? X : 0.0 - if (I.hasNoNaNs() && I.hasNoSignedZeros()) { - Value *V0 = I.getOperand(0); - Value *V1 = I.getOperand(1); - Value *Cond, *SLHS, *SRHS; - bool Match = false; - - if (match(V0, m_Select(m_Value(Cond), m_Value(SLHS), m_Value(SRHS)))) { - Match = true; - } else if (match(V1, m_Select(m_Value(Cond), m_Value(SLHS), - m_Value(SRHS)))) { - Match = true; - std::swap(V0, V1); + // Handle symmetric situation in a 2-iteration loop + Value *Opnd0 = Op0; + Value *Opnd1 = Op1; + for (int i = 0; i < 2; i++) { + bool IgnoreZeroSign = I.hasNoSignedZeros(); + if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) { + Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign); + Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign); + + // -X * -Y => X*Y + if (N1) + return BinaryOperator::CreateFMul(N0, N1); + + if (Opnd0->hasOneUse()) { + // -X * Y => -(X*Y) (Promote negation as high as possible) + Value *T = Builder->CreateFMul(N0, Opnd1); + cast<Instruction>(T)->setDebugLoc(I.getDebugLoc()); + Instruction *Neg = BinaryOperator::CreateFNeg(T); + if (I.getFastMathFlags().any()) { + cast<Instruction>(T)->copyFastMathFlags(&I); + Neg->copyFastMathFlags(&I); + } + return Neg; + } } - if (Match) { - ConstantFP *C0 = dyn_cast<ConstantFP>(SLHS); - ConstantFP *C1 = dyn_cast<ConstantFP>(SRHS); - - if (C0 && C1 && - ((C0->isZero() && C1->isExactlyValue(1.0)) || - (C1->isZero() && C0->isExactlyValue(1.0)))) { - Value *T; - if (C0->isZero()) - T = Builder->CreateSelect(Cond, SLHS, V1); - else - T = Builder->CreateSelect(Cond, V1, SRHS); - return ReplaceInstUsesWith(I, T); + // (X*Y) * X => (X*X) * Y where Y != X + // The purpose is two-fold: + // 1) to form a power expression (of X). + // 2) potentially shorten the critical path: After transformation, the + // latency of the instruction Y is amortized by the expression of X*X, + // and therefore Y is in a "less critical" position compared to what it + // was before the transformation. + // + if (AllowReassociate) { + Value *Opnd0_0, *Opnd0_1; + if (Opnd0->hasOneUse() && + match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) { + Value *Y = 0; + if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1) + Y = Opnd0_1; + else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1) + Y = Opnd0_0; + + if (Y) { + Instruction *T = cast<Instruction>(Builder->CreateFMul(Opnd1, Opnd1)); + T->copyFastMathFlags(&I); + T->setDebugLoc(I.getDebugLoc()); + + Instruction *R = BinaryOperator::CreateFMul(T, Y); + R->copyFastMathFlags(&I); + return R; + } } } + + if (!isa<Constant>(Op1)) + std::swap(Opnd0, Opnd1); + else + break; } return Changed ? &I : 0; @@ -784,21 +813,140 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { return 0; } +/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special +/// FP value and: +/// 1) 1/C is exact, or +/// 2) reciprocal is allowed. +/// If the convertion was successful, the simplified expression "X * 1/C" is +/// returned; otherwise, NULL is returned. +/// +static Instruction *CvtFDivConstToReciprocal(Value *Dividend, + ConstantFP *Divisor, + bool AllowReciprocal) { + const APFloat &FpVal = Divisor->getValueAPF(); + APFloat Reciprocal(FpVal.getSemantics()); + bool Cvt = FpVal.getExactInverse(&Reciprocal); + + if (!Cvt && AllowReciprocal && FpVal.isNormal()) { + Reciprocal = APFloat(FpVal.getSemantics(), 1.0f); + (void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven); + Cvt = !Reciprocal.isDenormal(); + } + + if (!Cvt) + return 0; + + ConstantFP *R; + R = ConstantFP::get(Dividend->getType()->getContext(), Reciprocal); + return BinaryOperator::CreateFMul(Dividend, R); +} + Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Value *V = SimplifyFDivInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); + bool AllowReassociate = I.hasUnsafeAlgebra(); + bool AllowReciprocal = I.hasAllowReciprocal(); + if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { - const APFloat &Op1F = Op1C->getValueAPF(); - - // If the divisor has an exact multiplicative inverse we can turn the fdiv - // into a cheaper fmul. - APFloat Reciprocal(Op1F.getSemantics()); - if (Op1F.getExactInverse(&Reciprocal)) { - ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal); - return BinaryOperator::CreateFMul(Op0, RFP); + if (AllowReassociate) { + ConstantFP *C1 = 0; + ConstantFP *C2 = Op1C; + Value *X; + Instruction *Res = 0; + + if (match(Op0, m_FMul(m_Value(X), m_ConstantFP(C1)))) { + // (X*C1)/C2 => X * (C1/C2) + // + Constant *C = ConstantExpr::getFDiv(C1, C2); + const APFloat &F = cast<ConstantFP>(C)->getValueAPF(); + if (F.isNormal() && !F.isDenormal()) + Res = BinaryOperator::CreateFMul(X, C); + } else if (match(Op0, m_FDiv(m_Value(X), m_ConstantFP(C1)))) { + // (X/C1)/C2 => X /(C2*C1) [=> X * 1/(C2*C1) if reciprocal is allowed] + // + Constant *C = ConstantExpr::getFMul(C1, C2); + const APFloat &F = cast<ConstantFP>(C)->getValueAPF(); + if (F.isNormal() && !F.isDenormal()) { + Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C), + AllowReciprocal); + if (!Res) + Res = BinaryOperator::CreateFDiv(X, C); + } + } + + if (Res) { + Res->setFastMathFlags(I.getFastMathFlags()); + return Res; + } + } + + // X / C => X * 1/C + if (Instruction *T = CvtFDivConstToReciprocal(Op0, Op1C, AllowReciprocal)) + return T; + + return 0; + } + + if (AllowReassociate && isa<ConstantFP>(Op0)) { + ConstantFP *C1 = cast<ConstantFP>(Op0), *C2; + Constant *Fold = 0; + Value *X; + bool CreateDiv = true; + + // C1 / (X*C2) => (C1/C2) / X + if (match(Op1, m_FMul(m_Value(X), m_ConstantFP(C2)))) + Fold = ConstantExpr::getFDiv(C1, C2); + else if (match(Op1, m_FDiv(m_Value(X), m_ConstantFP(C2)))) { + // C1 / (X/C2) => (C1*C2) / X + Fold = ConstantExpr::getFMul(C1, C2); + } else if (match(Op1, m_FDiv(m_ConstantFP(C2), m_Value(X)))) { + // C1 / (C2/X) => (C1/C2) * X + Fold = ConstantExpr::getFDiv(C1, C2); + CreateDiv = false; + } + + if (Fold) { + const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF(); + if (FoldC.isNormal() && !FoldC.isDenormal()) { + Instruction *R = CreateDiv ? + BinaryOperator::CreateFDiv(Fold, X) : + BinaryOperator::CreateFMul(X, Fold); + R->setFastMathFlags(I.getFastMathFlags()); + return R; + } + } + return 0; + } + + if (AllowReassociate) { + Value *X, *Y; + Value *NewInst = 0; + Instruction *SimpR = 0; + + if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) { + // (X/Y) / Z => X / (Y*Z) + // + if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op1)) { + NewInst = Builder->CreateFMul(Y, Op1); + SimpR = BinaryOperator::CreateFDiv(X, NewInst); + } + } else if (Op1->hasOneUse() && match(Op1, m_FDiv(m_Value(X), m_Value(Y)))) { + // Z / (X/Y) => Z*Y / X + // + if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op0)) { + NewInst = Builder->CreateFMul(Op0, Y); + SimpR = BinaryOperator::CreateFDiv(NewInst, X); + } + } + + if (NewInst) { + if (Instruction *T = dyn_cast<Instruction>(NewInst)) + T->setDebugLoc(I.getDebugLoc()); + SimpR->setFastMathFlags(I.getFastMathFlags()); + return SimpR; } } diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index dd7ea14..4f71db1 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; +using namespace PatternMatch; /// CheapToScalarize - Return true if the value is cheaper to scalarize than it /// is to leave as a vector operation. isConstant indicates whether we're @@ -92,6 +94,13 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); } + // Extract a value from a vector add operation with a constant zero. + Value *Val = 0; Constant *Con = 0; + if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) { + if (Con->getAggregateElement(EltNo)->isNullValue()) + return FindScalarElement(Val, EltNo); + } + // Otherwise, we don't know. return 0; } @@ -295,12 +304,12 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask, Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); return V; } - + if (isa<ConstantAggregateZero>(V)) { Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); return V; } - + if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { // If this is an insert of an extract from some other vector, include it. Value *VecOp = IEI->getOperand(0); @@ -595,12 +604,12 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // ShuffleVectorInst is equivalent to the original one. for (unsigned i = 0; i < VWidth; ++i) { int eltMask; - if (Mask[i] == -1) { + if (Mask[i] < 0) { // This element is an undef value. eltMask = -1; } else if (Mask[i] < (int)LHSWidth) { // This element is from left hand side vector operand. - // + // // If LHS is going to be replaced (case 1, 2, or 4), calculate the // new mask value for the element. if (newLHS != LHS) { @@ -609,8 +618,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // with a -1 mask value. if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) eltMask = -1; - } - else + } else eltMask = Mask[i]; } else { // This element is from right hand side vector operand @@ -630,8 +638,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { && "should have been check above"); eltMask = -1; } - } - else + } else eltMask = Mask[i]-LHSWidth; // If LHS's width is changed, shift the mask value accordingly. diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index 57ed9e3..49efce5 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -19,20 +19,20 @@ #include "llvm/Support/raw_ostream.h" namespace llvm { - + /// InstCombineWorklist - This is the worklist management logic for /// InstCombine. class LLVM_LIBRARY_VISIBILITY InstCombineWorklist { SmallVector<Instruction*, 256> Worklist; DenseMap<Instruction*, unsigned> WorklistMap; - + void operator=(const InstCombineWorklist&RHS) LLVM_DELETED_FUNCTION; InstCombineWorklist(const InstCombineWorklist&) LLVM_DELETED_FUNCTION; public: InstCombineWorklist() {} - + bool isEmpty() const { return Worklist.empty(); } - + /// Add - Add the specified instruction to the worklist if it isn't already /// in it. void Add(Instruction *I) { @@ -41,12 +41,12 @@ public: Worklist.push_back(I); } } - + void AddValue(Value *V) { if (Instruction *I = dyn_cast<Instruction>(V)) Add(I); } - + /// AddInitialGroup - Add the specified batch of stuff in reverse order. /// which should only be done when the worklist is empty and when the group /// has no duplicates. @@ -61,25 +61,25 @@ public: Worklist.push_back(I); } } - + // Remove - remove I from the worklist if it exists. void Remove(Instruction *I) { DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I); if (It == WorklistMap.end()) return; // Not in worklist. - + // Don't bother moving everything down, just null out the slot. Worklist[It->second] = 0; - + WorklistMap.erase(It); } - + Instruction *RemoveOne() { Instruction *I = Worklist.back(); Worklist.pop_back(); WorklistMap.erase(I); return I; } - + /// AddUsersToWorkList - When an instruction is simplified, add all users of /// the instruction to the work lists because they might get more simplified /// now. @@ -89,18 +89,18 @@ public: UI != UE; ++UI) Add(cast<Instruction>(*UI)); } - - + + /// Zap - check that the worklist is empty and nuke the backing store for /// the map if it is large. void Zap() { assert(WorklistMap.empty() && "Worklist empty, but map not?"); - + // Do an explicit clear, this shrinks the map if needed. WorklistMap.clear(); } }; - + } // end namespace llvm. #endif diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 6f24cdd..c6115e3 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -162,6 +162,21 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { return !Overflow; } +/// Conservatively clears subclassOptionalData after a reassociation or +/// commutation. We preserve fast-math flags when applicable as they can be +/// preserved. +static void ClearSubclassDataAfterReassociation(BinaryOperator &I) { + FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I); + if (!FPMO) { + I.clearSubclassOptionalData(); + return; + } + + FastMathFlags FMF = I.getFastMathFlags(); + I.clearSubclassOptionalData(); + I.setFastMathFlags(FMF); +} + /// SimplifyAssociativeOrCommutative - This performs a few simplifications for /// operators which are associative or commutative: // @@ -219,7 +234,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.clearSubclassOptionalData(); I.setHasNoSignedWrap(true); } else { - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); } Changed = true; @@ -241,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, C); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; @@ -263,7 +278,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, B); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; @@ -283,7 +298,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, V); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; @@ -310,7 +325,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, Folded); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; continue; @@ -516,8 +531,8 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const { // instruction if the LHS is a constant negative zero (which is the 'negate' // form). // -Value *InstCombiner::dyn_castFNegVal(Value *V) const { - if (BinaryOperator::isFNeg(V)) +Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const { + if (BinaryOperator::isFNeg(V, IgnoreZeroSign)) return BinaryOperator::getFNegArgument(V); // Constants can be considered to be negated values if they can be folded. diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 9bd3239..6877475 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -16,7 +16,6 @@ #define DEBUG_TYPE "asan" #include "llvm/Transforms/Instrumentation.h" -#include "BlackList.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -36,6 +35,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/InstVisitor.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" @@ -43,6 +43,7 @@ #include "llvm/Support/system_error.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/BlackList.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include <algorithm> @@ -53,7 +54,8 @@ using namespace llvm; static const uint64_t kDefaultShadowScale = 3; static const uint64_t kDefaultShadowOffset32 = 1ULL << 29; static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; -static const uint64_t kDefaultShadowOffsetAndroid = 0; +static const uint64_t kDefaultShort64bitShadowOffset = 0x7FFF8000; // < 2G. +static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41; static const size_t kMaxStackMallocSize = 1 << 16; // 64K static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3; @@ -63,11 +65,13 @@ static const char *kAsanModuleCtorName = "asan.module_ctor"; static const char *kAsanModuleDtorName = "asan.module_dtor"; static const int kAsanCtorAndCtorPriority = 1; static const char *kAsanReportErrorTemplate = "__asan_report_"; +static const char *kAsanReportLoadN = "__asan_report_load_n"; +static const char *kAsanReportStoreN = "__asan_report_store_n"; static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; -static const char *kAsanInitName = "__asan_init"; +static const char *kAsanInitName = "__asan_init_v1"; static const char *kAsanHandleNoReturnName = "__asan_handle_no_return"; static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; static const char *kAsanMappingScaleName = "__asan_mapping_scale"; @@ -133,6 +137,9 @@ static cl::opt<int> ClMappingScale("asan-mapping-scale", cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0)); static cl::opt<int> ClMappingOffsetLog("asan-mapping-offset-log", cl::desc("offset of asan shadow mapping"), cl::Hidden, cl::init(-1)); +static cl::opt<bool> ClShort64BitOffset("asan-short-64bit-mapping-offset", + cl::desc("Use short immediate constant as the mapping offset for 64bit"), + cl::Hidden, cl::init(true)); // Optimization flags. Not user visible, used mostly for testing // and benchmarking the tool. @@ -186,14 +193,53 @@ class SetOfDynamicallyInitializedGlobals { SmallSet<GlobalValue*, 32> DynInitGlobals; }; -static int MappingScale() { - return ClMappingScale ? ClMappingScale : kDefaultShadowScale; +/// This struct defines the shadow mapping using the rule: +/// shadow = (mem >> Scale) ADD-or-OR Offset. +struct ShadowMapping { + int Scale; + uint64_t Offset; + bool OrShadowOffset; +}; + +static ShadowMapping getShadowMapping(const Module &M, int LongSize, + bool ZeroBaseShadow) { + llvm::Triple TargetTriple(M.getTargetTriple()); + bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android; + bool IsMacOSX = TargetTriple.getOS() == llvm::Triple::MacOSX; + bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64; + bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64; + + ShadowMapping Mapping; + + // OR-ing shadow offset if more efficient (at least on x86), + // but on ppc64 we have to use add since the shadow offset is not neccesary + // 1/8-th of the address space. + Mapping.OrShadowOffset = !IsPPC64 && !ClShort64BitOffset; + + Mapping.Offset = (IsAndroid || ZeroBaseShadow) ? 0 : + (LongSize == 32 ? kDefaultShadowOffset32 : + IsPPC64 ? kPPC64_ShadowOffset64 : kDefaultShadowOffset64); + if (!ZeroBaseShadow && ClShort64BitOffset && IsX86_64 && !IsMacOSX) { + assert(LongSize == 64); + Mapping.Offset = kDefaultShort64bitShadowOffset; + } + if (!ZeroBaseShadow && ClMappingOffsetLog >= 0) { + // Zero offset log is the special case. + Mapping.Offset = (ClMappingOffsetLog == 0) ? 0 : 1ULL << ClMappingOffsetLog; + } + + Mapping.Scale = kDefaultShadowScale; + if (ClMappingScale) { + Mapping.Scale = ClMappingScale; + } + + return Mapping; } -static size_t RedzoneSize() { +static size_t RedzoneSizeForScale(int MappingScale) { // Redzone used for stack and globals is at least 32 bytes. // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. - return std::max(32U, 1U << MappingScale()); + return std::max(32U, 1U << MappingScale); } /// AddressSanitizer: instrument the code in module to find memory bugs. @@ -201,23 +247,27 @@ struct AddressSanitizer : public FunctionPass { AddressSanitizer(bool CheckInitOrder = false, bool CheckUseAfterReturn = false, bool CheckLifetime = false, - StringRef BlacklistFile = StringRef()) + StringRef BlacklistFile = StringRef(), + bool ZeroBaseShadow = false) : FunctionPass(ID), CheckInitOrder(CheckInitOrder || ClInitializers), CheckUseAfterReturn(CheckUseAfterReturn || ClUseAfterReturn), CheckLifetime(CheckLifetime || ClCheckLifetime), BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile - : BlacklistFile) {} + : BlacklistFile), + ZeroBaseShadow(ZeroBaseShadow) {} virtual const char *getPassName() const { return "AddressSanitizerFunctionPass"; } void instrumentMop(Instruction *I); - void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB, - Value *Addr, uint32_t TypeSize, bool IsWrite); + void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, + Value *Addr, uint32_t TypeSize, bool IsWrite, + Value *SizeArgument); Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize); Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, - bool IsWrite, size_t AccessSizeIndex); + bool IsWrite, size_t AccessSizeIndex, + Value *SizeArgument); bool instrumentMemIntrinsic(MemIntrinsic *MI); void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr, Value *Size, @@ -227,6 +277,7 @@ struct AddressSanitizer : public FunctionPass { void createInitializerPoisonCalls(Module &M, Value *FirstAddr, Value *LastAddr); bool maybeInsertAsanInitAtFunctionEntry(Function &F); + void emitShadowMapping(Module &M, IRBuilder<> &IRB) const; virtual bool doInitialization(Module &M); static char ID; // Pass identification, replacement for typeid @@ -240,18 +291,22 @@ struct AddressSanitizer : public FunctionPass { bool CheckInitOrder; bool CheckUseAfterReturn; bool CheckLifetime; + SmallString<64> BlacklistFile; + bool ZeroBaseShadow; + LLVMContext *C; DataLayout *TD; - uint64_t MappingOffset; int LongSize; Type *IntptrTy; + ShadowMapping Mapping; Function *AsanCtorFunction; Function *AsanInitFunction; Function *AsanHandleNoReturnFunc; - SmallString<64> BlacklistFile; OwningPtr<BlackList> BL; // This array is indexed by AccessIsWrite and log2(AccessSize). Function *AsanErrorCallback[2][kNumberOfAccessSizes]; + // This array is indexed by AccessIsWrite. + Function *AsanErrorCallbackSized[2]; InlineAsm *EmptyAsm; SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals; @@ -261,11 +316,13 @@ struct AddressSanitizer : public FunctionPass { class AddressSanitizerModule : public ModulePass { public: AddressSanitizerModule(bool CheckInitOrder = false, - StringRef BlacklistFile = StringRef()) + StringRef BlacklistFile = StringRef(), + bool ZeroBaseShadow = false) : ModulePass(ID), CheckInitOrder(CheckInitOrder || ClInitializers), BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile - : BlacklistFile) {} + : BlacklistFile), + ZeroBaseShadow(ZeroBaseShadow) {} bool runOnModule(Module &M); static char ID; // Pass identification, replacement for typeid virtual const char *getPassName() const { @@ -278,14 +335,20 @@ class AddressSanitizerModule : public ModulePass { bool ShouldInstrumentGlobal(GlobalVariable *G); void createInitializerPoisonCalls(Module &M, Value *FirstAddr, Value *LastAddr); + size_t RedzoneSize() const { + return RedzoneSizeForScale(Mapping.Scale); + } bool CheckInitOrder; SmallString<64> BlacklistFile; + bool ZeroBaseShadow; + OwningPtr<BlackList> BL; SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals; Type *IntptrTy; LLVMContext *C; DataLayout *TD; + ShadowMapping Mapping; Function *AsanPoisonGlobals; Function *AsanUnpoisonGlobals; Function *AsanRegisterGlobals; @@ -308,6 +371,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { LLVMContext *C; Type *IntptrTy; Type *IntptrPtrTy; + ShadowMapping Mapping; SmallVector<AllocaInst*, 16> AllocaVec; SmallVector<Instruction*, 8> RetVec; @@ -332,7 +396,8 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { FunctionStackPoisoner(Function &F, AddressSanitizer &ASan) : F(F), ASan(ASan), DIB(*F.getParent()), C(ASan.C), IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)), - TotalStackSize(0), StackAlignment(1 << MappingScale()) {} + Mapping(ASan.Mapping), + TotalStackSize(0), StackAlignment(1 << Mapping.Scale) {} bool runOnFunction() { if (!ClStack) return false; @@ -411,6 +476,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { AI.getAllocatedType()->isSized()); } + size_t RedzoneSize() const { + return RedzoneSizeForScale(Mapping.Scale); + } uint64_t getAllocaSizeInBytes(AllocaInst *AI) { Type *Ty = AI->getAllocatedType(); uint64_t SizeInBytes = ASan.TD->getTypeAllocSize(Ty); @@ -439,9 +507,9 @@ INITIALIZE_PASS(AddressSanitizer, "asan", false, false) FunctionPass *llvm::createAddressSanitizerFunctionPass( bool CheckInitOrder, bool CheckUseAfterReturn, bool CheckLifetime, - StringRef BlacklistFile) { + StringRef BlacklistFile, bool ZeroBaseShadow) { return new AddressSanitizer(CheckInitOrder, CheckUseAfterReturn, - CheckLifetime, BlacklistFile); + CheckLifetime, BlacklistFile, ZeroBaseShadow); } char AddressSanitizerModule::ID = 0; @@ -449,8 +517,9 @@ INITIALIZE_PASS(AddressSanitizerModule, "asan-module", "AddressSanitizer: detects use-after-free and out-of-bounds bugs." "ModulePass", false, false) ModulePass *llvm::createAddressSanitizerModulePass( - bool CheckInitOrder, StringRef BlacklistFile) { - return new AddressSanitizerModule(CheckInitOrder, BlacklistFile); + bool CheckInitOrder, StringRef BlacklistFile, bool ZeroBaseShadow) { + return new AddressSanitizerModule(CheckInitOrder, BlacklistFile, + ZeroBaseShadow); } static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { @@ -473,32 +542,30 @@ static bool GlobalWasGeneratedByAsan(GlobalVariable *G) { Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { // Shadow >> scale - Shadow = IRB.CreateLShr(Shadow, MappingScale()); - if (MappingOffset == 0) + Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); + if (Mapping.Offset == 0) return Shadow; // (Shadow >> scale) | offset - return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, - MappingOffset)); + if (Mapping.OrShadowOffset) + return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset)); + else + return IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset)); } void AddressSanitizer::instrumentMemIntrinsicParam( Instruction *OrigIns, Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) { + IRBuilder<> IRB(InsertBefore); + if (Size->getType() != IntptrTy) + Size = IRB.CreateIntCast(Size, IntptrTy, false); // Check the first byte. - { - IRBuilder<> IRB(InsertBefore); - instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite); - } + instrumentAddress(OrigIns, InsertBefore, Addr, 8, IsWrite, Size); // Check the last byte. - { - IRBuilder<> IRB(InsertBefore); - Value *SizeMinusOne = IRB.CreateSub( - Size, ConstantInt::get(Size->getType(), 1)); - SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false); - Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); - Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne); - instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite); - } + IRB.SetInsertPoint(InsertBefore); + Value *SizeMinusOne = IRB.CreateSub(Size, ConstantInt::get(IntptrTy, 1)); + Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + Value *AddrLast = IRB.CreateAdd(AddrLong, SizeMinusOne); + instrumentAddress(OrigIns, InsertBefore, AddrLast, 8, IsWrite, Size); } // Instrument memset/memmove/memcpy @@ -577,14 +644,24 @@ void AddressSanitizer::instrumentMop(Instruction *I) { assert(OrigTy->isSized()); uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); - if (TypeSize != 8 && TypeSize != 16 && - TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { - // Ignore all unusual sizes. - return; - } + assert((TypeSize % 8) == 0); + // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check. + if (TypeSize == 8 || TypeSize == 16 || + TypeSize == 32 || TypeSize == 64 || TypeSize == 128) + return instrumentAddress(I, I, Addr, TypeSize, IsWrite, 0); + // Instrument unusual size (but still multiple of 8). + // We can not do it with a single check, so we do 1-byte check for the first + // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able + // to report the actual access size. IRBuilder<> IRB(I); - instrumentAddress(I, IRB, Addr, TypeSize, IsWrite); + Value *LastByte = IRB.CreateIntToPtr( + IRB.CreateAdd(IRB.CreatePointerCast(Addr, IntptrTy), + ConstantInt::get(IntptrTy, TypeSize / 8 - 1)), + OrigPtrTy); + Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8); + instrumentAddress(I, I, Addr, 8, IsWrite, Size); + instrumentAddress(I, I, LastByte, 8, IsWrite, Size); } // Validate the result of Module::getOrInsertFunction called for an interface @@ -600,10 +677,12 @@ static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { Instruction *AddressSanitizer::generateCrashCode( Instruction *InsertBefore, Value *Addr, - bool IsWrite, size_t AccessSizeIndex) { + bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument) { IRBuilder<> IRB(InsertBefore); - CallInst *Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], - Addr); + CallInst *Call = SizeArgument + ? IRB.CreateCall2(AsanErrorCallbackSized[IsWrite], Addr, SizeArgument) + : IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr); + // We don't do Call->setDoesNotReturn() because the BB already has // UnreachableInst at the end. // This EmptyAsm is required to avoid callback merge. @@ -614,7 +693,7 @@ Instruction *AddressSanitizer::generateCrashCode( Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize) { - size_t Granularity = 1 << MappingScale(); + size_t Granularity = 1 << Mapping.Scale; // Addr & (Granularity - 1) Value *LastAccessedByte = IRB.CreateAnd( AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); @@ -630,12 +709,14 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, } void AddressSanitizer::instrumentAddress(Instruction *OrigIns, - IRBuilder<> &IRB, Value *Addr, - uint32_t TypeSize, bool IsWrite) { + Instruction *InsertBefore, + Value *Addr, uint32_t TypeSize, + bool IsWrite, Value *SizeArgument) { + IRBuilder<> IRB(InsertBefore); Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); Type *ShadowTy = IntegerType::get( - *C, std::max(8U, TypeSize >> MappingScale())); + *C, std::max(8U, TypeSize >> Mapping.Scale)); Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); Value *ShadowPtr = memToShadow(AddrLong, IRB); Value *CmpVal = Constant::getNullValue(ShadowTy); @@ -644,7 +725,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); - size_t Granularity = 1 << MappingScale(); + size_t Granularity = 1 << Mapping.Scale; TerminatorInst *CrashTerm = 0; if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { @@ -663,8 +744,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, CrashTerm = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), true); } - Instruction *Crash = - generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex); + Instruction *Crash = generateCrashCode( + CrashTerm, AddrLong, IsWrite, AccessSizeIndex, SizeArgument); Crash->setDebugLoc(OrigIns->getDebugLoc()); } @@ -782,7 +863,9 @@ bool AddressSanitizerModule::runOnModule(Module &M) { BL.reset(new BlackList(BlacklistFile)); if (BL->isIn(M)) return false; C = &(M.getContext()); - IntptrTy = Type::getIntNTy(*C, TD->getPointerSizeInBits()); + int LongSize = TD->getPointerSizeInBits(); + IntptrTy = Type::getIntNTy(*C, LongSize); + Mapping = getShadowMapping(M, LongSize, ZeroBaseShadow); initializeCallbacks(M); DynamicallyInitializedGlobals.Init(M); @@ -819,12 +902,22 @@ bool AddressSanitizerModule::runOnModule(Module &M) { Value *FirstDynamic = 0, *LastDynamic = 0; for (size_t i = 0; i < n; i++) { + static const uint64_t kMaxGlobalRedzone = 1 << 18; GlobalVariable *G = GlobalsToChange[i]; PointerType *PtrTy = cast<PointerType>(G->getType()); Type *Ty = PtrTy->getElementType(); uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); - size_t RZ = RedzoneSize(); - uint64_t RightRedzoneSize = RZ + (RZ - (SizeInBytes % RZ)); + uint64_t MinRZ = RedzoneSize(); + // MinRZ <= RZ <= kMaxGlobalRedzone + // and trying to make RZ to be ~ 1/4 of SizeInBytes. + uint64_t RZ = std::max(MinRZ, + std::min(kMaxGlobalRedzone, + (SizeInBytes / MinRZ / 4) * MinRZ)); + uint64_t RightRedzoneSize = RZ; + // Round up to MinRZ + if (SizeInBytes % MinRZ) + RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ); + assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); // Determine whether this global should be poisoned in initialization. bool GlobalHasDynamicInitializer = @@ -848,7 +941,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { M, NewTy, G->isConstant(), G->getLinkage(), NewInitializer, "", G, G->getThreadLocalMode()); NewGlobal->copyAttributesFrom(G); - NewGlobal->setAlignment(RZ); + NewGlobal->setAlignment(MinRZ); Value *Indices2[2]; Indices2[0] = IRB.getInt32(0); @@ -921,6 +1014,10 @@ void AddressSanitizer::initializeCallbacks(Module &M) { FunctionName, IRB.getVoidTy(), IntptrTy, NULL)); } } + AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction( + kAsanReportLoadN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction( + kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction( kAsanHandleNoReturnName, IRB.getVoidTy(), NULL)); @@ -930,6 +1027,23 @@ void AddressSanitizer::initializeCallbacks(Module &M) { /*hasSideEffects=*/true); } +void AddressSanitizer::emitShadowMapping(Module &M, IRBuilder<> &IRB) const { + // Tell the values of mapping offset and scale to the run-time. + GlobalValue *asan_mapping_offset = + new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, + ConstantInt::get(IntptrTy, Mapping.Offset), + kAsanMappingOffsetName); + // Read the global, otherwise it may be optimized away. + IRB.CreateLoad(asan_mapping_offset, true); + + GlobalValue *asan_mapping_scale = + new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, + ConstantInt::get(IntptrTy, Mapping.Scale), + kAsanMappingScaleName); + // Read the global, otherwise it may be optimized away. + IRB.CreateLoad(asan_mapping_scale, true); +} + // virtual bool AddressSanitizer::doInitialization(Module &M) { // Initialize the private fields. No one has accessed them before. @@ -955,41 +1069,10 @@ bool AddressSanitizer::doInitialization(Module &M) { AsanInitFunction->setLinkage(Function::ExternalLinkage); IRB.CreateCall(AsanInitFunction); - llvm::Triple targetTriple(M.getTargetTriple()); - bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android; - - MappingOffset = isAndroid ? kDefaultShadowOffsetAndroid : - (LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64); - if (ClMappingOffsetLog >= 0) { - if (ClMappingOffsetLog == 0) { - // special case - MappingOffset = 0; - } else { - MappingOffset = 1ULL << ClMappingOffsetLog; - } - } - - - if (ClMappingOffsetLog >= 0) { - // Tell the run-time the current values of mapping offset and scale. - GlobalValue *asan_mapping_offset = - new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, - ConstantInt::get(IntptrTy, MappingOffset), - kAsanMappingOffsetName); - // Read the global, otherwise it may be optimized away. - IRB.CreateLoad(asan_mapping_offset, true); - } - if (ClMappingScale) { - GlobalValue *asan_mapping_scale = - new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, - ConstantInt::get(IntptrTy, MappingScale()), - kAsanMappingScaleName); - // Read the global, otherwise it may be optimized away. - IRB.CreateLoad(asan_mapping_scale, true); - } + Mapping = getShadowMapping(M, LongSize, ZeroBaseShadow); + emitShadowMapping(M, IRB); appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority); - return true; } @@ -1015,11 +1098,11 @@ bool AddressSanitizer::runOnFunction(Function &F) { DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); initializeCallbacks(*F.getParent()); - // If needed, insert __asan_init before checking for AddressSafety attr. + // If needed, insert __asan_init before checking for SanitizeAddress attr. maybeInsertAsanInitAtFunctionEntry(F); if (!F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::AddressSafety)) + Attribute::SanitizeAddress)) return false; if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) @@ -1048,12 +1131,12 @@ bool AddressSanitizer::runOnFunction(Function &F) { } else if (isa<MemIntrinsic>(BI) && ClMemIntrin) { // ok, take it. } else { - if (CallInst *CI = dyn_cast<CallInst>(BI)) { + CallSite CS(BI); + if (CS) { // A call inside BB. TempsToInstrument.clear(); - if (CI->doesNotReturn()) { - NoReturnCalls.push_back(CI); - } + if (CS.doesNotReturn()) + NoReturnCalls.push_back(CS.getInstruction()); } continue; } @@ -1147,7 +1230,7 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) { void FunctionStackPoisoner::poisonRedZones( const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, Value *ShadowBase, bool DoPoison) { - size_t ShadowRZSize = RedzoneSize() >> MappingScale(); + size_t ShadowRZSize = RedzoneSize() >> Mapping.Scale; assert(ShadowRZSize >= 1 && ShadowRZSize <= 4); Type *RZTy = Type::getIntNTy(*C, ShadowRZSize * 8); Type *RZPtrTy = PointerType::get(RZTy, 0); @@ -1178,13 +1261,13 @@ void FunctionStackPoisoner::poisonRedZones( // Poison the partial redzone at right Ptr = IRB.CreateAdd( ShadowBase, ConstantInt::get(IntptrTy, - (Pos >> MappingScale()) - ShadowRZSize)); + (Pos >> Mapping.Scale) - ShadowRZSize)); size_t AddressableBytes = RedzoneSize() - (AlignedSize - SizeInBytes); uint32_t Poison = 0; if (DoPoison) { PoisonShadowPartialRightRedzone((uint8_t*)&Poison, AddressableBytes, RedzoneSize(), - 1ULL << MappingScale(), + 1ULL << Mapping.Scale, kAsanStackPartialRedzoneMagic); } Value *PartialPoison = ConstantInt::get(RZTy, Poison); @@ -1193,7 +1276,7 @@ void FunctionStackPoisoner::poisonRedZones( // Poison the full redzone at right. Ptr = IRB.CreateAdd(ShadowBase, - ConstantInt::get(IntptrTy, Pos >> MappingScale())); + ConstantInt::get(IntptrTy, Pos >> Mapping.Scale)); bool LastAlloca = (i == AllocaVec.size() - 1); Value *Poison = LastAlloca ? PoisonRight : PoisonMid; IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, RZPtrTy)); diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp index 4fcbea4..927982d 100644 --- a/lib/Transforms/Instrumentation/BlackList.cpp +++ b/lib/Transforms/Instrumentation/BlackList.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#include "BlackList.h" +#include "llvm/Transforms/Utils/BlackList.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -78,21 +78,21 @@ BlackList::BlackList(const StringRef Path) { } // Iterate through each of the prefixes, and create Regexs for them. - for (StringMap<std::string>::iterator I = Regexps.begin(), E = Regexps.end(); - I != E; ++I) { + for (StringMap<std::string>::const_iterator I = Regexps.begin(), + E = Regexps.end(); I != E; ++I) { Entries[I->getKey()] = new Regex(I->getValue()); } } -bool BlackList::isIn(const Function &F) { +bool BlackList::isIn(const Function &F) const { return isIn(*F.getParent()) || inSection("fun", F.getName()); } -bool BlackList::isIn(const GlobalVariable &G) { +bool BlackList::isIn(const GlobalVariable &G) const { return isIn(*G.getParent()) || inSection("global", G.getName()); } -bool BlackList::isIn(const Module &M) { +bool BlackList::isIn(const Module &M) const { return inSection("src", M.getModuleIdentifier()); } @@ -107,14 +107,15 @@ static StringRef GetGVTypeString(const GlobalVariable &G) { return "<unknown type>"; } -bool BlackList::isInInit(const GlobalVariable &G) { +bool BlackList::isInInit(const GlobalVariable &G) const { return (isIn(*G.getParent()) || inSection("global-init", G.getName()) || inSection("global-init-type", GetGVTypeString(G))); } -bool BlackList::inSection(const StringRef Section, const StringRef Query) { - StringMap<Regex*>::iterator I = Entries.find(Section); +bool BlackList::inSection(const StringRef Section, + const StringRef Query) const { + StringMap<Regex*>::const_iterator I = Entries.find(Section); if (I == Entries.end()) return false; Regex *FunctionRegex = I->getValue(); diff --git a/lib/Transforms/Instrumentation/BlackList.h b/lib/Transforms/Instrumentation/BlackList.h deleted file mode 100644 index ee18a98..0000000 --- a/lib/Transforms/Instrumentation/BlackList.h +++ /dev/null @@ -1,58 +0,0 @@ -//===-- BlackList.h - blacklist for sanitizers ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -//===----------------------------------------------------------------------===// -// -// This is a utility class for instrumentation passes (like AddressSanitizer -// or ThreadSanitizer) to avoid instrumenting some functions or global -// variables based on a user-supplied blacklist. -// -// The blacklist disables instrumentation of various functions and global -// variables. Each line contains a prefix, followed by a wild card expression. -// Empty lines and lines starting with "#" are ignored. -// --- -// # Blacklisted items: -// fun:*_ZN4base6subtle* -// global:*global_with_bad_access_or_initialization* -// global-init:*global_with_initialization_issues* -// global-init-type:*Namespace::ClassName* -// src:file_with_tricky_code.cc -// --- -// Note that the wild card is in fact an llvm::Regex, but * is automatically -// replaced with .* -// This is similar to the "ignore" feature of ThreadSanitizer. -// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores -// -//===----------------------------------------------------------------------===// -// - -#include "llvm/ADT/StringMap.h" - -namespace llvm { -class Function; -class GlobalVariable; -class Module; -class Regex; -class StringRef; - -class BlackList { - public: - BlackList(const StringRef Path); - // Returns whether either this function or it's source file are blacklisted. - bool isIn(const Function &F); - // Returns whether either this global or it's source file are blacklisted. - bool isIn(const GlobalVariable &G); - // Returns whether this module is blacklisted by filename. - bool isIn(const Module &M); - // Returns whether a global should be excluded from initialization checking. - bool isInInit(const GlobalVariable &G); - private: - StringMap<Regex*> Entries; - - bool inSection(const StringRef Section, const StringRef Query); -}; - -} // namespace llvm diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp index 0b18b4c..a2459fb 100644 --- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp @@ -21,7 +21,6 @@ #include "llvm/Transforms/Instrumentation.h" #include "ProfilingUtils.h" #include "llvm/ADT/Statistic.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" @@ -55,8 +54,8 @@ ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); } bool EdgeProfiler::runOnModule(Module &M) { Function *Main = M.getFunction("main"); if (Main == 0) { - M.getContext().emitWarning("cannot insert edge profiling into a module" - " with no main function"); + errs() << "WARNING: cannot insert edge profiling into a module" + << " with no main function!\n"; return false; // No main, no instrumentation! } diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 58d5801..80705af 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -71,7 +71,6 @@ #define DEBUG_TYPE "msan" #include "llvm/Transforms/Instrumentation.h" -#include "BlackList.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -91,6 +90,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/BlackList.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -127,6 +127,10 @@ static cl::opt<bool> ClHandleICmp("msan-handle-icmp", cl::desc("propagate shadow through ICmpEQ and ICmpNE"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClHandleICmpExact("msan-handle-icmp-exact", + cl::desc("exact handling of relational integer ICmp"), + cl::Hidden, cl::init(false)); + static cl::opt<bool> ClStoreCleanOrigin("msan-store-clean-origin", cl::desc("store origin for clean (fully initialized) values"), cl::Hidden, cl::init(false)); @@ -361,6 +365,9 @@ bool MemorySanitizer::doInitialization(Module &M) { new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage, IRB.getInt32(TrackOrigins), "__msan_track_origins"); + new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage, + IRB.getInt32(ClKeepGoing), "__msan_keep_going"); + return true; } @@ -451,9 +458,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment()); DEBUG(dbgs() << " STORE: " << *NewSI << "\n"); (void)NewSI; - // If the store is volatile, add a check. - if (I.isVolatile()) - insertCheck(Val, &I); + if (ClCheckAccessAddress) insertCheck(Addr, &I); @@ -574,7 +579,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy)) return IT; if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) { - uint32_t EltSize = MS.TD->getTypeStoreSizeInBits(VT->getElementType()); + uint32_t EltSize = MS.TD->getTypeSizeInBits(VT->getElementType()); return VectorType::get(IntegerType::get(*MS.C, EltSize), VT->getNumElements()); } @@ -586,7 +591,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n"); return Res; } - uint32_t TypeSize = MS.TD->getTypeStoreSizeInBits(OrigTy); + uint32_t TypeSize = MS.TD->getTypeSizeInBits(OrigTy); return IntegerType::get(*MS.C, TypeSize); } @@ -847,7 +852,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// /// Stores the corresponding shadow and (optionally) origin. /// Optionally, checks that the store address is fully defined. - /// Volatile stores check that the value being stored is fully defined. void visitStoreInst(StoreInst &I) { StoreList.push_back(&I); } @@ -1127,10 +1131,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *B = I.getOperand(1); Value *Sa = getShadow(A); Value *Sb = getShadow(B); - if (A->getType()->isPointerTy()) - A = IRB.CreatePointerCast(A, MS.IntptrTy); - if (B->getType()->isPointerTy()) - B = IRB.CreatePointerCast(B, MS.IntptrTy); + + // Get rid of pointers and vectors of pointers. + // For ints (and vectors of ints), types of A and Sa match, + // and this is a no-op. + A = IRB.CreatePointerCast(A, Sa->getType()); + B = IRB.CreatePointerCast(B, Sb->getType()); + // A == B <==> (C = A^B) == 0 // A != B <==> (C = A^B) != 0 // Sc = Sa | Sb @@ -1152,6 +1159,73 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOriginForNaryOp(I); } + /// \brief Build the lowest possible value of V, taking into account V's + /// uninitialized bits. + Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa, + bool isSigned) { + if (isSigned) { + // Split shadow into sign bit and other bits. + Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1); + Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits); + // Maximise the undefined shadow bit, minimize other undefined bits. + return + IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)), SaSignBit); + } else { + // Minimize undefined bits. + return IRB.CreateAnd(A, IRB.CreateNot(Sa)); + } + } + + /// \brief Build the highest possible value of V, taking into account V's + /// uninitialized bits. + Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa, + bool isSigned) { + if (isSigned) { + // Split shadow into sign bit and other bits. + Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1); + Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits); + // Minimise the undefined shadow bit, maximise other undefined bits. + return + IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)), SaOtherBits); + } else { + // Maximize undefined bits. + return IRB.CreateOr(A, Sa); + } + } + + /// \brief Instrument relational comparisons. + /// + /// This function does exact shadow propagation for all relational + /// comparisons of integers, pointers and vectors of those. + /// FIXME: output seems suboptimal when one of the operands is a constant + void handleRelationalComparisonExact(ICmpInst &I) { + IRBuilder<> IRB(&I); + Value *A = I.getOperand(0); + Value *B = I.getOperand(1); + Value *Sa = getShadow(A); + Value *Sb = getShadow(B); + + // Get rid of pointers and vectors of pointers. + // For ints (and vectors of ints), types of A and Sa match, + // and this is a no-op. + A = IRB.CreatePointerCast(A, Sa->getType()); + B = IRB.CreatePointerCast(B, Sb->getType()); + + // Let [a0, a1] be the interval of possible values of A, taking into account + // its undefined bits. Let [b0, b1] be the interval of possible values of B. + // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0). + bool IsSigned = I.isSigned(); + Value *S1 = IRB.CreateICmp(I.getPredicate(), + getLowestPossibleValue(IRB, A, Sa, IsSigned), + getHighestPossibleValue(IRB, B, Sb, IsSigned)); + Value *S2 = IRB.CreateICmp(I.getPredicate(), + getHighestPossibleValue(IRB, A, Sa, IsSigned), + getLowestPossibleValue(IRB, B, Sb, IsSigned)); + Value *Si = IRB.CreateXor(S1, S2); + setShadow(&I, Si); + setOriginForNaryOp(I); + } + /// \brief Instrument signed relational comparisons. /// /// Handle (x<0) and (x>=0) comparisons (essentially, sign bit tests) by @@ -1181,12 +1255,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } void visitICmpInst(ICmpInst &I) { - if (ClHandleICmp && I.isEquality()) + if (!ClHandleICmp) { + handleShadowOr(I); + return; + } + if (I.isEquality()) { handleEqualityComparison(I); - else if (ClHandleICmp && I.isSigned() && I.isRelational()) + return; + } + + assert(I.isRelational()); + if (ClHandleICmpExact) { + handleRelationalComparisonExact(I); + return; + } + if (I.isSigned()) { handleSignedRelationalComparison(I); - else - handleShadowOr(I); + return; + } + + assert(I.isUnsigned()); + if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) { + handleRelationalComparisonExact(I); + return; + } + + handleShadowOr(I); } void visitFCmpInst(FCmpInst &I) { @@ -1458,8 +1552,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { AttrBuilder B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone); - Func->removeAttribute(AttributeSet::FunctionIndex, - Attribute::get(Func->getContext(), B)); + Func->removeAttributes(AttributeSet::FunctionIndex, + AttributeSet::get(Func->getContext(), + AttributeSet::FunctionIndex, + B)); } } IRBuilder<> IRB(&I); @@ -1498,6 +1594,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (MS.TrackOrigins) IRB.CreateStore(getOrigin(A), getOriginPtrForArgument(A, IRB, ArgOffset)); + (void)Store; assert(Size != 0 && Store != 0); DEBUG(dbgs() << " Param:" << *Store << "\n"); ArgOffset += DataLayout::RoundUpAlignment(Size, 8); @@ -1774,7 +1871,7 @@ struct VarArgAMD64Helper : public VarArgHelper { // Unpoison the whole __va_list_tag. // FIXME: magic ABI constants. IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), - /* size */24, /* alignment */16, false); + /* size */24, /* alignment */8, false); } void visitVACopyInst(VACopyInst &I) { @@ -1785,7 +1882,7 @@ struct VarArgAMD64Helper : public VarArgHelper { // Unpoison the whole __va_list_tag. // FIXME: magic ABI constants. IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), - /* size */ 24, /* alignment */ 16, false); + /* size */24, /* alignment */8, false); } void finalizeInstrumentation() { @@ -1850,8 +1947,9 @@ bool MemorySanitizer::runOnFunction(Function &F) { AttrBuilder B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone); - F.removeAttribute(AttributeSet::FunctionIndex, - Attribute::get(F.getContext(), B)); + F.removeAttributes(AttributeSet::FunctionIndex, + AttributeSet::get(F.getContext(), + AttributeSet::FunctionIndex, B)); return Visitor.runOnFunction(); } diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index c5a1fe9..b45aef6 100644 --- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ProfileInfoLoader.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -76,8 +75,8 @@ inline static void printEdgeCounter(ProfileInfo::Edge e, bool OptimalEdgeProfiler::runOnModule(Module &M) { Function *Main = M.getFunction("main"); if (Main == 0) { - M.getContext().emitWarning("cannot insert edge profiling into a module" - " with no main function"); + errs() << "WARNING: cannot insert edge profiling into a module" + << " with no main function!\n"; return false; // No main, no instrumentation! } diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp index 358bbeb..7de7326 100644 --- a/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -1345,8 +1345,8 @@ bool PathProfiler::runOnModule(Module &M) { Main = M.getFunction("MAIN__"); if (!Main) { - Context->emitWarning("cannot insert edge profiling into a module" - " with no main function"); + errs() << "WARNING: cannot insert path profiling into a module" + << " with no main function!\n"; return false; } diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 29d2ece..f93c5ab 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -22,7 +22,6 @@ #define DEBUG_TYPE "tsan" #include "llvm/Transforms/Instrumentation.h" -#include "BlackList.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -41,6 +40,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/BlackList.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt index f7bca06..15e9fba 100644 --- a/lib/Transforms/LLVMBuild.txt +++ b/lib/Transforms/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize +subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC [component_0] type = Group diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile index 8b1df92..c390517 100644 --- a/lib/Transforms/Makefile +++ b/lib/Transforms/Makefile @@ -8,7 +8,7 @@ ##===----------------------------------------------------------------------===## LEVEL = ../.. -PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello +PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello ObjCARC include $(LEVEL)/Makefile.config diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt new file mode 100644 index 0000000..233deb3 --- /dev/null +++ b/lib/Transforms/ObjCARC/CMakeLists.txt @@ -0,0 +1,13 @@ +add_llvm_library(LLVMObjCARCOpts + ObjCARC.cpp + ObjCARCOpts.cpp + ObjCARCExpand.cpp + ObjCARCAPElim.cpp + ObjCARCAliasAnalysis.cpp + ObjCARCUtil.cpp + ObjCARCContract.cpp + DependencyAnalysis.cpp + ProvenanceAnalysis.cpp + ) + +add_dependencies(LLVMObjCARCOpts intrinsics_gen) diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp new file mode 100644 index 0000000..5aada9c --- /dev/null +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -0,0 +1,261 @@ +//===- DependencyAnalysis.cpp - ObjC ARC Optimization ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines special dependency analysis routines used in Objective C +/// ARC Optimizations. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "objc-arc-dependency" +#include "ObjCARC.h" +#include "DependencyAnalysis.h" +#include "ProvenanceAnalysis.h" +#include "llvm/Support/CFG.h" + +using namespace llvm; +using namespace llvm::objcarc; + +/// Test whether the given instruction can result in a reference count +/// modification (positive or negative) for the pointer's object. +bool +llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, + InstructionClass Class) { + switch (Class) { + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_User: + // These operations never directly modify a reference count. + return false; + default: break; + } + + ImmutableCallSite CS = static_cast<const Value *>(Inst); + assert(CS && "Only calls can alter reference counts!"); + + // See if AliasAnalysis can help us with the call. + AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS); + if (AliasAnalysis::onlyReadsMemory(MRB)) + return false; + if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { + for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) { + const Value *Op = *I; + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + return true; + } + return false; + } + + // Assume the worst. + return true; +} + +/// Test whether the given instruction can "use" the given pointer's object in a +/// way that requires the reference count to be positive. +bool +llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, InstructionClass Class) { + // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers. + if (Class == IC_Call) + return false; + + // Consider various instructions which may have pointer arguments which are + // not "uses". + if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) { + // Comparing a pointer with null, or any other constant, isn't really a use, + // because we don't care what the pointer points to, or about the values + // of any other dynamic reference-counted pointers. + if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA())) + return false; + } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) { + // For calls, just check the arguments (and not the callee operand). + for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), + OE = CS.arg_end(); OI != OE; ++OI) { + const Value *Op = *OI; + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + return true; + } + return false; + } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // Special-case stores, because we don't care about the stored value, just + // the store address. + const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); + // If we can't tell what the underlying object was, assume there is a + // dependence. + return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr); + } + + // Check each operand for a match. + for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); + OI != OE; ++OI) { + const Value *Op = *OI; + if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op)) + return true; + } + return false; +} + +/// Test if there can be dependencies on Inst through Arg. This function only +/// tests dependencies relevant for removing pairs of calls. +bool +llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst, + const Value *Arg, ProvenanceAnalysis &PA) { + // If we've reached the definition of Arg, stop. + if (Inst == Arg) + return true; + + switch (Flavor) { + case NeedsPositiveRetainCount: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: + case IC_None: + return false; + default: + return CanUse(Inst, Arg, PA, Class); + } + } + + case AutoreleasePoolBoundary: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: + // These mark the end and begin of an autorelease pool scope. + return true; + default: + // Nothing else does this. + return false; + } + } + + case CanChangeRetainCount: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + // Conservatively assume this can decrement any count. + return true; + case IC_AutoreleasepoolPush: + case IC_None: + return false; + default: + return CanAlterRefCount(Inst, Arg, PA, Class); + } + } + + case RetainAutoreleaseDep: + switch (GetBasicInstructionClass(Inst)) { + case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: + // Don't merge an objc_autorelease with an objc_retain inside a different + // autoreleasepool scope. + return true; + case IC_Retain: + case IC_RetainRV: + // Check for a retain of the same pointer for merging. + return GetObjCArg(Inst) == Arg; + default: + // Nothing else matters for objc_retainAutorelease formation. + return false; + } + + case RetainAutoreleaseRVDep: { + InstructionClass Class = GetBasicInstructionClass(Inst); + switch (Class) { + case IC_Retain: + case IC_RetainRV: + // Check for a retain of the same pointer for merging. + return GetObjCArg(Inst) == Arg; + default: + // Anything that can autorelease interrupts + // retainAutoreleaseReturnValue formation. + return CanInterruptRV(Class); + } + } + + case RetainRVDep: + return CanInterruptRV(GetBasicInstructionClass(Inst)); + } + + llvm_unreachable("Invalid dependence flavor"); +} + +/// Walk up the CFG from StartPos (which is in StartBB) and find local and +/// non-local dependencies on Arg. +/// +/// TODO: Cache results? +void +llvm::objcarc::FindDependencies(DependenceKind Flavor, + const Value *Arg, + BasicBlock *StartBB, Instruction *StartInst, + SmallPtrSet<Instruction *, 4> &DependingInsts, + SmallPtrSet<const BasicBlock *, 4> &Visited, + ProvenanceAnalysis &PA) { + BasicBlock::iterator StartPos = StartInst; + + SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist; + Worklist.push_back(std::make_pair(StartBB, StartPos)); + do { + std::pair<BasicBlock *, BasicBlock::iterator> Pair = + Worklist.pop_back_val(); + BasicBlock *LocalStartBB = Pair.first; + BasicBlock::iterator LocalStartPos = Pair.second; + BasicBlock::iterator StartBBBegin = LocalStartBB->begin(); + for (;;) { + if (LocalStartPos == StartBBBegin) { + pred_iterator PI(LocalStartBB), PE(LocalStartBB, false); + if (PI == PE) + // If we've reached the function entry, produce a null dependence. + DependingInsts.insert(0); + else + // Add the predecessors to the worklist. + do { + BasicBlock *PredBB = *PI; + if (Visited.insert(PredBB)) + Worklist.push_back(std::make_pair(PredBB, PredBB->end())); + } while (++PI != PE); + break; + } + + Instruction *Inst = --LocalStartPos; + if (Depends(Flavor, Inst, Arg, PA)) { + DependingInsts.insert(Inst); + break; + } + } + } while (!Worklist.empty()); + + // Determine whether the original StartBB post-dominates all of the blocks we + // visited. If not, insert a sentinal indicating that most optimizations are + // not safe. + for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(), + E = Visited.end(); I != E; ++I) { + const BasicBlock *BB = *I; + if (BB == StartBB) + continue; + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { + const BasicBlock *Succ = *SI; + if (Succ != StartBB && !Visited.count(Succ)) { + DependingInsts.insert(reinterpret_cast<Instruction *>(-1)); + return; + } + } + } +} diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.h b/lib/Transforms/ObjCARC/DependencyAnalysis.h new file mode 100644 index 0000000..24d358b --- /dev/null +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.h @@ -0,0 +1,79 @@ +//===- DependencyAnalysis.h - ObjC ARC Optimization ---*- mode: c++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file declares special dependency analysis routines used in Objective C +/// ARC Optimizations. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H +#define LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H + +#include "llvm/ADT/SmallPtrSet.h" + +namespace llvm { + class BasicBlock; + class Instruction; + class Value; +} + +namespace llvm { +namespace objcarc { + +class ProvenanceAnalysis; + +/// \enum DependenceKind +/// \brief Defines different dependence kinds among various ARC constructs. +/// +/// There are several kinds of dependence-like concepts in use here. +/// +enum DependenceKind { + NeedsPositiveRetainCount, + AutoreleasePoolBoundary, + CanChangeRetainCount, + RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease. + RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue. + RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue. +}; + +void FindDependencies(DependenceKind Flavor, + const Value *Arg, + BasicBlock *StartBB, Instruction *StartInst, + SmallPtrSet<Instruction *, 4> &DependingInstructions, + SmallPtrSet<const BasicBlock *, 4> &Visited, + ProvenanceAnalysis &PA); + +bool +Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, + ProvenanceAnalysis &PA); + +/// Test whether the given instruction can "use" the given pointer's object in a +/// way that requires the reference count to be positive. +bool +CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, + InstructionClass Class); + +/// Test whether the given instruction can result in a reference count +/// modification (positive or negative) for the pointer's object. +bool +CanAlterRefCount(const Instruction *Inst, const Value *Ptr, + ProvenanceAnalysis &PA, InstructionClass Class); + +} // namespace objcarc +} // namespace llvm + +#endif // LLVM_TRANSFORMS_OBJCARC_DEPEDENCYANALYSIS_H diff --git a/lib/Transforms/ObjCARC/LLVMBuild.txt b/lib/Transforms/ObjCARC/LLVMBuild.txt new file mode 100644 index 0000000..90a2338 --- /dev/null +++ b/lib/Transforms/ObjCARC/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Transforms/ObjCARC/LLVMBuild.txt -------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ObjCARC +parent = Transforms +library_name = ObjCARCOpts +required_libraries = Analysis Core Support TransformUtils diff --git a/lib/Transforms/ObjCARC/Makefile b/lib/Transforms/ObjCARC/Makefile new file mode 100644 index 0000000..2a34e21 --- /dev/null +++ b/lib/Transforms/ObjCARC/Makefile @@ -0,0 +1,15 @@ +##===- lib/Transforms/ObjCARC/Makefile ---------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMObjCARCOpts +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp new file mode 100644 index 0000000..53a31b0 --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARC.cpp @@ -0,0 +1,48 @@ +//===-- ObjCARC.cpp -------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements common infrastructure for libLLVMObjCARCOpts.a, which +// implements several scalar transformations over the LLVM intermediate +// representation, including the C bindings for that library. +// +//===----------------------------------------------------------------------===// + +#include "ObjCARC.h" +#include "llvm-c/Core.h" +#include "llvm-c/Initialization.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" + +namespace llvm { + class PassRegistry; +} + +using namespace llvm; +using namespace llvm::objcarc; + +/// \brief A handy option to enable/disable all ARC Optimizations. +bool llvm::objcarc::EnableARCOpts; +static cl::opt<bool, true> +EnableARCOptimizations("enable-objc-arc-opts", + cl::location(EnableARCOpts), + cl::init(true)); + +/// initializeObjCARCOptsPasses - Initialize all passes linked into the +/// ObjCARCOpts library. +void llvm::initializeObjCARCOpts(PassRegistry &Registry) { + initializeObjCARCAliasAnalysisPass(Registry); + initializeObjCARCAPElimPass(Registry); + initializeObjCARCExpandPass(Registry); + initializeObjCARCContractPass(Registry); + initializeObjCARCOptPass(Registry); +} + +void LLVMInitializeObjCARCOpts(LLVMPassRegistryRef R) { + initializeObjCARCOpts(*unwrap(R)); +} diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h new file mode 100644 index 0000000..e062b66 --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -0,0 +1,389 @@ +//===- ObjCARC.h - ObjC ARC Optimization --------------*- mode: c++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines common definitions/declarations used by the ObjC ARC +/// Optimizer. ARC stands for Automatic Reference Counting and is a system for +/// managing reference counts for objects in Objective C. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_OBJCARC_H +#define LLVM_TRANSFORMS_SCALAR_OBJCARC_H + +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Transforms/ObjCARC.h" +#include "llvm/Transforms/Utils/Local.h" + +namespace llvm { +class raw_ostream; +} + +namespace llvm { +namespace objcarc { + +/// \brief A handy option to enable/disable all ARC Optimizations. +extern bool EnableARCOpts; + +/// \brief Test if the given module looks interesting to run ARC optimization +/// on. +static inline bool ModuleHasARC(const Module &M) { + return + M.getNamedValue("objc_retain") || + M.getNamedValue("objc_release") || + M.getNamedValue("objc_autorelease") || + M.getNamedValue("objc_retainAutoreleasedReturnValue") || + M.getNamedValue("objc_retainBlock") || + M.getNamedValue("objc_autoreleaseReturnValue") || + M.getNamedValue("objc_autoreleasePoolPush") || + M.getNamedValue("objc_loadWeakRetained") || + M.getNamedValue("objc_loadWeak") || + M.getNamedValue("objc_destroyWeak") || + M.getNamedValue("objc_storeWeak") || + M.getNamedValue("objc_initWeak") || + M.getNamedValue("objc_moveWeak") || + M.getNamedValue("objc_copyWeak") || + M.getNamedValue("objc_retainedObject") || + M.getNamedValue("objc_unretainedObject") || + M.getNamedValue("objc_unretainedPointer"); +} + +/// \enum InstructionClass +/// \brief A simple classification for instructions. +enum InstructionClass { + IC_Retain, ///< objc_retain + IC_RetainRV, ///< objc_retainAutoreleasedReturnValue + IC_RetainBlock, ///< objc_retainBlock + IC_Release, ///< objc_release + IC_Autorelease, ///< objc_autorelease + IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue + IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush + IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop + IC_NoopCast, ///< objc_retainedObject, etc. + IC_FusedRetainAutorelease, ///< objc_retainAutorelease + IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue + IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive) + IC_StoreWeak, ///< objc_storeWeak (primitive) + IC_InitWeak, ///< objc_initWeak (derived) + IC_LoadWeak, ///< objc_loadWeak (derived) + IC_MoveWeak, ///< objc_moveWeak (derived) + IC_CopyWeak, ///< objc_copyWeak (derived) + IC_DestroyWeak, ///< objc_destroyWeak (derived) + IC_StoreStrong, ///< objc_storeStrong (derived) + IC_CallOrUser, ///< could call objc_release and/or "use" pointers + IC_Call, ///< could call objc_release + IC_User, ///< could "use" a pointer + IC_None ///< anything else +}; + +raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class); + +/// \brief Test if the given class is objc_retain or equivalent. +static inline bool IsRetain(InstructionClass Class) { + return Class == IC_Retain || + Class == IC_RetainRV; +} + +/// \brief Test if the given class is objc_autorelease or equivalent. +static inline bool IsAutorelease(InstructionClass Class) { + return Class == IC_Autorelease || + Class == IC_AutoreleaseRV; +} + +/// \brief Test if the given class represents instructions which return their +/// argument verbatim. +static inline bool IsForwarding(InstructionClass Class) { + // objc_retainBlock technically doesn't always return its argument + // verbatim, but it doesn't matter for our purposes here. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_RetainBlock || + Class == IC_NoopCast; +} + +/// \brief Test if the given class represents instructions which do nothing if +/// passed a null pointer. +static inline bool IsNoopOnNull(InstructionClass Class) { + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Release || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_RetainBlock; +} + +/// \brief Test if the given class represents instructions which are always safe +/// to mark with the "tail" keyword. +static inline bool IsAlwaysTail(InstructionClass Class) { + // IC_RetainBlock may be given a stack argument. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_AutoreleaseRV; +} + +/// \brief Test if the given class represents instructions which are never safe +/// to mark with the "tail" keyword. +static inline bool IsNeverTail(InstructionClass Class) { + /// It is never safe to tail call objc_autorelease since by tail calling + /// objc_autorelease, we also tail call -[NSObject autorelease] which supports + /// fast autoreleasing causing our object to be potentially reclaimed from the + /// autorelease pool which violates the semantics of __autoreleasing types in + /// ARC. + return Class == IC_Autorelease; +} + +/// \brief Test if the given class represents instructions which are always safe +/// to mark with the nounwind attribute. +static inline bool IsNoThrow(InstructionClass Class) { + // objc_retainBlock is not nounwind because it calls user copy constructors + // which could theoretically throw. + return Class == IC_Retain || + Class == IC_RetainRV || + Class == IC_Release || + Class == IC_Autorelease || + Class == IC_AutoreleaseRV || + Class == IC_AutoreleasepoolPush || + Class == IC_AutoreleasepoolPop; +} + +/// Test whether the given instruction can autorelease any pointer or cause an +/// autoreleasepool pop. +static inline bool +CanInterruptRV(InstructionClass Class) { + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_CallOrUser: + case IC_Call: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + return true; + default: + return false; + } +} + +/// \brief Determine if F is one of the special known Functions. If it isn't, +/// return IC_CallOrUser. +InstructionClass GetFunctionClass(const Function *F); + +/// \brief Determine which objc runtime call instruction class V belongs to. +/// +/// This is similar to GetInstructionClass except that it only detects objc +/// runtime calls. This allows it to be faster. +/// +static inline InstructionClass GetBasicInstructionClass(const Value *V) { + if (const CallInst *CI = dyn_cast<CallInst>(V)) { + if (const Function *F = CI->getCalledFunction()) + return GetFunctionClass(F); + // Otherwise, be conservative. + return IC_CallOrUser; + } + + // Otherwise, be conservative. + return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User; +} + +/// \brief Determine what kind of construct V is. +InstructionClass GetInstructionClass(const Value *V); + +/// \brief This is a wrapper around getUnderlyingObject which also knows how to +/// look through objc_retain and objc_autorelease calls, which we know to return +/// their argument verbatim. +static inline const Value *GetUnderlyingObjCPtr(const Value *V) { + for (;;) { + V = GetUnderlyingObject(V); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast<CallInst>(V)->getArgOperand(0); + } + + return V; +} + +/// \brief This is a wrapper around Value::stripPointerCasts which also knows +/// how to look through objc_retain and objc_autorelease calls, which we know to +/// return their argument verbatim. +static inline const Value *StripPointerCastsAndObjCCalls(const Value *V) { + for (;;) { + V = V->stripPointerCasts(); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast<CallInst>(V)->getArgOperand(0); + } + return V; +} + +/// \brief This is a wrapper around Value::stripPointerCasts which also knows +/// how to look through objc_retain and objc_autorelease calls, which we know to +/// return their argument verbatim. +static inline Value *StripPointerCastsAndObjCCalls(Value *V) { + for (;;) { + V = V->stripPointerCasts(); + if (!IsForwarding(GetBasicInstructionClass(V))) + break; + V = cast<CallInst>(V)->getArgOperand(0); + } + return V; +} + +/// \brief Assuming the given instruction is one of the special calls such as +/// objc_retain or objc_release, return the argument value, stripped of no-op +/// casts and forwarding calls. +static inline Value *GetObjCArg(Value *Inst) { + return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0)); +} + +static inline bool isNullOrUndef(const Value *V) { + return isa<ConstantPointerNull>(V) || isa<UndefValue>(V); +} + +static inline bool isNoopInstruction(const Instruction *I) { + return isa<BitCastInst>(I) || + (isa<GetElementPtrInst>(I) && + cast<GetElementPtrInst>(I)->hasAllZeroIndices()); +} + + +/// \brief Erase the given instruction. +/// +/// Many ObjC calls return their argument verbatim, +/// so if it's such a call and the return value has users, replace them with the +/// argument value. +/// +static inline void EraseInstruction(Instruction *CI) { + Value *OldArg = cast<CallInst>(CI)->getArgOperand(0); + + bool Unused = CI->use_empty(); + + if (!Unused) { + // Replace the return value with the argument. + assert(IsForwarding(GetBasicInstructionClass(CI)) && + "Can't delete non-forwarding instruction with users!"); + CI->replaceAllUsesWith(OldArg); + } + + CI->eraseFromParent(); + + if (Unused) + RecursivelyDeleteTriviallyDeadInstructions(OldArg); +} + +/// \brief Test whether the given value is possible a retainable object pointer. +static inline bool IsPotentialRetainableObjPtr(const Value *Op) { + // Pointers to static or stack storage are not valid retainable object + // pointers. + if (isa<Constant>(Op) || isa<AllocaInst>(Op)) + return false; + // Special arguments can not be a valid retainable object pointer. + if (const Argument *Arg = dyn_cast<Argument>(Op)) + if (Arg->hasByValAttr() || + Arg->hasNestAttr() || + Arg->hasStructRetAttr()) + return false; + // Only consider values with pointer types. + // + // It seemes intuitive to exclude function pointer types as well, since + // functions are never retainable object pointers, however clang occasionally + // bitcasts retainable object pointers to function-pointer type temporarily. + PointerType *Ty = dyn_cast<PointerType>(Op->getType()); + if (!Ty) + return false; + // Conservatively assume anything else is a potential retainable object + // pointer. + return true; +} + +static inline bool IsPotentialRetainableObjPtr(const Value *Op, + AliasAnalysis &AA) { + // First make the rudimentary check. + if (!IsPotentialRetainableObjPtr(Op)) + return false; + + // Objects in constant memory are not reference-counted. + if (AA.pointsToConstantMemory(Op)) + return false; + + // Pointers in constant memory are not pointing to reference-counted objects. + if (const LoadInst *LI = dyn_cast<LoadInst>(Op)) + if (AA.pointsToConstantMemory(LI->getPointerOperand())) + return false; + + // Otherwise assume the worst. + return true; +} + +/// \brief Helper for GetInstructionClass. Determines what kind of construct CS +/// is. +static inline InstructionClass GetCallSiteClass(ImmutableCallSite CS) { + for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) + if (IsPotentialRetainableObjPtr(*I)) + return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser; + + return CS.onlyReadsMemory() ? IC_None : IC_Call; +} + +/// \brief Return true if this value refers to a distinct and identifiable +/// object. +/// +/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses +/// special knowledge of ObjC conventions. +static inline bool IsObjCIdentifiedObject(const Value *V) { + // Assume that call results and arguments have their own "provenance". + // Constants (including GlobalVariables) and Allocas are never + // reference-counted. + if (isa<CallInst>(V) || isa<InvokeInst>(V) || + isa<Argument>(V) || isa<Constant>(V) || + isa<AllocaInst>(V)) + return true; + + if (const LoadInst *LI = dyn_cast<LoadInst>(V)) { + const Value *Pointer = + StripPointerCastsAndObjCCalls(LI->getPointerOperand()); + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) { + // A constant pointer can't be pointing to an object on the heap. It may + // be reference-counted, but it won't be deleted. + if (GV->isConstant()) + return true; + StringRef Name = GV->getName(); + // These special variables are known to hold values which are not + // reference-counted pointers. + if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") || + Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") || + Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") || + Name.startswith("\01L_OBJC_METH_VAR_NAME_") || + Name.startswith("\01l_objc_msgSend_fixup_")) + return true; + } + } + + return false; +} + +} // end namespace objcarc +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_OBJCARC_H diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp new file mode 100644 index 0000000..00d9864 --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp @@ -0,0 +1,175 @@ +//===- ObjCARCAPElim.cpp - ObjC ARC Optimization --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines ObjC ARC optimizations. ARC stands for Automatic +/// Reference Counting and is a system for managing reference counts for objects +/// in Objective C. +/// +/// This specific file implements optimizations which remove extraneous +/// autorelease pools. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "objc-arc-ap-elim" +#include "ObjCARC.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::objcarc; + +namespace { + /// \brief Autorelease pool elimination. + class ObjCARCAPElim : public ModulePass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnModule(Module &M); + + static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0); + static bool OptimizeBB(BasicBlock *BB); + + public: + static char ID; + ObjCARCAPElim() : ModulePass(ID) { + initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCAPElim::ID = 0; +INITIALIZE_PASS(ObjCARCAPElim, + "objc-arc-apelim", + "ObjC ARC autorelease pool elimination", + false, false) + +Pass *llvm::createObjCARCAPElimPass() { + return new ObjCARCAPElim(); +} + +void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); +} + +/// Interprocedurally determine if calls made by the given call site can +/// possibly produce autoreleases. +bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) { + if (const Function *Callee = CS.getCalledFunction()) { + if (Callee->isDeclaration() || Callee->mayBeOverridden()) + return true; + for (Function::const_iterator I = Callee->begin(), E = Callee->end(); + I != E; ++I) { + const BasicBlock *BB = I; + for (BasicBlock::const_iterator J = BB->begin(), F = BB->end(); + J != F; ++J) + if (ImmutableCallSite JCS = ImmutableCallSite(J)) + // This recursion depth limit is arbitrary. It's just great + // enough to cover known interesting testcases. + if (Depth < 3 && + !JCS.onlyReadsMemory() && + MayAutorelease(JCS, Depth + 1)) + return true; + } + return false; + } + + return true; +} + +bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { + bool Changed = false; + + Instruction *Push = 0; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { + Instruction *Inst = I++; + switch (GetBasicInstructionClass(Inst)) { + case IC_AutoreleasepoolPush: + Push = Inst; + break; + case IC_AutoreleasepoolPop: + // If this pop matches a push and nothing in between can autorelease, + // zap the pair. + if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) { + Changed = true; + DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop " + "autorelease pair:\n" + " Pop: " << *Inst << "\n" + << " Push: " << *Push << "\n"); + Inst->eraseFromParent(); + Push->eraseFromParent(); + } + Push = 0; + break; + case IC_CallOrUser: + if (MayAutorelease(ImmutableCallSite(Inst))) + Push = 0; + break; + default: + break; + } + } + + return Changed; +} + +bool ObjCARCAPElim::runOnModule(Module &M) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!ModuleHasARC(M)) + return false; + + // Find the llvm.global_ctors variable, as the first step in + // identifying the global constructors. In theory, unnecessary autorelease + // pools could occur anywhere, but in practice it's pretty rare. Global + // ctors are a place where autorelease pools get inserted automatically, + // so it's pretty common for them to be unnecessary, and it's pretty + // profitable to eliminate them. + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (!GV) + return false; + + assert(GV->hasDefinitiveInitializer() && + "llvm.global_ctors is uncooperative!"); + + bool Changed = false; + + // Dig the constructor functions out of GV's initializer. + ConstantArray *Init = cast<ConstantArray>(GV->getInitializer()); + for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end(); + OI != OE; ++OI) { + Value *Op = *OI; + // llvm.global_ctors is an array of pairs where the second members + // are constructor functions. + Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1)); + // If the user used a constructor function with the wrong signature and + // it got bitcasted or whatever, look the other way. + if (!F) + continue; + // Only look at function definitions. + if (F->isDeclaration()) + continue; + // Only look at functions with one basic block. + if (llvm::next(F->begin()) != F->end()) + continue; + // Ok, a single-block constructor function definition. Try to optimize it. + Changed |= OptimizeBB(F->begin()); + } + + return Changed; +} diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp new file mode 100644 index 0000000..46b2de7 --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp @@ -0,0 +1,162 @@ +//===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -*- mode: c++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines a simple ARC-aware AliasAnalysis using special knowledge +/// of Objective C to enhance other optimization passes which rely on the Alias +/// Analysis infrastructure. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "objc-arc-aa" +#include "ObjCARC.h" +#include "ObjCARCAliasAnalysis.h" +#include "llvm/IR/Instruction.h" +#include "llvm/InitializePasses.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/PassSupport.h" + +namespace llvm { + class Function; + class Value; +} + +using namespace llvm; +using namespace llvm::objcarc; + +// Register this pass... +char ObjCARCAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa", + "ObjC-ARC-Based Alias Analysis", false, true, false) + +ImmutablePass *llvm::createObjCARCAliasAnalysisPass() { + return new ObjCARCAliasAnalysis(); +} + +void +ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +AliasAnalysis::AliasResult +ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) { + if (!EnableARCOpts) + return AliasAnalysis::alias(LocA, LocB); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making a + // precise alias query. + const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr); + const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr); + AliasResult Result = + AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag), + Location(SB, LocB.Size, LocB.TBAATag)); + if (Result != MayAlias) + return Result; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *UA = GetUnderlyingObjCPtr(SA); + const Value *UB = GetUnderlyingObjCPtr(SB); + if (UA != SA || UB != SB) { + Result = AliasAnalysis::alias(Location(UA), Location(UB)); + // We can't use MustAlias or PartialAlias results here because + // GetUnderlyingObjCPtr may return an offsetted pointer value. + if (Result == NoAlias) + return NoAlias; + } + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. + return MayAlias; +} + +bool +ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + if (!EnableARCOpts) + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + + // First, strip off no-ops, including ObjC-specific no-ops, and try making + // a precise alias query. + const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr); + if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag), + OrLocal)) + return true; + + // If that failed, climb to the underlying object, including climbing through + // ObjC-specific no-ops, and try making an imprecise alias query. + const Value *U = GetUnderlyingObjCPtr(S); + if (U != S) + return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal); + + // If that failed, fail. We don't need to chain here, since that's covered + // by the earlier precise query. + return false; +} + +AliasAnalysis::ModRefBehavior +ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + // We have nothing to do. Just chain to the next AliasAnalysis. + return AliasAnalysis::getModRefBehavior(CS); +} + +AliasAnalysis::ModRefBehavior +ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) { + if (!EnableARCOpts) + return AliasAnalysis::getModRefBehavior(F); + + switch (GetFunctionClass(F)) { + case IC_NoopCast: + return DoesNotAccessMemory; + default: + break; + } + + return AliasAnalysis::getModRefBehavior(F); +} + +AliasAnalysis::ModRefResult +ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { + if (!EnableARCOpts) + return AliasAnalysis::getModRefInfo(CS, Loc); + + switch (GetBasicInstructionClass(CS.getInstruction())) { + case IC_Retain: + case IC_RetainRV: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_NoopCast: + case IC_AutoreleasepoolPush: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + // These functions don't access any memory visible to the compiler. + // Note that this doesn't include objc_retainBlock, because it updates + // pointers when it copies block data. + return NoModRef; + default: + break; + } + + return AliasAnalysis::getModRefInfo(CS, Loc); +} + +AliasAnalysis::ModRefResult +ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + // TODO: Theoretically we could check for dependencies between objc_* calls + // and OnlyAccessesArgumentPointees calls or other well-behaved calls. + return AliasAnalysis::getModRefInfo(CS1, CS2); +} diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h new file mode 100644 index 0000000..7abe995 --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h @@ -0,0 +1,74 @@ +//===- ObjCARCAliasAnalysis.h - ObjC ARC Optimization -*- mode: c++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares a simple ARC-aware AliasAnalysis using special knowledge +/// of Objective C to enhance other optimization passes which rely on the Alias +/// Analysis infrastructure. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H +#define LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Pass.h" + +namespace llvm { +namespace objcarc { + + /// \brief This is a simple alias analysis implementation that uses knowledge + /// of ARC constructs to answer queries. + /// + /// TODO: This class could be generalized to know about other ObjC-specific + /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing + /// even though their offsets are dynamic. + class ObjCARCAliasAnalysis : public ImmutablePass, + public AliasAnalysis { + public: + static char ID; // Class identification, replacement for typeinfo + ObjCARCAliasAnalysis() : ImmutablePass(ID) { + initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + private: + virtual void initializePass() { + InitializeAliasAnalysis(this); + } + + /// This method is used when a pass implements an analysis interface through + /// multiple inheritance. If needed, it should override this to adjust the + /// this pointer as needed for the specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *PI) { + if (PI == &AliasAnalysis::ID) + return static_cast<AliasAnalysis *>(this); + return this; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual AliasResult alias(const Location &LocA, const Location &LocB); + virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + virtual ModRefBehavior getModRefBehavior(const Function *F); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2); + }; + +} // namespace objcarc +} // namespace llvm + +#endif // LLVM_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp new file mode 100644 index 0000000..1c13d1c --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -0,0 +1,537 @@ +//===- ObjCARCContract.cpp - ObjC ARC Optimization ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines late ObjC ARC optimizations. ARC stands for Automatic +/// Reference Counting and is a system for managing reference counts for objects +/// in Objective C. +/// +/// This specific file mainly deals with ``contracting'' multiple lower level +/// operations into singular higher level operations through pattern matching. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +// TODO: ObjCARCContract could insert PHI nodes when uses aren't +// dominated by single calls. + +#define DEBUG_TYPE "objc-arc-contract" +#include "ObjCARC.h" +#include "DependencyAnalysis.h" +#include "ProvenanceAnalysis.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; +using namespace llvm::objcarc; + +STATISTIC(NumPeeps, "Number of calls peephole-optimized"); +STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); + +namespace { + /// \brief Late ARC optimizations + /// + /// These change the IR in a way that makes it difficult to be analyzed by + /// ObjCARCOpt, so it's run late. + class ObjCARCContract : public FunctionPass { + bool Changed; + AliasAnalysis *AA; + DominatorTree *DT; + ProvenanceAnalysis PA; + + /// A flag indicating whether this optimization pass should run. + bool Run; + + /// Declarations for ObjC runtime functions, for use in creating calls to + /// them. These are initialized lazily to avoid cluttering up the Module + /// with unused declarations. + + /// Declaration for objc_storeStrong(). + Constant *StoreStrongCallee; + /// Declaration for objc_retainAutorelease(). + Constant *RetainAutoreleaseCallee; + /// Declaration for objc_retainAutoreleaseReturnValue(). + Constant *RetainAutoreleaseRVCallee; + + /// The inline asm string to insert between calls and RetainRV calls to make + /// the optimization work on targets which need it. + const MDString *RetainRVMarker; + + /// The set of inserted objc_storeStrong calls. If at the end of walking the + /// function we have found no alloca instructions, these calls can be marked + /// "tail". + SmallPtrSet<CallInst *, 8> StoreStrongCalls; + + Constant *getStoreStrongCallee(Module *M); + Constant *getRetainAutoreleaseCallee(Module *M); + Constant *getRetainAutoreleaseRVCallee(Module *M); + + bool ContractAutorelease(Function &F, Instruction *Autorelease, + InstructionClass Class, + SmallPtrSet<Instruction *, 4> + &DependingInstructions, + SmallPtrSet<const BasicBlock *, 4> + &Visited); + + void ContractRelease(Instruction *Release, + inst_iterator &Iter); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + public: + static char ID; + ObjCARCContract() : FunctionPass(ID) { + initializeObjCARCContractPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCContract::ID = 0; +INITIALIZE_PASS_BEGIN(ObjCARCContract, + "objc-arc-contract", "ObjC ARC contraction", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(ObjCARCContract, + "objc-arc-contract", "ObjC ARC contraction", false, false) + +Pass *llvm::createObjCARCContractPass() { + return new ObjCARCContract(); +} + +void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.addRequired<DominatorTree>(); + AU.setPreservesCFG(); +} + +Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { + if (!StoreStrongCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + Type *Params[] = { I8XX, I8X }; + + AttributeSet Attr = AttributeSet() + .addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind) + .addAttribute(M->getContext(), 1, Attribute::NoCapture); + + StoreStrongCallee = + M->getOrInsertFunction( + "objc_storeStrong", + FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), + Attr); + } + return StoreStrongCallee; +} + +Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { + if (!RetainAutoreleaseCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + RetainAutoreleaseCallee = + M->getOrInsertFunction("objc_retainAutorelease", FTy, Attribute); + } + return RetainAutoreleaseCallee; +} + +Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { + if (!RetainAutoreleaseRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + RetainAutoreleaseRVCallee = + M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, + Attribute); + } + return RetainAutoreleaseRVCallee; +} + +/// Merge an autorelease with a retain into a fused call. +bool +ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, + InstructionClass Class, + SmallPtrSet<Instruction *, 4> + &DependingInstructions, + SmallPtrSet<const BasicBlock *, 4> + &Visited) { + const Value *Arg = GetObjCArg(Autorelease); + + // Check that there are no instructions between the retain and the autorelease + // (such as an autorelease_pop) which may change the count. + CallInst *Retain = 0; + if (Class == IC_AutoreleaseRV) + FindDependencies(RetainAutoreleaseRVDep, Arg, + Autorelease->getParent(), Autorelease, + DependingInstructions, Visited, PA); + else + FindDependencies(RetainAutoreleaseDep, Arg, + Autorelease->getParent(), Autorelease, + DependingInstructions, Visited, PA); + + Visited.clear(); + if (DependingInstructions.size() != 1) { + DependingInstructions.clear(); + return false; + } + + Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + DependingInstructions.clear(); + + if (!Retain || + GetBasicInstructionClass(Retain) != IC_Retain || + GetObjCArg(Retain) != Arg) + return false; + + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "ObjCARCContract::ContractAutorelease: Fusing " + "retain/autorelease. Erasing: " << *Autorelease << "\n" + " Old Retain: " + << *Retain << "\n"); + + if (Class == IC_AutoreleaseRV) + Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent())); + else + Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent())); + + DEBUG(dbgs() << " New Retain: " + << *Retain << "\n"); + + EraseInstruction(Autorelease); + return true; +} + +/// Attempt to merge an objc_release with a store, load, and objc_retain to form +/// an objc_storeStrong. This can be a little tricky because the instructions +/// don't always appear in order, and there may be unrelated intervening +/// instructions. +void ObjCARCContract::ContractRelease(Instruction *Release, + inst_iterator &Iter) { + LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release)); + if (!Load || !Load->isSimple()) return; + + // For now, require everything to be in one basic block. + BasicBlock *BB = Release->getParent(); + if (Load->getParent() != BB) return; + + // Walk down to find the store and the release, which may be in either order. + BasicBlock::iterator I = Load, End = BB->end(); + ++I; + AliasAnalysis::Location Loc = AA->getLocation(Load); + StoreInst *Store = 0; + bool SawRelease = false; + for (; !Store || !SawRelease; ++I) { + if (I == End) + return; + + Instruction *Inst = I; + if (Inst == Release) { + SawRelease = true; + continue; + } + + InstructionClass Class = GetBasicInstructionClass(Inst); + + // Unrelated retains are harmless. + if (IsRetain(Class)) + continue; + + if (Store) { + // The store is the point where we're going to put the objc_storeStrong, + // so make sure there are no uses after it. + if (CanUse(Inst, Load, PA, Class)) + return; + } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) { + // We are moving the load down to the store, so check for anything + // else which writes to the memory between the load and the store. + Store = dyn_cast<StoreInst>(Inst); + if (!Store || !Store->isSimple()) return; + if (Store->getPointerOperand() != Loc.Ptr) return; + } + } + + Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); + + // Walk up to find the retain. + I = Store; + BasicBlock::iterator Begin = BB->begin(); + while (I != Begin && GetBasicInstructionClass(I) != IC_Retain) + --I; + Instruction *Retain = I; + if (GetBasicInstructionClass(Retain) != IC_Retain) return; + if (GetObjCArg(Retain) != New) return; + + Changed = true; + ++NumStoreStrongs; + + LLVMContext &C = Release->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + + Value *Args[] = { Load->getPointerOperand(), New }; + if (Args[0]->getType() != I8XX) + Args[0] = new BitCastInst(Args[0], I8XX, "", Store); + if (Args[1]->getType() != I8X) + Args[1] = new BitCastInst(Args[1], I8X, "", Store); + CallInst *StoreStrong = + CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()), + Args, "", Store); + StoreStrong->setDoesNotThrow(); + StoreStrong->setDebugLoc(Store->getDebugLoc()); + + // We can't set the tail flag yet, because we haven't yet determined + // whether there are any escaping allocas. Remember this call, so that + // we can set the tail flag once we know it's safe. + StoreStrongCalls.insert(StoreStrong); + + if (&*Iter == Store) ++Iter; + Store->eraseFromParent(); + Release->eraseFromParent(); + EraseInstruction(Retain); + if (Load->use_empty()) + Load->eraseFromParent(); +} + +bool ObjCARCContract::doInitialization(Module &M) { + // If nothing in the Module uses ARC, don't do anything. + Run = ModuleHasARC(M); + if (!Run) + return false; + + // These are initialized lazily. + StoreStrongCallee = 0; + RetainAutoreleaseCallee = 0; + RetainAutoreleaseRVCallee = 0; + + // Initialize RetainRVMarker. + RetainRVMarker = 0; + if (NamedMDNode *NMD = + M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker")) + if (NMD->getNumOperands() == 1) { + const MDNode *N = NMD->getOperand(0); + if (N->getNumOperands() == 1) + if (const MDString *S = dyn_cast<MDString>(N->getOperand(0))) + RetainRVMarker = S; + } + + return false; +} + +bool ObjCARCContract::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + Changed = false; + AA = &getAnalysis<AliasAnalysis>(); + DT = &getAnalysis<DominatorTree>(); + + PA.setAA(&getAnalysis<AliasAnalysis>()); + + // Track whether it's ok to mark objc_storeStrong calls with the "tail" + // keyword. Be conservative if the function has variadic arguments. + // It seems that functions which "return twice" are also unsafe for the + // "tail" argument, because they are setjmp, which could need to + // return to an earlier stack state. + bool TailOkForStoreStrongs = !F.isVarArg() && + !F.callsFunctionThatReturnsTwice(); + + // For ObjC library calls which return their argument, replace uses of the + // argument with uses of the call return value, if it dominates the use. This + // reduces register pressure. + SmallPtrSet<Instruction *, 4> DependingInstructions; + SmallPtrSet<const BasicBlock *, 4> Visited; + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + + DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n"); + + // Only these library routines return their argument. In particular, + // objc_retainBlock does not necessarily return its argument. + InstructionClass Class = GetBasicInstructionClass(Inst); + switch (Class) { + case IC_Retain: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: + break; + case IC_Autorelease: + case IC_AutoreleaseRV: + if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) + continue; + break; + case IC_RetainRV: { + // If we're compiling for a target which needs a special inline-asm + // marker to do the retainAutoreleasedReturnValue optimization, + // insert it now. + if (!RetainRVMarker) + break; + BasicBlock::iterator BBI = Inst; + BasicBlock *InstParent = Inst->getParent(); + + // Step up to see if the call immediately precedes the RetainRV call. + // If it's an invoke, we have to cross a block boundary. And we have + // to carefully dodge no-op instructions. + do { + if (&*BBI == InstParent->begin()) { + BasicBlock *Pred = InstParent->getSinglePredecessor(); + if (!Pred) + goto decline_rv_optimization; + BBI = Pred->getTerminator(); + break; + } + --BBI; + } while (isNoopInstruction(BBI)); + + if (&*BBI == GetObjCArg(Inst)) { + DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for " + "retainAutoreleasedReturnValue optimization.\n"); + Changed = true; + InlineAsm *IA = + InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), + /*isVarArg=*/false), + RetainRVMarker->getString(), + /*Constraints=*/"", /*hasSideEffects=*/true); + CallInst::Create(IA, "", Inst); + } + decline_rv_optimization: + break; + } + case IC_InitWeak: { + // objc_initWeak(p, null) => *p = null + CallInst *CI = cast<CallInst>(Inst); + if (isNullOrUndef(CI->getArgOperand(1))) { + Value *Null = + ConstantPointerNull::get(cast<PointerType>(CI->getType())); + Changed = true; + new StoreInst(Null, CI->getArgOperand(0), CI); + + DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n" + << " New = " << *Null << "\n"); + + CI->replaceAllUsesWith(Null); + CI->eraseFromParent(); + } + continue; + } + case IC_Release: + ContractRelease(Inst, I); + continue; + case IC_User: + // Be conservative if the function has any alloca instructions. + // Technically we only care about escaping alloca instructions, + // but this is sufficient to handle some interesting cases. + if (isa<AllocaInst>(Inst)) + TailOkForStoreStrongs = false; + continue; + default: + continue; + } + + DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n"); + + // Don't use GetObjCArg because we don't want to look through bitcasts + // and such; to do the replacement, the argument must have type i8*. + const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0); + for (;;) { + // If we're compiling bugpointed code, don't get in trouble. + if (!isa<Instruction>(Arg) && !isa<Argument>(Arg)) + break; + // Look through the uses of the pointer. + for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ) { + Use &U = UI.getUse(); + unsigned OperandNo = UI.getOperandNo(); + ++UI; // Increment UI now, because we may unlink its element. + + // If the call's return value dominates a use of the call's argument + // value, rewrite the use to use the return value. We check for + // reachability here because an unreachable call is considered to + // trivially dominate itself, which would lead us to rewriting its + // argument in terms of its return value, which would lead to + // infinite loops in GetObjCArg. + if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) { + Changed = true; + Instruction *Replacement = Inst; + Type *UseTy = U.get()->getType(); + if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) { + // For PHI nodes, insert the bitcast in the predecessor block. + unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); + BasicBlock *BB = PHI->getIncomingBlock(ValNo); + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", + &BB->back()); + // While we're here, rewrite all edges for this PHI, rather + // than just one use at a time, to minimize the number of + // bitcasts we emit. + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) + if (PHI->getIncomingBlock(i) == BB) { + // Keep the UI iterator valid. + if (&PHI->getOperandUse( + PHINode::getOperandNumForIncomingValue(i)) == + &UI.getUse()) + ++UI; + PHI->setIncomingValue(i, Replacement); + } + } else { + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", + cast<Instruction>(U.getUser())); + U.set(Replacement); + } + } + } + + // If Arg is a no-op casted pointer, strip one level of casts and iterate. + if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg)) + Arg = BI->getOperand(0); + else if (isa<GEPOperator>(Arg) && + cast<GEPOperator>(Arg)->hasAllZeroIndices()) + Arg = cast<GEPOperator>(Arg)->getPointerOperand(); + else if (isa<GlobalAlias>(Arg) && + !cast<GlobalAlias>(Arg)->mayBeOverridden()) + Arg = cast<GlobalAlias>(Arg)->getAliasee(); + else + break; + } + } + + // If this function has no escaping allocas or suspicious vararg usage, + // objc_storeStrong calls can be marked with the "tail" keyword. + if (TailOkForStoreStrongs) + for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(), + E = StoreStrongCalls.end(); I != E; ++I) + (*I)->setTailCall(); + StoreStrongCalls.clear(); + + return Changed; +} diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp new file mode 100644 index 0000000..39bf8f3 --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp @@ -0,0 +1,128 @@ +//===- ObjCARCExpand.cpp - ObjC ARC Optimization --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines ObjC ARC optimizations. ARC stands for Automatic +/// Reference Counting and is a system for managing reference counts for objects +/// in Objective C. +/// +/// This specific file deals with early optimizations which perform certain +/// cleanup operations. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "objc-arc-expand" + +#include "ObjCARC.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/PassRegistry.h" +#include "llvm/PassSupport.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + class Module; +} + +using namespace llvm; +using namespace llvm::objcarc; + +namespace { + /// \brief Early ARC transformations. + class ObjCARCExpand : public FunctionPass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + /// A flag indicating whether this optimization pass should run. + bool Run; + + public: + static char ID; + ObjCARCExpand() : FunctionPass(ID) { + initializeObjCARCExpandPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCExpand::ID = 0; +INITIALIZE_PASS(ObjCARCExpand, + "objc-arc-expand", "ObjC ARC expansion", false, false) + +Pass *llvm::createObjCARCExpandPass() { + return new ObjCARCExpand(); +} + +void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); +} + +bool ObjCARCExpand::doInitialization(Module &M) { + Run = ModuleHasARC(M); + return false; +} + +bool ObjCARCExpand::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + bool Changed = false; + + DEBUG(dbgs() << "ObjCARCExpand: Visiting Function: " << F.getName() << "\n"); + + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { + Instruction *Inst = &*I; + + DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << *Inst << "\n"); + + switch (GetBasicInstructionClass(Inst)) { + case IC_Retain: + case IC_RetainRV: + case IC_Autorelease: + case IC_AutoreleaseRV: + case IC_FusedRetainAutorelease: + case IC_FusedRetainAutoreleaseRV: { + // These calls return their argument verbatim, as a low-level + // optimization. However, this makes high-level optimizations + // harder. Undo any uses of this optimization that the front-end + // emitted here. We'll redo them in the contract pass. + Changed = true; + Value *Value = cast<CallInst>(Inst)->getArgOperand(0); + DEBUG(dbgs() << "ObjCARCExpand: Old = " << *Inst << "\n" + " New = " << *Value << "\n"); + Inst->replaceAllUsesWith(Value); + break; + } + default: + break; + } + } + + DEBUG(dbgs() << "ObjCARCExpand: Finished List.\n\n"); + + return Changed; +} diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp new file mode 100644 index 0000000..9c14949 --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -0,0 +1,2691 @@ +//===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines ObjC ARC optimizations. ARC stands for Automatic +/// Reference Counting and is a system for managing reference counts for objects +/// in Objective C. +/// +/// The optimizations performed include elimination of redundant, partially +/// redundant, and inconsequential reference count operations, elimination of +/// redundant weak pointer operations, and numerous minor simplifications. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "objc-arc-opts" +#include "ObjCARC.h" +#include "DependencyAnalysis.h" +#include "ObjCARCAliasAnalysis.h" +#include "ProvenanceAnalysis.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::objcarc; + +/// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific. +/// @{ + +namespace { + /// \brief An associative container with fast insertion-order (deterministic) + /// iteration over its elements. Plus the special blot operation. + template<class KeyT, class ValueT> + class MapVector { + /// Map keys to indices in Vector. + typedef DenseMap<KeyT, size_t> MapTy; + MapTy Map; + + typedef std::vector<std::pair<KeyT, ValueT> > VectorTy; + /// Keys and values. + VectorTy Vector; + + public: + typedef typename VectorTy::iterator iterator; + typedef typename VectorTy::const_iterator const_iterator; + iterator begin() { return Vector.begin(); } + iterator end() { return Vector.end(); } + const_iterator begin() const { return Vector.begin(); } + const_iterator end() const { return Vector.end(); } + +#ifdef XDEBUG + ~MapVector() { + assert(Vector.size() >= Map.size()); // May differ due to blotting. + for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); + I != E; ++I) { + assert(I->second < Vector.size()); + assert(Vector[I->second].first == I->first); + } + for (typename VectorTy::const_iterator I = Vector.begin(), + E = Vector.end(); I != E; ++I) + assert(!I->first || + (Map.count(I->first) && + Map[I->first] == size_t(I - Vector.begin()))); + } +#endif + + ValueT &operator[](const KeyT &Arg) { + std::pair<typename MapTy::iterator, bool> Pair = + Map.insert(std::make_pair(Arg, size_t(0))); + if (Pair.second) { + size_t Num = Vector.size(); + Pair.first->second = Num; + Vector.push_back(std::make_pair(Arg, ValueT())); + return Vector[Num].second; + } + return Vector[Pair.first->second].second; + } + + std::pair<iterator, bool> + insert(const std::pair<KeyT, ValueT> &InsertPair) { + std::pair<typename MapTy::iterator, bool> Pair = + Map.insert(std::make_pair(InsertPair.first, size_t(0))); + if (Pair.second) { + size_t Num = Vector.size(); + Pair.first->second = Num; + Vector.push_back(InsertPair); + return std::make_pair(Vector.begin() + Num, true); + } + return std::make_pair(Vector.begin() + Pair.first->second, false); + } + + const_iterator find(const KeyT &Key) const { + typename MapTy::const_iterator It = Map.find(Key); + if (It == Map.end()) return Vector.end(); + return Vector.begin() + It->second; + } + + /// This is similar to erase, but instead of removing the element from the + /// vector, it just zeros out the key in the vector. This leaves iterators + /// intact, but clients must be prepared for zeroed-out keys when iterating. + void blot(const KeyT &Key) { + typename MapTy::iterator It = Map.find(Key); + if (It == Map.end()) return; + Vector[It->second].first = KeyT(); + Map.erase(It); + } + + void clear() { + Map.clear(); + Vector.clear(); + } + }; +} + +/// @} +/// +/// \defgroup ARCUtilities Utility declarations/definitions specific to ARC. +/// @{ + +/// \brief This is similar to StripPointerCastsAndObjCCalls but it stops as soon +/// as it finds a value with multiple uses. +static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { + if (Arg->hasOneUse()) { + if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg)) + return FindSingleUseIdentifiedObject(BC->getOperand(0)); + if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg)) + if (GEP->hasAllZeroIndices()) + return FindSingleUseIdentifiedObject(GEP->getPointerOperand()); + if (IsForwarding(GetBasicInstructionClass(Arg))) + return FindSingleUseIdentifiedObject( + cast<CallInst>(Arg)->getArgOperand(0)); + if (!IsObjCIdentifiedObject(Arg)) + return 0; + return Arg; + } + + // If we found an identifiable object but it has multiple uses, but they are + // trivial uses, we can still consider this to be a single-use value. + if (IsObjCIdentifiedObject(Arg)) { + for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); + UI != UE; ++UI) { + const User *U = *UI; + if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg) + return 0; + } + + return Arg; + } + + return 0; +} + +/// \brief Test whether the given retainable object pointer escapes. +/// +/// This differs from regular escape analysis in that a use as an +/// argument to a call is not considered an escape. +/// +static bool DoesRetainableObjPtrEscape(const User *Ptr) { + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Target: " << *Ptr << "\n"); + + // Walk the def-use chains. + SmallVector<const Value *, 4> Worklist; + Worklist.push_back(Ptr); + // If Ptr has any operands add them as well. + for (User::const_op_iterator I = Ptr->op_begin(), E = Ptr->op_end(); I != E; + ++I) { + Worklist.push_back(*I); + } + + // Ensure we do not visit any value twice. + SmallPtrSet<const Value *, 8> VisitedSet; + + do { + const Value *V = Worklist.pop_back_val(); + + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Visiting: " << *V << "\n"); + + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + const User *UUser = *UI; + + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User: " << *UUser << "\n"); + + // Special - Use by a call (callee or argument) is not considered + // to be an escape. + switch (GetBasicInstructionClass(UUser)) { + case IC_StoreWeak: + case IC_InitWeak: + case IC_StoreStrong: + case IC_Autorelease: + case IC_AutoreleaseRV: { + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer " + "arguments. Pointer Escapes!\n"); + // These special functions make copies of their pointer arguments. + return true; + } + case IC_User: + case IC_None: + // Use by an instruction which copies the value is an escape if the + // result is an escape. + if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) || + isa<PHINode>(UUser) || isa<SelectInst>(UUser)) { + + if (VisitedSet.insert(UUser)) { + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. " + "Ptr escapes if result escapes. Adding to list.\n"); + Worklist.push_back(UUser); + } else { + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node." + "\n"); + } + continue; + } + // Use by a load is not an escape. + if (isa<LoadInst>(UUser)) + continue; + // Use by a store is not an escape if the use is the address. + if (const StoreInst *SI = dyn_cast<StoreInst>(UUser)) + if (V != SI->getValueOperand()) + continue; + break; + default: + // Regular calls and other stuff are not considered escapes. + continue; + } + // Otherwise, conservatively assume an escape. + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming ptr escapes.\n"); + return true; + } + } while (!Worklist.empty()); + + // No escapes found. + DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Ptr does not escape.\n"); + return false; +} + +/// @} +/// +/// \defgroup ARCOpt ARC Optimization. +/// @{ + +// TODO: On code like this: +// +// objc_retain(%x) +// stuff_that_cannot_release() +// objc_autorelease(%x) +// stuff_that_cannot_release() +// objc_retain(%x) +// stuff_that_cannot_release() +// objc_autorelease(%x) +// +// The second retain and autorelease can be deleted. + +// TODO: It should be possible to delete +// objc_autoreleasePoolPush and objc_autoreleasePoolPop +// pairs if nothing is actually autoreleased between them. Also, autorelease +// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code +// after inlining) can be turned into plain release calls. + +// TODO: Critical-edge splitting. If the optimial insertion point is +// a critical edge, the current algorithm has to fail, because it doesn't +// know how to split edges. It should be possible to make the optimizer +// think in terms of edges, rather than blocks, and then split critical +// edges on demand. + +// TODO: OptimizeSequences could generalized to be Interprocedural. + +// TODO: Recognize that a bunch of other objc runtime calls have +// non-escaping arguments and non-releasing arguments, and may be +// non-autoreleasing. + +// TODO: Sink autorelease calls as far as possible. Unfortunately we +// usually can't sink them past other calls, which would be the main +// case where it would be useful. + +// TODO: The pointer returned from objc_loadWeakRetained is retained. + +// TODO: Delete release+retain pairs (rare). + +STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); +STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); +STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); +STATISTIC(NumRets, "Number of return value forwarding " + "retain+autoreleaes eliminated"); +STATISTIC(NumRRs, "Number of retain+release paths eliminated"); +STATISTIC(NumPeeps, "Number of calls peephole-optimized"); + +namespace { + /// \enum Sequence + /// + /// \brief A sequence of states that a pointer may go through in which an + /// objc_retain and objc_release are actually needed. + enum Sequence { + S_None, + S_Retain, ///< objc_retain(x). + S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement. + S_Use, ///< any use of x. + S_Stop, ///< like S_Release, but code motion is stopped. + S_Release, ///< objc_release(x). + S_MovableRelease ///< objc_release(x), !clang.imprecise_release. + }; + + raw_ostream &operator<<(raw_ostream &OS, const Sequence S) + LLVM_ATTRIBUTE_UNUSED; + raw_ostream &operator<<(raw_ostream &OS, const Sequence S) { + switch (S) { + case S_None: + return OS << "S_None"; + case S_Retain: + return OS << "S_Retain"; + case S_CanRelease: + return OS << "S_CanRelease"; + case S_Use: + return OS << "S_Use"; + case S_Release: + return OS << "S_Release"; + case S_MovableRelease: + return OS << "S_MovableRelease"; + case S_Stop: + return OS << "S_Stop"; + } + llvm_unreachable("Unknown sequence type."); + } +} + +static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { + // The easy cases. + if (A == B) + return A; + if (A == S_None || B == S_None) + return S_None; + + if (A > B) std::swap(A, B); + if (TopDown) { + // Choose the side which is further along in the sequence. + if ((A == S_Retain || A == S_CanRelease) && + (B == S_CanRelease || B == S_Use)) + return B; + } else { + // Choose the side which is further along in the sequence. + if ((A == S_Use || A == S_CanRelease) && + (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease)) + return A; + // If both sides are releases, choose the more conservative one. + if (A == S_Stop && (B == S_Release || B == S_MovableRelease)) + return A; + if (A == S_Release && B == S_MovableRelease) + return A; + } + + return S_None; +} + +namespace { + /// \brief Unidirectional information about either a + /// retain-decrement-use-release sequence or release-use-decrement-retain + /// reverese sequence. + struct RRInfo { + /// After an objc_retain, the reference count of the referenced + /// object is known to be positive. Similarly, before an objc_release, the + /// reference count of the referenced object is known to be positive. If + /// there are retain-release pairs in code regions where the retain count + /// is known to be positive, they can be eliminated, regardless of any side + /// effects between them. + /// + /// Also, a retain+release pair nested within another retain+release + /// pair all on the known same pointer value can be eliminated, regardless + /// of any intervening side effects. + /// + /// KnownSafe is true when either of these conditions is satisfied. + bool KnownSafe; + + /// True if the Calls are objc_retainBlock calls (as opposed to objc_retain + /// calls). + bool IsRetainBlock; + + /// True of the objc_release calls are all marked with the "tail" keyword. + bool IsTailCallRelease; + + /// If the Calls are objc_release calls and they all have a + /// clang.imprecise_release tag, this is the metadata tag. + MDNode *ReleaseMetadata; + + /// For a top-down sequence, the set of objc_retains or + /// objc_retainBlocks. For bottom-up, the set of objc_releases. + SmallPtrSet<Instruction *, 2> Calls; + + /// The set of optimal insert positions for moving calls in the opposite + /// sequence. + SmallPtrSet<Instruction *, 2> ReverseInsertPts; + + RRInfo() : + KnownSafe(false), IsRetainBlock(false), + IsTailCallRelease(false), + ReleaseMetadata(0) {} + + void clear(); + }; +} + +void RRInfo::clear() { + KnownSafe = false; + IsRetainBlock = false; + IsTailCallRelease = false; + ReleaseMetadata = 0; + Calls.clear(); + ReverseInsertPts.clear(); +} + +namespace { + /// \brief This class summarizes several per-pointer runtime properties which + /// are propogated through the flow graph. + class PtrState { + /// True if the reference count is known to be incremented. + bool KnownPositiveRefCount; + + /// True of we've seen an opportunity for partial RR elimination, such as + /// pushing calls into a CFG triangle or into one side of a CFG diamond. + bool Partial; + + /// The current position in the sequence. + Sequence Seq : 8; + + public: + /// Unidirectional information about the current sequence. + /// + /// TODO: Encapsulate this better. + RRInfo RRI; + + PtrState() : KnownPositiveRefCount(false), Partial(false), + Seq(S_None) {} + + void SetKnownPositiveRefCount() { + KnownPositiveRefCount = true; + } + + void ClearRefCount() { + KnownPositiveRefCount = false; + } + + bool IsKnownIncremented() const { + return KnownPositiveRefCount; + } + + void SetSeq(Sequence NewSeq) { + Seq = NewSeq; + } + + Sequence GetSeq() const { + return Seq; + } + + void ClearSequenceProgress() { + ResetSequenceProgress(S_None); + } + + void ResetSequenceProgress(Sequence NewSeq) { + Seq = NewSeq; + Partial = false; + RRI.clear(); + } + + void Merge(const PtrState &Other, bool TopDown); + }; +} + +void +PtrState::Merge(const PtrState &Other, bool TopDown) { + Seq = MergeSeqs(Seq, Other.Seq, TopDown); + KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; + + // We can't merge a plain objc_retain with an objc_retainBlock. + if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) + Seq = S_None; + + // If we're not in a sequence (anymore), drop all associated state. + if (Seq == S_None) { + Partial = false; + RRI.clear(); + } else if (Partial || Other.Partial) { + // If we're doing a merge on a path that's previously seen a partial + // merge, conservatively drop the sequence, to avoid doing partial + // RR elimination. If the branch predicates for the two merge differ, + // mixing them is unsafe. + ClearSequenceProgress(); + } else { + // Conservatively merge the ReleaseMetadata information. + if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata) + RRI.ReleaseMetadata = 0; + + RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe; + RRI.IsTailCallRelease = RRI.IsTailCallRelease && + Other.RRI.IsTailCallRelease; + RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); + + // Merge the insert point sets. If there are any differences, + // that makes this a partial merge. + Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size(); + for (SmallPtrSet<Instruction *, 2>::const_iterator + I = Other.RRI.ReverseInsertPts.begin(), + E = Other.RRI.ReverseInsertPts.end(); I != E; ++I) + Partial |= RRI.ReverseInsertPts.insert(*I); + } +} + +namespace { + /// \brief Per-BasicBlock state. + class BBState { + /// The number of unique control paths from the entry which can reach this + /// block. + unsigned TopDownPathCount; + + /// The number of unique control paths to exits from this block. + unsigned BottomUpPathCount; + + /// A type for PerPtrTopDown and PerPtrBottomUp. + typedef MapVector<const Value *, PtrState> MapTy; + + /// The top-down traversal uses this to record information known about a + /// pointer at the bottom of each block. + MapTy PerPtrTopDown; + + /// The bottom-up traversal uses this to record information known about a + /// pointer at the top of each block. + MapTy PerPtrBottomUp; + + /// Effective predecessors of the current block ignoring ignorable edges and + /// ignored backedges. + SmallVector<BasicBlock *, 2> Preds; + /// Effective successors of the current block ignoring ignorable edges and + /// ignored backedges. + SmallVector<BasicBlock *, 2> Succs; + + public: + BBState() : TopDownPathCount(0), BottomUpPathCount(0) {} + + typedef MapTy::iterator ptr_iterator; + typedef MapTy::const_iterator ptr_const_iterator; + + ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } + ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } + ptr_const_iterator top_down_ptr_begin() const { + return PerPtrTopDown.begin(); + } + ptr_const_iterator top_down_ptr_end() const { + return PerPtrTopDown.end(); + } + + ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); } + ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } + ptr_const_iterator bottom_up_ptr_begin() const { + return PerPtrBottomUp.begin(); + } + ptr_const_iterator bottom_up_ptr_end() const { + return PerPtrBottomUp.end(); + } + + /// Mark this block as being an entry block, which has one path from the + /// entry by definition. + void SetAsEntry() { TopDownPathCount = 1; } + + /// Mark this block as being an exit block, which has one path to an exit by + /// definition. + void SetAsExit() { BottomUpPathCount = 1; } + + PtrState &getPtrTopDownState(const Value *Arg) { + return PerPtrTopDown[Arg]; + } + + PtrState &getPtrBottomUpState(const Value *Arg) { + return PerPtrBottomUp[Arg]; + } + + void clearBottomUpPointers() { + PerPtrBottomUp.clear(); + } + + void clearTopDownPointers() { + PerPtrTopDown.clear(); + } + + void InitFromPred(const BBState &Other); + void InitFromSucc(const BBState &Other); + void MergePred(const BBState &Other); + void MergeSucc(const BBState &Other); + + /// Return the number of possible unique paths from an entry to an exit + /// which pass through this block. This is only valid after both the + /// top-down and bottom-up traversals are complete. + unsigned GetAllPathCount() const { + assert(TopDownPathCount != 0); + assert(BottomUpPathCount != 0); + return TopDownPathCount * BottomUpPathCount; + } + + // Specialized CFG utilities. + typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator; + edge_iterator pred_begin() { return Preds.begin(); } + edge_iterator pred_end() { return Preds.end(); } + edge_iterator succ_begin() { return Succs.begin(); } + edge_iterator succ_end() { return Succs.end(); } + + void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); } + void addPred(BasicBlock *Pred) { Preds.push_back(Pred); } + + bool isExit() const { return Succs.empty(); } + }; +} + +void BBState::InitFromPred(const BBState &Other) { + PerPtrTopDown = Other.PerPtrTopDown; + TopDownPathCount = Other.TopDownPathCount; +} + +void BBState::InitFromSucc(const BBState &Other) { + PerPtrBottomUp = Other.PerPtrBottomUp; + BottomUpPathCount = Other.BottomUpPathCount; +} + +/// The top-down traversal uses this to merge information about predecessors to +/// form the initial state for a new block. +void BBState::MergePred(const BBState &Other) { + // Other.TopDownPathCount can be 0, in which case it is either dead or a + // loop backedge. Loop backedges are special. + TopDownPathCount += Other.TopDownPathCount; + + // Check for overflow. If we have overflow, fall back to conservative + // behavior. + if (TopDownPathCount < Other.TopDownPathCount) { + clearTopDownPointers(); + return; + } + + // For each entry in the other set, if our set has an entry with the same key, + // merge the entries. Otherwise, copy the entry and merge it with an empty + // entry. + for (ptr_const_iterator MI = Other.top_down_ptr_begin(), + ME = Other.top_down_ptr_end(); MI != ME; ++MI) { + std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI); + Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, + /*TopDown=*/true); + } + + // For each entry in our set, if the other set doesn't have an entry with the + // same key, force it to merge with an empty entry. + for (ptr_iterator MI = top_down_ptr_begin(), + ME = top_down_ptr_end(); MI != ME; ++MI) + if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) + MI->second.Merge(PtrState(), /*TopDown=*/true); +} + +/// The bottom-up traversal uses this to merge information about successors to +/// form the initial state for a new block. +void BBState::MergeSucc(const BBState &Other) { + // Other.BottomUpPathCount can be 0, in which case it is either dead or a + // loop backedge. Loop backedges are special. + BottomUpPathCount += Other.BottomUpPathCount; + + // Check for overflow. If we have overflow, fall back to conservative + // behavior. + if (BottomUpPathCount < Other.BottomUpPathCount) { + clearBottomUpPointers(); + return; + } + + // For each entry in the other set, if our set has an entry with the + // same key, merge the entries. Otherwise, copy the entry and merge + // it with an empty entry. + for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(), + ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) { + std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI); + Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, + /*TopDown=*/false); + } + + // For each entry in our set, if the other set doesn't have an entry + // with the same key, force it to merge with an empty entry. + for (ptr_iterator MI = bottom_up_ptr_begin(), + ME = bottom_up_ptr_end(); MI != ME; ++MI) + if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) + MI->second.Merge(PtrState(), /*TopDown=*/false); +} + +namespace { + /// \brief The main ARC optimization pass. + class ObjCARCOpt : public FunctionPass { + bool Changed; + ProvenanceAnalysis PA; + + /// A flag indicating whether this optimization pass should run. + bool Run; + + /// Declarations for ObjC runtime functions, for use in creating calls to + /// them. These are initialized lazily to avoid cluttering up the Module + /// with unused declarations. + + /// Declaration for ObjC runtime function + /// objc_retainAutoreleasedReturnValue. + Constant *RetainRVCallee; + /// Declaration for ObjC runtime function objc_autoreleaseReturnValue. + Constant *AutoreleaseRVCallee; + /// Declaration for ObjC runtime function objc_release. + Constant *ReleaseCallee; + /// Declaration for ObjC runtime function objc_retain. + Constant *RetainCallee; + /// Declaration for ObjC runtime function objc_retainBlock. + Constant *RetainBlockCallee; + /// Declaration for ObjC runtime function objc_autorelease. + Constant *AutoreleaseCallee; + + /// Flags which determine whether each of the interesting runtine functions + /// is in fact used in the current function. + unsigned UsedInThisFunction; + + /// The Metadata Kind for clang.imprecise_release metadata. + unsigned ImpreciseReleaseMDKind; + + /// The Metadata Kind for clang.arc.copy_on_escape metadata. + unsigned CopyOnEscapeMDKind; + + /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata. + unsigned NoObjCARCExceptionsMDKind; + + Constant *getRetainRVCallee(Module *M); + Constant *getAutoreleaseRVCallee(Module *M); + Constant *getReleaseCallee(Module *M); + Constant *getRetainCallee(Module *M); + Constant *getRetainBlockCallee(Module *M); + Constant *getAutoreleaseCallee(Module *M); + + bool IsRetainBlockOptimizable(const Instruction *Inst); + + void OptimizeRetainCall(Function &F, Instruction *Retain); + bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); + void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, + InstructionClass &Class); + void OptimizeIndividualCalls(Function &F); + + void CheckForCFGHazards(const BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + BBState &MyStates) const; + bool VisitInstructionBottomUp(Instruction *Inst, + BasicBlock *BB, + MapVector<Value *, RRInfo> &Retains, + BBState &MyStates); + bool VisitBottomUp(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains); + bool VisitInstructionTopDown(Instruction *Inst, + DenseMap<Value *, RRInfo> &Releases, + BBState &MyStates); + bool VisitTopDown(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + DenseMap<Value *, RRInfo> &Releases); + bool Visit(Function &F, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases); + + void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, + SmallVectorImpl<Instruction *> &DeadInsts, + Module *M); + + bool ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, + Module *M, + SmallVector<Instruction *, 4> &NewRetains, + SmallVector<Instruction *, 4> &NewReleases, + SmallVector<Instruction *, 8> &DeadInsts, + RRInfo &RetainsToMove, + RRInfo &ReleasesToMove, + Value *Arg, + bool KnownSafe, + bool &AnyPairsCompletelyEliminated); + + bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, + Module *M); + + void OptimizeWeakCalls(Function &F); + + bool OptimizeSequences(Function &F); + + void OptimizeReturns(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + virtual void releaseMemory(); + + public: + static char ID; + ObjCARCOpt() : FunctionPass(ID) { + initializeObjCARCOptPass(*PassRegistry::getPassRegistry()); + } + }; +} + +char ObjCARCOpt::ID = 0; +INITIALIZE_PASS_BEGIN(ObjCARCOpt, + "objc-arc", "ObjC ARC optimization", false, false) +INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis) +INITIALIZE_PASS_END(ObjCARCOpt, + "objc-arc", "ObjC ARC optimization", false, false) + +Pass *llvm::createObjCARCOptPass() { + return new ObjCARCOpt(); +} + +void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<ObjCARCAliasAnalysis>(); + AU.addRequired<AliasAnalysis>(); + // ARC optimization doesn't currently split critical edges. + AU.setPreservesCFG(); +} + +bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) { + // Without the magic metadata tag, we have to assume this might be an + // objc_retainBlock call inserted to convert a block pointer to an id, + // in which case it really is needed. + if (!Inst->getMetadata(CopyOnEscapeMDKind)) + return false; + + // If the pointer "escapes" (not including being used in a call), + // the copy may be needed. + if (DoesRetainableObjPtrEscape(Inst)) + return false; + + // Otherwise, it's not needed. + return true; +} + +Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { + if (!RetainRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + RetainRVCallee = + M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, + Attribute); + } + return RetainRVCallee; +} + +Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { + if (!AutoreleaseRVCallee) { + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + AutoreleaseRVCallee = + M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy, + Attribute); + } + return AutoreleaseRVCallee; +} + +Constant *ObjCARCOpt::getReleaseCallee(Module *M) { + if (!ReleaseCallee) { + LLVMContext &C = M->getContext(); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + ReleaseCallee = + M->getOrInsertFunction( + "objc_release", + FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), + Attribute); + } + return ReleaseCallee; +} + +Constant *ObjCARCOpt::getRetainCallee(Module *M) { + if (!RetainCallee) { + LLVMContext &C = M->getContext(); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + RetainCallee = + M->getOrInsertFunction( + "objc_retain", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + Attribute); + } + return RetainCallee; +} + +Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) { + if (!RetainBlockCallee) { + LLVMContext &C = M->getContext(); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + // objc_retainBlock is not nounwind because it calls user copy constructors + // which could theoretically throw. + RetainBlockCallee = + M->getOrInsertFunction( + "objc_retainBlock", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + AttributeSet()); + } + return RetainBlockCallee; +} + +Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { + if (!AutoreleaseCallee) { + LLVMContext &C = M->getContext(); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + AttributeSet Attribute = + AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); + AutoreleaseCallee = + M->getOrInsertFunction( + "objc_autorelease", + FunctionType::get(Params[0], Params, /*isVarArg=*/false), + Attribute); + } + return AutoreleaseCallee; +} + +/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a +/// return value. +void +ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { + ImmutableCallSite CS(GetObjCArg(Retain)); + const Instruction *Call = CS.getInstruction(); + if (!Call) return; + if (Call->getParent() != Retain->getParent()) return; + + // Check that the call is next to the retain. + BasicBlock::const_iterator I = Call; + ++I; + while (isNoopInstruction(I)) ++I; + if (&*I != Retain) + return; + + // Turn it to an objc_retainAutoreleasedReturnValue.. + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming " + "objc_retain => objc_retainAutoreleasedReturnValue" + " since the operand is a return value.\n" + " Old: " + << *Retain << "\n"); + + cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent())); + + DEBUG(dbgs() << " New: " + << *Retain << "\n"); +} + +/// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is +/// not a return value. Or, if it can be paired with an +/// objc_autoreleaseReturnValue, delete the pair and return true. +bool +ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { + // Check for the argument being from an immediately preceding call or invoke. + const Value *Arg = GetObjCArg(RetainRV); + ImmutableCallSite CS(Arg); + if (const Instruction *Call = CS.getInstruction()) { + if (Call->getParent() == RetainRV->getParent()) { + BasicBlock::const_iterator I = Call; + ++I; + while (isNoopInstruction(I)) ++I; + if (&*I == RetainRV) + return false; + } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + BasicBlock *RetainRVParent = RetainRV->getParent(); + if (II->getNormalDest() == RetainRVParent) { + BasicBlock::const_iterator I = RetainRVParent->begin(); + while (isNoopInstruction(I)) ++I; + if (&*I == RetainRV) + return false; + } + } + } + + // Check for being preceded by an objc_autoreleaseReturnValue on the same + // pointer. In this case, we can delete the pair. + BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin(); + if (I != Begin) { + do --I; while (I != Begin && isNoopInstruction(I)); + if (GetBasicInstructionClass(I) == IC_AutoreleaseRV && + GetObjCArg(I) == Arg) { + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n" + << " Erasing " << *RetainRV + << "\n"); + + EraseInstruction(I); + EraseInstruction(RetainRV); + return true; + } + } + + // Turn it to a plain objc_retain. + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming " + "objc_retainAutoreleasedReturnValue => " + "objc_retain since the operand is not a return value.\n" + " Old: " + << *RetainRV << "\n"); + + cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent())); + + DEBUG(dbgs() << " New: " + << *RetainRV << "\n"); + + return false; +} + +/// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not +/// used as a return value. +void +ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, + InstructionClass &Class) { + // Check for a return of the pointer value. + const Value *Ptr = GetObjCArg(AutoreleaseRV); + SmallVector<const Value *, 2> Users; + Users.push_back(Ptr); + do { + Ptr = Users.pop_back_val(); + for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end(); + UI != UE; ++UI) { + const User *I = *UI; + if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV) + return; + if (isa<BitCastInst>(I)) + Users.push_back(I); + } + } while (!Users.empty()); + + Changed = true; + ++NumPeeps; + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming " + "objc_autoreleaseReturnValue => " + "objc_autorelease since its operand is not used as a return " + "value.\n" + " Old: " + << *AutoreleaseRV << "\n"); + + CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV); + AutoreleaseRVCI-> + setCalledFunction(getAutoreleaseCallee(F.getParent())); + AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease. + Class = IC_Autorelease; + + DEBUG(dbgs() << " New: " + << *AutoreleaseRV << "\n"); + +} + +/// Visit each call, one at a time, and make simplifications without doing any +/// additional analysis. +void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { + // Reset all the flags in preparation for recomputing them. + UsedInThisFunction = 0; + + // Visit all objc_* calls in F. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + + InstructionClass Class = GetBasicInstructionClass(Inst); + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: " + << Class << "; " << *Inst << "\n"); + + switch (Class) { + default: break; + + // Delete no-op casts. These function calls have special semantics, but + // the semantics are entirely implemented via lowering in the front-end, + // so by the time they reach the optimizer, they are just no-op calls + // which return their argument. + // + // There are gray areas here, as the ability to cast reference-counted + // pointers to raw void* and back allows code to break ARC assumptions, + // however these are currently considered to be unimportant. + case IC_NoopCast: + Changed = true; + ++NumNoops; + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:" + " " << *Inst << "\n"); + EraseInstruction(Inst); + continue; + + // If the pointer-to-weak-pointer is null, it's undefined behavior. + case IC_StoreWeak: + case IC_LoadWeak: + case IC_LoadWeakRetained: + case IC_InitWeak: + case IC_DestroyWeak: { + CallInst *CI = cast<CallInst>(Inst); + if (isNullOrUndef(CI->getArgOperand(0))) { + Changed = true; + Type *Ty = CI->getArgOperand(0)->getType(); + new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), + Constant::getNullValue(Ty), + CI); + llvm::Value *NewValue = UndefValue::get(CI->getType()); + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null " + "pointer-to-weak-pointer is undefined behavior.\n" + " Old = " << *CI << + "\n New = " << + *NewValue << "\n"); + CI->replaceAllUsesWith(NewValue); + CI->eraseFromParent(); + continue; + } + break; + } + case IC_CopyWeak: + case IC_MoveWeak: { + CallInst *CI = cast<CallInst>(Inst); + if (isNullOrUndef(CI->getArgOperand(0)) || + isNullOrUndef(CI->getArgOperand(1))) { + Changed = true; + Type *Ty = CI->getArgOperand(0)->getType(); + new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), + Constant::getNullValue(Ty), + CI); + + llvm::Value *NewValue = UndefValue::get(CI->getType()); + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null " + "pointer-to-weak-pointer is undefined behavior.\n" + " Old = " << *CI << + "\n New = " << + *NewValue << "\n"); + + CI->replaceAllUsesWith(NewValue); + CI->eraseFromParent(); + continue; + } + break; + } + case IC_Retain: + OptimizeRetainCall(F, Inst); + break; + case IC_RetainRV: + if (OptimizeRetainRVCall(F, Inst)) + continue; + break; + case IC_AutoreleaseRV: + OptimizeAutoreleaseRVCall(F, Inst, Class); + break; + } + + // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. + if (IsAutorelease(Class) && Inst->use_empty()) { + CallInst *Call = cast<CallInst>(Inst); + const Value *Arg = Call->getArgOperand(0); + Arg = FindSingleUseIdentifiedObject(Arg); + if (Arg) { + Changed = true; + ++NumAutoreleases; + + // Create the declaration lazily. + LLVMContext &C = Inst->getContext(); + CallInst *NewCall = + CallInst::Create(getReleaseCallee(F.getParent()), + Call->getArgOperand(0), "", Call); + NewCall->setMetadata(ImpreciseReleaseMDKind, + MDNode::get(C, ArrayRef<Value *>())); + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing " + "objc_autorelease(x) with objc_release(x) since x is " + "otherwise unused.\n" + " Old: " << *Call << + "\n New: " << + *NewCall << "\n"); + + EraseInstruction(Call); + Inst = NewCall; + Class = IC_Release; + } + } + + // For functions which can never be passed stack arguments, add + // a tail keyword. + if (IsAlwaysTail(Class)) { + Changed = true; + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword" + " to function since it can never be passed stack args: " << *Inst << + "\n"); + cast<CallInst>(Inst)->setTailCall(); + } + + // Ensure that functions that can never have a "tail" keyword due to the + // semantics of ARC truly do not do so. + if (IsNeverTail(Class)) { + Changed = true; + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail " + "keyword from function: " << *Inst << + "\n"); + cast<CallInst>(Inst)->setTailCall(false); + } + + // Set nounwind as needed. + if (IsNoThrow(Class)) { + Changed = true; + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw" + " class. Setting nounwind on: " << *Inst << "\n"); + cast<CallInst>(Inst)->setDoesNotThrow(); + } + + if (!IsNoopOnNull(Class)) { + UsedInThisFunction |= 1 << Class; + continue; + } + + const Value *Arg = GetObjCArg(Inst); + + // ARC calls with null are no-ops. Delete them. + if (isNullOrUndef(Arg)) { + Changed = true; + ++NumNoops; + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with " + " null are no-ops. Erasing: " << *Inst << "\n"); + EraseInstruction(Inst); + continue; + } + + // Keep track of which of retain, release, autorelease, and retain_block + // are actually present in this function. + UsedInThisFunction |= 1 << Class; + + // If Arg is a PHI, and one or more incoming values to the + // PHI are null, and the call is control-equivalent to the PHI, and there + // are no relevant side effects between the PHI and the call, the call + // could be pushed up to just those paths with non-null incoming values. + // For now, don't bother splitting critical edges for this. + SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist; + Worklist.push_back(std::make_pair(Inst, Arg)); + do { + std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val(); + Inst = Pair.first; + Arg = Pair.second; + + const PHINode *PN = dyn_cast<PHINode>(Arg); + if (!PN) continue; + + // Determine if the PHI has any null operands, or any incoming + // critical edges. + bool HasNull = false; + bool HasCriticalEdges = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = + StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); + if (isNullOrUndef(Incoming)) + HasNull = true; + else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back()) + .getNumSuccessors() != 1) { + HasCriticalEdges = true; + break; + } + } + // If we have null operands and no critical edges, optimize. + if (!HasCriticalEdges && HasNull) { + SmallPtrSet<Instruction *, 4> DependingInstructions; + SmallPtrSet<const BasicBlock *, 4> Visited; + + // Check that there is nothing that cares about the reference + // count between the call and the phi. + switch (Class) { + case IC_Retain: + case IC_RetainBlock: + // These can always be moved up. + break; + case IC_Release: + // These can't be moved across things that care about the retain + // count. + FindDependencies(NeedsPositiveRetainCount, Arg, + Inst->getParent(), Inst, + DependingInstructions, Visited, PA); + break; + case IC_Autorelease: + // These can't be moved across autorelease pool scope boundaries. + FindDependencies(AutoreleasePoolBoundary, Arg, + Inst->getParent(), Inst, + DependingInstructions, Visited, PA); + break; + case IC_RetainRV: + case IC_AutoreleaseRV: + // Don't move these; the RV optimization depends on the autoreleaseRV + // being tail called, and the retainRV being immediately after a call + // (which might still happen if we get lucky with codegen layout, but + // it's not worth taking the chance). + continue; + default: + llvm_unreachable("Invalid dependence flavor"); + } + + if (DependingInstructions.size() == 1 && + *DependingInstructions.begin() == PN) { + Changed = true; + ++NumPartialNoops; + // Clone the call into each predecessor that has a non-null value. + CallInst *CInst = cast<CallInst>(Inst); + Type *ParamTy = CInst->getArgOperand(0)->getType(); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = + StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); + if (!isNullOrUndef(Incoming)) { + CallInst *Clone = cast<CallInst>(CInst->clone()); + Value *Op = PN->getIncomingValue(i); + Instruction *InsertPos = &PN->getIncomingBlock(i)->back(); + if (Op->getType() != ParamTy) + Op = new BitCastInst(Op, ParamTy, "", InsertPos); + Clone->setArgOperand(0, Op); + Clone->insertBefore(InsertPos); + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning " + << *CInst << "\n" + " And inserting " + "clone at " << *InsertPos << "\n"); + Worklist.push_back(std::make_pair(Clone, Incoming)); + } + } + // Erase the original call. + DEBUG(dbgs() << "Erasing: " << *CInst << "\n"); + EraseInstruction(CInst); + continue; + } + } + } while (!Worklist.empty()); + } + DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n"); +} + +/// Check for critical edges, loop boundaries, irreducible control flow, or +/// other CFG structures where moving code across the edge would result in it +/// being executed more. +void +ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + BBState &MyStates) const { + // If any top-down local-use or possible-dec has a succ which is earlier in + // the sequence, forget it. + for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(), + E = MyStates.top_down_ptr_end(); I != E; ++I) + switch (I->second.GetSeq()) { + default: break; + case S_Use: { + const Value *Arg = I->first; + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + bool SomeSuccHasSame = false; + bool AllSuccsHaveSame = true; + PtrState &S = I->second; + succ_const_iterator SI(TI), SE(TI, false); + + for (; SI != SE; ++SI) { + Sequence SuccSSeq = S_None; + bool SuccSRRIKnownSafe = false; + // If VisitBottomUp has pointer information for this successor, take + // what we know about it. + DenseMap<const BasicBlock *, BBState>::iterator BBI = + BBStates.find(*SI); + assert(BBI != BBStates.end()); + const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); + SuccSSeq = SuccS.GetSeq(); + SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; + switch (SuccSSeq) { + case S_None: + case S_CanRelease: { + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { + S.ClearSequenceProgress(); + break; + } + continue; + } + case S_Use: + SomeSuccHasSame = true; + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) + AllSuccsHaveSame = false; + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + // If the state at the other end of any of the successor edges + // matches the current state, require all edges to match. This + // guards against loops in the middle of a sequence. + if (SomeSuccHasSame && !AllSuccsHaveSame) + S.ClearSequenceProgress(); + break; + } + case S_CanRelease: { + const Value *Arg = I->first; + const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + bool SomeSuccHasSame = false; + bool AllSuccsHaveSame = true; + PtrState &S = I->second; + succ_const_iterator SI(TI), SE(TI, false); + + for (; SI != SE; ++SI) { + Sequence SuccSSeq = S_None; + bool SuccSRRIKnownSafe = false; + // If VisitBottomUp has pointer information for this successor, take + // what we know about it. + DenseMap<const BasicBlock *, BBState>::iterator BBI = + BBStates.find(*SI); + assert(BBI != BBStates.end()); + const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); + SuccSSeq = SuccS.GetSeq(); + SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; + switch (SuccSSeq) { + case S_None: { + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { + S.ClearSequenceProgress(); + break; + } + continue; + } + case S_CanRelease: + SomeSuccHasSame = true; + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) + AllSuccsHaveSame = false; + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + // If the state at the other end of any of the successor edges + // matches the current state, require all edges to match. This + // guards against loops in the middle of a sequence. + if (SomeSuccHasSame && !AllSuccsHaveSame) + S.ClearSequenceProgress(); + break; + } + } +} + +bool +ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, + BasicBlock *BB, + MapVector<Value *, RRInfo> &Retains, + BBState &MyStates) { + bool NestingDetected = false; + InstructionClass Class = GetInstructionClass(Inst); + const Value *Arg = 0; + + switch (Class) { + case IC_Release: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrBottomUpState(Arg); + + // If we see two releases in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second release, which may allow us to + // eliminate the first release too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) { + DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested " + "releases (i.e. a release pair)\n"); + NestingDetected = true; + } + + MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); + S.RRI.ReleaseMetadata = ReleaseMetadata; + S.RRI.KnownSafe = S.IsKnownIncremented(); + S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); + S.RRI.Calls.insert(Inst); + + S.SetKnownPositiveRefCount(); + break; + } + case IC_RetainBlock: + // An objc_retainBlock call with just a use may need to be kept, + // because it may be copying a block from the stack to the heap. + if (!IsRetainBlockOptimizable(Inst)) + break; + // FALLTHROUGH + case IC_Retain: + case IC_RetainRV: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrBottomUpState(Arg); + S.SetKnownPositiveRefCount(); + + switch (S.GetSeq()) { + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + S.RRI.ReverseInsertPts.clear(); + // FALL THROUGH + case S_CanRelease: + // Don't do retain+release tracking for IC_RetainRV, because it's + // better to let it remain as the first instruction after a call. + if (Class != IC_RetainRV) { + S.RRI.IsRetainBlock = Class == IC_RetainBlock; + Retains[Inst] = S.RRI; + } + S.ClearSequenceProgress(); + break; + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + return NestingDetected; + } + case IC_AutoreleasepoolPop: + // Conservatively, clear MyStates for all known pointers. + MyStates.clearBottomUpPointers(); + return NestingDetected; + case IC_AutoreleasepoolPush: + case IC_None: + // These are irrelevant. + return NestingDetected; + default: + break; + } + + // Consider any other possible effects of this instruction on each + // pointer being tracked. + for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(), + ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) { + const Value *Ptr = MI->first; + if (Ptr == Arg) + continue; // Handled above. + PtrState &S = MI->second; + Sequence Seq = S.GetSeq(); + + // Check for possible releases. + if (CanAlterRefCount(Inst, Ptr, PA, Class)) { + S.ClearRefCount(); + switch (Seq) { + case S_Use: + S.SetSeq(S_CanRelease); + continue; + case S_CanRelease: + case S_Release: + case S_MovableRelease: + case S_Stop: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + + // Check for possible direct uses. + switch (Seq) { + case S_Release: + case S_MovableRelease: + if (CanUse(Inst, Ptr, PA, Class)) { + assert(S.RRI.ReverseInsertPts.empty()); + // If this is an invoke instruction, we're scanning it as part of + // one of its successor blocks, since we can't insert code after it + // in its own block, and we don't want to split critical edges. + if (isa<InvokeInst>(Inst)) + S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt()); + else + S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); + S.SetSeq(S_Use); + } else if (Seq == S_Release && + (Class == IC_User || Class == IC_CallOrUser)) { + // Non-movable releases depend on any possible objc pointer use. + S.SetSeq(S_Stop); + assert(S.RRI.ReverseInsertPts.empty()); + // As above; handle invoke specially. + if (isa<InvokeInst>(Inst)) + S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt()); + else + S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); + } + break; + case S_Stop: + if (CanUse(Inst, Ptr, PA, Class)) + S.SetSeq(S_Use); + break; + case S_CanRelease: + case S_Use: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + + return NestingDetected; +} + +bool +ObjCARCOpt::VisitBottomUp(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains) { + bool NestingDetected = false; + BBState &MyStates = BBStates[BB]; + + // Merge the states from each successor to compute the initial state + // for the current block. + BBState::edge_iterator SI(MyStates.succ_begin()), + SE(MyStates.succ_end()); + if (SI != SE) { + const BasicBlock *Succ = *SI; + DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); + assert(I != BBStates.end()); + MyStates.InitFromSucc(I->second); + ++SI; + for (; SI != SE; ++SI) { + Succ = *SI; + I = BBStates.find(Succ); + assert(I != BBStates.end()); + MyStates.MergeSucc(I->second); + } + } + + // Visit all the instructions, bottom-up. + for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { + Instruction *Inst = llvm::prior(I); + + // Invoke instructions are visited as part of their successors (below). + if (isa<InvokeInst>(Inst)) + continue; + + DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n"); + + NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); + } + + // If there's a predecessor with an invoke, visit the invoke as if it were + // part of this block, since we can't insert code after an invoke in its own + // block, and we don't want to split critical edges. + for (BBState::edge_iterator PI(MyStates.pred_begin()), + PE(MyStates.pred_end()); PI != PE; ++PI) { + BasicBlock *Pred = *PI; + if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back())) + NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); + } + + return NestingDetected; +} + +bool +ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, + DenseMap<Value *, RRInfo> &Releases, + BBState &MyStates) { + bool NestingDetected = false; + InstructionClass Class = GetInstructionClass(Inst); + const Value *Arg = 0; + + switch (Class) { + case IC_RetainBlock: + // An objc_retainBlock call with just a use may need to be kept, + // because it may be copying a block from the stack to the heap. + if (!IsRetainBlockOptimizable(Inst)) + break; + // FALLTHROUGH + case IC_Retain: + case IC_RetainRV: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrTopDownState(Arg); + + // Don't do retain+release tracking for IC_RetainRV, because it's + // better to let it remain as the first instruction after a call. + if (Class != IC_RetainRV) { + // If we see two retains in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second retain, which may allow us to + // eliminate the first retain too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + if (S.GetSeq() == S_Retain) + NestingDetected = true; + + S.ResetSequenceProgress(S_Retain); + S.RRI.IsRetainBlock = Class == IC_RetainBlock; + S.RRI.KnownSafe = S.IsKnownIncremented(); + S.RRI.Calls.insert(Inst); + } + + S.SetKnownPositiveRefCount(); + + // A retain can be a potential use; procede to the generic checking + // code below. + break; + } + case IC_Release: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrTopDownState(Arg); + S.ClearRefCount(); + + switch (S.GetSeq()) { + case S_Retain: + case S_CanRelease: + S.RRI.ReverseInsertPts.clear(); + // FALL THROUGH + case S_Use: + S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); + Releases[Inst] = S.RRI; + S.ClearSequenceProgress(); + break; + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + break; + } + case IC_AutoreleasepoolPop: + // Conservatively, clear MyStates for all known pointers. + MyStates.clearTopDownPointers(); + return NestingDetected; + case IC_AutoreleasepoolPush: + case IC_None: + // These are irrelevant. + return NestingDetected; + default: + break; + } + + // Consider any other possible effects of this instruction on each + // pointer being tracked. + for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(), + ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) { + const Value *Ptr = MI->first; + if (Ptr == Arg) + continue; // Handled above. + PtrState &S = MI->second; + Sequence Seq = S.GetSeq(); + + // Check for possible releases. + if (CanAlterRefCount(Inst, Ptr, PA, Class)) { + S.ClearRefCount(); + switch (Seq) { + case S_Retain: + S.SetSeq(S_CanRelease); + assert(S.RRI.ReverseInsertPts.empty()); + S.RRI.ReverseInsertPts.insert(Inst); + + // One call can't cause a transition from S_Retain to S_CanRelease + // and S_CanRelease to S_Use. If we've made the first transition, + // we're done. + continue; + case S_Use: + case S_CanRelease: + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + } + + // Check for possible direct uses. + switch (Seq) { + case S_CanRelease: + if (CanUse(Inst, Ptr, PA, Class)) + S.SetSeq(S_Use); + break; + case S_Retain: + case S_Use: + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } + } + + return NestingDetected; +} + +bool +ObjCARCOpt::VisitTopDown(BasicBlock *BB, + DenseMap<const BasicBlock *, BBState> &BBStates, + DenseMap<Value *, RRInfo> &Releases) { + bool NestingDetected = false; + BBState &MyStates = BBStates[BB]; + + // Merge the states from each predecessor to compute the initial state + // for the current block. + BBState::edge_iterator PI(MyStates.pred_begin()), + PE(MyStates.pred_end()); + if (PI != PE) { + const BasicBlock *Pred = *PI; + DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); + assert(I != BBStates.end()); + MyStates.InitFromPred(I->second); + ++PI; + for (; PI != PE; ++PI) { + Pred = *PI; + I = BBStates.find(Pred); + assert(I != BBStates.end()); + MyStates.MergePred(I->second); + } + } + + // Visit all the instructions, top-down. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + Instruction *Inst = I; + + DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n"); + + NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); + } + + CheckForCFGHazards(BB, BBStates, MyStates); + return NestingDetected; +} + +static void +ComputePostOrders(Function &F, + SmallVectorImpl<BasicBlock *> &PostOrder, + SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder, + unsigned NoObjCARCExceptionsMDKind, + DenseMap<const BasicBlock *, BBState> &BBStates) { + /// The visited set, for doing DFS walks. + SmallPtrSet<BasicBlock *, 16> Visited; + + // Do DFS, computing the PostOrder. + SmallPtrSet<BasicBlock *, 16> OnStack; + SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack; + + // Functions always have exactly one entry block, and we don't have + // any other block that we treat like an entry block. + BasicBlock *EntryBB = &F.getEntryBlock(); + BBState &MyStates = BBStates[EntryBB]; + MyStates.SetAsEntry(); + TerminatorInst *EntryTI = cast<TerminatorInst>(&EntryBB->back()); + SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI))); + Visited.insert(EntryBB); + OnStack.insert(EntryBB); + do { + dfs_next_succ: + BasicBlock *CurrBB = SuccStack.back().first; + TerminatorInst *TI = cast<TerminatorInst>(&CurrBB->back()); + succ_iterator SE(TI, false); + + while (SuccStack.back().second != SE) { + BasicBlock *SuccBB = *SuccStack.back().second++; + if (Visited.insert(SuccBB)) { + TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back()); + SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI))); + BBStates[CurrBB].addSucc(SuccBB); + BBState &SuccStates = BBStates[SuccBB]; + SuccStates.addPred(CurrBB); + OnStack.insert(SuccBB); + goto dfs_next_succ; + } + + if (!OnStack.count(SuccBB)) { + BBStates[CurrBB].addSucc(SuccBB); + BBStates[SuccBB].addPred(CurrBB); + } + } + OnStack.erase(CurrBB); + PostOrder.push_back(CurrBB); + SuccStack.pop_back(); + } while (!SuccStack.empty()); + + Visited.clear(); + + // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. + // Functions may have many exits, and there also blocks which we treat + // as exits due to ignored edges. + SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BasicBlock *ExitBB = I; + BBState &MyStates = BBStates[ExitBB]; + if (!MyStates.isExit()) + continue; + + MyStates.SetAsExit(); + + PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin())); + Visited.insert(ExitBB); + while (!PredStack.empty()) { + reverse_dfs_next_succ: + BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end(); + while (PredStack.back().second != PE) { + BasicBlock *BB = *PredStack.back().second++; + if (Visited.insert(BB)) { + PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin())); + goto reverse_dfs_next_succ; + } + } + ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first); + } + } +} + +// Visit the function both top-down and bottom-up. +bool +ObjCARCOpt::Visit(Function &F, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases) { + + // Use reverse-postorder traversals, because we magically know that loops + // will be well behaved, i.e. they won't repeatedly call retain on a single + // pointer without doing a release. We can't use the ReversePostOrderTraversal + // class here because we want the reverse-CFG postorder to consider each + // function exit point, and we want to ignore selected cycle edges. + SmallVector<BasicBlock *, 16> PostOrder; + SmallVector<BasicBlock *, 16> ReverseCFGPostOrder; + ComputePostOrders(F, PostOrder, ReverseCFGPostOrder, + NoObjCARCExceptionsMDKind, + BBStates); + + // Use reverse-postorder on the reverse CFG for bottom-up. + bool BottomUpNestingDetected = false; + for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = + ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend(); + I != E; ++I) + BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains); + + // Use reverse-postorder for top-down. + bool TopDownNestingDetected = false; + for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = + PostOrder.rbegin(), E = PostOrder.rend(); + I != E; ++I) + TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases); + + return TopDownNestingDetected && BottomUpNestingDetected; +} + +/// Move the calls in RetainsToMove and ReleasesToMove. +void ObjCARCOpt::MoveCalls(Value *Arg, + RRInfo &RetainsToMove, + RRInfo &ReleasesToMove, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, + SmallVectorImpl<Instruction *> &DeadInsts, + Module *M) { + Type *ArgTy = Arg->getType(); + Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext())); + + // Insert the new retain and release calls. + for (SmallPtrSet<Instruction *, 2>::const_iterator + PI = ReleasesToMove.ReverseInsertPts.begin(), + PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) { + Instruction *InsertPt = *PI; + Value *MyArg = ArgTy == ParamTy ? Arg : + new BitCastInst(Arg, ParamTy, "", InsertPt); + CallInst *Call = + CallInst::Create(RetainsToMove.IsRetainBlock ? + getRetainBlockCallee(M) : getRetainCallee(M), + MyArg, "", InsertPt); + Call->setDoesNotThrow(); + if (RetainsToMove.IsRetainBlock) + Call->setMetadata(CopyOnEscapeMDKind, + MDNode::get(M->getContext(), ArrayRef<Value *>())); + else + Call->setTailCall(); + + DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call + << "\n" + " At insertion point: " << *InsertPt + << "\n"); + } + for (SmallPtrSet<Instruction *, 2>::const_iterator + PI = RetainsToMove.ReverseInsertPts.begin(), + PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) { + Instruction *InsertPt = *PI; + Value *MyArg = ArgTy == ParamTy ? Arg : + new BitCastInst(Arg, ParamTy, "", InsertPt); + CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg, + "", InsertPt); + // Attach a clang.imprecise_release metadata tag, if appropriate. + if (MDNode *M = ReleasesToMove.ReleaseMetadata) + Call->setMetadata(ImpreciseReleaseMDKind, M); + Call->setDoesNotThrow(); + if (ReleasesToMove.IsTailCallRelease) + Call->setTailCall(); + + DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call + << "\n" + " At insertion point: " << *InsertPt + << "\n"); + } + + // Delete the original retain and release calls. + for (SmallPtrSet<Instruction *, 2>::const_iterator + AI = RetainsToMove.Calls.begin(), + AE = RetainsToMove.Calls.end(); AI != AE; ++AI) { + Instruction *OrigRetain = *AI; + Retains.blot(OrigRetain); + DeadInsts.push_back(OrigRetain); + DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain << + "\n"); + } + for (SmallPtrSet<Instruction *, 2>::const_iterator + AI = ReleasesToMove.Calls.begin(), + AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) { + Instruction *OrigRelease = *AI; + Releases.erase(OrigRelease); + DeadInsts.push_back(OrigRelease); + DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease + << "\n"); + } +} + +bool +ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> + &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, + Module *M, + SmallVector<Instruction *, 4> &NewRetains, + SmallVector<Instruction *, 4> &NewReleases, + SmallVector<Instruction *, 8> &DeadInsts, + RRInfo &RetainsToMove, + RRInfo &ReleasesToMove, + Value *Arg, + bool KnownSafe, + bool &AnyPairsCompletelyEliminated) { + // If a pair happens in a region where it is known that the reference count + // is already incremented, we can similarly ignore possible decrements. + bool KnownSafeTD = true, KnownSafeBU = true; + + // Connect the dots between the top-down-collected RetainsToMove and + // bottom-up-collected ReleasesToMove to form sets of related calls. + // This is an iterative process so that we connect multiple releases + // to multiple retains if needed. + unsigned OldDelta = 0; + unsigned NewDelta = 0; + unsigned OldCount = 0; + unsigned NewCount = 0; + bool FirstRelease = true; + bool FirstRetain = true; + for (;;) { + for (SmallVectorImpl<Instruction *>::const_iterator + NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { + Instruction *NewRetain = *NI; + MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain); + assert(It != Retains.end()); + const RRInfo &NewRetainRRI = It->second; + KnownSafeTD &= NewRetainRRI.KnownSafe; + for (SmallPtrSet<Instruction *, 2>::const_iterator + LI = NewRetainRRI.Calls.begin(), + LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { + Instruction *NewRetainRelease = *LI; + DenseMap<Value *, RRInfo>::const_iterator Jt = + Releases.find(NewRetainRelease); + if (Jt == Releases.end()) + return false; + const RRInfo &NewRetainReleaseRRI = Jt->second; + assert(NewRetainReleaseRRI.Calls.count(NewRetain)); + if (ReleasesToMove.Calls.insert(NewRetainRelease)) { + OldDelta -= + BBStates[NewRetainRelease->getParent()].GetAllPathCount(); + + // Merge the ReleaseMetadata and IsTailCallRelease values. + if (FirstRelease) { + ReleasesToMove.ReleaseMetadata = + NewRetainReleaseRRI.ReleaseMetadata; + ReleasesToMove.IsTailCallRelease = + NewRetainReleaseRRI.IsTailCallRelease; + FirstRelease = false; + } else { + if (ReleasesToMove.ReleaseMetadata != + NewRetainReleaseRRI.ReleaseMetadata) + ReleasesToMove.ReleaseMetadata = 0; + if (ReleasesToMove.IsTailCallRelease != + NewRetainReleaseRRI.IsTailCallRelease) + ReleasesToMove.IsTailCallRelease = false; + } + + // Collect the optimal insertion points. + if (!KnownSafe) + for (SmallPtrSet<Instruction *, 2>::const_iterator + RI = NewRetainReleaseRRI.ReverseInsertPts.begin(), + RE = NewRetainReleaseRRI.ReverseInsertPts.end(); + RI != RE; ++RI) { + Instruction *RIP = *RI; + if (ReleasesToMove.ReverseInsertPts.insert(RIP)) + NewDelta -= BBStates[RIP->getParent()].GetAllPathCount(); + } + NewReleases.push_back(NewRetainRelease); + } + } + } + NewRetains.clear(); + if (NewReleases.empty()) break; + + // Back the other way. + for (SmallVectorImpl<Instruction *>::const_iterator + NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) { + Instruction *NewRelease = *NI; + DenseMap<Value *, RRInfo>::const_iterator It = + Releases.find(NewRelease); + assert(It != Releases.end()); + const RRInfo &NewReleaseRRI = It->second; + KnownSafeBU &= NewReleaseRRI.KnownSafe; + for (SmallPtrSet<Instruction *, 2>::const_iterator + LI = NewReleaseRRI.Calls.begin(), + LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) { + Instruction *NewReleaseRetain = *LI; + MapVector<Value *, RRInfo>::const_iterator Jt = + Retains.find(NewReleaseRetain); + if (Jt == Retains.end()) + return false; + const RRInfo &NewReleaseRetainRRI = Jt->second; + assert(NewReleaseRetainRRI.Calls.count(NewRelease)); + if (RetainsToMove.Calls.insert(NewReleaseRetain)) { + unsigned PathCount = + BBStates[NewReleaseRetain->getParent()].GetAllPathCount(); + OldDelta += PathCount; + OldCount += PathCount; + + // Merge the IsRetainBlock values. + if (FirstRetain) { + RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock; + FirstRetain = false; + } else if (ReleasesToMove.IsRetainBlock != + NewReleaseRetainRRI.IsRetainBlock) + // It's not possible to merge the sequences if one uses + // objc_retain and the other uses objc_retainBlock. + return false; + + // Collect the optimal insertion points. + if (!KnownSafe) + for (SmallPtrSet<Instruction *, 2>::const_iterator + RI = NewReleaseRetainRRI.ReverseInsertPts.begin(), + RE = NewReleaseRetainRRI.ReverseInsertPts.end(); + RI != RE; ++RI) { + Instruction *RIP = *RI; + if (RetainsToMove.ReverseInsertPts.insert(RIP)) { + PathCount = BBStates[RIP->getParent()].GetAllPathCount(); + NewDelta += PathCount; + NewCount += PathCount; + } + } + NewRetains.push_back(NewReleaseRetain); + } + } + } + NewReleases.clear(); + if (NewRetains.empty()) break; + } + + // If the pointer is known incremented or nested, we can safely delete the + // pair regardless of what's between them. + if (KnownSafeTD || KnownSafeBU) { + RetainsToMove.ReverseInsertPts.clear(); + ReleasesToMove.ReverseInsertPts.clear(); + NewCount = 0; + } else { + // Determine whether the new insertion points we computed preserve the + // balance of retain and release calls through the program. + // TODO: If the fully aggressive solution isn't valid, try to find a + // less aggressive solution which is. + if (NewDelta != 0) + return false; + } + + // Determine whether the original call points are balanced in the retain and + // release calls through the program. If not, conservatively don't touch + // them. + // TODO: It's theoretically possible to do code motion in this case, as + // long as the existing imbalances are maintained. + if (OldDelta != 0) + return false; + + Changed = true; + assert(OldCount != 0 && "Unreachable code?"); + NumRRs += OldCount - NewCount; + // Set to true if we completely removed any RR pairs. + AnyPairsCompletelyEliminated = NewCount == 0; + + // We can move calls! + return true; +} + +/// Identify pairings between the retains and releases, and delete and/or move +/// them. +bool +ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> + &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases, + Module *M) { + bool AnyPairsCompletelyEliminated = false; + RRInfo RetainsToMove; + RRInfo ReleasesToMove; + SmallVector<Instruction *, 4> NewRetains; + SmallVector<Instruction *, 4> NewReleases; + SmallVector<Instruction *, 8> DeadInsts; + + // Visit each retain. + for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), + E = Retains.end(); I != E; ++I) { + Value *V = I->first; + if (!V) continue; // blotted + + Instruction *Retain = cast<Instruction>(V); + + DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain + << "\n"); + + Value *Arg = GetObjCArg(Retain); + + // If the object being released is in static or stack storage, we know it's + // not being managed by ObjC reference counting, so we can delete pairs + // regardless of what possible decrements or uses lie between them. + bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg); + + // A constant pointer can't be pointing to an object on the heap. It may + // be reference-counted, but it won't be deleted. + if (const LoadInst *LI = dyn_cast<LoadInst>(Arg)) + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>( + StripPointerCastsAndObjCCalls(LI->getPointerOperand()))) + if (GV->isConstant()) + KnownSafe = true; + + // Connect the dots between the top-down-collected RetainsToMove and + // bottom-up-collected ReleasesToMove to form sets of related calls. + NewRetains.push_back(Retain); + bool PerformMoveCalls = + ConnectTDBUTraversals(BBStates, Retains, Releases, M, NewRetains, + NewReleases, DeadInsts, RetainsToMove, + ReleasesToMove, Arg, KnownSafe, + AnyPairsCompletelyEliminated); + + if (PerformMoveCalls) { + // Ok, everything checks out and we're all set. Let's move/delete some + // code! + MoveCalls(Arg, RetainsToMove, ReleasesToMove, + Retains, Releases, DeadInsts, M); + } + + // Clean up state for next retain. + NewReleases.clear(); + NewRetains.clear(); + RetainsToMove.clear(); + ReleasesToMove.clear(); + } + + // Now that we're done moving everything, we can delete the newly dead + // instructions, as we no longer need them as insert points. + while (!DeadInsts.empty()) + EraseInstruction(DeadInsts.pop_back_val()); + + return AnyPairsCompletelyEliminated; +} + +/// Weak pointer optimizations. +void ObjCARCOpt::OptimizeWeakCalls(Function &F) { + // First, do memdep-style RLE and S2L optimizations. We can't use memdep + // itself because it uses AliasAnalysis and we need to do provenance + // queries instead. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst << + "\n"); + + InstructionClass Class = GetBasicInstructionClass(Inst); + if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained) + continue; + + // Delete objc_loadWeak calls with no users. + if (Class == IC_LoadWeak && Inst->use_empty()) { + Inst->eraseFromParent(); + continue; + } + + // TODO: For now, just look for an earlier available version of this value + // within the same block. Theoretically, we could do memdep-style non-local + // analysis too, but that would want caching. A better approach would be to + // use the technique that EarlyCSE uses. + inst_iterator Current = llvm::prior(I); + BasicBlock *CurrentBB = Current.getBasicBlockIterator(); + for (BasicBlock::iterator B = CurrentBB->begin(), + J = Current.getInstructionIterator(); + J != B; --J) { + Instruction *EarlierInst = &*llvm::prior(J); + InstructionClass EarlierClass = GetInstructionClass(EarlierInst); + switch (EarlierClass) { + case IC_LoadWeak: + case IC_LoadWeakRetained: { + // If this is loading from the same pointer, replace this load's value + // with that one. + CallInst *Call = cast<CallInst>(Inst); + CallInst *EarlierCall = cast<CallInst>(EarlierInst); + Value *Arg = Call->getArgOperand(0); + Value *EarlierArg = EarlierCall->getArgOperand(0); + switch (PA.getAA()->alias(Arg, EarlierArg)) { + case AliasAnalysis::MustAlias: + Changed = true; + // If the load has a builtin retain, insert a plain retain for it. + if (Class == IC_LoadWeakRetained) { + CallInst *CI = + CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, + "", Call); + CI->setTailCall(); + } + // Zap the fully redundant load. + Call->replaceAllUsesWith(EarlierCall); + Call->eraseFromParent(); + goto clobbered; + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + goto clobbered; + case AliasAnalysis::NoAlias: + break; + } + break; + } + case IC_StoreWeak: + case IC_InitWeak: { + // If this is storing to the same pointer and has the same size etc. + // replace this load's value with the stored value. + CallInst *Call = cast<CallInst>(Inst); + CallInst *EarlierCall = cast<CallInst>(EarlierInst); + Value *Arg = Call->getArgOperand(0); + Value *EarlierArg = EarlierCall->getArgOperand(0); + switch (PA.getAA()->alias(Arg, EarlierArg)) { + case AliasAnalysis::MustAlias: + Changed = true; + // If the load has a builtin retain, insert a plain retain for it. + if (Class == IC_LoadWeakRetained) { + CallInst *CI = + CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, + "", Call); + CI->setTailCall(); + } + // Zap the fully redundant load. + Call->replaceAllUsesWith(EarlierCall->getArgOperand(1)); + Call->eraseFromParent(); + goto clobbered; + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + goto clobbered; + case AliasAnalysis::NoAlias: + break; + } + break; + } + case IC_MoveWeak: + case IC_CopyWeak: + // TOOD: Grab the copied value. + goto clobbered; + case IC_AutoreleasepoolPush: + case IC_None: + case IC_User: + // Weak pointers are only modified through the weak entry points + // (and arbitrary calls, which could call the weak entry points). + break; + default: + // Anything else could modify the weak pointer. + goto clobbered; + } + } + clobbered:; + } + + // Then, for each destroyWeak with an alloca operand, check to see if + // the alloca and all its users can be zapped. + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { + Instruction *Inst = &*I++; + InstructionClass Class = GetBasicInstructionClass(Inst); + if (Class != IC_DestroyWeak) + continue; + + CallInst *Call = cast<CallInst>(Inst); + Value *Arg = Call->getArgOperand(0); + if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) { + for (Value::use_iterator UI = Alloca->use_begin(), + UE = Alloca->use_end(); UI != UE; ++UI) { + const Instruction *UserInst = cast<Instruction>(*UI); + switch (GetBasicInstructionClass(UserInst)) { + case IC_InitWeak: + case IC_StoreWeak: + case IC_DestroyWeak: + continue; + default: + goto done; + } + } + Changed = true; + for (Value::use_iterator UI = Alloca->use_begin(), + UE = Alloca->use_end(); UI != UE; ) { + CallInst *UserInst = cast<CallInst>(*UI++); + switch (GetBasicInstructionClass(UserInst)) { + case IC_InitWeak: + case IC_StoreWeak: + // These functions return their second argument. + UserInst->replaceAllUsesWith(UserInst->getArgOperand(1)); + break; + case IC_DestroyWeak: + // No return value. + break; + default: + llvm_unreachable("alloca really is used!"); + } + UserInst->eraseFromParent(); + } + Alloca->eraseFromParent(); + done:; + } + } + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n"); + +} + +/// Identify program paths which execute sequences of retains and releases which +/// can be eliminated. +bool ObjCARCOpt::OptimizeSequences(Function &F) { + /// Releases, Retains - These are used to store the results of the main flow + /// analysis. These use Value* as the key instead of Instruction* so that the + /// map stays valid when we get around to rewriting code and calls get + /// replaced by arguments. + DenseMap<Value *, RRInfo> Releases; + MapVector<Value *, RRInfo> Retains; + + /// This is used during the traversal of the function to track the + /// states for each identified object at each block. + DenseMap<const BasicBlock *, BBState> BBStates; + + // Analyze the CFG of the function, and all instructions. + bool NestingDetected = Visit(F, BBStates, Retains, Releases); + + // Transform. + return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) && + NestingDetected; +} + +/// Look for this pattern: +/// \code +/// %call = call i8* @something(...) +/// %2 = call i8* @objc_retain(i8* %call) +/// %3 = call i8* @objc_autorelease(i8* %2) +/// ret i8* %3 +/// \endcode +/// And delete the retain and autorelease. +/// +/// Otherwise if it's just this: +/// \code +/// %3 = call i8* @objc_autorelease(i8* %2) +/// ret i8* %3 +/// \endcode +/// convert the autorelease to autoreleaseRV. +void ObjCARCOpt::OptimizeReturns(Function &F) { + if (!F.getReturnType()->isPointerTy()) + return; + + SmallPtrSet<Instruction *, 4> DependingInstructions; + SmallPtrSet<const BasicBlock *, 4> Visited; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + BasicBlock *BB = FI; + ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back()); + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n"); + + if (!Ret) continue; + + const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0)); + FindDependencies(NeedsPositiveRetainCount, Arg, + BB, Ret, DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Autorelease = + dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + if (!Autorelease) + goto next_block; + InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease); + if (!IsAutorelease(AutoreleaseClass)) + goto next_block; + if (GetObjCArg(Autorelease) != Arg) + goto next_block; + + DependingInstructions.clear(); + Visited.clear(); + + // Check that there is nothing that can affect the reference + // count between the autorelease and the retain. + FindDependencies(CanChangeRetainCount, Arg, + BB, Autorelease, DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Retain = + dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + + // Check that we found a retain with the same argument. + if (!Retain || + !IsRetain(GetBasicInstructionClass(Retain)) || + GetObjCArg(Retain) != Arg) + goto next_block; + + DependingInstructions.clear(); + Visited.clear(); + + // Convert the autorelease to an autoreleaseRV, since it's + // returning the value. + if (AutoreleaseClass == IC_Autorelease) { + DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Converting autorelease " + "=> autoreleaseRV since it's returning a value.\n" + " In: " << *Autorelease + << "\n"); + Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent())); + DEBUG(dbgs() << " Out: " << *Autorelease + << "\n"); + Autorelease->setTailCall(); // Always tail call autoreleaseRV. + AutoreleaseClass = IC_AutoreleaseRV; + } + + // Check that there is nothing that can affect the reference + // count between the retain and the call. + // Note that Retain need not be in BB. + FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain, + DependingInstructions, Visited, PA); + if (DependingInstructions.size() != 1) + goto next_block; + + { + CallInst *Call = + dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); + + // Check that the pointer is the return value of the call. + if (!Call || Arg != Call) + goto next_block; + + // Check that the call is a regular call. + InstructionClass Class = GetBasicInstructionClass(Call); + if (Class != IC_CallOrUser && Class != IC_Call) + goto next_block; + + // If so, we can zap the retain and autorelease. + Changed = true; + ++NumRets; + DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain + << "\n Erasing: " + << *Autorelease << "\n"); + EraseInstruction(Retain); + EraseInstruction(Autorelease); + } + } + } + + next_block: + DependingInstructions.clear(); + Visited.clear(); + } + + DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n"); + +} + +bool ObjCARCOpt::doInitialization(Module &M) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + Run = ModuleHasARC(M); + if (!Run) + return false; + + // Identify the imprecise release metadata kind. + ImpreciseReleaseMDKind = + M.getContext().getMDKindID("clang.imprecise_release"); + CopyOnEscapeMDKind = + M.getContext().getMDKindID("clang.arc.copy_on_escape"); + NoObjCARCExceptionsMDKind = + M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); + + // Intuitively, objc_retain and others are nocapture, however in practice + // they are not, because they return their argument value. And objc_release + // calls finalizers which can have arbitrary side effects. + + // These are initialized lazily. + RetainRVCallee = 0; + AutoreleaseRVCallee = 0; + ReleaseCallee = 0; + RetainCallee = 0; + RetainBlockCallee = 0; + AutoreleaseCallee = 0; + + return false; +} + +bool ObjCARCOpt::runOnFunction(Function &F) { + if (!EnableARCOpts) + return false; + + // If nothing in the Module uses ARC, don't do anything. + if (!Run) + return false; + + Changed = false; + + DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n"); + + PA.setAA(&getAnalysis<AliasAnalysis>()); + + // This pass performs several distinct transformations. As a compile-time aid + // when compiling code that isn't ObjC, skip these if the relevant ObjC + // library functions aren't declared. + + // Preliminary optimizations. This also computs UsedInThisFunction. + OptimizeIndividualCalls(F); + + // Optimizations for weak pointers. + if (UsedInThisFunction & ((1 << IC_LoadWeak) | + (1 << IC_LoadWeakRetained) | + (1 << IC_StoreWeak) | + (1 << IC_InitWeak) | + (1 << IC_CopyWeak) | + (1 << IC_MoveWeak) | + (1 << IC_DestroyWeak))) + OptimizeWeakCalls(F); + + // Optimizations for retain+release pairs. + if (UsedInThisFunction & ((1 << IC_Retain) | + (1 << IC_RetainRV) | + (1 << IC_RetainBlock))) + if (UsedInThisFunction & (1 << IC_Release)) + // Run OptimizeSequences until it either stops making changes or + // no retain+release pair nesting is detected. + while (OptimizeSequences(F)) {} + + // Optimizations if objc_autorelease is used. + if (UsedInThisFunction & ((1 << IC_Autorelease) | + (1 << IC_AutoreleaseRV))) + OptimizeReturns(F); + + DEBUG(dbgs() << "\n"); + + return Changed; +} + +void ObjCARCOpt::releaseMemory() { + PA.clear(); +} + +/// @} +/// diff --git a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp new file mode 100644 index 0000000..a841c64 --- /dev/null +++ b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp @@ -0,0 +1,241 @@ +//===- ObjCARCUtil.cpp - ObjC ARC Optimization --------*- mode: c++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines several utility functions used by various ARC +/// optimizations which are IMHO too big to be in a header file. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#include "ObjCARC.h" +#include "llvm/IR/Intrinsics.h" + +using namespace llvm; +using namespace llvm::objcarc; + +raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, + const InstructionClass Class) { + switch (Class) { + case IC_Retain: + return OS << "IC_Retain"; + case IC_RetainRV: + return OS << "IC_RetainRV"; + case IC_RetainBlock: + return OS << "IC_RetainBlock"; + case IC_Release: + return OS << "IC_Release"; + case IC_Autorelease: + return OS << "IC_Autorelease"; + case IC_AutoreleaseRV: + return OS << "IC_AutoreleaseRV"; + case IC_AutoreleasepoolPush: + return OS << "IC_AutoreleasepoolPush"; + case IC_AutoreleasepoolPop: + return OS << "IC_AutoreleasepoolPop"; + case IC_NoopCast: + return OS << "IC_NoopCast"; + case IC_FusedRetainAutorelease: + return OS << "IC_FusedRetainAutorelease"; + case IC_FusedRetainAutoreleaseRV: + return OS << "IC_FusedRetainAutoreleaseRV"; + case IC_LoadWeakRetained: + return OS << "IC_LoadWeakRetained"; + case IC_StoreWeak: + return OS << "IC_StoreWeak"; + case IC_InitWeak: + return OS << "IC_InitWeak"; + case IC_LoadWeak: + return OS << "IC_LoadWeak"; + case IC_MoveWeak: + return OS << "IC_MoveWeak"; + case IC_CopyWeak: + return OS << "IC_CopyWeak"; + case IC_DestroyWeak: + return OS << "IC_DestroyWeak"; + case IC_StoreStrong: + return OS << "IC_StoreStrong"; + case IC_CallOrUser: + return OS << "IC_CallOrUser"; + case IC_Call: + return OS << "IC_Call"; + case IC_User: + return OS << "IC_User"; + case IC_None: + return OS << "IC_None"; + } + llvm_unreachable("Unknown instruction class!"); +} + +InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) { + Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + + // No arguments. + if (AI == AE) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) + .Default(IC_CallOrUser); + + // One argument. + const Argument *A0 = AI++; + if (AI == AE) + // Argument is a pointer. + if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) { + Type *ETy = PTy->getElementType(); + // Argument is i8*. + if (ETy->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_retain", IC_Retain) + .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV) + .Case("objc_retainBlock", IC_RetainBlock) + .Case("objc_release", IC_Release) + .Case("objc_autorelease", IC_Autorelease) + .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV) + .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop) + .Case("objc_retainedObject", IC_NoopCast) + .Case("objc_unretainedObject", IC_NoopCast) + .Case("objc_unretainedPointer", IC_NoopCast) + .Case("objc_retain_autorelease", IC_FusedRetainAutorelease) + .Case("objc_retainAutorelease", IC_FusedRetainAutorelease) + .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV) + .Default(IC_CallOrUser); + + // Argument is i8** + if (PointerType *Pte = dyn_cast<PointerType>(ETy)) + if (Pte->getElementType()->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_loadWeakRetained", IC_LoadWeakRetained) + .Case("objc_loadWeak", IC_LoadWeak) + .Case("objc_destroyWeak", IC_DestroyWeak) + .Default(IC_CallOrUser); + } + + // Two arguments, first is i8**. + const Argument *A1 = AI++; + if (AI == AE) + if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) + if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType())) + if (Pte->getElementType()->isIntegerTy(8)) + if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) { + Type *ETy1 = PTy1->getElementType(); + // Second argument is i8* + if (ETy1->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_storeWeak", IC_StoreWeak) + .Case("objc_initWeak", IC_InitWeak) + .Case("objc_storeStrong", IC_StoreStrong) + .Default(IC_CallOrUser); + // Second argument is i8**. + if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1)) + if (Pte1->getElementType()->isIntegerTy(8)) + return StringSwitch<InstructionClass>(F->getName()) + .Case("objc_moveWeak", IC_MoveWeak) + .Case("objc_copyWeak", IC_CopyWeak) + .Default(IC_CallOrUser); + } + + // Anything else. + return IC_CallOrUser; +} + +/// \brief Determine what kind of construct V is. +InstructionClass +llvm::objcarc::GetInstructionClass(const Value *V) { + if (const Instruction *I = dyn_cast<Instruction>(V)) { + // Any instruction other than bitcast and gep with a pointer operand have a + // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer + // to a subsequent use, rather than using it themselves, in this sense. + // As a short cut, several other opcodes are known to have no pointer + // operands of interest. And ret is never followed by a release, so it's + // not interesting to examine. + switch (I->getOpcode()) { + case Instruction::Call: { + const CallInst *CI = cast<CallInst>(I); + // Check for calls to special functions. + if (const Function *F = CI->getCalledFunction()) { + InstructionClass Class = GetFunctionClass(F); + if (Class != IC_CallOrUser) + return Class; + + // None of the intrinsic functions do objc_release. For intrinsics, the + // only question is whether or not they may be users. + switch (F->getIntrinsicID()) { + case Intrinsic::returnaddress: case Intrinsic::frameaddress: + case Intrinsic::stacksave: case Intrinsic::stackrestore: + case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend: + case Intrinsic::objectsize: case Intrinsic::prefetch: + case Intrinsic::stackprotector: + case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: + case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa: + case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext: + case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline: + case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: case Intrinsic::invariant_end: + // Don't let dbg info affect our results. + case Intrinsic::dbg_declare: case Intrinsic::dbg_value: + // Short cut: Some intrinsics obviously don't use ObjC pointers. + return IC_None; + default: + break; + } + } + return GetCallSiteClass(CI); + } + case Instruction::Invoke: + return GetCallSiteClass(cast<InvokeInst>(I)); + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::Select: case Instruction::PHI: + case Instruction::Ret: case Instruction::Br: + case Instruction::Switch: case Instruction::IndirectBr: + case Instruction::Alloca: case Instruction::VAArg: + case Instruction::Add: case Instruction::FAdd: + case Instruction::Sub: case Instruction::FSub: + case Instruction::Mul: case Instruction::FMul: + case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv: + case Instruction::SRem: case Instruction::URem: case Instruction::FRem: + case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: + case Instruction::And: case Instruction::Or: case Instruction::Xor: + case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc: + case Instruction::IntToPtr: case Instruction::FCmp: + case Instruction::FPTrunc: case Instruction::FPExt: + case Instruction::FPToUI: case Instruction::FPToSI: + case Instruction::UIToFP: case Instruction::SIToFP: + case Instruction::InsertElement: case Instruction::ExtractElement: + case Instruction::ShuffleVector: + case Instruction::ExtractValue: + break; + case Instruction::ICmp: + // Comparing a pointer with null, or any other constant, isn't an + // interesting use, because we don't care what the pointer points to, or + // about the values of any other dynamic reference-counted pointers. + if (IsPotentialRetainableObjPtr(I->getOperand(1))) + return IC_User; + break; + default: + // For anything else, check all the operands. + // Note that this includes both operands of a Store: while the first + // operand isn't actually being dereferenced, it is being stored to + // memory where we can no longer track who might read it and dereference + // it, so we have to consider it potentially used. + for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); + OI != OE; ++OI) + if (IsPotentialRetainableObjPtr(*OI)) + return IC_User; + } + } + + // Otherwise, it's totally inert for ARC purposes. + return IC_None; +} diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp new file mode 100644 index 0000000..ae3c628 --- /dev/null +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp @@ -0,0 +1,177 @@ +//===- ProvenanceAnalysis.cpp - ObjC ARC Optimization ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a special form of Alias Analysis called ``Provenance +/// Analysis''. The word ``provenance'' refers to the history of the ownership +/// of an object. Thus ``Provenance Analysis'' is an analysis which attempts to +/// use various techniques to determine if locally +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#include "ObjCARC.h" +#include "ProvenanceAnalysis.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" + +using namespace llvm; +using namespace llvm::objcarc; + +bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, + const Value *B) { + // If the values are Selects with the same condition, we can do a more precise + // check: just check for relations between the values on corresponding arms. + if (const SelectInst *SB = dyn_cast<SelectInst>(B)) + if (A->getCondition() == SB->getCondition()) + return related(A->getTrueValue(), SB->getTrueValue()) || + related(A->getFalseValue(), SB->getFalseValue()); + + // Check both arms of the Select node individually. + return related(A->getTrueValue(), B) || + related(A->getFalseValue(), B); +} + +bool ProvenanceAnalysis::relatedPHI(const PHINode *A, + const Value *B) { + // If the values are PHIs in the same block, we can do a more precise as well + // as efficient check: just check for relations between the values on + // corresponding edges. + if (const PHINode *PNB = dyn_cast<PHINode>(B)) + if (PNB->getParent() == A->getParent()) { + for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) + if (related(A->getIncomingValue(i), + PNB->getIncomingValueForBlock(A->getIncomingBlock(i)))) + return true; + return false; + } + + // Check each unique source of the PHI node against B. + SmallPtrSet<const Value *, 4> UniqueSrc; + for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) { + const Value *PV1 = A->getIncomingValue(i); + if (UniqueSrc.insert(PV1) && related(PV1, B)) + return true; + } + + // All of the arms checked out. + return false; +} + +/// Test if the value of P, or any value covered by its provenance, is ever +/// stored within the function (not counting callees). +static bool IsStoredObjCPointer(const Value *P) { + SmallPtrSet<const Value *, 8> Visited; + SmallVector<const Value *, 8> Worklist; + Worklist.push_back(P); + Visited.insert(P); + do { + P = Worklist.pop_back_val(); + for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end(); + UI != UE; ++UI) { + const User *Ur = *UI; + if (isa<StoreInst>(Ur)) { + if (UI.getOperandNo() == 0) + // The pointer is stored. + return true; + // The pointed is stored through. + continue; + } + if (isa<CallInst>(Ur)) + // The pointer is passed as an argument, ignore this. + continue; + if (isa<PtrToIntInst>(P)) + // Assume the worst. + return true; + if (Visited.insert(Ur)) + Worklist.push_back(Ur); + } + } while (!Worklist.empty()); + + // Everything checked out. + return false; +} + +bool ProvenanceAnalysis::relatedCheck(const Value *A, + const Value *B) { + // Skip past provenance pass-throughs. + A = GetUnderlyingObjCPtr(A); + B = GetUnderlyingObjCPtr(B); + + // Quick check. + if (A == B) + return true; + + // Ask regular AliasAnalysis, for a first approximation. + switch (AA->alias(A, B)) { + case AliasAnalysis::NoAlias: + return false; + case AliasAnalysis::MustAlias: + case AliasAnalysis::PartialAlias: + return true; + case AliasAnalysis::MayAlias: + break; + } + + bool AIsIdentified = IsObjCIdentifiedObject(A); + bool BIsIdentified = IsObjCIdentifiedObject(B); + + // An ObjC-Identified object can't alias a load if it is never locally stored. + if (AIsIdentified) { + // Check for an obvious escape. + if (isa<LoadInst>(B)) + return IsStoredObjCPointer(A); + if (BIsIdentified) { + // Check for an obvious escape. + if (isa<LoadInst>(A)) + return IsStoredObjCPointer(B); + // Both pointers are identified and escapes aren't an evident problem. + return false; + } + } else if (BIsIdentified) { + // Check for an obvious escape. + if (isa<LoadInst>(A)) + return IsStoredObjCPointer(B); + } + + // Special handling for PHI and Select. + if (const PHINode *PN = dyn_cast<PHINode>(A)) + return relatedPHI(PN, B); + if (const PHINode *PN = dyn_cast<PHINode>(B)) + return relatedPHI(PN, A); + if (const SelectInst *S = dyn_cast<SelectInst>(A)) + return relatedSelect(S, B); + if (const SelectInst *S = dyn_cast<SelectInst>(B)) + return relatedSelect(S, A); + + // Conservative. + return true; +} + +bool ProvenanceAnalysis::related(const Value *A, + const Value *B) { + // Begin by inserting a conservative value into the map. If the insertion + // fails, we have the answer already. If it succeeds, leave it there until we + // compute the real answer to guard against recursive queries. + if (A > B) std::swap(A, B); + std::pair<CachedResultsTy::iterator, bool> Pair = + CachedResults.insert(std::make_pair(ValuePairTy(A, B), true)); + if (!Pair.second) + return Pair.first->second; + + bool Result = relatedCheck(A, B); + CachedResults[ValuePairTy(A, B)] = Result; + return Result; +} diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h new file mode 100644 index 0000000..ec449fd --- /dev/null +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h @@ -0,0 +1,80 @@ +//===- ProvenanceAnalysis.h - ObjC ARC Optimization ---*- mode: c++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file declares a special form of Alias Analysis called ``Provenance +/// Analysis''. The word ``provenance'' refers to the history of the ownership +/// of an object. Thus ``Provenance Analysis'' is an analysis which attempts to +/// use various techniques to determine if locally +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H +#define LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H + +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + class Value; + class AliasAnalysis; + class PHINode; + class SelectInst; +} + +namespace llvm { +namespace objcarc { + +/// \brief This is similar to BasicAliasAnalysis, and it uses many of the same +/// techniques, except it uses special ObjC-specific reasoning about pointer +/// relationships. +/// +/// In this context ``Provenance'' is defined as the history of an object's +/// ownership. Thus ``Provenance Analysis'' is defined by using the notion of +/// an ``independent provenance source'' of a pointer to determine whether or +/// not two pointers have the same provenance source and thus could +/// potentially be related. +class ProvenanceAnalysis { + AliasAnalysis *AA; + + typedef std::pair<const Value *, const Value *> ValuePairTy; + typedef DenseMap<ValuePairTy, bool> CachedResultsTy; + CachedResultsTy CachedResults; + + bool relatedCheck(const Value *A, const Value *B); + bool relatedSelect(const SelectInst *A, const Value *B); + bool relatedPHI(const PHINode *A, const Value *B); + + void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION; + ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION; + +public: + ProvenanceAnalysis() {} + + void setAA(AliasAnalysis *aa) { AA = aa; } + + AliasAnalysis *getAA() const { return AA; } + + bool related(const Value *A, const Value *B); + + void clear() { + CachedResults.clear(); + } +}; + +} // end namespace objcarc +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index b3fc6e3..fd55e08 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -21,7 +21,6 @@ add_llvm_library(LLVMScalarOpts LoopUnswitch.cpp LowerAtomic.cpp MemCpyOptimizer.cpp - ObjCARC.cpp Reassociate.cpp Reg2Mem.cpp SCCP.cpp diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index d513c96..d71dd5d 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -729,9 +729,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { // It's not safe to eliminate the sign / zero extension of the return value. // See llvm::isInTailCallPosition(). const Function *F = BB->getParent(); - Attribute CallerRetAttr = F->getAttributes().getRetAttributes(); - if (CallerRetAttr.hasAttribute(Attribute::ZExt) || - CallerRetAttr.hasAttribute(Attribute::SExt)) + AttributeSet CallerAttrs = F->getAttributes(); + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) return false; // Make sure there are no instructions between the PHI and return, or that the @@ -788,10 +788,10 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { // Conservatively require the attributes of the call to match those of the // return. Ignore noalias because it doesn't affect the call sequence. - Attribute CalleeRetAttr = CS.getAttributes().getRetAttributes(); - if (AttrBuilder(CalleeRetAttr). + AttributeSet CalleeAttrs = CS.getAttributes(); + if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). removeAttribute(Attribute::NoAlias) != - AttrBuilder(CallerRetAttr). + AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). removeAttribute(Attribute::NoAlias)) continue; diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 4c3631b..995782e 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -21,6 +21,8 @@ #include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -97,12 +99,29 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) { Value *Incoming = P->getIncomingValue(i); if (isa<Constant>(Incoming)) continue; - Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i), - P->getIncomingBlock(i), - BB); - if (!C) continue; + Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB); - P->setIncomingValue(i, C); + // Look if the incoming value is a select with a constant but LVI tells us + // that the incoming value can never be that constant. In that case replace + // the incoming value with the other value of the select. This often allows + // us to remove the select later. + if (!V) { + SelectInst *SI = dyn_cast<SelectInst>(Incoming); + if (!SI) continue; + + Constant *C = dyn_cast<Constant>(SI->getFalseValue()); + if (!C) continue; + + if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C, + P->getIncomingBlock(i), BB) != + LazyValueInfo::False) + continue; + + DEBUG(dbgs() << "CVP: Threading PHI over " << *SI << '\n'); + V = SI->getTrueValue(); + } + + P->setIncomingValue(i, V); Changed = true; } diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index fe3acbf..57432c7 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -376,10 +376,10 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // Check to see if the later store is to the entire object (either a global, // an alloca, or a byval argument). If so, then it clearly overwrites any // other store to the same object. - const DataLayout &TD = *AA.getDataLayout(); + const DataLayout *TD = AA.getDataLayout(); - const Value *UO1 = GetUnderlyingObject(P1, &TD), - *UO2 = GetUnderlyingObject(P2, &TD); + const Value *UO1 = GetUnderlyingObject(P1, TD), + *UO2 = GetUnderlyingObject(P2, TD); // If we can't resolve the same pointers to the same object, then we can't // analyze them at all. diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 14201b9..c04b447 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -849,8 +849,8 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, return -1; int64_t StoreOffset = 0, LoadOffset = 0; - Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); - Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); + Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&TD); + Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &TD); if (StoreBase != LoadBase) return -1; @@ -945,7 +945,7 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, // then we should widen it! int64_t LoadOffs = 0; const Value *LoadBase = - GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD); + GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &TD); unsigned LoadSize = TD.getTypeStoreSize(LoadTy); unsigned Size = MemoryDependenceAnalysis:: @@ -1526,10 +1526,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { BasicBlock *LoadBB = LI->getParent(); BasicBlock *TmpBB = LoadBB; - bool isSinglePred = false; bool allSingleSucc = true; while (TmpBB->getSinglePredecessor()) { - isSinglePred = true; TmpBB = TmpBB->getSinglePredecessor(); if (TmpBB == LoadBB) // Infinite (unreachable) loop. return false; @@ -1548,28 +1546,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { assert(TmpBB); LoadBB = TmpBB; - // FIXME: It is extremely unclear what this loop is doing, other than - // artificially restricting loadpre. - if (isSinglePred) { - bool isHot = false; - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { - const AvailableValueInBlock &AV = ValuesPerBlock[i]; - if (AV.isSimpleValue()) - // "Hot" Instruction is in some loop (because it dominates its dep. - // instruction). - if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue())) - if (DT->dominates(LI, I)) { - isHot = true; - break; - } - } - - // We are interested only in "hot" instructions. We don't want to do any - // mis-optimizations here. - if (!isHot) - return false; - } - // Check to see how many predecessors have the loaded value fully // available. DenseMap<BasicBlock*, Value*> PredLoads; @@ -2371,8 +2347,8 @@ bool GVN::processBlock(BasicBlock *BB) { E = InstrsToErase.end(); I != E; ++I) { DEBUG(dbgs() << "GVN removed: " << **I << '\n'); if (MD) MD->removeInstruction(*I); - (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); + (*I)->eraseFromParent(); } InstrsToErase.clear(); @@ -2389,7 +2365,7 @@ bool GVN::processBlock(BasicBlock *BB) { /// control flow patterns and attempts to perform simple PRE at the join point. bool GVN::performPRE(Function &F) { bool Changed = false; - DenseMap<BasicBlock*, Value*> predMap; + SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap; for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()), DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { BasicBlock *CurrentBlock = *DI; @@ -2445,19 +2421,22 @@ bool GVN::performPRE(Function &F) { if (P == CurrentBlock) { NumWithout = 2; break; - } else if (!DT->dominates(&F.getEntryBlock(), P)) { + } else if (!DT->isReachableFromEntry(P)) { NumWithout = 2; break; } Value* predV = findLeader(P, ValNo); if (predV == 0) { + predMap.push_back(std::make_pair(static_cast<Value *>(0), P)); PREPred = P; ++NumWithout; } else if (predV == CurInst) { + /* CurInst dominates this predecessor. */ NumWithout = 2; + break; } else { - predMap[P] = predV; + predMap.push_back(std::make_pair(predV, P)); ++NumWith; } } @@ -2504,15 +2483,14 @@ bool GVN::performPRE(Function &F) { // the PRE predecessor. This is typically because of loads which // are not value numbered precisely. if (!success) { - delete PREInstr; DEBUG(verifyRemoved(PREInstr)); + delete PREInstr; continue; } PREInstr->insertBefore(PREPred->getTerminator()); PREInstr->setName(CurInst->getName() + ".pre"); PREInstr->setDebugLoc(CurInst->getDebugLoc()); - predMap[PREPred] = PREInstr; VN.add(PREInstr, ValNo); ++NumGVNPRE; @@ -2520,13 +2498,14 @@ bool GVN::performPRE(Function &F) { addToLeaderTable(ValNo, PREInstr, PREPred); // Create a PHI to make the value available in this block. - pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); - PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE), + PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(), CurInst->getName() + ".pre-phi", CurrentBlock->begin()); - for (pred_iterator PI = PB; PI != PE; ++PI) { - BasicBlock *P = *PI; - Phi->addIncoming(predMap[P], P); + for (unsigned i = 0, e = predMap.size(); i != e; ++i) { + if (Value *V = predMap[i].first) + Phi->addIncoming(V, predMap[i].second); + else + Phi->addIncoming(PREInstr, PREPred); } VN.add(Phi, ValNo); @@ -2551,8 +2530,8 @@ bool GVN::performPRE(Function &F) { DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); if (MD) MD->removeInstruction(CurInst); - CurInst->eraseFromParent(); DEBUG(verifyRemoved(CurInst)); + CurInst->eraseFromParent(); Changed = true; } } diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index dc6bef7..f94cd2a 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -440,13 +440,12 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { } // Only these instructions are hoistable/sinkable. - bool HoistableKind = (isa<BinaryOperator>(I) || isa<CastInst>(I) || - isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || - isa<CmpInst>(I) || isa<InsertElementInst>(I) || - isa<ExtractElementInst>(I) || - isa<ShuffleVectorInst>(I)); - if (!HoistableKind) - return false; + if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) && + !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) && + !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) && + !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) && + !isa<InsertValueInst>(I)) + return false; return isSafeToExecuteUnconditionally(I); } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index c4f9012..8258719 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -407,7 +407,7 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)" { - if (DefX2->getOpcode() != Instruction::And) + if (!DefX2 || DefX2->getOpcode() != Instruction::And) return false; BinaryOperator *SubOneOp; diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index c48808f..a23860a 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "loop-instsimplify" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 0ea80f3..e98ae95 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" @@ -51,6 +52,7 @@ namespace { AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); AU.addPreserved<ScalarEvolution>(); + AU.addRequired<TargetTransformInfo>(); } bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -59,11 +61,13 @@ namespace { private: LoopInfo *LI; + const TargetTransformInfo *TTI; }; } char LoopRotate::ID = 0; INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) @@ -75,6 +79,7 @@ Pass *llvm::createLoopRotatePass() { return new LoopRotate(); } /// the loop is rotated at least once. bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { LI = &getAnalysis<LoopInfo>(); + TTI = &getAnalysis<TargetTransformInfo>(); // Simplify the loop latch before attempting to rotate the header // upward. Rotation may not be needed if the loop tail can be folded into the @@ -278,7 +283,7 @@ bool LoopRotate::rotateLoop(Loop *L) { // duplicate blocks inside it. { CodeMetrics Metrics; - Metrics.analyzeBasicBlock(OrigHeader); + Metrics.analyzeBasicBlock(OrigHeader, *TTI); if (Metrics.notDuplicatable) { DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non duplicatable" << " instructions: "; L->dump()); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index c7b853e..4e4cb86 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -58,6 +58,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopPass.h" @@ -237,7 +238,7 @@ struct Formula { /// BaseRegs - The list of "base" registers for this use. When this is /// non-empty, - SmallVector<const SCEV *, 2> BaseRegs; + SmallVector<const SCEV *, 4> BaseRegs; /// ScaledReg - The 'scaled' register for this use. This should be non-null /// when Scale is not zero. @@ -1087,19 +1088,19 @@ namespace { /// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding /// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*. struct UniquifierDenseMapInfo { - static SmallVector<const SCEV *, 2> getEmptyKey() { - SmallVector<const SCEV *, 2> V; + static SmallVector<const SCEV *, 4> getEmptyKey() { + SmallVector<const SCEV *, 4> V; V.push_back(reinterpret_cast<const SCEV *>(-1)); return V; } - static SmallVector<const SCEV *, 2> getTombstoneKey() { - SmallVector<const SCEV *, 2> V; + static SmallVector<const SCEV *, 4> getTombstoneKey() { + SmallVector<const SCEV *, 4> V; V.push_back(reinterpret_cast<const SCEV *>(-2)); return V; } - static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) { + static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) { unsigned Result = 0; for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(), E = V.end(); I != E; ++I) @@ -1107,8 +1108,8 @@ struct UniquifierDenseMapInfo { return Result; } - static bool isEqual(const SmallVector<const SCEV *, 2> &LHS, - const SmallVector<const SCEV *, 2> &RHS) { + static bool isEqual(const SmallVector<const SCEV *, 4> &LHS, + const SmallVector<const SCEV *, 4> &RHS) { return LHS == RHS; } }; @@ -1119,7 +1120,7 @@ struct UniquifierDenseMapInfo { /// the user itself, and information about how the use may be satisfied. /// TODO: Represent multiple users of the same expression in common? class LSRUse { - DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier; + DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier; public: /// KindType - An enum for a kind of use, indicating what types of @@ -1178,7 +1179,7 @@ public: /// HasFormula - Test whether this use as a formula which has the same /// registers as the given formula. bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { - SmallVector<const SCEV *, 2> Key = F.BaseRegs; + SmallVector<const SCEV *, 4> Key = F.BaseRegs; if (F.ScaledReg) Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for uniquifying. std::sort(Key.begin(), Key.end()); @@ -1188,7 +1189,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. bool LSRUse::InsertFormula(const Formula &F) { - SmallVector<const SCEV *, 2> Key = F.BaseRegs; + SmallVector<const SCEV *, 4> Key = F.BaseRegs; if (F.ScaledReg) Key.push_back(F.ScaledReg); // Unstable sort by host order ok, because this is only used for uniquifying. std::sort(Key.begin(), Key.end()); @@ -2536,6 +2537,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, // Add this IV user to the end of the chain. IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr)); } + IVChain &Chain = IVChainVec[ChainIdx]; SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers; // This chain's NearUsers become FarUsers. @@ -2553,8 +2555,19 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, for (Value::use_iterator UseIter = IVOper->use_begin(), UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) { Instruction *OtherUse = dyn_cast<Instruction>(*UseIter); - if (!OtherUse || OtherUse == UserInst) + if (!OtherUse) continue; + // Uses in the chain will no longer be uses if the chain is formed. + // Include the head of the chain in this iteration (not Chain.begin()). + IVChain::const_iterator IncIter = Chain.Incs.begin(); + IVChain::const_iterator IncEnd = Chain.Incs.end(); + for( ; IncIter != IncEnd; ++IncIter) { + if (IncIter->UserInst == OtherUse) + break; + } + if (IncIter != IncEnd) + continue; + if (SE.isSCEVable(OtherUse->getType()) && !isa<SCEVUnknown>(SE.getSCEV(OtherUse)) && IU.isIVUserOrOperand(OtherUse)) { @@ -2891,7 +2904,6 @@ void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { Formula F; F.InitialMatch(S, L, SE); - F.HasBaseReg = true; bool Inserted = InsertFormula(LU, LUIdx, F); assert(Inserted && "Initial formula already exists!"); (void)Inserted; } @@ -2903,6 +2915,7 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { Formula F; F.BaseRegs.push_back(S); + F.HasBaseReg = true; bool Inserted = InsertFormula(LU, LUIdx, F); assert(Inserted && "Supplemental formula already exists!"); (void)Inserted; } @@ -3656,7 +3669,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { // Collect the best formula for each unique set of shared registers. This // is reset for each use. - typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo> + typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo> BestFormulaeTy; BestFormulaeTy BestFormulae; @@ -3691,7 +3704,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { dbgs() << "\n"); } else { - SmallVector<const SCEV *, 2> Key; + SmallVector<const SCEV *, 4> Key; for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(), JE = F.BaseRegs.end(); J != JE; ++J) { const SCEV *Reg = *J; @@ -3837,83 +3850,83 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { /// for expressions like A, A+1, A+2, etc., allocate a single register for /// them. void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { - if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { - DEBUG(dbgs() << "The search space is too complex.\n"); + if (EstimateSearchSpaceComplexity() < ComplexityLimit) + return; - DEBUG(dbgs() << "Narrowing the search space by assuming that uses " - "separated by a constant offset will use the same " - "registers.\n"); + DEBUG(dbgs() << "The search space is too complex.\n" + "Narrowing the search space by assuming that uses separated " + "by a constant offset will use the same registers.\n"); - // This is especially useful for unrolled loops. + // This is especially useful for unrolled loops. - for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { - LSRUse &LU = Uses[LUIdx]; - for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), - E = LU.Formulae.end(); I != E; ++I) { - const Formula &F = *I; - if (F.BaseOffset != 0 && F.Scale == 0) { - if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) { - if (reconcileNewOffset(*LUThatHas, F.BaseOffset, - /*HasBaseReg=*/false, - LU.Kind, LU.AccessTy)) { - DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); - dbgs() << '\n'); - - LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; - - // Update the relocs to reference the new use. - for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), - E = Fixups.end(); I != E; ++I) { - LSRFixup &Fixup = *I; - if (Fixup.LUIdx == LUIdx) { - Fixup.LUIdx = LUThatHas - &Uses.front(); - Fixup.Offset += F.BaseOffset; - // Add the new offset to LUThatHas' offset list. - if (LUThatHas->Offsets.back() != Fixup.Offset) { - LUThatHas->Offsets.push_back(Fixup.Offset); - if (Fixup.Offset > LUThatHas->MaxOffset) - LUThatHas->MaxOffset = Fixup.Offset; - if (Fixup.Offset < LUThatHas->MinOffset) - LUThatHas->MinOffset = Fixup.Offset; - } - DEBUG(dbgs() << "New fixup has offset " - << Fixup.Offset << '\n'); - } - if (Fixup.LUIdx == NumUses-1) - Fixup.LUIdx = LUIdx; - } + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), + E = LU.Formulae.end(); I != E; ++I) { + const Formula &F = *I; + if (F.BaseOffset == 0 || F.Scale != 0) + continue; - // Delete formulae from the new use which are no longer legal. - bool Any = false; - for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { - Formula &F = LUThatHas->Formulae[i]; - if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset, - LUThatHas->Kind, LUThatHas->AccessTy, F)) { - DEBUG(dbgs() << " Deleting "; F.print(dbgs()); - dbgs() << '\n'); - LUThatHas->DeleteFormula(F); - --i; - --e; - Any = true; - } - } - if (Any) - LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); + LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU); + if (!LUThatHas) + continue; - // Delete the old use. - DeleteUse(LU, LUIdx); - --LUIdx; - --NumUses; - break; - } + if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false, + LU.Kind, LU.AccessTy)) + continue; + + DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n'); + + LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; + + // Update the relocs to reference the new use. + for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), + E = Fixups.end(); I != E; ++I) { + LSRFixup &Fixup = *I; + if (Fixup.LUIdx == LUIdx) { + Fixup.LUIdx = LUThatHas - &Uses.front(); + Fixup.Offset += F.BaseOffset; + // Add the new offset to LUThatHas' offset list. + if (LUThatHas->Offsets.back() != Fixup.Offset) { + LUThatHas->Offsets.push_back(Fixup.Offset); + if (Fixup.Offset > LUThatHas->MaxOffset) + LUThatHas->MaxOffset = Fixup.Offset; + if (Fixup.Offset < LUThatHas->MinOffset) + LUThatHas->MinOffset = Fixup.Offset; } + DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); } + if (Fixup.LUIdx == NumUses-1) + Fixup.LUIdx = LUIdx; } - } - DEBUG(dbgs() << "After pre-selection:\n"; - print_uses(dbgs())); + // Delete formulae from the new use which are no longer legal. + bool Any = false; + for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { + Formula &F = LUThatHas->Formulae[i]; + if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset, + LUThatHas->Kind, LUThatHas->AccessTy, F)) { + DEBUG(dbgs() << " Deleting "; F.print(dbgs()); + dbgs() << '\n'); + LUThatHas->DeleteFormula(F); + --i; + --e; + Any = true; + } + } + + if (Any) + LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); + + // Delete the old use. + DeleteUse(LU, LUIdx); + --LUIdx; + --NumUses; + break; + } } + + DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); } /// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index e0f915b..80d060b 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CommandLine.h" @@ -90,6 +91,7 @@ namespace { AU.addPreservedID(LCSSAID); AU.addRequired<ScalarEvolution>(); AU.addPreserved<ScalarEvolution>(); + AU.addRequired<TargetTransformInfo>(); // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. // If loop unroll does not preserve dom info then LCSSA pass on next // loop will receive invalid dom info. @@ -101,6 +103,7 @@ namespace { char LoopUnroll::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) @@ -113,11 +116,12 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) { /// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, - bool &NotDuplicatable, const DataLayout *TD) { + bool &NotDuplicatable, + const TargetTransformInfo &TTI) { CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) - Metrics.analyzeBasicBlock(*I, TD); + Metrics.analyzeBasicBlock(*I, TTI); NumCalls = Metrics.NumInlineCandidates; NotDuplicatable = Metrics.notDuplicatable; @@ -134,6 +138,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { LoopInfo *LI = &getAnalysis<LoopInfo>(); ScalarEvolution *SE = &getAnalysis<ScalarEvolution>(); + const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>(); BasicBlock *Header = L->getHeader(); DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() @@ -181,11 +186,10 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Enforce the threshold. if (Threshold != NoThreshold) { - const DataLayout *TD = getAnalysisIfAvailable<DataLayout>(); unsigned NumInlineCandidates; bool notDuplicatable; unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, - notDuplicatable, TD); + notDuplicatable, TTI); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); if (notDuplicatable) { DEBUG(dbgs() << " Not unrolling loop which contains non duplicatable" diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 68d4423..0e8199f 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -37,6 +37,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -101,7 +102,7 @@ namespace { // Analyze loop. Check its size, calculate is it possible to unswitch // it. Returns true if we can unswitch this loop. - bool countLoop(const Loop* L); + bool countLoop(const Loop* L, const TargetTransformInfo &TTI); // Clean all data related to given loop. void forgetLoop(const Loop* L); @@ -170,6 +171,7 @@ namespace { AU.addPreservedID(LCSSAID); AU.addPreserved<DominatorTree>(); AU.addPreserved<ScalarEvolution>(); + AU.addRequired<TargetTransformInfo>(); } private: @@ -221,7 +223,7 @@ namespace { // Analyze loop. Check its size, calculate is it possible to unswitch // it. Returns true if we can unswitch this loop. -bool LUAnalysisCache::countLoop(const Loop* L) { +bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) { std::pair<LoopPropsMapIt, bool> InsertRes = LoopsProperties.insert(std::make_pair(L, LoopProperties())); @@ -243,7 +245,7 @@ bool LUAnalysisCache::countLoop(const Loop* L) { for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) - Metrics.analyzeBasicBlock(*I); + Metrics.analyzeBasicBlock(*I, TTI); Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5); Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); @@ -334,6 +336,7 @@ void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop, char LoopUnswitch::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LCSSA) @@ -424,7 +427,7 @@ bool LoopUnswitch::processCurrentLoop() { // Probably we reach the quota of branches for this loop. If so // stop unswitching. - if (!BranchesInfo.countLoop(currentLoop)) + if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>())) return false; // Loop over all of the basic blocks in the loop. If we find an interior diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp deleted file mode 100644 index e6ec841..0000000 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ /dev/null @@ -1,4354 +0,0 @@ -//===- ObjCARC.cpp - ObjC ARC Optimization --------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines ObjC ARC optimizations. ARC stands for -// Automatic Reference Counting and is a system for managing reference counts -// for objects in Objective C. -// -// The optimizations performed include elimination of redundant, partially -// redundant, and inconsequential reference count operations, elimination of -// redundant weak pointer operations, pattern-matching and replacement of -// low-level operations into higher-level operations, and numerous minor -// simplifications. -// -// This file also defines a simple ARC-aware AliasAnalysis. -// -// WARNING: This file knows about certain library functions. It recognizes them -// by name, and hardwires knowledge of their semantics. -// -// WARNING: This file knows about how certain Objective-C library functions are -// used. Naive LLVM IR transformations which would otherwise be -// behavior-preserving may break these assumptions. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "objc-arc" -#include "llvm/ADT/DenseMap.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -// A handy option to enable/disable all optimizations in this file. -static cl::opt<bool> EnableARCOpts("enable-objc-arc-opts", cl::init(true)); - -//===----------------------------------------------------------------------===// -// Misc. Utilities -//===----------------------------------------------------------------------===// - -namespace { - /// MapVector - An associative container with fast insertion-order - /// (deterministic) iteration over its elements. Plus the special - /// blot operation. - template<class KeyT, class ValueT> - class MapVector { - /// Map - Map keys to indices in Vector. - typedef DenseMap<KeyT, size_t> MapTy; - MapTy Map; - - /// Vector - Keys and values. - typedef std::vector<std::pair<KeyT, ValueT> > VectorTy; - VectorTy Vector; - - public: - typedef typename VectorTy::iterator iterator; - typedef typename VectorTy::const_iterator const_iterator; - iterator begin() { return Vector.begin(); } - iterator end() { return Vector.end(); } - const_iterator begin() const { return Vector.begin(); } - const_iterator end() const { return Vector.end(); } - -#ifdef XDEBUG - ~MapVector() { - assert(Vector.size() >= Map.size()); // May differ due to blotting. - for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); - I != E; ++I) { - assert(I->second < Vector.size()); - assert(Vector[I->second].first == I->first); - } - for (typename VectorTy::const_iterator I = Vector.begin(), - E = Vector.end(); I != E; ++I) - assert(!I->first || - (Map.count(I->first) && - Map[I->first] == size_t(I - Vector.begin()))); - } -#endif - - ValueT &operator[](const KeyT &Arg) { - std::pair<typename MapTy::iterator, bool> Pair = - Map.insert(std::make_pair(Arg, size_t(0))); - if (Pair.second) { - size_t Num = Vector.size(); - Pair.first->second = Num; - Vector.push_back(std::make_pair(Arg, ValueT())); - return Vector[Num].second; - } - return Vector[Pair.first->second].second; - } - - std::pair<iterator, bool> - insert(const std::pair<KeyT, ValueT> &InsertPair) { - std::pair<typename MapTy::iterator, bool> Pair = - Map.insert(std::make_pair(InsertPair.first, size_t(0))); - if (Pair.second) { - size_t Num = Vector.size(); - Pair.first->second = Num; - Vector.push_back(InsertPair); - return std::make_pair(Vector.begin() + Num, true); - } - return std::make_pair(Vector.begin() + Pair.first->second, false); - } - - const_iterator find(const KeyT &Key) const { - typename MapTy::const_iterator It = Map.find(Key); - if (It == Map.end()) return Vector.end(); - return Vector.begin() + It->second; - } - - /// blot - This is similar to erase, but instead of removing the element - /// from the vector, it just zeros out the key in the vector. This leaves - /// iterators intact, but clients must be prepared for zeroed-out keys when - /// iterating. - void blot(const KeyT &Key) { - typename MapTy::iterator It = Map.find(Key); - if (It == Map.end()) return; - Vector[It->second].first = KeyT(); - Map.erase(It); - } - - void clear() { - Map.clear(); - Vector.clear(); - } - }; -} - -//===----------------------------------------------------------------------===// -// ARC Utilities. -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/StringSwitch.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Transforms/Utils/Local.h" - -namespace { - /// InstructionClass - A simple classification for instructions. - enum InstructionClass { - IC_Retain, ///< objc_retain - IC_RetainRV, ///< objc_retainAutoreleasedReturnValue - IC_RetainBlock, ///< objc_retainBlock - IC_Release, ///< objc_release - IC_Autorelease, ///< objc_autorelease - IC_AutoreleaseRV, ///< objc_autoreleaseReturnValue - IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush - IC_AutoreleasepoolPop, ///< objc_autoreleasePoolPop - IC_NoopCast, ///< objc_retainedObject, etc. - IC_FusedRetainAutorelease, ///< objc_retainAutorelease - IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue - IC_LoadWeakRetained, ///< objc_loadWeakRetained (primitive) - IC_StoreWeak, ///< objc_storeWeak (primitive) - IC_InitWeak, ///< objc_initWeak (derived) - IC_LoadWeak, ///< objc_loadWeak (derived) - IC_MoveWeak, ///< objc_moveWeak (derived) - IC_CopyWeak, ///< objc_copyWeak (derived) - IC_DestroyWeak, ///< objc_destroyWeak (derived) - IC_StoreStrong, ///< objc_storeStrong (derived) - IC_CallOrUser, ///< could call objc_release and/or "use" pointers - IC_Call, ///< could call objc_release - IC_User, ///< could "use" a pointer - IC_None ///< anything else - }; -} - -/// IsPotentialUse - Test whether the given value is possible a -/// reference-counted pointer. -static bool IsPotentialUse(const Value *Op) { - // Pointers to static or stack storage are not reference-counted pointers. - if (isa<Constant>(Op) || isa<AllocaInst>(Op)) - return false; - // Special arguments are not reference-counted. - if (const Argument *Arg = dyn_cast<Argument>(Op)) - if (Arg->hasByValAttr() || - Arg->hasNestAttr() || - Arg->hasStructRetAttr()) - return false; - // Only consider values with pointer types. - // It seemes intuitive to exclude function pointer types as well, since - // functions are never reference-counted, however clang occasionally - // bitcasts reference-counted pointers to function-pointer type - // temporarily. - PointerType *Ty = dyn_cast<PointerType>(Op->getType()); - if (!Ty) - return false; - // Conservatively assume anything else is a potential use. - return true; -} - -/// GetCallSiteClass - Helper for GetInstructionClass. Determines what kind -/// of construct CS is. -static InstructionClass GetCallSiteClass(ImmutableCallSite CS) { - for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) - if (IsPotentialUse(*I)) - return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser; - - return CS.onlyReadsMemory() ? IC_None : IC_Call; -} - -/// GetFunctionClass - Determine if F is one of the special known Functions. -/// If it isn't, return IC_CallOrUser. -static InstructionClass GetFunctionClass(const Function *F) { - Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - - // No arguments. - if (AI == AE) - return StringSwitch<InstructionClass>(F->getName()) - .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) - .Default(IC_CallOrUser); - - // One argument. - const Argument *A0 = AI++; - if (AI == AE) - // Argument is a pointer. - if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) { - Type *ETy = PTy->getElementType(); - // Argument is i8*. - if (ETy->isIntegerTy(8)) - return StringSwitch<InstructionClass>(F->getName()) - .Case("objc_retain", IC_Retain) - .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV) - .Case("objc_retainBlock", IC_RetainBlock) - .Case("objc_release", IC_Release) - .Case("objc_autorelease", IC_Autorelease) - .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV) - .Case("objc_autoreleasePoolPop", IC_AutoreleasepoolPop) - .Case("objc_retainedObject", IC_NoopCast) - .Case("objc_unretainedObject", IC_NoopCast) - .Case("objc_unretainedPointer", IC_NoopCast) - .Case("objc_retain_autorelease", IC_FusedRetainAutorelease) - .Case("objc_retainAutorelease", IC_FusedRetainAutorelease) - .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV) - .Default(IC_CallOrUser); - - // Argument is i8** - if (PointerType *Pte = dyn_cast<PointerType>(ETy)) - if (Pte->getElementType()->isIntegerTy(8)) - return StringSwitch<InstructionClass>(F->getName()) - .Case("objc_loadWeakRetained", IC_LoadWeakRetained) - .Case("objc_loadWeak", IC_LoadWeak) - .Case("objc_destroyWeak", IC_DestroyWeak) - .Default(IC_CallOrUser); - } - - // Two arguments, first is i8**. - const Argument *A1 = AI++; - if (AI == AE) - if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) - if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType())) - if (Pte->getElementType()->isIntegerTy(8)) - if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) { - Type *ETy1 = PTy1->getElementType(); - // Second argument is i8* - if (ETy1->isIntegerTy(8)) - return StringSwitch<InstructionClass>(F->getName()) - .Case("objc_storeWeak", IC_StoreWeak) - .Case("objc_initWeak", IC_InitWeak) - .Case("objc_storeStrong", IC_StoreStrong) - .Default(IC_CallOrUser); - // Second argument is i8**. - if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1)) - if (Pte1->getElementType()->isIntegerTy(8)) - return StringSwitch<InstructionClass>(F->getName()) - .Case("objc_moveWeak", IC_MoveWeak) - .Case("objc_copyWeak", IC_CopyWeak) - .Default(IC_CallOrUser); - } - - // Anything else. - return IC_CallOrUser; -} - -/// GetInstructionClass - Determine what kind of construct V is. -static InstructionClass GetInstructionClass(const Value *V) { - if (const Instruction *I = dyn_cast<Instruction>(V)) { - // Any instruction other than bitcast and gep with a pointer operand have a - // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer - // to a subsequent use, rather than using it themselves, in this sense. - // As a short cut, several other opcodes are known to have no pointer - // operands of interest. And ret is never followed by a release, so it's - // not interesting to examine. - switch (I->getOpcode()) { - case Instruction::Call: { - const CallInst *CI = cast<CallInst>(I); - // Check for calls to special functions. - if (const Function *F = CI->getCalledFunction()) { - InstructionClass Class = GetFunctionClass(F); - if (Class != IC_CallOrUser) - return Class; - - // None of the intrinsic functions do objc_release. For intrinsics, the - // only question is whether or not they may be users. - switch (F->getIntrinsicID()) { - case Intrinsic::returnaddress: case Intrinsic::frameaddress: - case Intrinsic::stacksave: case Intrinsic::stackrestore: - case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend: - case Intrinsic::objectsize: case Intrinsic::prefetch: - case Intrinsic::stackprotector: - case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: - case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa: - case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext: - case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline: - case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: case Intrinsic::invariant_end: - // Don't let dbg info affect our results. - case Intrinsic::dbg_declare: case Intrinsic::dbg_value: - // Short cut: Some intrinsics obviously don't use ObjC pointers. - return IC_None; - default: - break; - } - } - return GetCallSiteClass(CI); - } - case Instruction::Invoke: - return GetCallSiteClass(cast<InvokeInst>(I)); - case Instruction::BitCast: - case Instruction::GetElementPtr: - case Instruction::Select: case Instruction::PHI: - case Instruction::Ret: case Instruction::Br: - case Instruction::Switch: case Instruction::IndirectBr: - case Instruction::Alloca: case Instruction::VAArg: - case Instruction::Add: case Instruction::FAdd: - case Instruction::Sub: case Instruction::FSub: - case Instruction::Mul: case Instruction::FMul: - case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv: - case Instruction::SRem: case Instruction::URem: case Instruction::FRem: - case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - case Instruction::And: case Instruction::Or: case Instruction::Xor: - case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc: - case Instruction::IntToPtr: case Instruction::FCmp: - case Instruction::FPTrunc: case Instruction::FPExt: - case Instruction::FPToUI: case Instruction::FPToSI: - case Instruction::UIToFP: case Instruction::SIToFP: - case Instruction::InsertElement: case Instruction::ExtractElement: - case Instruction::ShuffleVector: - case Instruction::ExtractValue: - break; - case Instruction::ICmp: - // Comparing a pointer with null, or any other constant, isn't an - // interesting use, because we don't care what the pointer points to, or - // about the values of any other dynamic reference-counted pointers. - if (IsPotentialUse(I->getOperand(1))) - return IC_User; - break; - default: - // For anything else, check all the operands. - // Note that this includes both operands of a Store: while the first - // operand isn't actually being dereferenced, it is being stored to - // memory where we can no longer track who might read it and dereference - // it, so we have to consider it potentially used. - for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); - OI != OE; ++OI) - if (IsPotentialUse(*OI)) - return IC_User; - } - } - - // Otherwise, it's totally inert for ARC purposes. - return IC_None; -} - -/// GetBasicInstructionClass - Determine what kind of construct V is. This is -/// similar to GetInstructionClass except that it only detects objc runtine -/// calls. This allows it to be faster. -static InstructionClass GetBasicInstructionClass(const Value *V) { - if (const CallInst *CI = dyn_cast<CallInst>(V)) { - if (const Function *F = CI->getCalledFunction()) - return GetFunctionClass(F); - // Otherwise, be conservative. - return IC_CallOrUser; - } - - // Otherwise, be conservative. - return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User; -} - -/// IsRetain - Test if the given class is objc_retain or -/// equivalent. -static bool IsRetain(InstructionClass Class) { - return Class == IC_Retain || - Class == IC_RetainRV; -} - -/// IsAutorelease - Test if the given class is objc_autorelease or -/// equivalent. -static bool IsAutorelease(InstructionClass Class) { - return Class == IC_Autorelease || - Class == IC_AutoreleaseRV; -} - -/// IsForwarding - Test if the given class represents instructions which return -/// their argument verbatim. -static bool IsForwarding(InstructionClass Class) { - // objc_retainBlock technically doesn't always return its argument - // verbatim, but it doesn't matter for our purposes here. - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_Autorelease || - Class == IC_AutoreleaseRV || - Class == IC_RetainBlock || - Class == IC_NoopCast; -} - -/// IsNoopOnNull - Test if the given class represents instructions which do -/// nothing if passed a null pointer. -static bool IsNoopOnNull(InstructionClass Class) { - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_Release || - Class == IC_Autorelease || - Class == IC_AutoreleaseRV || - Class == IC_RetainBlock; -} - -/// IsAlwaysTail - Test if the given class represents instructions which are -/// always safe to mark with the "tail" keyword. -static bool IsAlwaysTail(InstructionClass Class) { - // IC_RetainBlock may be given a stack argument. - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_Autorelease || - Class == IC_AutoreleaseRV; -} - -/// IsNoThrow - Test if the given class represents instructions which are always -/// safe to mark with the nounwind attribute.. -static bool IsNoThrow(InstructionClass Class) { - // objc_retainBlock is not nounwind because it calls user copy constructors - // which could theoretically throw. - return Class == IC_Retain || - Class == IC_RetainRV || - Class == IC_Release || - Class == IC_Autorelease || - Class == IC_AutoreleaseRV || - Class == IC_AutoreleasepoolPush || - Class == IC_AutoreleasepoolPop; -} - -/// EraseInstruction - Erase the given instruction. Many ObjC calls return their -/// argument verbatim, so if it's such a call and the return value has users, -/// replace them with the argument value. -static void EraseInstruction(Instruction *CI) { - Value *OldArg = cast<CallInst>(CI)->getArgOperand(0); - - bool Unused = CI->use_empty(); - - if (!Unused) { - // Replace the return value with the argument. - assert(IsForwarding(GetBasicInstructionClass(CI)) && - "Can't delete non-forwarding instruction with users!"); - CI->replaceAllUsesWith(OldArg); - } - - CI->eraseFromParent(); - - if (Unused) - RecursivelyDeleteTriviallyDeadInstructions(OldArg); -} - -/// GetUnderlyingObjCPtr - This is a wrapper around getUnderlyingObject which -/// also knows how to look through objc_retain and objc_autorelease calls, which -/// we know to return their argument verbatim. -static const Value *GetUnderlyingObjCPtr(const Value *V) { - for (;;) { - V = GetUnderlyingObject(V); - if (!IsForwarding(GetBasicInstructionClass(V))) - break; - V = cast<CallInst>(V)->getArgOperand(0); - } - - return V; -} - -/// StripPointerCastsAndObjCCalls - This is a wrapper around -/// Value::stripPointerCasts which also knows how to look through objc_retain -/// and objc_autorelease calls, which we know to return their argument verbatim. -static const Value *StripPointerCastsAndObjCCalls(const Value *V) { - for (;;) { - V = V->stripPointerCasts(); - if (!IsForwarding(GetBasicInstructionClass(V))) - break; - V = cast<CallInst>(V)->getArgOperand(0); - } - return V; -} - -/// StripPointerCastsAndObjCCalls - This is a wrapper around -/// Value::stripPointerCasts which also knows how to look through objc_retain -/// and objc_autorelease calls, which we know to return their argument verbatim. -static Value *StripPointerCastsAndObjCCalls(Value *V) { - for (;;) { - V = V->stripPointerCasts(); - if (!IsForwarding(GetBasicInstructionClass(V))) - break; - V = cast<CallInst>(V)->getArgOperand(0); - } - return V; -} - -/// GetObjCArg - Assuming the given instruction is one of the special calls such -/// as objc_retain or objc_release, return the argument value, stripped of no-op -/// casts and forwarding calls. -static Value *GetObjCArg(Value *Inst) { - return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0)); -} - -/// IsObjCIdentifiedObject - This is similar to AliasAnalysis' -/// isObjCIdentifiedObject, except that it uses special knowledge of -/// ObjC conventions... -static bool IsObjCIdentifiedObject(const Value *V) { - // Assume that call results and arguments have their own "provenance". - // Constants (including GlobalVariables) and Allocas are never - // reference-counted. - if (isa<CallInst>(V) || isa<InvokeInst>(V) || - isa<Argument>(V) || isa<Constant>(V) || - isa<AllocaInst>(V)) - return true; - - if (const LoadInst *LI = dyn_cast<LoadInst>(V)) { - const Value *Pointer = - StripPointerCastsAndObjCCalls(LI->getPointerOperand()); - if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) { - // A constant pointer can't be pointing to an object on the heap. It may - // be reference-counted, but it won't be deleted. - if (GV->isConstant()) - return true; - StringRef Name = GV->getName(); - // These special variables are known to hold values which are not - // reference-counted pointers. - if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") || - Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") || - Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") || - Name.startswith("\01L_OBJC_METH_VAR_NAME_") || - Name.startswith("\01l_objc_msgSend_fixup_")) - return true; - } - } - - return false; -} - -/// FindSingleUseIdentifiedObject - This is similar to -/// StripPointerCastsAndObjCCalls but it stops as soon as it finds a value -/// with multiple uses. -static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { - if (Arg->hasOneUse()) { - if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg)) - return FindSingleUseIdentifiedObject(BC->getOperand(0)); - if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg)) - if (GEP->hasAllZeroIndices()) - return FindSingleUseIdentifiedObject(GEP->getPointerOperand()); - if (IsForwarding(GetBasicInstructionClass(Arg))) - return FindSingleUseIdentifiedObject( - cast<CallInst>(Arg)->getArgOperand(0)); - if (!IsObjCIdentifiedObject(Arg)) - return 0; - return Arg; - } - - // If we found an identifiable object but it has multiple uses, but they are - // trivial uses, we can still consider this to be a single-use value. - if (IsObjCIdentifiedObject(Arg)) { - for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); - UI != UE; ++UI) { - const User *U = *UI; - if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg) - return 0; - } - - return Arg; - } - - return 0; -} - -/// ModuleHasARC - Test if the given module looks interesting to run ARC -/// optimization on. -static bool ModuleHasARC(const Module &M) { - return - M.getNamedValue("objc_retain") || - M.getNamedValue("objc_release") || - M.getNamedValue("objc_autorelease") || - M.getNamedValue("objc_retainAutoreleasedReturnValue") || - M.getNamedValue("objc_retainBlock") || - M.getNamedValue("objc_autoreleaseReturnValue") || - M.getNamedValue("objc_autoreleasePoolPush") || - M.getNamedValue("objc_loadWeakRetained") || - M.getNamedValue("objc_loadWeak") || - M.getNamedValue("objc_destroyWeak") || - M.getNamedValue("objc_storeWeak") || - M.getNamedValue("objc_initWeak") || - M.getNamedValue("objc_moveWeak") || - M.getNamedValue("objc_copyWeak") || - M.getNamedValue("objc_retainedObject") || - M.getNamedValue("objc_unretainedObject") || - M.getNamedValue("objc_unretainedPointer"); -} - -/// DoesObjCBlockEscape - Test whether the given pointer, which is an -/// Objective C block pointer, does not "escape". This differs from regular -/// escape analysis in that a use as an argument to a call is not considered -/// an escape. -static bool DoesObjCBlockEscape(const Value *BlockPtr) { - // Walk the def-use chains. - SmallVector<const Value *, 4> Worklist; - Worklist.push_back(BlockPtr); - do { - const Value *V = Worklist.pop_back_val(); - for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { - const User *UUser = *UI; - // Special - Use by a call (callee or argument) is not considered - // to be an escape. - switch (GetBasicInstructionClass(UUser)) { - case IC_StoreWeak: - case IC_InitWeak: - case IC_StoreStrong: - case IC_Autorelease: - case IC_AutoreleaseRV: - // These special functions make copies of their pointer arguments. - return true; - case IC_User: - case IC_None: - // Use by an instruction which copies the value is an escape if the - // result is an escape. - if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) || - isa<PHINode>(UUser) || isa<SelectInst>(UUser)) { - Worklist.push_back(UUser); - continue; - } - // Use by a load is not an escape. - if (isa<LoadInst>(UUser)) - continue; - // Use by a store is not an escape if the use is the address. - if (const StoreInst *SI = dyn_cast<StoreInst>(UUser)) - if (V != SI->getValueOperand()) - continue; - break; - default: - // Regular calls and other stuff are not considered escapes. - continue; - } - // Otherwise, conservatively assume an escape. - return true; - } - } while (!Worklist.empty()); - - // No escapes found. - return false; -} - -//===----------------------------------------------------------------------===// -// ARC AliasAnalysis. -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Pass.h" - -namespace { - /// ObjCARCAliasAnalysis - This is a simple alias analysis - /// implementation that uses knowledge of ARC constructs to answer queries. - /// - /// TODO: This class could be generalized to know about other ObjC-specific - /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing - /// even though their offsets are dynamic. - class ObjCARCAliasAnalysis : public ImmutablePass, - public AliasAnalysis { - public: - static char ID; // Class identification, replacement for typeinfo - ObjCARCAliasAnalysis() : ImmutablePass(ID) { - initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry()); - } - - private: - virtual void initializePass() { - InitializeAliasAnalysis(this); - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - virtual void *getAdjustedAnalysisPointer(const void *PI) { - if (PI == &AliasAnalysis::ID) - return static_cast<AliasAnalysis *>(this); - return this; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual AliasResult alias(const Location &LocA, const Location &LocB); - virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); - virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); - virtual ModRefBehavior getModRefBehavior(const Function *F); - virtual ModRefResult getModRefInfo(ImmutableCallSite CS, - const Location &Loc); - virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2); - }; -} // End of anonymous namespace - -// Register this pass... -char ObjCARCAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa", - "ObjC-ARC-Based Alias Analysis", false, true, false) - -ImmutablePass *llvm::createObjCARCAliasAnalysisPass() { - return new ObjCARCAliasAnalysis(); -} - -void -ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AliasAnalysis::getAnalysisUsage(AU); -} - -AliasAnalysis::AliasResult -ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) { - if (!EnableARCOpts) - return AliasAnalysis::alias(LocA, LocB); - - // First, strip off no-ops, including ObjC-specific no-ops, and try making a - // precise alias query. - const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr); - const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr); - AliasResult Result = - AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag), - Location(SB, LocB.Size, LocB.TBAATag)); - if (Result != MayAlias) - return Result; - - // If that failed, climb to the underlying object, including climbing through - // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *UA = GetUnderlyingObjCPtr(SA); - const Value *UB = GetUnderlyingObjCPtr(SB); - if (UA != SA || UB != SB) { - Result = AliasAnalysis::alias(Location(UA), Location(UB)); - // We can't use MustAlias or PartialAlias results here because - // GetUnderlyingObjCPtr may return an offsetted pointer value. - if (Result == NoAlias) - return NoAlias; - } - - // If that failed, fail. We don't need to chain here, since that's covered - // by the earlier precise query. - return MayAlias; -} - -bool -ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc, - bool OrLocal) { - if (!EnableARCOpts) - return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); - - // First, strip off no-ops, including ObjC-specific no-ops, and try making - // a precise alias query. - const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr); - if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag), - OrLocal)) - return true; - - // If that failed, climb to the underlying object, including climbing through - // ObjC-specific no-ops, and try making an imprecise alias query. - const Value *U = GetUnderlyingObjCPtr(S); - if (U != S) - return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal); - - // If that failed, fail. We don't need to chain here, since that's covered - // by the earlier precise query. - return false; -} - -AliasAnalysis::ModRefBehavior -ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { - // We have nothing to do. Just chain to the next AliasAnalysis. - return AliasAnalysis::getModRefBehavior(CS); -} - -AliasAnalysis::ModRefBehavior -ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) { - if (!EnableARCOpts) - return AliasAnalysis::getModRefBehavior(F); - - switch (GetFunctionClass(F)) { - case IC_NoopCast: - return DoesNotAccessMemory; - default: - break; - } - - return AliasAnalysis::getModRefBehavior(F); -} - -AliasAnalysis::ModRefResult -ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { - if (!EnableARCOpts) - return AliasAnalysis::getModRefInfo(CS, Loc); - - switch (GetBasicInstructionClass(CS.getInstruction())) { - case IC_Retain: - case IC_RetainRV: - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_NoopCast: - case IC_AutoreleasepoolPush: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: - // These functions don't access any memory visible to the compiler. - // Note that this doesn't include objc_retainBlock, because it updates - // pointers when it copies block data. - return NoModRef; - default: - break; - } - - return AliasAnalysis::getModRefInfo(CS, Loc); -} - -AliasAnalysis::ModRefResult -ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) { - // TODO: Theoretically we could check for dependencies between objc_* calls - // and OnlyAccessesArgumentPointees calls or other well-behaved calls. - return AliasAnalysis::getModRefInfo(CS1, CS2); -} - -//===----------------------------------------------------------------------===// -// ARC expansion. -//===----------------------------------------------------------------------===// - -#include "llvm/Support/InstIterator.h" -#include "llvm/Transforms/Scalar.h" - -namespace { - /// ObjCARCExpand - Early ARC transformations. - class ObjCARCExpand : public FunctionPass { - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - - /// Run - A flag indicating whether this optimization pass should run. - bool Run; - - public: - static char ID; - ObjCARCExpand() : FunctionPass(ID) { - initializeObjCARCExpandPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCExpand::ID = 0; -INITIALIZE_PASS(ObjCARCExpand, - "objc-arc-expand", "ObjC ARC expansion", false, false) - -Pass *llvm::createObjCARCExpandPass() { - return new ObjCARCExpand(); -} - -void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); -} - -bool ObjCARCExpand::doInitialization(Module &M) { - Run = ModuleHasARC(M); - return false; -} - -bool ObjCARCExpand::runOnFunction(Function &F) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!Run) - return false; - - bool Changed = false; - - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { - Instruction *Inst = &*I; - - DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << *Inst << "\n"); - - switch (GetBasicInstructionClass(Inst)) { - case IC_Retain: - case IC_RetainRV: - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: { - // These calls return their argument verbatim, as a low-level - // optimization. However, this makes high-level optimizations - // harder. Undo any uses of this optimization that the front-end - // emitted here. We'll redo them in the contract pass. - Changed = true; - Value *Value = cast<CallInst>(Inst)->getArgOperand(0); - DEBUG(dbgs() << "ObjCARCExpand: Old = " << *Inst << "\n" - " New = " << *Value << "\n"); - Inst->replaceAllUsesWith(Value); - break; - } - default: - break; - } - } - - DEBUG(dbgs() << "ObjCARCExpand: Finished List.\n\n"); - - return Changed; -} - -//===----------------------------------------------------------------------===// -// ARC autorelease pool elimination. -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/STLExtras.h" -#include "llvm/IR/Constants.h" - -namespace { - /// ObjCARCAPElim - Autorelease pool elimination. - class ObjCARCAPElim : public ModulePass { - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool runOnModule(Module &M); - - static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0); - static bool OptimizeBB(BasicBlock *BB); - - public: - static char ID; - ObjCARCAPElim() : ModulePass(ID) { - initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCAPElim::ID = 0; -INITIALIZE_PASS(ObjCARCAPElim, - "objc-arc-apelim", - "ObjC ARC autorelease pool elimination", - false, false) - -Pass *llvm::createObjCARCAPElimPass() { - return new ObjCARCAPElim(); -} - -void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); -} - -/// MayAutorelease - Interprocedurally determine if calls made by the -/// given call site can possibly produce autoreleases. -bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) { - if (const Function *Callee = CS.getCalledFunction()) { - if (Callee->isDeclaration() || Callee->mayBeOverridden()) - return true; - for (Function::const_iterator I = Callee->begin(), E = Callee->end(); - I != E; ++I) { - const BasicBlock *BB = I; - for (BasicBlock::const_iterator J = BB->begin(), F = BB->end(); - J != F; ++J) - if (ImmutableCallSite JCS = ImmutableCallSite(J)) - // This recursion depth limit is arbitrary. It's just great - // enough to cover known interesting testcases. - if (Depth < 3 && - !JCS.onlyReadsMemory() && - MayAutorelease(JCS, Depth + 1)) - return true; - } - return false; - } - - return true; -} - -bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { - bool Changed = false; - - Instruction *Push = 0; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - Instruction *Inst = I++; - switch (GetBasicInstructionClass(Inst)) { - case IC_AutoreleasepoolPush: - Push = Inst; - break; - case IC_AutoreleasepoolPop: - // If this pop matches a push and nothing in between can autorelease, - // zap the pair. - if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) { - Changed = true; - DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop autorelease pair:\n" - << " Pop: " << *Inst << "\n" - << " Push: " << *Push << "\n"); - Inst->eraseFromParent(); - Push->eraseFromParent(); - } - Push = 0; - break; - case IC_CallOrUser: - if (MayAutorelease(ImmutableCallSite(Inst))) - Push = 0; - break; - default: - break; - } - } - - return Changed; -} - -bool ObjCARCAPElim::runOnModule(Module &M) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!ModuleHasARC(M)) - return false; - - // Find the llvm.global_ctors variable, as the first step in - // identifying the global constructors. In theory, unnecessary autorelease - // pools could occur anywhere, but in practice it's pretty rare. Global - // ctors are a place where autorelease pools get inserted automatically, - // so it's pretty common for them to be unnecessary, and it's pretty - // profitable to eliminate them. - GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); - if (!GV) - return false; - - assert(GV->hasDefinitiveInitializer() && - "llvm.global_ctors is uncooperative!"); - - bool Changed = false; - - // Dig the constructor functions out of GV's initializer. - ConstantArray *Init = cast<ConstantArray>(GV->getInitializer()); - for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end(); - OI != OE; ++OI) { - Value *Op = *OI; - // llvm.global_ctors is an array of pairs where the second members - // are constructor functions. - Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1)); - // If the user used a constructor function with the wrong signature and - // it got bitcasted or whatever, look the other way. - if (!F) - continue; - // Only look at function definitions. - if (F->isDeclaration()) - continue; - // Only look at functions with one basic block. - if (llvm::next(F->begin()) != F->end()) - continue; - // Ok, a single-block constructor function definition. Try to optimize it. - Changed |= OptimizeBB(F->begin()); - } - - return Changed; -} - -//===----------------------------------------------------------------------===// -// ARC optimization. -//===----------------------------------------------------------------------===// - -// TODO: On code like this: -// -// objc_retain(%x) -// stuff_that_cannot_release() -// objc_autorelease(%x) -// stuff_that_cannot_release() -// objc_retain(%x) -// stuff_that_cannot_release() -// objc_autorelease(%x) -// -// The second retain and autorelease can be deleted. - -// TODO: It should be possible to delete -// objc_autoreleasePoolPush and objc_autoreleasePoolPop -// pairs if nothing is actually autoreleased between them. Also, autorelease -// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code -// after inlining) can be turned into plain release calls. - -// TODO: Critical-edge splitting. If the optimial insertion point is -// a critical edge, the current algorithm has to fail, because it doesn't -// know how to split edges. It should be possible to make the optimizer -// think in terms of edges, rather than blocks, and then split critical -// edges on demand. - -// TODO: OptimizeSequences could generalized to be Interprocedural. - -// TODO: Recognize that a bunch of other objc runtime calls have -// non-escaping arguments and non-releasing arguments, and may be -// non-autoreleasing. - -// TODO: Sink autorelease calls as far as possible. Unfortunately we -// usually can't sink them past other calls, which would be the main -// case where it would be useful. - -// TODO: The pointer returned from objc_loadWeakRetained is retained. - -// TODO: Delete release+retain pairs (rare). - -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/Support/CFG.h" - -STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); -STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); -STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); -STATISTIC(NumRets, "Number of return value forwarding " - "retain+autoreleaes eliminated"); -STATISTIC(NumRRs, "Number of retain+release paths eliminated"); -STATISTIC(NumPeeps, "Number of calls peephole-optimized"); - -namespace { - /// ProvenanceAnalysis - This is similar to BasicAliasAnalysis, and it - /// uses many of the same techniques, except it uses special ObjC-specific - /// reasoning about pointer relationships. - class ProvenanceAnalysis { - AliasAnalysis *AA; - - typedef std::pair<const Value *, const Value *> ValuePairTy; - typedef DenseMap<ValuePairTy, bool> CachedResultsTy; - CachedResultsTy CachedResults; - - bool relatedCheck(const Value *A, const Value *B); - bool relatedSelect(const SelectInst *A, const Value *B); - bool relatedPHI(const PHINode *A, const Value *B); - - void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION; - ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION; - - public: - ProvenanceAnalysis() {} - - void setAA(AliasAnalysis *aa) { AA = aa; } - - AliasAnalysis *getAA() const { return AA; } - - bool related(const Value *A, const Value *B); - - void clear() { - CachedResults.clear(); - } - }; -} - -bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) { - // If the values are Selects with the same condition, we can do a more precise - // check: just check for relations between the values on corresponding arms. - if (const SelectInst *SB = dyn_cast<SelectInst>(B)) - if (A->getCondition() == SB->getCondition()) - return related(A->getTrueValue(), SB->getTrueValue()) || - related(A->getFalseValue(), SB->getFalseValue()); - - // Check both arms of the Select node individually. - return related(A->getTrueValue(), B) || - related(A->getFalseValue(), B); -} - -bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) { - // If the values are PHIs in the same block, we can do a more precise as well - // as efficient check: just check for relations between the values on - // corresponding edges. - if (const PHINode *PNB = dyn_cast<PHINode>(B)) - if (PNB->getParent() == A->getParent()) { - for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) - if (related(A->getIncomingValue(i), - PNB->getIncomingValueForBlock(A->getIncomingBlock(i)))) - return true; - return false; - } - - // Check each unique source of the PHI node against B. - SmallPtrSet<const Value *, 4> UniqueSrc; - for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) { - const Value *PV1 = A->getIncomingValue(i); - if (UniqueSrc.insert(PV1) && related(PV1, B)) - return true; - } - - // All of the arms checked out. - return false; -} - -/// isStoredObjCPointer - Test if the value of P, or any value covered by its -/// provenance, is ever stored within the function (not counting callees). -static bool isStoredObjCPointer(const Value *P) { - SmallPtrSet<const Value *, 8> Visited; - SmallVector<const Value *, 8> Worklist; - Worklist.push_back(P); - Visited.insert(P); - do { - P = Worklist.pop_back_val(); - for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end(); - UI != UE; ++UI) { - const User *Ur = *UI; - if (isa<StoreInst>(Ur)) { - if (UI.getOperandNo() == 0) - // The pointer is stored. - return true; - // The pointed is stored through. - continue; - } - if (isa<CallInst>(Ur)) - // The pointer is passed as an argument, ignore this. - continue; - if (isa<PtrToIntInst>(P)) - // Assume the worst. - return true; - if (Visited.insert(Ur)) - Worklist.push_back(Ur); - } - } while (!Worklist.empty()); - - // Everything checked out. - return false; -} - -bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) { - // Skip past provenance pass-throughs. - A = GetUnderlyingObjCPtr(A); - B = GetUnderlyingObjCPtr(B); - - // Quick check. - if (A == B) - return true; - - // Ask regular AliasAnalysis, for a first approximation. - switch (AA->alias(A, B)) { - case AliasAnalysis::NoAlias: - return false; - case AliasAnalysis::MustAlias: - case AliasAnalysis::PartialAlias: - return true; - case AliasAnalysis::MayAlias: - break; - } - - bool AIsIdentified = IsObjCIdentifiedObject(A); - bool BIsIdentified = IsObjCIdentifiedObject(B); - - // An ObjC-Identified object can't alias a load if it is never locally stored. - if (AIsIdentified) { - // Check for an obvious escape. - if (isa<LoadInst>(B)) - return isStoredObjCPointer(A); - if (BIsIdentified) { - // Check for an obvious escape. - if (isa<LoadInst>(A)) - return isStoredObjCPointer(B); - // Both pointers are identified and escapes aren't an evident problem. - return false; - } - } else if (BIsIdentified) { - // Check for an obvious escape. - if (isa<LoadInst>(A)) - return isStoredObjCPointer(B); - } - - // Special handling for PHI and Select. - if (const PHINode *PN = dyn_cast<PHINode>(A)) - return relatedPHI(PN, B); - if (const PHINode *PN = dyn_cast<PHINode>(B)) - return relatedPHI(PN, A); - if (const SelectInst *S = dyn_cast<SelectInst>(A)) - return relatedSelect(S, B); - if (const SelectInst *S = dyn_cast<SelectInst>(B)) - return relatedSelect(S, A); - - // Conservative. - return true; -} - -bool ProvenanceAnalysis::related(const Value *A, const Value *B) { - // Begin by inserting a conservative value into the map. If the insertion - // fails, we have the answer already. If it succeeds, leave it there until we - // compute the real answer to guard against recursive queries. - if (A > B) std::swap(A, B); - std::pair<CachedResultsTy::iterator, bool> Pair = - CachedResults.insert(std::make_pair(ValuePairTy(A, B), true)); - if (!Pair.second) - return Pair.first->second; - - bool Result = relatedCheck(A, B); - CachedResults[ValuePairTy(A, B)] = Result; - return Result; -} - -namespace { - // Sequence - A sequence of states that a pointer may go through in which an - // objc_retain and objc_release are actually needed. - enum Sequence { - S_None, - S_Retain, ///< objc_retain(x) - S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement - S_Use, ///< any use of x - S_Stop, ///< like S_Release, but code motion is stopped - S_Release, ///< objc_release(x) - S_MovableRelease ///< objc_release(x), !clang.imprecise_release - }; -} - -static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) { - // The easy cases. - if (A == B) - return A; - if (A == S_None || B == S_None) - return S_None; - - if (A > B) std::swap(A, B); - if (TopDown) { - // Choose the side which is further along in the sequence. - if ((A == S_Retain || A == S_CanRelease) && - (B == S_CanRelease || B == S_Use)) - return B; - } else { - // Choose the side which is further along in the sequence. - if ((A == S_Use || A == S_CanRelease) && - (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease)) - return A; - // If both sides are releases, choose the more conservative one. - if (A == S_Stop && (B == S_Release || B == S_MovableRelease)) - return A; - if (A == S_Release && B == S_MovableRelease) - return A; - } - - return S_None; -} - -namespace { - /// RRInfo - Unidirectional information about either a - /// retain-decrement-use-release sequence or release-use-decrement-retain - /// reverese sequence. - struct RRInfo { - /// KnownSafe - After an objc_retain, the reference count of the referenced - /// object is known to be positive. Similarly, before an objc_release, the - /// reference count of the referenced object is known to be positive. If - /// there are retain-release pairs in code regions where the retain count - /// is known to be positive, they can be eliminated, regardless of any side - /// effects between them. - /// - /// Also, a retain+release pair nested within another retain+release - /// pair all on the known same pointer value can be eliminated, regardless - /// of any intervening side effects. - /// - /// KnownSafe is true when either of these conditions is satisfied. - bool KnownSafe; - - /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as - /// opposed to objc_retain calls). - bool IsRetainBlock; - - /// IsTailCallRelease - True of the objc_release calls are all marked - /// with the "tail" keyword. - bool IsTailCallRelease; - - /// ReleaseMetadata - If the Calls are objc_release calls and they all have - /// a clang.imprecise_release tag, this is the metadata tag. - MDNode *ReleaseMetadata; - - /// Calls - For a top-down sequence, the set of objc_retains or - /// objc_retainBlocks. For bottom-up, the set of objc_releases. - SmallPtrSet<Instruction *, 2> Calls; - - /// ReverseInsertPts - The set of optimal insert positions for - /// moving calls in the opposite sequence. - SmallPtrSet<Instruction *, 2> ReverseInsertPts; - - RRInfo() : - KnownSafe(false), IsRetainBlock(false), - IsTailCallRelease(false), - ReleaseMetadata(0) {} - - void clear(); - }; -} - -void RRInfo::clear() { - KnownSafe = false; - IsRetainBlock = false; - IsTailCallRelease = false; - ReleaseMetadata = 0; - Calls.clear(); - ReverseInsertPts.clear(); -} - -namespace { - /// PtrState - This class summarizes several per-pointer runtime properties - /// which are propogated through the flow graph. - class PtrState { - /// KnownPositiveRefCount - True if the reference count is known to - /// be incremented. - bool KnownPositiveRefCount; - - /// Partial - True of we've seen an opportunity for partial RR elimination, - /// such as pushing calls into a CFG triangle or into one side of a - /// CFG diamond. - bool Partial; - - /// Seq - The current position in the sequence. - Sequence Seq : 8; - - public: - /// RRI - Unidirectional information about the current sequence. - /// TODO: Encapsulate this better. - RRInfo RRI; - - PtrState() : KnownPositiveRefCount(false), Partial(false), - Seq(S_None) {} - - void SetKnownPositiveRefCount() { - KnownPositiveRefCount = true; - } - - void ClearRefCount() { - KnownPositiveRefCount = false; - } - - bool IsKnownIncremented() const { - return KnownPositiveRefCount; - } - - void SetSeq(Sequence NewSeq) { - Seq = NewSeq; - } - - Sequence GetSeq() const { - return Seq; - } - - void ClearSequenceProgress() { - ResetSequenceProgress(S_None); - } - - void ResetSequenceProgress(Sequence NewSeq) { - Seq = NewSeq; - Partial = false; - RRI.clear(); - } - - void Merge(const PtrState &Other, bool TopDown); - }; -} - -void -PtrState::Merge(const PtrState &Other, bool TopDown) { - Seq = MergeSeqs(Seq, Other.Seq, TopDown); - KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; - - // We can't merge a plain objc_retain with an objc_retainBlock. - if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) - Seq = S_None; - - // If we're not in a sequence (anymore), drop all associated state. - if (Seq == S_None) { - Partial = false; - RRI.clear(); - } else if (Partial || Other.Partial) { - // If we're doing a merge on a path that's previously seen a partial - // merge, conservatively drop the sequence, to avoid doing partial - // RR elimination. If the branch predicates for the two merge differ, - // mixing them is unsafe. - ClearSequenceProgress(); - } else { - // Conservatively merge the ReleaseMetadata information. - if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata) - RRI.ReleaseMetadata = 0; - - RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe; - RRI.IsTailCallRelease = RRI.IsTailCallRelease && - Other.RRI.IsTailCallRelease; - RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); - - // Merge the insert point sets. If there are any differences, - // that makes this a partial merge. - Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size(); - for (SmallPtrSet<Instruction *, 2>::const_iterator - I = Other.RRI.ReverseInsertPts.begin(), - E = Other.RRI.ReverseInsertPts.end(); I != E; ++I) - Partial |= RRI.ReverseInsertPts.insert(*I); - } -} - -namespace { - /// BBState - Per-BasicBlock state. - class BBState { - /// TopDownPathCount - The number of unique control paths from the entry - /// which can reach this block. - unsigned TopDownPathCount; - - /// BottomUpPathCount - The number of unique control paths to exits - /// from this block. - unsigned BottomUpPathCount; - - /// MapTy - A type for PerPtrTopDown and PerPtrBottomUp. - typedef MapVector<const Value *, PtrState> MapTy; - - /// PerPtrTopDown - The top-down traversal uses this to record information - /// known about a pointer at the bottom of each block. - MapTy PerPtrTopDown; - - /// PerPtrBottomUp - The bottom-up traversal uses this to record information - /// known about a pointer at the top of each block. - MapTy PerPtrBottomUp; - - /// Preds, Succs - Effective successors and predecessors of the current - /// block (this ignores ignorable edges and ignored backedges). - SmallVector<BasicBlock *, 2> Preds; - SmallVector<BasicBlock *, 2> Succs; - - public: - BBState() : TopDownPathCount(0), BottomUpPathCount(0) {} - - typedef MapTy::iterator ptr_iterator; - typedef MapTy::const_iterator ptr_const_iterator; - - ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } - ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } - ptr_const_iterator top_down_ptr_begin() const { - return PerPtrTopDown.begin(); - } - ptr_const_iterator top_down_ptr_end() const { - return PerPtrTopDown.end(); - } - - ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); } - ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } - ptr_const_iterator bottom_up_ptr_begin() const { - return PerPtrBottomUp.begin(); - } - ptr_const_iterator bottom_up_ptr_end() const { - return PerPtrBottomUp.end(); - } - - /// SetAsEntry - Mark this block as being an entry block, which has one - /// path from the entry by definition. - void SetAsEntry() { TopDownPathCount = 1; } - - /// SetAsExit - Mark this block as being an exit block, which has one - /// path to an exit by definition. - void SetAsExit() { BottomUpPathCount = 1; } - - PtrState &getPtrTopDownState(const Value *Arg) { - return PerPtrTopDown[Arg]; - } - - PtrState &getPtrBottomUpState(const Value *Arg) { - return PerPtrBottomUp[Arg]; - } - - void clearBottomUpPointers() { - PerPtrBottomUp.clear(); - } - - void clearTopDownPointers() { - PerPtrTopDown.clear(); - } - - void InitFromPred(const BBState &Other); - void InitFromSucc(const BBState &Other); - void MergePred(const BBState &Other); - void MergeSucc(const BBState &Other); - - /// GetAllPathCount - Return the number of possible unique paths from an - /// entry to an exit which pass through this block. This is only valid - /// after both the top-down and bottom-up traversals are complete. - unsigned GetAllPathCount() const { - assert(TopDownPathCount != 0); - assert(BottomUpPathCount != 0); - return TopDownPathCount * BottomUpPathCount; - } - - // Specialized CFG utilities. - typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator; - edge_iterator pred_begin() { return Preds.begin(); } - edge_iterator pred_end() { return Preds.end(); } - edge_iterator succ_begin() { return Succs.begin(); } - edge_iterator succ_end() { return Succs.end(); } - - void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); } - void addPred(BasicBlock *Pred) { Preds.push_back(Pred); } - - bool isExit() const { return Succs.empty(); } - }; -} - -void BBState::InitFromPred(const BBState &Other) { - PerPtrTopDown = Other.PerPtrTopDown; - TopDownPathCount = Other.TopDownPathCount; -} - -void BBState::InitFromSucc(const BBState &Other) { - PerPtrBottomUp = Other.PerPtrBottomUp; - BottomUpPathCount = Other.BottomUpPathCount; -} - -/// MergePred - The top-down traversal uses this to merge information about -/// predecessors to form the initial state for a new block. -void BBState::MergePred(const BBState &Other) { - // Other.TopDownPathCount can be 0, in which case it is either dead or a - // loop backedge. Loop backedges are special. - TopDownPathCount += Other.TopDownPathCount; - - // Check for overflow. If we have overflow, fall back to conservative behavior. - if (TopDownPathCount < Other.TopDownPathCount) { - clearTopDownPointers(); - return; - } - - // For each entry in the other set, if our set has an entry with the same key, - // merge the entries. Otherwise, copy the entry and merge it with an empty - // entry. - for (ptr_const_iterator MI = Other.top_down_ptr_begin(), - ME = Other.top_down_ptr_end(); MI != ME; ++MI) { - std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI); - Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, - /*TopDown=*/true); - } - - // For each entry in our set, if the other set doesn't have an entry with the - // same key, force it to merge with an empty entry. - for (ptr_iterator MI = top_down_ptr_begin(), - ME = top_down_ptr_end(); MI != ME; ++MI) - if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) - MI->second.Merge(PtrState(), /*TopDown=*/true); -} - -/// MergeSucc - The bottom-up traversal uses this to merge information about -/// successors to form the initial state for a new block. -void BBState::MergeSucc(const BBState &Other) { - // Other.BottomUpPathCount can be 0, in which case it is either dead or a - // loop backedge. Loop backedges are special. - BottomUpPathCount += Other.BottomUpPathCount; - - // Check for overflow. If we have overflow, fall back to conservative behavior. - if (BottomUpPathCount < Other.BottomUpPathCount) { - clearBottomUpPointers(); - return; - } - - // For each entry in the other set, if our set has an entry with the - // same key, merge the entries. Otherwise, copy the entry and merge - // it with an empty entry. - for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(), - ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) { - std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI); - Pair.first->second.Merge(Pair.second ? PtrState() : MI->second, - /*TopDown=*/false); - } - - // For each entry in our set, if the other set doesn't have an entry - // with the same key, force it to merge with an empty entry. - for (ptr_iterator MI = bottom_up_ptr_begin(), - ME = bottom_up_ptr_end(); MI != ME; ++MI) - if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) - MI->second.Merge(PtrState(), /*TopDown=*/false); -} - -namespace { - /// ObjCARCOpt - The main ARC optimization pass. - class ObjCARCOpt : public FunctionPass { - bool Changed; - ProvenanceAnalysis PA; - - /// Run - A flag indicating whether this optimization pass should run. - bool Run; - - /// RetainRVCallee, etc. - Declarations for ObjC runtime - /// functions, for use in creating calls to them. These are initialized - /// lazily to avoid cluttering up the Module with unused declarations. - Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee, - *RetainCallee, *RetainBlockCallee, *AutoreleaseCallee; - - /// UsedInThisFunciton - Flags which determine whether each of the - /// interesting runtine functions is in fact used in the current function. - unsigned UsedInThisFunction; - - /// ImpreciseReleaseMDKind - The Metadata Kind for clang.imprecise_release - /// metadata. - unsigned ImpreciseReleaseMDKind; - - /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape - /// metadata. - unsigned CopyOnEscapeMDKind; - - /// NoObjCARCExceptionsMDKind - The Metadata Kind for - /// clang.arc.no_objc_arc_exceptions metadata. - unsigned NoObjCARCExceptionsMDKind; - - Constant *getRetainRVCallee(Module *M); - Constant *getAutoreleaseRVCallee(Module *M); - Constant *getReleaseCallee(Module *M); - Constant *getRetainCallee(Module *M); - Constant *getRetainBlockCallee(Module *M); - Constant *getAutoreleaseCallee(Module *M); - - bool IsRetainBlockOptimizable(const Instruction *Inst); - - void OptimizeRetainCall(Function &F, Instruction *Retain); - bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); - void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV); - void OptimizeIndividualCalls(Function &F); - - void CheckForCFGHazards(const BasicBlock *BB, - DenseMap<const BasicBlock *, BBState> &BBStates, - BBState &MyStates) const; - bool VisitInstructionBottomUp(Instruction *Inst, - BasicBlock *BB, - MapVector<Value *, RRInfo> &Retains, - BBState &MyStates); - bool VisitBottomUp(BasicBlock *BB, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains); - bool VisitInstructionTopDown(Instruction *Inst, - DenseMap<Value *, RRInfo> &Releases, - BBState &MyStates); - bool VisitTopDown(BasicBlock *BB, - DenseMap<const BasicBlock *, BBState> &BBStates, - DenseMap<Value *, RRInfo> &Releases); - bool Visit(Function &F, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases); - - void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - SmallVectorImpl<Instruction *> &DeadInsts, - Module *M); - - bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - Module *M); - - void OptimizeWeakCalls(Function &F); - - bool OptimizeSequences(Function &F); - - void OptimizeReturns(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - virtual void releaseMemory(); - - public: - static char ID; - ObjCARCOpt() : FunctionPass(ID) { - initializeObjCARCOptPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCOpt::ID = 0; -INITIALIZE_PASS_BEGIN(ObjCARCOpt, - "objc-arc", "ObjC ARC optimization", false, false) -INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis) -INITIALIZE_PASS_END(ObjCARCOpt, - "objc-arc", "ObjC ARC optimization", false, false) - -Pass *llvm::createObjCARCOptPass() { - return new ObjCARCOpt(); -} - -void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<ObjCARCAliasAnalysis>(); - AU.addRequired<AliasAnalysis>(); - // ARC optimization doesn't currently split critical edges. - AU.setPreservesCFG(); -} - -bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) { - // Without the magic metadata tag, we have to assume this might be an - // objc_retainBlock call inserted to convert a block pointer to an id, - // in which case it really is needed. - if (!Inst->getMetadata(CopyOnEscapeMDKind)) - return false; - - // If the pointer "escapes" (not including being used in a call), - // the copy may be needed. - if (DoesObjCBlockEscape(Inst)) - return false; - - // Otherwise, it's not needed. - return true; -} - -Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { - if (!RetainRVCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attribute = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)); - RetainRVCallee = - M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, - Attribute); - } - return RetainRVCallee; -} - -Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { - if (!AutoreleaseRVCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attribute = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)); - AutoreleaseRVCallee = - M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy, - Attribute); - } - return AutoreleaseRVCallee; -} - -Constant *ObjCARCOpt::getReleaseCallee(Module *M) { - if (!ReleaseCallee) { - LLVMContext &C = M->getContext(); - Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttributeSet Attribute = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)); - ReleaseCallee = - M->getOrInsertFunction( - "objc_release", - FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), - Attribute); - } - return ReleaseCallee; -} - -Constant *ObjCARCOpt::getRetainCallee(Module *M) { - if (!RetainCallee) { - LLVMContext &C = M->getContext(); - Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttributeSet Attribute = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)); - RetainCallee = - M->getOrInsertFunction( - "objc_retain", - FunctionType::get(Params[0], Params, /*isVarArg=*/false), - Attribute); - } - return RetainCallee; -} - -Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) { - if (!RetainBlockCallee) { - LLVMContext &C = M->getContext(); - Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - // objc_retainBlock is not nounwind because it calls user copy constructors - // which could theoretically throw. - RetainBlockCallee = - M->getOrInsertFunction( - "objc_retainBlock", - FunctionType::get(Params[0], Params, /*isVarArg=*/false), - AttributeSet()); - } - return RetainBlockCallee; -} - -Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { - if (!AutoreleaseCallee) { - LLVMContext &C = M->getContext(); - Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; - AttributeSet Attribute = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)); - AutoreleaseCallee = - M->getOrInsertFunction( - "objc_autorelease", - FunctionType::get(Params[0], Params, /*isVarArg=*/false), - Attribute); - } - return AutoreleaseCallee; -} - -/// IsPotentialUse - Test whether the given value is possible a -/// reference-counted pointer, including tests which utilize AliasAnalysis. -static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) { - // First make the rudimentary check. - if (!IsPotentialUse(Op)) - return false; - - // Objects in constant memory are not reference-counted. - if (AA.pointsToConstantMemory(Op)) - return false; - - // Pointers in constant memory are not pointing to reference-counted objects. - if (const LoadInst *LI = dyn_cast<LoadInst>(Op)) - if (AA.pointsToConstantMemory(LI->getPointerOperand())) - return false; - - // Otherwise assume the worst. - return true; -} - -/// CanAlterRefCount - Test whether the given instruction can result in a -/// reference count modification (positive or negative) for the pointer's -/// object. -static bool -CanAlterRefCount(const Instruction *Inst, const Value *Ptr, - ProvenanceAnalysis &PA, InstructionClass Class) { - switch (Class) { - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_User: - // These operations never directly modify a reference count. - return false; - default: break; - } - - ImmutableCallSite CS = static_cast<const Value *>(Inst); - assert(CS && "Only calls can alter reference counts!"); - - // See if AliasAnalysis can help us with the call. - AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS); - if (AliasAnalysis::onlyReadsMemory(MRB)) - return false; - if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { - for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) { - const Value *Op = *I; - if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) - return true; - } - return false; - } - - // Assume the worst. - return true; -} - -/// CanUse - Test whether the given instruction can "use" the given pointer's -/// object in a way that requires the reference count to be positive. -static bool -CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, - InstructionClass Class) { - // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers. - if (Class == IC_Call) - return false; - - // Consider various instructions which may have pointer arguments which are - // not "uses". - if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) { - // Comparing a pointer with null, or any other constant, isn't really a use, - // because we don't care what the pointer points to, or about the values - // of any other dynamic reference-counted pointers. - if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA())) - return false; - } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) { - // For calls, just check the arguments (and not the callee operand). - for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), - OE = CS.arg_end(); OI != OE; ++OI) { - const Value *Op = *OI; - if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) - return true; - } - return false; - } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - // Special-case stores, because we don't care about the stored value, just - // the store address. - const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand()); - // If we can't tell what the underlying object was, assume there is a - // dependence. - return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr); - } - - // Check each operand for a match. - for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end(); - OI != OE; ++OI) { - const Value *Op = *OI; - if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op)) - return true; - } - return false; -} - -/// CanInterruptRV - Test whether the given instruction can autorelease -/// any pointer or cause an autoreleasepool pop. -static bool -CanInterruptRV(InstructionClass Class) { - switch (Class) { - case IC_AutoreleasepoolPop: - case IC_CallOrUser: - case IC_Call: - case IC_Autorelease: - case IC_AutoreleaseRV: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: - return true; - default: - return false; - } -} - -namespace { - /// DependenceKind - There are several kinds of dependence-like concepts in - /// use here. - enum DependenceKind { - NeedsPositiveRetainCount, - AutoreleasePoolBoundary, - CanChangeRetainCount, - RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease. - RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue. - RetainRVDep ///< Blocks objc_retainAutoreleasedReturnValue. - }; -} - -/// Depends - Test if there can be dependencies on Inst through Arg. This -/// function only tests dependencies relevant for removing pairs of calls. -static bool -Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, - ProvenanceAnalysis &PA) { - // If we've reached the definition of Arg, stop. - if (Inst == Arg) - return true; - - switch (Flavor) { - case NeedsPositiveRetainCount: { - InstructionClass Class = GetInstructionClass(Inst); - switch (Class) { - case IC_AutoreleasepoolPop: - case IC_AutoreleasepoolPush: - case IC_None: - return false; - default: - return CanUse(Inst, Arg, PA, Class); - } - } - - case AutoreleasePoolBoundary: { - InstructionClass Class = GetInstructionClass(Inst); - switch (Class) { - case IC_AutoreleasepoolPop: - case IC_AutoreleasepoolPush: - // These mark the end and begin of an autorelease pool scope. - return true; - default: - // Nothing else does this. - return false; - } - } - - case CanChangeRetainCount: { - InstructionClass Class = GetInstructionClass(Inst); - switch (Class) { - case IC_AutoreleasepoolPop: - // Conservatively assume this can decrement any count. - return true; - case IC_AutoreleasepoolPush: - case IC_None: - return false; - default: - return CanAlterRefCount(Inst, Arg, PA, Class); - } - } - - case RetainAutoreleaseDep: - switch (GetBasicInstructionClass(Inst)) { - case IC_AutoreleasepoolPop: - case IC_AutoreleasepoolPush: - // Don't merge an objc_autorelease with an objc_retain inside a different - // autoreleasepool scope. - return true; - case IC_Retain: - case IC_RetainRV: - // Check for a retain of the same pointer for merging. - return GetObjCArg(Inst) == Arg; - default: - // Nothing else matters for objc_retainAutorelease formation. - return false; - } - - case RetainAutoreleaseRVDep: { - InstructionClass Class = GetBasicInstructionClass(Inst); - switch (Class) { - case IC_Retain: - case IC_RetainRV: - // Check for a retain of the same pointer for merging. - return GetObjCArg(Inst) == Arg; - default: - // Anything that can autorelease interrupts - // retainAutoreleaseReturnValue formation. - return CanInterruptRV(Class); - } - } - - case RetainRVDep: - return CanInterruptRV(GetBasicInstructionClass(Inst)); - } - - llvm_unreachable("Invalid dependence flavor"); -} - -/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and -/// find local and non-local dependencies on Arg. -/// TODO: Cache results? -static void -FindDependencies(DependenceKind Flavor, - const Value *Arg, - BasicBlock *StartBB, Instruction *StartInst, - SmallPtrSet<Instruction *, 4> &DependingInstructions, - SmallPtrSet<const BasicBlock *, 4> &Visited, - ProvenanceAnalysis &PA) { - BasicBlock::iterator StartPos = StartInst; - - SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist; - Worklist.push_back(std::make_pair(StartBB, StartPos)); - do { - std::pair<BasicBlock *, BasicBlock::iterator> Pair = - Worklist.pop_back_val(); - BasicBlock *LocalStartBB = Pair.first; - BasicBlock::iterator LocalStartPos = Pair.second; - BasicBlock::iterator StartBBBegin = LocalStartBB->begin(); - for (;;) { - if (LocalStartPos == StartBBBegin) { - pred_iterator PI(LocalStartBB), PE(LocalStartBB, false); - if (PI == PE) - // If we've reached the function entry, produce a null dependence. - DependingInstructions.insert(0); - else - // Add the predecessors to the worklist. - do { - BasicBlock *PredBB = *PI; - if (Visited.insert(PredBB)) - Worklist.push_back(std::make_pair(PredBB, PredBB->end())); - } while (++PI != PE); - break; - } - - Instruction *Inst = --LocalStartPos; - if (Depends(Flavor, Inst, Arg, PA)) { - DependingInstructions.insert(Inst); - break; - } - } - } while (!Worklist.empty()); - - // Determine whether the original StartBB post-dominates all of the blocks we - // visited. If not, insert a sentinal indicating that most optimizations are - // not safe. - for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(), - E = Visited.end(); I != E; ++I) { - const BasicBlock *BB = *I; - if (BB == StartBB) - continue; - const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); - for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) { - const BasicBlock *Succ = *SI; - if (Succ != StartBB && !Visited.count(Succ)) { - DependingInstructions.insert(reinterpret_cast<Instruction *>(-1)); - return; - } - } - } -} - -static bool isNullOrUndef(const Value *V) { - return isa<ConstantPointerNull>(V) || isa<UndefValue>(V); -} - -static bool isNoopInstruction(const Instruction *I) { - return isa<BitCastInst>(I) || - (isa<GetElementPtrInst>(I) && - cast<GetElementPtrInst>(I)->hasAllZeroIndices()); -} - -/// OptimizeRetainCall - Turn objc_retain into -/// objc_retainAutoreleasedReturnValue if the operand is a return value. -void -ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { - ImmutableCallSite CS(GetObjCArg(Retain)); - const Instruction *Call = CS.getInstruction(); - if (!Call) return; - if (Call->getParent() != Retain->getParent()) return; - - // Check that the call is next to the retain. - BasicBlock::const_iterator I = Call; - ++I; - while (isNoopInstruction(I)) ++I; - if (&*I != Retain) - return; - - // Turn it to an objc_retainAutoreleasedReturnValue.. - Changed = true; - ++NumPeeps; - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming " - "objc_retainAutoreleasedReturnValue => " - "objc_retain since the operand is not a return value.\n" - " Old: " - << *Retain << "\n"); - - cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent())); - - DEBUG(dbgs() << " New: " - << *Retain << "\n"); -} - -/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into -/// objc_retain if the operand is not a return value. Or, if it can be paired -/// with an objc_autoreleaseReturnValue, delete the pair and return true. -bool -ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { - // Check for the argument being from an immediately preceding call or invoke. - const Value *Arg = GetObjCArg(RetainRV); - ImmutableCallSite CS(Arg); - if (const Instruction *Call = CS.getInstruction()) { - if (Call->getParent() == RetainRV->getParent()) { - BasicBlock::const_iterator I = Call; - ++I; - while (isNoopInstruction(I)) ++I; - if (&*I == RetainRV) - return false; - } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) { - BasicBlock *RetainRVParent = RetainRV->getParent(); - if (II->getNormalDest() == RetainRVParent) { - BasicBlock::const_iterator I = RetainRVParent->begin(); - while (isNoopInstruction(I)) ++I; - if (&*I == RetainRV) - return false; - } - } - } - - // Check for being preceded by an objc_autoreleaseReturnValue on the same - // pointer. In this case, we can delete the pair. - BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin(); - if (I != Begin) { - do --I; while (I != Begin && isNoopInstruction(I)); - if (GetBasicInstructionClass(I) == IC_AutoreleaseRV && - GetObjCArg(I) == Arg) { - Changed = true; - ++NumPeeps; - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n" - << " Erasing " << *RetainRV - << "\n"); - - EraseInstruction(I); - EraseInstruction(RetainRV); - return true; - } - } - - // Turn it to a plain objc_retain. - Changed = true; - ++NumPeeps; - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming " - "objc_retainAutoreleasedReturnValue => " - "objc_retain since the operand is not a return value.\n" - " Old: " - << *RetainRV << "\n"); - - cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent())); - - DEBUG(dbgs() << " New: " - << *RetainRV << "\n"); - - return false; -} - -/// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into -/// objc_autorelease if the result is not used as a return value. -void -ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) { - // Check for a return of the pointer value. - const Value *Ptr = GetObjCArg(AutoreleaseRV); - SmallVector<const Value *, 2> Users; - Users.push_back(Ptr); - do { - Ptr = Users.pop_back_val(); - for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end(); - UI != UE; ++UI) { - const User *I = *UI; - if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV) - return; - if (isa<BitCastInst>(I)) - Users.push_back(I); - } - } while (!Users.empty()); - - Changed = true; - ++NumPeeps; - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming " - "objc_autoreleaseReturnValue => " - "objc_autorelease since its operand is not used as a return " - "value.\n" - " Old: " - << *AutoreleaseRV << "\n"); - - cast<CallInst>(AutoreleaseRV)-> - setCalledFunction(getAutoreleaseCallee(F.getParent())); - - DEBUG(dbgs() << " New: " - << *AutoreleaseRV << "\n"); - -} - -/// OptimizeIndividualCalls - Visit each call, one at a time, and make -/// simplifications without doing any additional analysis. -void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { - // Reset all the flags in preparation for recomputing them. - UsedInThisFunction = 0; - - // Visit all objc_* calls in F. - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { - Instruction *Inst = &*I++; - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: " << - *Inst << "\n"); - - InstructionClass Class = GetBasicInstructionClass(Inst); - - switch (Class) { - default: break; - - // Delete no-op casts. These function calls have special semantics, but - // the semantics are entirely implemented via lowering in the front-end, - // so by the time they reach the optimizer, they are just no-op calls - // which return their argument. - // - // There are gray areas here, as the ability to cast reference-counted - // pointers to raw void* and back allows code to break ARC assumptions, - // however these are currently considered to be unimportant. - case IC_NoopCast: - Changed = true; - ++NumNoops; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:" - " " << *Inst << "\n"); - EraseInstruction(Inst); - continue; - - // If the pointer-to-weak-pointer is null, it's undefined behavior. - case IC_StoreWeak: - case IC_LoadWeak: - case IC_LoadWeakRetained: - case IC_InitWeak: - case IC_DestroyWeak: { - CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(0))) { - Changed = true; - Type *Ty = CI->getArgOperand(0)->getType(); - new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), - Constant::getNullValue(Ty), - CI); - llvm::Value *NewValue = UndefValue::get(CI->getType()); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null " - "pointer-to-weak-pointer is undefined behavior.\n" - " Old = " << *CI << - "\n New = " << - *NewValue << "\n"); - CI->replaceAllUsesWith(NewValue); - CI->eraseFromParent(); - continue; - } - break; - } - case IC_CopyWeak: - case IC_MoveWeak: { - CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(0)) || - isNullOrUndef(CI->getArgOperand(1))) { - Changed = true; - Type *Ty = CI->getArgOperand(0)->getType(); - new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), - Constant::getNullValue(Ty), - CI); - - llvm::Value *NewValue = UndefValue::get(CI->getType()); - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null " - "pointer-to-weak-pointer is undefined behavior.\n" - " Old = " << *CI << - "\n New = " << - *NewValue << "\n"); - - CI->replaceAllUsesWith(NewValue); - CI->eraseFromParent(); - continue; - } - break; - } - case IC_Retain: - OptimizeRetainCall(F, Inst); - break; - case IC_RetainRV: - if (OptimizeRetainRVCall(F, Inst)) - continue; - break; - case IC_AutoreleaseRV: - OptimizeAutoreleaseRVCall(F, Inst); - break; - } - - // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. - if (IsAutorelease(Class) && Inst->use_empty()) { - CallInst *Call = cast<CallInst>(Inst); - const Value *Arg = Call->getArgOperand(0); - Arg = FindSingleUseIdentifiedObject(Arg); - if (Arg) { - Changed = true; - ++NumAutoreleases; - - // Create the declaration lazily. - LLVMContext &C = Inst->getContext(); - CallInst *NewCall = - CallInst::Create(getReleaseCallee(F.getParent()), - Call->getArgOperand(0), "", Call); - NewCall->setMetadata(ImpreciseReleaseMDKind, - MDNode::get(C, ArrayRef<Value *>())); - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing " - "objc_autorelease(x) with objc_release(x) since x is " - "otherwise unused.\n" - " Old: " << *Call << - "\n New: " << - *NewCall << "\n"); - - EraseInstruction(Call); - Inst = NewCall; - Class = IC_Release; - } - } - - // For functions which can never be passed stack arguments, add - // a tail keyword. - if (IsAlwaysTail(Class)) { - Changed = true; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword" - " to function since it can never be passed stack args: " << *Inst << - "\n"); - cast<CallInst>(Inst)->setTailCall(); - } - - // Set nounwind as needed. - if (IsNoThrow(Class)) { - Changed = true; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw" - " class. Setting nounwind on: " << *Inst << "\n"); - cast<CallInst>(Inst)->setDoesNotThrow(); - } - - if (!IsNoopOnNull(Class)) { - UsedInThisFunction |= 1 << Class; - continue; - } - - const Value *Arg = GetObjCArg(Inst); - - // ARC calls with null are no-ops. Delete them. - if (isNullOrUndef(Arg)) { - Changed = true; - ++NumNoops; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with " - " null are no-ops. Erasing: " << *Inst << "\n"); - EraseInstruction(Inst); - continue; - } - - // Keep track of which of retain, release, autorelease, and retain_block - // are actually present in this function. - UsedInThisFunction |= 1 << Class; - - // If Arg is a PHI, and one or more incoming values to the - // PHI are null, and the call is control-equivalent to the PHI, and there - // are no relevant side effects between the PHI and the call, the call - // could be pushed up to just those paths with non-null incoming values. - // For now, don't bother splitting critical edges for this. - SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist; - Worklist.push_back(std::make_pair(Inst, Arg)); - do { - std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val(); - Inst = Pair.first; - Arg = Pair.second; - - const PHINode *PN = dyn_cast<PHINode>(Arg); - if (!PN) continue; - - // Determine if the PHI has any null operands, or any incoming - // critical edges. - bool HasNull = false; - bool HasCriticalEdges = false; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *Incoming = - StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); - if (isNullOrUndef(Incoming)) - HasNull = true; - else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back()) - .getNumSuccessors() != 1) { - HasCriticalEdges = true; - break; - } - } - // If we have null operands and no critical edges, optimize. - if (!HasCriticalEdges && HasNull) { - SmallPtrSet<Instruction *, 4> DependingInstructions; - SmallPtrSet<const BasicBlock *, 4> Visited; - - // Check that there is nothing that cares about the reference - // count between the call and the phi. - switch (Class) { - case IC_Retain: - case IC_RetainBlock: - // These can always be moved up. - break; - case IC_Release: - // These can't be moved across things that care about the retain - // count. - FindDependencies(NeedsPositiveRetainCount, Arg, - Inst->getParent(), Inst, - DependingInstructions, Visited, PA); - break; - case IC_Autorelease: - // These can't be moved across autorelease pool scope boundaries. - FindDependencies(AutoreleasePoolBoundary, Arg, - Inst->getParent(), Inst, - DependingInstructions, Visited, PA); - break; - case IC_RetainRV: - case IC_AutoreleaseRV: - // Don't move these; the RV optimization depends on the autoreleaseRV - // being tail called, and the retainRV being immediately after a call - // (which might still happen if we get lucky with codegen layout, but - // it's not worth taking the chance). - continue; - default: - llvm_unreachable("Invalid dependence flavor"); - } - - if (DependingInstructions.size() == 1 && - *DependingInstructions.begin() == PN) { - Changed = true; - ++NumPartialNoops; - // Clone the call into each predecessor that has a non-null value. - CallInst *CInst = cast<CallInst>(Inst); - Type *ParamTy = CInst->getArgOperand(0)->getType(); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - Value *Incoming = - StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); - if (!isNullOrUndef(Incoming)) { - CallInst *Clone = cast<CallInst>(CInst->clone()); - Value *Op = PN->getIncomingValue(i); - Instruction *InsertPos = &PN->getIncomingBlock(i)->back(); - if (Op->getType() != ParamTy) - Op = new BitCastInst(Op, ParamTy, "", InsertPos); - Clone->setArgOperand(0, Op); - Clone->insertBefore(InsertPos); - Worklist.push_back(std::make_pair(Clone, Incoming)); - } - } - // Erase the original call. - EraseInstruction(CInst); - continue; - } - } - } while (!Worklist.empty()); - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished Queue.\n\n"); - - } -} - -/// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible -/// control flow, or other CFG structures where moving code across the edge -/// would result in it being executed more. -void -ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, - DenseMap<const BasicBlock *, BBState> &BBStates, - BBState &MyStates) const { - // If any top-down local-use or possible-dec has a succ which is earlier in - // the sequence, forget it. - for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(), - E = MyStates.top_down_ptr_end(); I != E; ++I) - switch (I->second.GetSeq()) { - default: break; - case S_Use: { - const Value *Arg = I->first; - const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); - bool SomeSuccHasSame = false; - bool AllSuccsHaveSame = true; - PtrState &S = I->second; - succ_const_iterator SI(TI), SE(TI, false); - - // If the terminator is an invoke marked with the - // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be - // ignored, for ARC purposes. - if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) - --SE; - - for (; SI != SE; ++SI) { - Sequence SuccSSeq = S_None; - bool SuccSRRIKnownSafe = false; - // If VisitBottomUp has pointer information for this successor, take - // what we know about it. - DenseMap<const BasicBlock *, BBState>::iterator BBI = - BBStates.find(*SI); - assert(BBI != BBStates.end()); - const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); - SuccSSeq = SuccS.GetSeq(); - SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; - switch (SuccSSeq) { - case S_None: - case S_CanRelease: { - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { - S.ClearSequenceProgress(); - break; - } - continue; - } - case S_Use: - SomeSuccHasSame = true; - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) - AllSuccsHaveSame = false; - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - // If the state at the other end of any of the successor edges - // matches the current state, require all edges to match. This - // guards against loops in the middle of a sequence. - if (SomeSuccHasSame && !AllSuccsHaveSame) - S.ClearSequenceProgress(); - break; - } - case S_CanRelease: { - const Value *Arg = I->first; - const TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); - bool SomeSuccHasSame = false; - bool AllSuccsHaveSame = true; - PtrState &S = I->second; - succ_const_iterator SI(TI), SE(TI, false); - - // If the terminator is an invoke marked with the - // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be - // ignored, for ARC purposes. - if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) - --SE; - - for (; SI != SE; ++SI) { - Sequence SuccSSeq = S_None; - bool SuccSRRIKnownSafe = false; - // If VisitBottomUp has pointer information for this successor, take - // what we know about it. - DenseMap<const BasicBlock *, BBState>::iterator BBI = - BBStates.find(*SI); - assert(BBI != BBStates.end()); - const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); - SuccSSeq = SuccS.GetSeq(); - SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; - switch (SuccSSeq) { - case S_None: { - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { - S.ClearSequenceProgress(); - break; - } - continue; - } - case S_CanRelease: - SomeSuccHasSame = true; - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - case S_Use: - if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) - AllSuccsHaveSame = false; - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - // If the state at the other end of any of the successor edges - // matches the current state, require all edges to match. This - // guards against loops in the middle of a sequence. - if (SomeSuccHasSame && !AllSuccsHaveSame) - S.ClearSequenceProgress(); - break; - } - } -} - -bool -ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, - BasicBlock *BB, - MapVector<Value *, RRInfo> &Retains, - BBState &MyStates) { - bool NestingDetected = false; - InstructionClass Class = GetInstructionClass(Inst); - const Value *Arg = 0; - - switch (Class) { - case IC_Release: { - Arg = GetObjCArg(Inst); - - PtrState &S = MyStates.getPtrBottomUpState(Arg); - - // If we see two releases in a row on the same pointer. If so, make - // a note, and we'll cicle back to revisit it after we've - // hopefully eliminated the second release, which may allow us to - // eliminate the first release too. - // Theoretically we could implement removal of nested retain+release - // pairs by making PtrState hold a stack of states, but this is - // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) - NestingDetected = true; - - MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); - S.RRI.ReleaseMetadata = ReleaseMetadata; - S.RRI.KnownSafe = S.IsKnownIncremented(); - S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); - S.RRI.Calls.insert(Inst); - - S.SetKnownPositiveRefCount(); - break; - } - case IC_RetainBlock: - // An objc_retainBlock call with just a use may need to be kept, - // because it may be copying a block from the stack to the heap. - if (!IsRetainBlockOptimizable(Inst)) - break; - // FALLTHROUGH - case IC_Retain: - case IC_RetainRV: { - Arg = GetObjCArg(Inst); - - PtrState &S = MyStates.getPtrBottomUpState(Arg); - S.SetKnownPositiveRefCount(); - - switch (S.GetSeq()) { - case S_Stop: - case S_Release: - case S_MovableRelease: - case S_Use: - S.RRI.ReverseInsertPts.clear(); - // FALL THROUGH - case S_CanRelease: - // Don't do retain+release tracking for IC_RetainRV, because it's - // better to let it remain as the first instruction after a call. - if (Class != IC_RetainRV) { - S.RRI.IsRetainBlock = Class == IC_RetainBlock; - Retains[Inst] = S.RRI; - } - S.ClearSequenceProgress(); - break; - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - return NestingDetected; - } - case IC_AutoreleasepoolPop: - // Conservatively, clear MyStates for all known pointers. - MyStates.clearBottomUpPointers(); - return NestingDetected; - case IC_AutoreleasepoolPush: - case IC_None: - // These are irrelevant. - return NestingDetected; - default: - break; - } - - // Consider any other possible effects of this instruction on each - // pointer being tracked. - for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(), - ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) { - const Value *Ptr = MI->first; - if (Ptr == Arg) - continue; // Handled above. - PtrState &S = MI->second; - Sequence Seq = S.GetSeq(); - - // Check for possible releases. - if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.ClearRefCount(); - switch (Seq) { - case S_Use: - S.SetSeq(S_CanRelease); - continue; - case S_CanRelease: - case S_Release: - case S_MovableRelease: - case S_Stop: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - - // Check for possible direct uses. - switch (Seq) { - case S_Release: - case S_MovableRelease: - if (CanUse(Inst, Ptr, PA, Class)) { - assert(S.RRI.ReverseInsertPts.empty()); - // If this is an invoke instruction, we're scanning it as part of - // one of its successor blocks, since we can't insert code after it - // in its own block, and we don't want to split critical edges. - if (isa<InvokeInst>(Inst)) - S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt()); - else - S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); - S.SetSeq(S_Use); - } else if (Seq == S_Release && - (Class == IC_User || Class == IC_CallOrUser)) { - // Non-movable releases depend on any possible objc pointer use. - S.SetSeq(S_Stop); - assert(S.RRI.ReverseInsertPts.empty()); - // As above; handle invoke specially. - if (isa<InvokeInst>(Inst)) - S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt()); - else - S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); - } - break; - case S_Stop: - if (CanUse(Inst, Ptr, PA, Class)) - S.SetSeq(S_Use); - break; - case S_CanRelease: - case S_Use: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - - return NestingDetected; -} - -bool -ObjCARCOpt::VisitBottomUp(BasicBlock *BB, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains) { - bool NestingDetected = false; - BBState &MyStates = BBStates[BB]; - - // Merge the states from each successor to compute the initial state - // for the current block. - BBState::edge_iterator SI(MyStates.succ_begin()), - SE(MyStates.succ_end()); - if (SI != SE) { - const BasicBlock *Succ = *SI; - DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); - assert(I != BBStates.end()); - MyStates.InitFromSucc(I->second); - ++SI; - for (; SI != SE; ++SI) { - Succ = *SI; - I = BBStates.find(Succ); - assert(I != BBStates.end()); - MyStates.MergeSucc(I->second); - } - } - - // Visit all the instructions, bottom-up. - for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { - Instruction *Inst = llvm::prior(I); - - // Invoke instructions are visited as part of their successors (below). - if (isa<InvokeInst>(Inst)) - continue; - - NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); - } - - // If there's a predecessor with an invoke, visit the invoke as if it were - // part of this block, since we can't insert code after an invoke in its own - // block, and we don't want to split critical edges. - for (BBState::edge_iterator PI(MyStates.pred_begin()), - PE(MyStates.pred_end()); PI != PE; ++PI) { - BasicBlock *Pred = *PI; - if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back())) - NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); - } - - return NestingDetected; -} - -bool -ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, - DenseMap<Value *, RRInfo> &Releases, - BBState &MyStates) { - bool NestingDetected = false; - InstructionClass Class = GetInstructionClass(Inst); - const Value *Arg = 0; - - switch (Class) { - case IC_RetainBlock: - // An objc_retainBlock call with just a use may need to be kept, - // because it may be copying a block from the stack to the heap. - if (!IsRetainBlockOptimizable(Inst)) - break; - // FALLTHROUGH - case IC_Retain: - case IC_RetainRV: { - Arg = GetObjCArg(Inst); - - PtrState &S = MyStates.getPtrTopDownState(Arg); - - // Don't do retain+release tracking for IC_RetainRV, because it's - // better to let it remain as the first instruction after a call. - if (Class != IC_RetainRV) { - // If we see two retains in a row on the same pointer. If so, make - // a note, and we'll cicle back to revisit it after we've - // hopefully eliminated the second retain, which may allow us to - // eliminate the first retain too. - // Theoretically we could implement removal of nested retain+release - // pairs by making PtrState hold a stack of states, but this is - // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Retain) - NestingDetected = true; - - S.ResetSequenceProgress(S_Retain); - S.RRI.IsRetainBlock = Class == IC_RetainBlock; - S.RRI.KnownSafe = S.IsKnownIncremented(); - S.RRI.Calls.insert(Inst); - } - - S.SetKnownPositiveRefCount(); - - // A retain can be a potential use; procede to the generic checking - // code below. - break; - } - case IC_Release: { - Arg = GetObjCArg(Inst); - - PtrState &S = MyStates.getPtrTopDownState(Arg); - S.ClearRefCount(); - - switch (S.GetSeq()) { - case S_Retain: - case S_CanRelease: - S.RRI.ReverseInsertPts.clear(); - // FALL THROUGH - case S_Use: - S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); - Releases[Inst] = S.RRI; - S.ClearSequenceProgress(); - break; - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - break; - } - case IC_AutoreleasepoolPop: - // Conservatively, clear MyStates for all known pointers. - MyStates.clearTopDownPointers(); - return NestingDetected; - case IC_AutoreleasepoolPush: - case IC_None: - // These are irrelevant. - return NestingDetected; - default: - break; - } - - // Consider any other possible effects of this instruction on each - // pointer being tracked. - for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(), - ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) { - const Value *Ptr = MI->first; - if (Ptr == Arg) - continue; // Handled above. - PtrState &S = MI->second; - Sequence Seq = S.GetSeq(); - - // Check for possible releases. - if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.ClearRefCount(); - switch (Seq) { - case S_Retain: - S.SetSeq(S_CanRelease); - assert(S.RRI.ReverseInsertPts.empty()); - S.RRI.ReverseInsertPts.insert(Inst); - - // One call can't cause a transition from S_Retain to S_CanRelease - // and S_CanRelease to S_Use. If we've made the first transition, - // we're done. - continue; - case S_Use: - case S_CanRelease: - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - } - - // Check for possible direct uses. - switch (Seq) { - case S_CanRelease: - if (CanUse(Inst, Ptr, PA, Class)) - S.SetSeq(S_Use); - break; - case S_Retain: - case S_Use: - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - } - - return NestingDetected; -} - -bool -ObjCARCOpt::VisitTopDown(BasicBlock *BB, - DenseMap<const BasicBlock *, BBState> &BBStates, - DenseMap<Value *, RRInfo> &Releases) { - bool NestingDetected = false; - BBState &MyStates = BBStates[BB]; - - // Merge the states from each predecessor to compute the initial state - // for the current block. - BBState::edge_iterator PI(MyStates.pred_begin()), - PE(MyStates.pred_end()); - if (PI != PE) { - const BasicBlock *Pred = *PI; - DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); - assert(I != BBStates.end()); - MyStates.InitFromPred(I->second); - ++PI; - for (; PI != PE; ++PI) { - Pred = *PI; - I = BBStates.find(Pred); - assert(I != BBStates.end()); - MyStates.MergePred(I->second); - } - } - - // Visit all the instructions, top-down. - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - Instruction *Inst = I; - NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); - } - - CheckForCFGHazards(BB, BBStates, MyStates); - return NestingDetected; -} - -static void -ComputePostOrders(Function &F, - SmallVectorImpl<BasicBlock *> &PostOrder, - SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder, - unsigned NoObjCARCExceptionsMDKind, - DenseMap<const BasicBlock *, BBState> &BBStates) { - /// Visited - The visited set, for doing DFS walks. - SmallPtrSet<BasicBlock *, 16> Visited; - - // Do DFS, computing the PostOrder. - SmallPtrSet<BasicBlock *, 16> OnStack; - SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack; - - // Functions always have exactly one entry block, and we don't have - // any other block that we treat like an entry block. - BasicBlock *EntryBB = &F.getEntryBlock(); - BBState &MyStates = BBStates[EntryBB]; - MyStates.SetAsEntry(); - TerminatorInst *EntryTI = cast<TerminatorInst>(&EntryBB->back()); - SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI))); - Visited.insert(EntryBB); - OnStack.insert(EntryBB); - do { - dfs_next_succ: - BasicBlock *CurrBB = SuccStack.back().first; - TerminatorInst *TI = cast<TerminatorInst>(&CurrBB->back()); - succ_iterator SE(TI, false); - - // If the terminator is an invoke marked with the - // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be - // ignored, for ARC purposes. - if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) - --SE; - - while (SuccStack.back().second != SE) { - BasicBlock *SuccBB = *SuccStack.back().second++; - if (Visited.insert(SuccBB)) { - TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back()); - SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI))); - BBStates[CurrBB].addSucc(SuccBB); - BBState &SuccStates = BBStates[SuccBB]; - SuccStates.addPred(CurrBB); - OnStack.insert(SuccBB); - goto dfs_next_succ; - } - - if (!OnStack.count(SuccBB)) { - BBStates[CurrBB].addSucc(SuccBB); - BBStates[SuccBB].addPred(CurrBB); - } - } - OnStack.erase(CurrBB); - PostOrder.push_back(CurrBB); - SuccStack.pop_back(); - } while (!SuccStack.empty()); - - Visited.clear(); - - // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. - // Functions may have many exits, and there also blocks which we treat - // as exits due to ignored edges. - SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BasicBlock *ExitBB = I; - BBState &MyStates = BBStates[ExitBB]; - if (!MyStates.isExit()) - continue; - - MyStates.SetAsExit(); - - PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin())); - Visited.insert(ExitBB); - while (!PredStack.empty()) { - reverse_dfs_next_succ: - BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end(); - while (PredStack.back().second != PE) { - BasicBlock *BB = *PredStack.back().second++; - if (Visited.insert(BB)) { - PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin())); - goto reverse_dfs_next_succ; - } - } - ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first); - } - } -} - -// Visit - Visit the function both top-down and bottom-up. -bool -ObjCARCOpt::Visit(Function &F, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases) { - - // Use reverse-postorder traversals, because we magically know that loops - // will be well behaved, i.e. they won't repeatedly call retain on a single - // pointer without doing a release. We can't use the ReversePostOrderTraversal - // class here because we want the reverse-CFG postorder to consider each - // function exit point, and we want to ignore selected cycle edges. - SmallVector<BasicBlock *, 16> PostOrder; - SmallVector<BasicBlock *, 16> ReverseCFGPostOrder; - ComputePostOrders(F, PostOrder, ReverseCFGPostOrder, - NoObjCARCExceptionsMDKind, - BBStates); - - // Use reverse-postorder on the reverse CFG for bottom-up. - bool BottomUpNestingDetected = false; - for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = - ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend(); - I != E; ++I) - BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains); - - // Use reverse-postorder for top-down. - bool TopDownNestingDetected = false; - for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = - PostOrder.rbegin(), E = PostOrder.rend(); - I != E; ++I) - TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases); - - return TopDownNestingDetected && BottomUpNestingDetected; -} - -/// MoveCalls - Move the calls in RetainsToMove and ReleasesToMove. -void ObjCARCOpt::MoveCalls(Value *Arg, - RRInfo &RetainsToMove, - RRInfo &ReleasesToMove, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - SmallVectorImpl<Instruction *> &DeadInsts, - Module *M) { - Type *ArgTy = Arg->getType(); - Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext())); - - // Insert the new retain and release calls. - for (SmallPtrSet<Instruction *, 2>::const_iterator - PI = ReleasesToMove.ReverseInsertPts.begin(), - PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) { - Instruction *InsertPt = *PI; - Value *MyArg = ArgTy == ParamTy ? Arg : - new BitCastInst(Arg, ParamTy, "", InsertPt); - CallInst *Call = - CallInst::Create(RetainsToMove.IsRetainBlock ? - getRetainBlockCallee(M) : getRetainCallee(M), - MyArg, "", InsertPt); - Call->setDoesNotThrow(); - if (RetainsToMove.IsRetainBlock) - Call->setMetadata(CopyOnEscapeMDKind, - MDNode::get(M->getContext(), ArrayRef<Value *>())); - else - Call->setTailCall(); - } - for (SmallPtrSet<Instruction *, 2>::const_iterator - PI = RetainsToMove.ReverseInsertPts.begin(), - PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) { - Instruction *InsertPt = *PI; - Value *MyArg = ArgTy == ParamTy ? Arg : - new BitCastInst(Arg, ParamTy, "", InsertPt); - CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg, - "", InsertPt); - // Attach a clang.imprecise_release metadata tag, if appropriate. - if (MDNode *M = ReleasesToMove.ReleaseMetadata) - Call->setMetadata(ImpreciseReleaseMDKind, M); - Call->setDoesNotThrow(); - if (ReleasesToMove.IsTailCallRelease) - Call->setTailCall(); - } - - // Delete the original retain and release calls. - for (SmallPtrSet<Instruction *, 2>::const_iterator - AI = RetainsToMove.Calls.begin(), - AE = RetainsToMove.Calls.end(); AI != AE; ++AI) { - Instruction *OrigRetain = *AI; - Retains.blot(OrigRetain); - DeadInsts.push_back(OrigRetain); - } - for (SmallPtrSet<Instruction *, 2>::const_iterator - AI = ReleasesToMove.Calls.begin(), - AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) { - Instruction *OrigRelease = *AI; - Releases.erase(OrigRelease); - DeadInsts.push_back(OrigRelease); - } -} - -/// PerformCodePlacement - Identify pairings between the retains and releases, -/// and delete and/or move them. -bool -ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> - &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases, - Module *M) { - bool AnyPairsCompletelyEliminated = false; - RRInfo RetainsToMove; - RRInfo ReleasesToMove; - SmallVector<Instruction *, 4> NewRetains; - SmallVector<Instruction *, 4> NewReleases; - SmallVector<Instruction *, 8> DeadInsts; - - // Visit each retain. - for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), - E = Retains.end(); I != E; ++I) { - Value *V = I->first; - if (!V) continue; // blotted - - Instruction *Retain = cast<Instruction>(V); - Value *Arg = GetObjCArg(Retain); - - // If the object being released is in static or stack storage, we know it's - // not being managed by ObjC reference counting, so we can delete pairs - // regardless of what possible decrements or uses lie between them. - bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg); - - // A constant pointer can't be pointing to an object on the heap. It may - // be reference-counted, but it won't be deleted. - if (const LoadInst *LI = dyn_cast<LoadInst>(Arg)) - if (const GlobalVariable *GV = - dyn_cast<GlobalVariable>( - StripPointerCastsAndObjCCalls(LI->getPointerOperand()))) - if (GV->isConstant()) - KnownSafe = true; - - // If a pair happens in a region where it is known that the reference count - // is already incremented, we can similarly ignore possible decrements. - bool KnownSafeTD = true, KnownSafeBU = true; - - // Connect the dots between the top-down-collected RetainsToMove and - // bottom-up-collected ReleasesToMove to form sets of related calls. - // This is an iterative process so that we connect multiple releases - // to multiple retains if needed. - unsigned OldDelta = 0; - unsigned NewDelta = 0; - unsigned OldCount = 0; - unsigned NewCount = 0; - bool FirstRelease = true; - bool FirstRetain = true; - NewRetains.push_back(Retain); - for (;;) { - for (SmallVectorImpl<Instruction *>::const_iterator - NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { - Instruction *NewRetain = *NI; - MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain); - assert(It != Retains.end()); - const RRInfo &NewRetainRRI = It->second; - KnownSafeTD &= NewRetainRRI.KnownSafe; - for (SmallPtrSet<Instruction *, 2>::const_iterator - LI = NewRetainRRI.Calls.begin(), - LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) { - Instruction *NewRetainRelease = *LI; - DenseMap<Value *, RRInfo>::const_iterator Jt = - Releases.find(NewRetainRelease); - if (Jt == Releases.end()) - goto next_retain; - const RRInfo &NewRetainReleaseRRI = Jt->second; - assert(NewRetainReleaseRRI.Calls.count(NewRetain)); - if (ReleasesToMove.Calls.insert(NewRetainRelease)) { - OldDelta -= - BBStates[NewRetainRelease->getParent()].GetAllPathCount(); - - // Merge the ReleaseMetadata and IsTailCallRelease values. - if (FirstRelease) { - ReleasesToMove.ReleaseMetadata = - NewRetainReleaseRRI.ReleaseMetadata; - ReleasesToMove.IsTailCallRelease = - NewRetainReleaseRRI.IsTailCallRelease; - FirstRelease = false; - } else { - if (ReleasesToMove.ReleaseMetadata != - NewRetainReleaseRRI.ReleaseMetadata) - ReleasesToMove.ReleaseMetadata = 0; - if (ReleasesToMove.IsTailCallRelease != - NewRetainReleaseRRI.IsTailCallRelease) - ReleasesToMove.IsTailCallRelease = false; - } - - // Collect the optimal insertion points. - if (!KnownSafe) - for (SmallPtrSet<Instruction *, 2>::const_iterator - RI = NewRetainReleaseRRI.ReverseInsertPts.begin(), - RE = NewRetainReleaseRRI.ReverseInsertPts.end(); - RI != RE; ++RI) { - Instruction *RIP = *RI; - if (ReleasesToMove.ReverseInsertPts.insert(RIP)) - NewDelta -= BBStates[RIP->getParent()].GetAllPathCount(); - } - NewReleases.push_back(NewRetainRelease); - } - } - } - NewRetains.clear(); - if (NewReleases.empty()) break; - - // Back the other way. - for (SmallVectorImpl<Instruction *>::const_iterator - NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) { - Instruction *NewRelease = *NI; - DenseMap<Value *, RRInfo>::const_iterator It = - Releases.find(NewRelease); - assert(It != Releases.end()); - const RRInfo &NewReleaseRRI = It->second; - KnownSafeBU &= NewReleaseRRI.KnownSafe; - for (SmallPtrSet<Instruction *, 2>::const_iterator - LI = NewReleaseRRI.Calls.begin(), - LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) { - Instruction *NewReleaseRetain = *LI; - MapVector<Value *, RRInfo>::const_iterator Jt = - Retains.find(NewReleaseRetain); - if (Jt == Retains.end()) - goto next_retain; - const RRInfo &NewReleaseRetainRRI = Jt->second; - assert(NewReleaseRetainRRI.Calls.count(NewRelease)); - if (RetainsToMove.Calls.insert(NewReleaseRetain)) { - unsigned PathCount = - BBStates[NewReleaseRetain->getParent()].GetAllPathCount(); - OldDelta += PathCount; - OldCount += PathCount; - - // Merge the IsRetainBlock values. - if (FirstRetain) { - RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock; - FirstRetain = false; - } else if (ReleasesToMove.IsRetainBlock != - NewReleaseRetainRRI.IsRetainBlock) - // It's not possible to merge the sequences if one uses - // objc_retain and the other uses objc_retainBlock. - goto next_retain; - - // Collect the optimal insertion points. - if (!KnownSafe) - for (SmallPtrSet<Instruction *, 2>::const_iterator - RI = NewReleaseRetainRRI.ReverseInsertPts.begin(), - RE = NewReleaseRetainRRI.ReverseInsertPts.end(); - RI != RE; ++RI) { - Instruction *RIP = *RI; - if (RetainsToMove.ReverseInsertPts.insert(RIP)) { - PathCount = BBStates[RIP->getParent()].GetAllPathCount(); - NewDelta += PathCount; - NewCount += PathCount; - } - } - NewRetains.push_back(NewReleaseRetain); - } - } - } - NewReleases.clear(); - if (NewRetains.empty()) break; - } - - // If the pointer is known incremented or nested, we can safely delete the - // pair regardless of what's between them. - if (KnownSafeTD || KnownSafeBU) { - RetainsToMove.ReverseInsertPts.clear(); - ReleasesToMove.ReverseInsertPts.clear(); - NewCount = 0; - } else { - // Determine whether the new insertion points we computed preserve the - // balance of retain and release calls through the program. - // TODO: If the fully aggressive solution isn't valid, try to find a - // less aggressive solution which is. - if (NewDelta != 0) - goto next_retain; - } - - // Determine whether the original call points are balanced in the retain and - // release calls through the program. If not, conservatively don't touch - // them. - // TODO: It's theoretically possible to do code motion in this case, as - // long as the existing imbalances are maintained. - if (OldDelta != 0) - goto next_retain; - - // Ok, everything checks out and we're all set. Let's move some code! - Changed = true; - assert(OldCount != 0 && "Unreachable code?"); - AnyPairsCompletelyEliminated = NewCount == 0; - NumRRs += OldCount - NewCount; - MoveCalls(Arg, RetainsToMove, ReleasesToMove, - Retains, Releases, DeadInsts, M); - - next_retain: - NewReleases.clear(); - NewRetains.clear(); - RetainsToMove.clear(); - ReleasesToMove.clear(); - } - - // Now that we're done moving everything, we can delete the newly dead - // instructions, as we no longer need them as insert points. - while (!DeadInsts.empty()) - EraseInstruction(DeadInsts.pop_back_val()); - - return AnyPairsCompletelyEliminated; -} - -/// OptimizeWeakCalls - Weak pointer optimizations. -void ObjCARCOpt::OptimizeWeakCalls(Function &F) { - // First, do memdep-style RLE and S2L optimizations. We can't use memdep - // itself because it uses AliasAnalysis and we need to do provenance - // queries instead. - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { - Instruction *Inst = &*I++; - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst << - "\n"); - - InstructionClass Class = GetBasicInstructionClass(Inst); - if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained) - continue; - - // Delete objc_loadWeak calls with no users. - if (Class == IC_LoadWeak && Inst->use_empty()) { - Inst->eraseFromParent(); - continue; - } - - // TODO: For now, just look for an earlier available version of this value - // within the same block. Theoretically, we could do memdep-style non-local - // analysis too, but that would want caching. A better approach would be to - // use the technique that EarlyCSE uses. - inst_iterator Current = llvm::prior(I); - BasicBlock *CurrentBB = Current.getBasicBlockIterator(); - for (BasicBlock::iterator B = CurrentBB->begin(), - J = Current.getInstructionIterator(); - J != B; --J) { - Instruction *EarlierInst = &*llvm::prior(J); - InstructionClass EarlierClass = GetInstructionClass(EarlierInst); - switch (EarlierClass) { - case IC_LoadWeak: - case IC_LoadWeakRetained: { - // If this is loading from the same pointer, replace this load's value - // with that one. - CallInst *Call = cast<CallInst>(Inst); - CallInst *EarlierCall = cast<CallInst>(EarlierInst); - Value *Arg = Call->getArgOperand(0); - Value *EarlierArg = EarlierCall->getArgOperand(0); - switch (PA.getAA()->alias(Arg, EarlierArg)) { - case AliasAnalysis::MustAlias: - Changed = true; - // If the load has a builtin retain, insert a plain retain for it. - if (Class == IC_LoadWeakRetained) { - CallInst *CI = - CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, - "", Call); - CI->setTailCall(); - } - // Zap the fully redundant load. - Call->replaceAllUsesWith(EarlierCall); - Call->eraseFromParent(); - goto clobbered; - case AliasAnalysis::MayAlias: - case AliasAnalysis::PartialAlias: - goto clobbered; - case AliasAnalysis::NoAlias: - break; - } - break; - } - case IC_StoreWeak: - case IC_InitWeak: { - // If this is storing to the same pointer and has the same size etc. - // replace this load's value with the stored value. - CallInst *Call = cast<CallInst>(Inst); - CallInst *EarlierCall = cast<CallInst>(EarlierInst); - Value *Arg = Call->getArgOperand(0); - Value *EarlierArg = EarlierCall->getArgOperand(0); - switch (PA.getAA()->alias(Arg, EarlierArg)) { - case AliasAnalysis::MustAlias: - Changed = true; - // If the load has a builtin retain, insert a plain retain for it. - if (Class == IC_LoadWeakRetained) { - CallInst *CI = - CallInst::Create(getRetainCallee(F.getParent()), EarlierCall, - "", Call); - CI->setTailCall(); - } - // Zap the fully redundant load. - Call->replaceAllUsesWith(EarlierCall->getArgOperand(1)); - Call->eraseFromParent(); - goto clobbered; - case AliasAnalysis::MayAlias: - case AliasAnalysis::PartialAlias: - goto clobbered; - case AliasAnalysis::NoAlias: - break; - } - break; - } - case IC_MoveWeak: - case IC_CopyWeak: - // TOOD: Grab the copied value. - goto clobbered; - case IC_AutoreleasepoolPush: - case IC_None: - case IC_User: - // Weak pointers are only modified through the weak entry points - // (and arbitrary calls, which could call the weak entry points). - break; - default: - // Anything else could modify the weak pointer. - goto clobbered; - } - } - clobbered:; - } - - // Then, for each destroyWeak with an alloca operand, check to see if - // the alloca and all its users can be zapped. - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { - Instruction *Inst = &*I++; - InstructionClass Class = GetBasicInstructionClass(Inst); - if (Class != IC_DestroyWeak) - continue; - - CallInst *Call = cast<CallInst>(Inst); - Value *Arg = Call->getArgOperand(0); - if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) { - for (Value::use_iterator UI = Alloca->use_begin(), - UE = Alloca->use_end(); UI != UE; ++UI) { - const Instruction *UserInst = cast<Instruction>(*UI); - switch (GetBasicInstructionClass(UserInst)) { - case IC_InitWeak: - case IC_StoreWeak: - case IC_DestroyWeak: - continue; - default: - goto done; - } - } - Changed = true; - for (Value::use_iterator UI = Alloca->use_begin(), - UE = Alloca->use_end(); UI != UE; ) { - CallInst *UserInst = cast<CallInst>(*UI++); - switch (GetBasicInstructionClass(UserInst)) { - case IC_InitWeak: - case IC_StoreWeak: - // These functions return their second argument. - UserInst->replaceAllUsesWith(UserInst->getArgOperand(1)); - break; - case IC_DestroyWeak: - // No return value. - break; - default: - llvm_unreachable("alloca really is used!"); - } - UserInst->eraseFromParent(); - } - Alloca->eraseFromParent(); - done:; - } - } - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n"); - -} - -/// OptimizeSequences - Identify program paths which execute sequences of -/// retains and releases which can be eliminated. -bool ObjCARCOpt::OptimizeSequences(Function &F) { - /// Releases, Retains - These are used to store the results of the main flow - /// analysis. These use Value* as the key instead of Instruction* so that the - /// map stays valid when we get around to rewriting code and calls get - /// replaced by arguments. - DenseMap<Value *, RRInfo> Releases; - MapVector<Value *, RRInfo> Retains; - - /// BBStates, This is used during the traversal of the function to track the - /// states for each identified object at each block. - DenseMap<const BasicBlock *, BBState> BBStates; - - // Analyze the CFG of the function, and all instructions. - bool NestingDetected = Visit(F, BBStates, Retains, Releases); - - // Transform. - return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) && - NestingDetected; -} - -/// OptimizeReturns - Look for this pattern: -/// \code -/// %call = call i8* @something(...) -/// %2 = call i8* @objc_retain(i8* %call) -/// %3 = call i8* @objc_autorelease(i8* %2) -/// ret i8* %3 -/// \endcode -/// And delete the retain and autorelease. -/// -/// Otherwise if it's just this: -/// \code -/// %3 = call i8* @objc_autorelease(i8* %2) -/// ret i8* %3 -/// \endcode -/// convert the autorelease to autoreleaseRV. -void ObjCARCOpt::OptimizeReturns(Function &F) { - if (!F.getReturnType()->isPointerTy()) - return; - - SmallPtrSet<Instruction *, 4> DependingInstructions; - SmallPtrSet<const BasicBlock *, 4> Visited; - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { - BasicBlock *BB = FI; - ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back()); - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n"); - - if (!Ret) continue; - - const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0)); - FindDependencies(NeedsPositiveRetainCount, Arg, - BB, Ret, DependingInstructions, Visited, PA); - if (DependingInstructions.size() != 1) - goto next_block; - - { - CallInst *Autorelease = - dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); - if (!Autorelease) - goto next_block; - InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease); - if (!IsAutorelease(AutoreleaseClass)) - goto next_block; - if (GetObjCArg(Autorelease) != Arg) - goto next_block; - - DependingInstructions.clear(); - Visited.clear(); - - // Check that there is nothing that can affect the reference - // count between the autorelease and the retain. - FindDependencies(CanChangeRetainCount, Arg, - BB, Autorelease, DependingInstructions, Visited, PA); - if (DependingInstructions.size() != 1) - goto next_block; - - { - CallInst *Retain = - dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); - - // Check that we found a retain with the same argument. - if (!Retain || - !IsRetain(GetBasicInstructionClass(Retain)) || - GetObjCArg(Retain) != Arg) - goto next_block; - - DependingInstructions.clear(); - Visited.clear(); - - // Convert the autorelease to an autoreleaseRV, since it's - // returning the value. - if (AutoreleaseClass == IC_Autorelease) { - Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent())); - AutoreleaseClass = IC_AutoreleaseRV; - } - - // Check that there is nothing that can affect the reference - // count between the retain and the call. - // Note that Retain need not be in BB. - FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain, - DependingInstructions, Visited, PA); - if (DependingInstructions.size() != 1) - goto next_block; - - { - CallInst *Call = - dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); - - // Check that the pointer is the return value of the call. - if (!Call || Arg != Call) - goto next_block; - - // Check that the call is a regular call. - InstructionClass Class = GetBasicInstructionClass(Call); - if (Class != IC_CallOrUser && Class != IC_Call) - goto next_block; - - // If so, we can zap the retain and autorelease. - Changed = true; - ++NumRets; - DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain - << "\n Erasing: " - << *Autorelease << "\n"); - EraseInstruction(Retain); - EraseInstruction(Autorelease); - } - } - } - - next_block: - DependingInstructions.clear(); - Visited.clear(); - } - - DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n"); - -} - -bool ObjCARCOpt::doInitialization(Module &M) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - Run = ModuleHasARC(M); - if (!Run) - return false; - - // Identify the imprecise release metadata kind. - ImpreciseReleaseMDKind = - M.getContext().getMDKindID("clang.imprecise_release"); - CopyOnEscapeMDKind = - M.getContext().getMDKindID("clang.arc.copy_on_escape"); - NoObjCARCExceptionsMDKind = - M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); - - // Intuitively, objc_retain and others are nocapture, however in practice - // they are not, because they return their argument value. And objc_release - // calls finalizers which can have arbitrary side effects. - - // These are initialized lazily. - RetainRVCallee = 0; - AutoreleaseRVCallee = 0; - ReleaseCallee = 0; - RetainCallee = 0; - RetainBlockCallee = 0; - AutoreleaseCallee = 0; - - return false; -} - -bool ObjCARCOpt::runOnFunction(Function &F) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!Run) - return false; - - Changed = false; - - PA.setAA(&getAnalysis<AliasAnalysis>()); - - // This pass performs several distinct transformations. As a compile-time aid - // when compiling code that isn't ObjC, skip these if the relevant ObjC - // library functions aren't declared. - - // Preliminary optimizations. This also computs UsedInThisFunction. - OptimizeIndividualCalls(F); - - // Optimizations for weak pointers. - if (UsedInThisFunction & ((1 << IC_LoadWeak) | - (1 << IC_LoadWeakRetained) | - (1 << IC_StoreWeak) | - (1 << IC_InitWeak) | - (1 << IC_CopyWeak) | - (1 << IC_MoveWeak) | - (1 << IC_DestroyWeak))) - OptimizeWeakCalls(F); - - // Optimizations for retain+release pairs. - if (UsedInThisFunction & ((1 << IC_Retain) | - (1 << IC_RetainRV) | - (1 << IC_RetainBlock))) - if (UsedInThisFunction & (1 << IC_Release)) - // Run OptimizeSequences until it either stops making changes or - // no retain+release pair nesting is detected. - while (OptimizeSequences(F)) {} - - // Optimizations if objc_autorelease is used. - if (UsedInThisFunction & ((1 << IC_Autorelease) | - (1 << IC_AutoreleaseRV))) - OptimizeReturns(F); - - return Changed; -} - -void ObjCARCOpt::releaseMemory() { - PA.clear(); -} - -//===----------------------------------------------------------------------===// -// ARC contraction. -//===----------------------------------------------------------------------===// - -// TODO: ObjCARCContract could insert PHI nodes when uses aren't -// dominated by single calls. - -#include "llvm/Analysis/Dominators.h" -#include "llvm/IR/InlineAsm.h" -#include "llvm/IR/Operator.h" - -STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); - -namespace { - /// ObjCARCContract - Late ARC optimizations. These change the IR in a way - /// that makes it difficult to be analyzed by ObjCARCOpt, so it's run late. - class ObjCARCContract : public FunctionPass { - bool Changed; - AliasAnalysis *AA; - DominatorTree *DT; - ProvenanceAnalysis PA; - - /// Run - A flag indicating whether this optimization pass should run. - bool Run; - - /// StoreStrongCallee, etc. - Declarations for ObjC runtime - /// functions, for use in creating calls to them. These are initialized - /// lazily to avoid cluttering up the Module with unused declarations. - Constant *StoreStrongCallee, - *RetainAutoreleaseCallee, *RetainAutoreleaseRVCallee; - - /// RetainRVMarker - The inline asm string to insert between calls and - /// RetainRV calls to make the optimization work on targets which need it. - const MDString *RetainRVMarker; - - /// StoreStrongCalls - The set of inserted objc_storeStrong calls. If - /// at the end of walking the function we have found no alloca - /// instructions, these calls can be marked "tail". - SmallPtrSet<CallInst *, 8> StoreStrongCalls; - - Constant *getStoreStrongCallee(Module *M); - Constant *getRetainAutoreleaseCallee(Module *M); - Constant *getRetainAutoreleaseRVCallee(Module *M); - - bool ContractAutorelease(Function &F, Instruction *Autorelease, - InstructionClass Class, - SmallPtrSet<Instruction *, 4> - &DependingInstructions, - SmallPtrSet<const BasicBlock *, 4> - &Visited); - - void ContractRelease(Instruction *Release, - inst_iterator &Iter); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - - public: - static char ID; - ObjCARCContract() : FunctionPass(ID) { - initializeObjCARCContractPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char ObjCARCContract::ID = 0; -INITIALIZE_PASS_BEGIN(ObjCARCContract, - "objc-arc-contract", "ObjC ARC contraction", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) -INITIALIZE_PASS_END(ObjCARCContract, - "objc-arc-contract", "ObjC ARC contraction", false, false) - -Pass *llvm::createObjCARCContractPass() { - return new ObjCARCContract(); -} - -void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<AliasAnalysis>(); - AU.addRequired<DominatorTree>(); - AU.setPreservesCFG(); -} - -Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { - if (!StoreStrongCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *I8XX = PointerType::getUnqual(I8X); - Type *Params[] = { I8XX, I8X }; - - AttributeSet Attribute = AttributeSet() - .addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)) - .addAttr(M->getContext(), 1, Attribute::get(C, Attribute::NoCapture)); - - StoreStrongCallee = - M->getOrInsertFunction( - "objc_storeStrong", - FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false), - Attribute); - } - return StoreStrongCallee; -} - -Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { - if (!RetainAutoreleaseCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attribute = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)); - RetainAutoreleaseCallee = - M->getOrInsertFunction("objc_retainAutorelease", FTy, Attribute); - } - return RetainAutoreleaseCallee; -} - -Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { - if (!RetainAutoreleaseRVCallee) { - LLVMContext &C = M->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *Params[] = { I8X }; - FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttributeSet Attribute = - AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex, - Attribute::get(C, Attribute::NoUnwind)); - RetainAutoreleaseRVCallee = - M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, - Attribute); - } - return RetainAutoreleaseRVCallee; -} - -/// ContractAutorelease - Merge an autorelease with a retain into a fused call. -bool -ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, - InstructionClass Class, - SmallPtrSet<Instruction *, 4> - &DependingInstructions, - SmallPtrSet<const BasicBlock *, 4> - &Visited) { - const Value *Arg = GetObjCArg(Autorelease); - - // Check that there are no instructions between the retain and the autorelease - // (such as an autorelease_pop) which may change the count. - CallInst *Retain = 0; - if (Class == IC_AutoreleaseRV) - FindDependencies(RetainAutoreleaseRVDep, Arg, - Autorelease->getParent(), Autorelease, - DependingInstructions, Visited, PA); - else - FindDependencies(RetainAutoreleaseDep, Arg, - Autorelease->getParent(), Autorelease, - DependingInstructions, Visited, PA); - - Visited.clear(); - if (DependingInstructions.size() != 1) { - DependingInstructions.clear(); - return false; - } - - Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin()); - DependingInstructions.clear(); - - if (!Retain || - GetBasicInstructionClass(Retain) != IC_Retain || - GetObjCArg(Retain) != Arg) - return false; - - Changed = true; - ++NumPeeps; - - DEBUG(dbgs() << "ObjCARCContract::ContractAutorelease: Fusing " - "retain/autorelease. Erasing: " << *Autorelease << "\n" - " Old Retain: " - << *Retain << "\n"); - - if (Class == IC_AutoreleaseRV) - Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent())); - else - Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent())); - - DEBUG(dbgs() << " New Retain: " - << *Retain << "\n"); - - EraseInstruction(Autorelease); - return true; -} - -/// ContractRelease - Attempt to merge an objc_release with a store, load, and -/// objc_retain to form an objc_storeStrong. This can be a little tricky because -/// the instructions don't always appear in order, and there may be unrelated -/// intervening instructions. -void ObjCARCContract::ContractRelease(Instruction *Release, - inst_iterator &Iter) { - LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release)); - if (!Load || !Load->isSimple()) return; - - // For now, require everything to be in one basic block. - BasicBlock *BB = Release->getParent(); - if (Load->getParent() != BB) return; - - // Walk down to find the store and the release, which may be in either order. - BasicBlock::iterator I = Load, End = BB->end(); - ++I; - AliasAnalysis::Location Loc = AA->getLocation(Load); - StoreInst *Store = 0; - bool SawRelease = false; - for (; !Store || !SawRelease; ++I) { - if (I == End) - return; - - Instruction *Inst = I; - if (Inst == Release) { - SawRelease = true; - continue; - } - - InstructionClass Class = GetBasicInstructionClass(Inst); - - // Unrelated retains are harmless. - if (IsRetain(Class)) - continue; - - if (Store) { - // The store is the point where we're going to put the objc_storeStrong, - // so make sure there are no uses after it. - if (CanUse(Inst, Load, PA, Class)) - return; - } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) { - // We are moving the load down to the store, so check for anything - // else which writes to the memory between the load and the store. - Store = dyn_cast<StoreInst>(Inst); - if (!Store || !Store->isSimple()) return; - if (Store->getPointerOperand() != Loc.Ptr) return; - } - } - - Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); - - // Walk up to find the retain. - I = Store; - BasicBlock::iterator Begin = BB->begin(); - while (I != Begin && GetBasicInstructionClass(I) != IC_Retain) - --I; - Instruction *Retain = I; - if (GetBasicInstructionClass(Retain) != IC_Retain) return; - if (GetObjCArg(Retain) != New) return; - - Changed = true; - ++NumStoreStrongs; - - LLVMContext &C = Release->getContext(); - Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - Type *I8XX = PointerType::getUnqual(I8X); - - Value *Args[] = { Load->getPointerOperand(), New }; - if (Args[0]->getType() != I8XX) - Args[0] = new BitCastInst(Args[0], I8XX, "", Store); - if (Args[1]->getType() != I8X) - Args[1] = new BitCastInst(Args[1], I8X, "", Store); - CallInst *StoreStrong = - CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()), - Args, "", Store); - StoreStrong->setDoesNotThrow(); - StoreStrong->setDebugLoc(Store->getDebugLoc()); - - // We can't set the tail flag yet, because we haven't yet determined - // whether there are any escaping allocas. Remember this call, so that - // we can set the tail flag once we know it's safe. - StoreStrongCalls.insert(StoreStrong); - - if (&*Iter == Store) ++Iter; - Store->eraseFromParent(); - Release->eraseFromParent(); - EraseInstruction(Retain); - if (Load->use_empty()) - Load->eraseFromParent(); -} - -bool ObjCARCContract::doInitialization(Module &M) { - // If nothing in the Module uses ARC, don't do anything. - Run = ModuleHasARC(M); - if (!Run) - return false; - - // These are initialized lazily. - StoreStrongCallee = 0; - RetainAutoreleaseCallee = 0; - RetainAutoreleaseRVCallee = 0; - - // Initialize RetainRVMarker. - RetainRVMarker = 0; - if (NamedMDNode *NMD = - M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker")) - if (NMD->getNumOperands() == 1) { - const MDNode *N = NMD->getOperand(0); - if (N->getNumOperands() == 1) - if (const MDString *S = dyn_cast<MDString>(N->getOperand(0))) - RetainRVMarker = S; - } - - return false; -} - -bool ObjCARCContract::runOnFunction(Function &F) { - if (!EnableARCOpts) - return false; - - // If nothing in the Module uses ARC, don't do anything. - if (!Run) - return false; - - Changed = false; - AA = &getAnalysis<AliasAnalysis>(); - DT = &getAnalysis<DominatorTree>(); - - PA.setAA(&getAnalysis<AliasAnalysis>()); - - // Track whether it's ok to mark objc_storeStrong calls with the "tail" - // keyword. Be conservative if the function has variadic arguments. - // It seems that functions which "return twice" are also unsafe for the - // "tail" argument, because they are setjmp, which could need to - // return to an earlier stack state. - bool TailOkForStoreStrongs = !F.isVarArg() && - !F.callsFunctionThatReturnsTwice(); - - // For ObjC library calls which return their argument, replace uses of the - // argument with uses of the call return value, if it dominates the use. This - // reduces register pressure. - SmallPtrSet<Instruction *, 4> DependingInstructions; - SmallPtrSet<const BasicBlock *, 4> Visited; - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { - Instruction *Inst = &*I++; - - DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n"); - - // Only these library routines return their argument. In particular, - // objc_retainBlock does not necessarily return its argument. - InstructionClass Class = GetBasicInstructionClass(Inst); - switch (Class) { - case IC_Retain: - case IC_FusedRetainAutorelease: - case IC_FusedRetainAutoreleaseRV: - break; - case IC_Autorelease: - case IC_AutoreleaseRV: - if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) - continue; - break; - case IC_RetainRV: { - // If we're compiling for a target which needs a special inline-asm - // marker to do the retainAutoreleasedReturnValue optimization, - // insert it now. - if (!RetainRVMarker) - break; - BasicBlock::iterator BBI = Inst; - BasicBlock *InstParent = Inst->getParent(); - - // Step up to see if the call immediately precedes the RetainRV call. - // If it's an invoke, we have to cross a block boundary. And we have - // to carefully dodge no-op instructions. - do { - if (&*BBI == InstParent->begin()) { - BasicBlock *Pred = InstParent->getSinglePredecessor(); - if (!Pred) - goto decline_rv_optimization; - BBI = Pred->getTerminator(); - break; - } - --BBI; - } while (isNoopInstruction(BBI)); - - if (&*BBI == GetObjCArg(Inst)) { - DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for " - "retainAutoreleasedReturnValue optimization.\n"); - Changed = true; - InlineAsm *IA = - InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), - /*isVarArg=*/false), - RetainRVMarker->getString(), - /*Constraints=*/"", /*hasSideEffects=*/true); - CallInst::Create(IA, "", Inst); - } - decline_rv_optimization: - break; - } - case IC_InitWeak: { - // objc_initWeak(p, null) => *p = null - CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(1))) { - Value *Null = - ConstantPointerNull::get(cast<PointerType>(CI->getType())); - Changed = true; - new StoreInst(Null, CI->getArgOperand(0), CI); - - DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n" - << " New = " << *Null << "\n"); - - CI->replaceAllUsesWith(Null); - CI->eraseFromParent(); - } - continue; - } - case IC_Release: - ContractRelease(Inst, I); - continue; - case IC_User: - // Be conservative if the function has any alloca instructions. - // Technically we only care about escaping alloca instructions, - // but this is sufficient to handle some interesting cases. - if (isa<AllocaInst>(Inst)) - TailOkForStoreStrongs = false; - continue; - default: - continue; - } - - DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n"); - - // Don't use GetObjCArg because we don't want to look through bitcasts - // and such; to do the replacement, the argument must have type i8*. - const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0); - for (;;) { - // If we're compiling bugpointed code, don't get in trouble. - if (!isa<Instruction>(Arg) && !isa<Argument>(Arg)) - break; - // Look through the uses of the pointer. - for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); - UI != UE; ) { - Use &U = UI.getUse(); - unsigned OperandNo = UI.getOperandNo(); - ++UI; // Increment UI now, because we may unlink its element. - - // If the call's return value dominates a use of the call's argument - // value, rewrite the use to use the return value. We check for - // reachability here because an unreachable call is considered to - // trivially dominate itself, which would lead us to rewriting its - // argument in terms of its return value, which would lead to - // infinite loops in GetObjCArg. - if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) { - Changed = true; - Instruction *Replacement = Inst; - Type *UseTy = U.get()->getType(); - if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) { - // For PHI nodes, insert the bitcast in the predecessor block. - unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); - BasicBlock *BB = PHI->getIncomingBlock(ValNo); - if (Replacement->getType() != UseTy) - Replacement = new BitCastInst(Replacement, UseTy, "", - &BB->back()); - // While we're here, rewrite all edges for this PHI, rather - // than just one use at a time, to minimize the number of - // bitcasts we emit. - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) - if (PHI->getIncomingBlock(i) == BB) { - // Keep the UI iterator valid. - if (&PHI->getOperandUse( - PHINode::getOperandNumForIncomingValue(i)) == - &UI.getUse()) - ++UI; - PHI->setIncomingValue(i, Replacement); - } - } else { - if (Replacement->getType() != UseTy) - Replacement = new BitCastInst(Replacement, UseTy, "", - cast<Instruction>(U.getUser())); - U.set(Replacement); - } - } - } - - // If Arg is a no-op casted pointer, strip one level of casts and iterate. - if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg)) - Arg = BI->getOperand(0); - else if (isa<GEPOperator>(Arg) && - cast<GEPOperator>(Arg)->hasAllZeroIndices()) - Arg = cast<GEPOperator>(Arg)->getPointerOperand(); - else if (isa<GlobalAlias>(Arg) && - !cast<GlobalAlias>(Arg)->mayBeOverridden()) - Arg = cast<GlobalAlias>(Arg)->getAliasee(); - else - break; - } - } - - // If this function has no escaping allocas or suspicious vararg usage, - // objc_storeStrong calls can be marked with the "tail" keyword. - if (TailOkForStoreStrongs) - for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(), - E = StoreStrongCalls.end(); I != E; ++I) - (*I)->setTailCall(); - StoreStrongCalls.clear(); - - return Changed; -} diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 3e935d8..e30a274 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -271,13 +271,6 @@ public: return I->second; } - /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const { - DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I = - StructValueState.find(std::make_pair(V, i)); - assert(I != StructValueState.end() && "V is not in valuemap!"); - return I->second; - }*/ - /// getTrackedRetVals - Get the inferred return value map. /// const DenseMap<Function*, LatticeVal> &getTrackedRetVals() { @@ -710,9 +703,6 @@ void SCCPSolver::visitPHINode(PHINode &PN) { markConstant(&PN, OperandVal); // Acquire operand value } - - - void SCCPSolver::visitReturnInst(ReturnInst &I) { if (I.getNumOperands() == 0) return; // ret void @@ -1185,7 +1175,7 @@ void SCCPSolver::Solve() { DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n'); // "I" got into the work list because it either made the transition from - // bottom to constant + // bottom to constant, or to overdefined. // // Anything on this worklist that is overdefined need not be visited // since all of its users will have already been marked as overdefined diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 4204171..e90fe90 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -43,14 +43,12 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/InstVisitor.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" @@ -411,9 +409,9 @@ static Value *foldSelectInst(SelectInst &SI) { // early on. if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition())) return SI.getOperand(1+CI->isZero()); - if (SI.getOperand(1) == SI.getOperand(2)) { + if (SI.getOperand(1) == SI.getOperand(2)) return SI.getOperand(1); - } + return 0; } @@ -621,7 +619,7 @@ private: } // Disable SRoA for any intrinsics except for lifetime invariants. - // FIXME: What about debug instrinsics? This matches old behavior, but + // FIXME: What about debug intrinsics? This matches old behavior, but // doesn't make sense. void visitIntrinsicInst(IntrinsicInst &II) { if (!IsOffsetKnown) @@ -1141,8 +1139,7 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I, void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I, StringRef Indent) const { - for (const_use_iterator UI = use_begin(I), UE = use_end(I); - UI != UE; ++UI) { + for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { if (!UI->U) continue; // Skip dead uses. OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") " @@ -1170,8 +1167,7 @@ void AllocaPartitioning::print(raw_ostream &OS) const { } OS << "Partitioning of alloca: " << AI << "\n"; - unsigned Num = 0; - for (const_iterator I = begin(), E = end(); I != E; ++I, ++Num) { + for (const_iterator I = begin(), E = end(); I != E; ++I) { print(OS, I); printUsers(OS, I); } @@ -1242,7 +1238,7 @@ public: for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), E = DVIs.end(); I != E; ++I) { DbgValueInst *DVI = *I; - Value *Arg = NULL; + Value *Arg = 0; if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -1277,7 +1273,7 @@ namespace { /// 1) It takes allocations of aggregates and analyzes the ways in which they /// are used to try to split them into smaller allocations, ideally of /// a single scalar data type. It will split up memcpy and memset accesses -/// as necessary and try to isolate invidual scalar accesses. +/// as necessary and try to isolate individual scalar accesses. /// 2) It will transform accesses into forms which are suitable for SSA value /// promotion. This can be replacing a memset with a scalar store of an /// integer value, or it can involve speculating operations on a PHI or @@ -1439,8 +1435,7 @@ private: // We can only transform this if it is safe to push the loads into the // predecessor blocks. The only thing to watch out for is that we can't put // a possibly trapping load in the predecessor if it is a critical edge. - for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; - ++Idx) { + for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator(); Value *InVal = PN.getIncomingValue(Idx); @@ -1483,7 +1478,7 @@ private: PN.getName() + ".sroa.speculated"); // Get the TBAA tag and alignment to use from one of the loads. It doesn't - // matter which one we get and if any differ, it doesn't matter. + // matter which one we get and if any differ. LoadInst *SomeLoad = cast<LoadInst>(Loads.back()); MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); unsigned Align = SomeLoad->getAlignment(); @@ -1816,7 +1811,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, /// The strategy for finding the more natural GEPs is to peel off layers of the /// pointer, walking back through bit casts and GEPs, searching for a base /// pointer from which we can compute a natural GEP with the desired -/// properities. The algorithm tries to fold as many constant indices into +/// properties. The algorithm tries to fold as many constant indices into /// a single GEP as possible, thus making each GEP more independent of the /// surrounding code. static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, @@ -2062,9 +2057,9 @@ static bool isIntegerWideningViable(const DataLayout &TD, uint64_t Size = TD.getTypeStoreSize(AllocaTy); - // Check the uses to ensure the uses are (likely) promoteable integer uses. + // Check the uses to ensure the uses are (likely) promotable integer uses. // Also ensure that the alloca has a covering load or store. We don't want - // to widen the integer operotains only to fail to promote due to some other + // to widen the integer operations only to fail to promote due to some other // unsplittable entry (which we may make splittable later). bool WholeAllocaOp = false; for (; I != E; ++I) { @@ -2283,7 +2278,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, // If we are rewriting an alloca partition which can be written as pure // vector operations, we stash extra information here. When VecTy is - // non-null, we have some strict guarantees about the rewriten alloca: + // non-null, we have some strict guarantees about the rewritten alloca: // - The new alloca is exactly the size of the vector type here. // - The accesses all either map to the entire vector or to a single // element. @@ -2636,7 +2631,7 @@ private: /// /// Note that this routine assumes an i8 is a byte. If that isn't true, don't /// call this routine. - /// FIXME: Heed the abvice above. + /// FIXME: Heed the advice above. /// /// \param V The i8 value to splat. /// \param Size The number of bytes in the output (assuming i8 is one byte) @@ -2971,6 +2966,7 @@ private: else New = IRB.CreateLifetimeEnd(Ptr, Size); + (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); return true; } @@ -3147,9 +3143,8 @@ private: void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { assert(Ty->isSingleValueType()); // Load the single value and insert it using the indices. - Value *Load = IRB.CreateLoad(IRB.CreateInBoundsGEP(Ptr, GEPIndices, - Name + ".gep"), - Name + ".load"); + Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"); + Value *Load = IRB.CreateLoad(GEP, Name + ".load"); Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); DEBUG(dbgs() << " to: " << *Load << "\n"); } @@ -3422,7 +3417,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI, // Check for the case where we're going to rewrite to a new alloca of the // exact same type as the original, and with the same access offsets. In that // case, re-use the existing alloca, but still run through the rewriter to - // performe phi and select speculation. + // perform phi and select speculation. AllocaInst *NewAI; if (AllocaTy == AI.getAllocatedType()) { assert(PI->BeginOffset == 0 && @@ -3589,7 +3584,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { /// If there is a domtree available, we attempt to promote using the full power /// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is /// based on the SSAUpdater utilities. This function returns whether any -/// promotion occured. +/// promotion occurred. bool SROA::promoteAllocas(Function &F) { if (PromotableAllocas.empty()) return false; diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 35d2fa0..8a9c7da 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -50,11 +50,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLowerAtomicPass(Registry); initializeLowerExpectIntrinsicPass(Registry); initializeMemCpyOptPass(Registry); - initializeObjCARCAliasAnalysisPass(Registry); - initializeObjCARCAPElimPass(Registry); - initializeObjCARCExpandPass(Registry); - initializeObjCARCContractPass(Registry); - initializeObjCARCOptPass(Registry); initializeReassociatePass(Registry); initializeRegToMemPass(Registry); initializeSCCPPass(Registry); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index d5cefa3..916b37d 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -165,7 +165,7 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { // Ignore non-calls. CallInst *CI = dyn_cast<CallInst>(I++); - if (!CI) continue; + if (!CI || CI->hasFnAttr(Attribute::NoBuiltin)) continue; // Ignore indirect calls and calls to non-external functions. Function *Callee = CI->getCalledFunction(); diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 6572e09..2002e68 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -58,6 +58,7 @@ #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -79,11 +80,15 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced"); namespace { struct TailCallElim : public FunctionPass { + const TargetTransformInfo *TTI; + static char ID; // Pass identification, replacement for typeid TailCallElim() : FunctionPass(ID) { initializeTailCallElimPass(*PassRegistry::getPassRegistry()); } + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnFunction(Function &F); private: @@ -109,14 +114,21 @@ namespace { } char TailCallElim::ID = 0; -INITIALIZE_PASS(TailCallElim, "tailcallelim", - "Tail Call Elimination", false, false) +INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", + "Tail Call Elimination", false, false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_END(TailCallElim, "tailcallelim", + "Tail Call Elimination", false, false) // Public interface to the TailCallElimination pass FunctionPass *llvm::createTailCallEliminationPass() { return new TailCallElim(); } +void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetTransformInfo>(); +} + /// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by /// callees of this function. We only do very simple analysis right now, this /// could be expanded in the future to use mod/ref information for particular @@ -151,6 +163,7 @@ bool TailCallElim::runOnFunction(Function &F) { // right, so don't even try to convert it... if (F.getFunctionType()->isVarArg()) return false; + TTI = &getAnalysis<TargetTransformInfo>(); BasicBlock *OldEntry = 0; bool TailCallsAreMarkedTail = false; SmallVector<PHINode*, 8> ArgumentPHIs; @@ -391,7 +404,8 @@ TailCallElim::FindTRECandidate(Instruction *TI, if (BB == &F->getEntryBlock() && FirstNonDbg(BB->front()) == CI && FirstNonDbg(llvm::next(BB->begin())) == TI && - callIsSmall(CI)) { + CI->getCalledFunction() && + !TTI->isLoweredToCall(CI->getCalledFunction())) { // A single-block function with just a call and a return. Check that // the arguments match. CallSite::arg_iterator I = CallSite(CI).arg_begin(), diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 8330e84..ba99d2e 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -37,12 +37,12 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { // Can delete self loop. BB->getSinglePredecessor() == BB) && "Block is not dead!"); TerminatorInst *BBTerm = BB->getTerminator(); - + // Loop through all of our successors and make sure they know that one // of their predecessors is going away. for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) BBTerm->getSuccessor(i)->removePredecessor(BB); - + // Zap all the instructions in the block. while (!BB->empty()) { Instruction &I = BB->back(); @@ -55,7 +55,7 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { I.replaceAllUsesWith(UndefValue::get(I.getType())); BB->getInstList().pop_back(); } - + // Zap the block! BB->eraseFromParent(); } @@ -66,25 +66,25 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { /// when the block has exactly one predecessor. void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { if (!isa<PHINode>(BB->begin())) return; - + AliasAnalysis *AA = 0; MemoryDependenceAnalysis *MemDep = 0; if (P) { AA = P->getAnalysisIfAvailable<AliasAnalysis>(); MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>(); } - + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { if (PN->getIncomingValue(0) != PN) PN->replaceAllUsesWith(PN->getIncomingValue(0)); else PN->replaceAllUsesWith(UndefValue::get(PN->getType())); - + if (MemDep) MemDep->removeInstruction(PN); // Memdep updates AA itself. else if (AA && isa<PointerType>(PN->getType())) AA->deleteValue(PN); - + PN->eraseFromParent(); } } @@ -115,7 +115,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { // Don't merge away blocks who have their address taken. if (BB->hasAddressTaken()) return false; - + // Can't merge if there are multiple predecessors, or no predecessors. BasicBlock *PredBB = BB->getUniquePredecessor(); if (!PredBB) return false; @@ -124,7 +124,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { if (PredBB == BB) return false; // Don't break invokes. if (isa<InvokeInst>(PredBB->getTerminator())) return false; - + succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB)); BasicBlock *OnlySucc = BB; for (; SI != SE; ++SI) @@ -132,7 +132,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { OnlySucc = 0; // There are multiple distinct successors! break; } - + // Can't merge if there are multiple successors. if (!OnlySucc) return false; @@ -149,21 +149,21 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { // Begin by getting rid of unneeded PHIs. if (isa<PHINode>(BB->front())) FoldSingleEntryPHINodes(BB, P); - + // Delete the unconditional branch from the predecessor... PredBB->getInstList().pop_back(); - + // Make all PHI nodes that referred to BB now refer to Pred as their // source... BB->replaceAllUsesWith(PredBB); - + // Move all definitions in the successor to the predecessor... PredBB->getInstList().splice(PredBB->end(), BB->getInstList()); - + // Inherit predecessors name if it exists. if (!PredBB->hasName()) PredBB->takeName(BB); - + // Finally, erase the old block and update dominator info. if (P) { if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) { @@ -176,16 +176,16 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { DT->eraseNode(BB); } - + if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) LI->removeBlock(BB); - + if (MemoryDependenceAnalysis *MD = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>()) MD->invalidateCachedPredecessors(); } } - + BB->eraseFromParent(); return true; } @@ -251,11 +251,11 @@ unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) { } } -/// SplitEdge - Split the edge connecting specified block. Pass P must -/// not be NULL. +/// SplitEdge - Split the edge connecting specified block. Pass P must +/// not be NULL. BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { unsigned SuccNum = GetSuccessorNumber(BB, Succ); - + // If this is a critical edge, let SplitCriticalEdge do it. TerminatorInst *LatchTerm = BB->getTerminator(); if (SplitCriticalEdge(LatchTerm, SuccNum, P)) @@ -271,11 +271,11 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { SP = NULL; return SplitBlock(Succ, Succ->begin(), P); } - + // Otherwise, if BB has a single successor, split it at the bottom of the // block. assert(BB->getTerminator()->getNumSuccessors() == 1 && - "Should have a single succ!"); + "Should have a single succ!"); return SplitBlock(BB, BB->getTerminator(), P); } @@ -301,12 +301,12 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { if (DomTreeNode *OldNode = DT->getNode(Old)) { std::vector<DomTreeNode *> Children; for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end(); - I != E; ++I) + I != E; ++I) Children.push_back(*I); DomTreeNode *NewNode = DT->addNewBlock(New,Old); for (std::vector<DomTreeNode *>::iterator I = Children.begin(), - E = Children.end(); I != E; ++I) + E = Children.end(); I != E; ++I) DT->changeImmediateDominator(*I, NewNode); } } @@ -424,7 +424,7 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, PHINode *NewPHI = PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); if (AA) AA->copyValue(PN, NewPHI); - + // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); @@ -451,16 +451,16 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, /// preserve LoopSimplify (because it's complicated to handle the case where one /// of the edges being split is an exit of a loop with other exits). /// -BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, +BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock*> Preds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); - + // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); - + // Move the edges from Preds to point to NewBB instead of BB. for (unsigned i = 0, e = Preds.size(); i != e; ++i) { // This is slightly more strict than necessary; the minimum requirement @@ -497,13 +497,13 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, /// block gets the remaining predecessors of OrigBB. The landingpad instruction /// OrigBB is clone into both of the new basic blocks. The new blocks are given /// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector. -/// +/// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, /// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular, /// it does not preserve LoopSimplify (because it's complicated to handle the /// case where one of the edges being split is an exit of a loop with other /// exits). -/// +/// void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef<BasicBlock*> Preds, const char *Suffix1, const char *Suffix2, @@ -608,11 +608,11 @@ void llvm::FindFunctionBackedges(const Function &F, const BasicBlock *BB = &F.getEntryBlock(); if (succ_begin(BB) == succ_end(BB)) return; - + SmallPtrSet<const BasicBlock*, 8> Visited; SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack; SmallPtrSet<const BasicBlock*, 8> InStack; - + Visited.insert(BB); VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); InStack.insert(BB); @@ -620,7 +620,7 @@ void llvm::FindFunctionBackedges(const Function &F, std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back(); const BasicBlock *ParentBB = Top.first; succ_const_iterator &I = Top.second; - + bool FoundNew = false; while (I != succ_end(ParentBB)) { BB = *I++; @@ -632,7 +632,7 @@ void llvm::FindFunctionBackedges(const Function &F, if (InStack.count(BB)) Result.push_back(std::make_pair(ParentBB, BB)); } - + if (FoundNew) { // Go down one level if there is a unvisited successor. InStack.insert(BB); @@ -641,7 +641,7 @@ void llvm::FindFunctionBackedges(const Function &F, // Go up one level. InStack.erase(VisitStack.pop_back_val().first); } - } while (!VisitStack.empty()); + } while (!VisitStack.empty()); } /// FoldReturnIntoUncondBranch - This method duplicates the specified return @@ -655,7 +655,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, // Clone the return and add it to the end of the predecessor. Instruction *NewRet = RI->clone(); Pred->getInstList().push_back(NewRet); - + // If the return instruction returns a value, and if the value was a // PHI node in "BB", propagate the right value into the return. for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); @@ -679,7 +679,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, } } } - + // Update any PHI nodes in the returning block to realize that we no // longer branch to them. BB->removePredecessor(Pred); diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index bf540b0..6d13217 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -38,16 +38,16 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - ArrayRef<Attribute::AttrKind>(AVs, 2)); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Constant *StrLen = M->getOrInsertFunction("strlen", AttributeSet::get(M->getContext(), - AWI), + AS), TD->getIntPtrType(Context), B.getInt8PtrTy(), NULL); @@ -67,16 +67,16 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - ArrayRef<Attribute::AttrKind>(AVs, 2)); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Constant *StrNLen = M->getOrInsertFunction("strnlen", AttributeSet::get(M->getContext(), - AWI), + AS), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), @@ -98,15 +98,15 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, Module *M = B.GetInsertBlock()->getParent()->getParent(); Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AttributeWithIndex AWI = - AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - ArrayRef<Attribute::AttrKind>(AVs, 2)); + AttributeSet AS = + AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); Type *I8Ptr = B.getInt8PtrTy(); Type *I32Ty = B.getInt32Ty(); Constant *StrChr = M->getOrInsertFunction("strchr", AttributeSet::get(M->getContext(), - AWI), + AS), I8Ptr, I8Ptr, I32Ty, NULL); CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), ConstantInt::get(I32Ty, C), "strchr"); @@ -123,17 +123,17 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); + AttributeSet AS[3]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - ArrayRef<Attribute::AttrKind>(AVs, 2)); + AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *StrNCmp = M->getOrInsertFunction("strncmp", AttributeSet::get(M->getContext(), - AWI), + AS), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -156,13 +156,13 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); Value *StrCpy = M->getOrInsertFunction(Name, - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), I8Ptr, I8Ptr, I8Ptr, NULL); CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Name); @@ -180,14 +180,14 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); Value *StrNCpy = M->getOrInsertFunction(Name, AttributeSet::get(M->getContext(), - AWI), + AS), I8Ptr, I8Ptr, I8Ptr, Len->getType(), NULL); CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B), @@ -207,12 +207,12 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI; - AWI = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeSet AS; + AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -235,13 +235,13 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI; + AttributeSet AS; Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AWI = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - ArrayRef<Attribute::AttrKind>(AVs, 2)); + AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemChr = M->getOrInsertFunction("memchr", - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), @@ -263,16 +263,16 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); + AttributeSet AS[3]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; - AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - ArrayRef<Attribute::AttrKind>(AVs, 2)); + AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCmp = M->getOrInsertFunction("memcmp", - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -344,13 +344,13 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); Value *PutS = M->getOrInsertFunction("puts", - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), B.getInt8PtrTy(), NULL); @@ -368,14 +368,14 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeSet AS[2]; + AS[0] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); Constant *F; if (File->getType()->isPointerTy()) F = M->getOrInsertFunction("fputc", - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), B.getInt32Ty(), File->getType(), NULL); @@ -401,16 +401,16 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeSet AS[3]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), 2, Attribute::NoCapture); + AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); StringRef FPutsName = TLI->getName(LibFunc::fputs); Constant *F; if (File->getType()->isPointerTy()) F = M->getOrInsertFunction(FPutsName, - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), NULL); @@ -434,17 +434,17 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); - AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, - Attribute::NoUnwind); + AttributeSet AS[3]; + AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture); + AS[1] = AttributeSet::get(M->getContext(), 4, Attribute::NoCapture); + AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; if (File->getType()->isPointerTy()) F = M->getOrInsertFunction(FWriteName, - AttributeSet::get(M->getContext(), AWI), + AttributeSet::get(M->getContext(), AS), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index ccc3eae..a309bce 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -95,18 +95,16 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, for (Function::const_arg_iterator I = OldFunc->arg_begin(), E = OldFunc->arg_end(); I != E; ++I) if (Argument* Anew = dyn_cast<Argument>(VMap[I])) - Anew->addAttr( OldFunc->getAttributes() + Anew->addAttr(OldFunc->getAttributes() .getParamAttributes(I->getArgNo() + 1)); NewFunc->setAttributes(NewFunc->getAttributes() - .addAttr(NewFunc->getContext(), - AttributeSet::ReturnIndex, - OldFunc->getAttributes() - .getRetAttributes())); + .addAttributes(NewFunc->getContext(), + AttributeSet::ReturnIndex, + OldFunc->getAttributes())); NewFunc->setAttributes(NewFunc->getAttributes() - .addAttr(NewFunc->getContext(), - AttributeSet::FunctionIndex, - OldFunc->getAttributes() - .getFnAttributes())); + .addAttributes(NewFunc->getContext(), + AttributeSet::FunctionIndex, + OldFunc->getAttributes())); } diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 3a21528..f7c659f 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Utils/CodeExtractor.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index d5c41f5..db525cd 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Function.h" @@ -78,12 +79,21 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, InsertPt = &I; ++InsertPt; } else { - // We cannot demote invoke instructions to the stack if their normal edge - // is critical. InvokeInst &II = cast<InvokeInst>(I); - assert(II.getNormalDest()->getSinglePredecessor() && - "Cannot demote invoke with a critical successor!"); - InsertPt = II.getNormalDest()->begin(); + if (II.getNormalDest()->getSinglePredecessor()) + InsertPt = II.getNormalDest()->getFirstInsertionPt(); + else { + // We cannot demote invoke instructions to the stack if their normal edge + // is critical. Therefore, split the critical edge and insert the store + // in the newly created basic block. + unsigned SuccNum = GetSuccessorNumber(I.getParent(), II.getNormalDest()); + TerminatorInst *TI = &cast<TerminatorInst>(I); + assert (isCriticalEdge(TI, SuccNum) && + "Expected a critical edge!"); + BasicBlock *BB = SplitCriticalEdge(TI, SuccNum); + assert (BB && "Unable to split critical edge."); + InsertPt = BB->getFirstInsertionPt(); + } } for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt) diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index 5187d7c..3cb8ded 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -418,3 +418,107 @@ bool llvm::expandDivision(BinaryOperator *Div) { return true; } + +/// Generate code to compute the remainder of two integers of bitwidth up to +/// 32 bits. Uses the above routines and extends the inputs/truncates the +/// outputs to operate in 32 bits; that is, these routines are good for targets +/// that have no or very little suppport for smaller than 32 bit integer +/// arithmetic. +/// +/// @brief Replace Rem with emulation code. +bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + Type *RemTy = Rem->getType(); + if (RemTy->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + unsigned RemTyBitWidth = RemTy->getIntegerBitWidth(); + + if (RemTyBitWidth > 32) + llvm_unreachable("Div of bitwidth greater than 32 not supported"); + + if (RemTyBitWidth == 32) + return expandRemainder(Rem); + + // If bitwidth smaller than 32 extend inputs, truncate output and proceed + // with 32 bit division. + IRBuilder<> Builder(Rem); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtRem; + Value *Trunc; + Type *Int32Ty = Builder.getInt32Ty(); + + if (Rem->getOpcode() == Instruction::SRem) { + ExtDividend = Builder.CreateSExt(Rem->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateSExt(Rem->getOperand(1), Int32Ty); + ExtRem = Builder.CreateSRem(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Rem->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateZExt(Rem->getOperand(1), Int32Ty); + ExtRem = Builder.CreateURem(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtRem, RemTy); + + Rem->replaceAllUsesWith(Trunc); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + return expandRemainder(cast<BinaryOperator>(ExtRem)); +} + + +/// Generate code to divide two integers of bitwidth up to 32 bits. Uses the +/// above routines and extends the inputs/truncates the outputs to operate +/// in 32 bits; that is, these routines are good for targets that have no +/// or very little support for smaller than 32 bit integer arithmetic. +/// +/// @brief Replace Div with emulation code. +bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) { + assert((Div->getOpcode() == Instruction::SDiv || + Div->getOpcode() == Instruction::UDiv) && + "Trying to expand division from a non-division function"); + + Type *DivTy = Div->getType(); + if (DivTy->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + unsigned DivTyBitWidth = DivTy->getIntegerBitWidth(); + + if (DivTyBitWidth > 32) + llvm_unreachable("Div of bitwidth greater than 32 not supported"); + + if (DivTyBitWidth == 32) + return expandDivision(Div); + + // If bitwidth smaller than 32 extend inputs, truncate output and proceed + // with 32 bit division. + IRBuilder<> Builder(Div); + + Value *ExtDividend; + Value *ExtDivisor; + Value *ExtDiv; + Value *Trunc; + Type *Int32Ty = Builder.getInt32Ty(); + + if (Div->getOpcode() == Instruction::SDiv) { + ExtDividend = Builder.CreateSExt(Div->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateSExt(Div->getOperand(1), Int32Ty); + ExtDiv = Builder.CreateSDiv(ExtDividend, ExtDivisor); + } else { + ExtDividend = Builder.CreateZExt(Div->getOperand(0), Int32Ty); + ExtDivisor = Builder.CreateZExt(Div->getOperand(1), Int32Ty); + ExtDiv = Builder.CreateUDiv(ExtDividend, ExtDivisor); + } + Trunc = Builder.CreateTrunc(ExtDiv, DivTy); + + Div->replaceAllUsesWith(Trunc); + Div->dropAllReferences(); + Div->eraseFromParent(); + + return expandDivision(cast<BinaryOperator>(ExtDiv)); +} diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index d519fb7..3716f58 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -72,13 +72,23 @@ namespace { // Rename all aliases for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end(); - AI != AE; ++AI) - AI->setName("alias"); + AI != AE; ++AI) { + StringRef Name = AI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + AI->setName("alias"); + } + // Rename all global variables for (Module::global_iterator GI = M.global_begin(), GE = M.global_end(); - GI != GE; ++GI) + GI != GE; ++GI) { + StringRef Name = GI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + GI->setName("global"); + } // Rename all struct types TypeFinder StructTypes; @@ -95,6 +105,10 @@ namespace { // Rename all functions for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { + StringRef Name = FI->getName(); + if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1)) + continue; + FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]); runOnFunction(*FI); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index f10c35f..a63d31d 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1332,149 +1332,180 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { return Changed; } -/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1 -/// and an BB2 and the only successor of BB1 is BB2, hoist simple code -/// (for now, restricted to a single instruction that's side effect free) from -/// the BB1 into the branch block to speculatively execute it. +/// \brief Speculate a conditional basic block flattening the CFG. /// -/// Turn -/// BB: -/// %t1 = icmp -/// br i1 %t1, label %BB1, label %BB2 -/// BB1: -/// %t3 = add %t2, c +/// Note that this is a very risky transform currently. Speculating +/// instructions like this is most often not desirable. Instead, there is an MI +/// pass which can do it with full awareness of the resource constraints. +/// However, some cases are "obvious" and we should do directly. An example of +/// this is speculating a single, reasonably cheap instruction. +/// +/// There is only one distinct advantage to flattening the CFG at the IR level: +/// it makes very common but simplistic optimizations such as are common in +/// instcombine and the DAG combiner more powerful by removing CFG edges and +/// modeling their effects with easier to reason about SSA value graphs. +/// +/// +/// An illustration of this transform is turning this IR: +/// \code +/// BB: +/// %cmp = icmp ult %x, %y +/// br i1 %cmp, label %EndBB, label %ThenBB +/// ThenBB: +/// %sub = sub %x, %y /// br label BB2 -/// BB2: -/// => -/// BB: -/// %t1 = icmp -/// %t4 = add %t2, c -/// %t3 = select i1 %t1, %t2, %t3 -static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { - // Only speculatively execution a single instruction (not counting the - // terminator) for now. - Instruction *HInst = NULL; - Instruction *Term = BB1->getTerminator(); - for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); +/// EndBB: +/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ] +/// ... +/// \endcode +/// +/// Into this IR: +/// \code +/// BB: +/// %cmp = icmp ult %x, %y +/// %sub = sub %x, %y +/// %cond = select i1 %cmp, 0, %sub +/// ... +/// \endcode +/// +/// \returns true if the conditional block is removed. +static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { + // Be conservative for now. FP select instruction can often be expensive. + Value *BrCond = BI->getCondition(); + if (isa<FCmpInst>(BrCond)) + return false; + + BasicBlock *BB = BI->getParent(); + BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0); + + // If ThenBB is actually on the false edge of the conditional branch, remember + // to swap the select operands later. + bool Invert = false; + if (ThenBB != BI->getSuccessor(0)) { + assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?"); + Invert = true; + } + assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block"); + + // Keep a count of how many times instructions are used within CondBB when + // they are candidates for sinking into CondBB. Specifically: + // - They are defined in BB, and + // - They have no side effects, and + // - All of their uses are in CondBB. + SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts; + + unsigned SpeculationCost = 0; + for (BasicBlock::iterator BBI = ThenBB->begin(), + BBE = llvm::prior(ThenBB->end()); BBI != BBE; ++BBI) { Instruction *I = BBI; // Skip debug info. - if (isa<DbgInfoIntrinsic>(I)) continue; - if (I == Term) break; + if (isa<DbgInfoIntrinsic>(I)) + continue; - if (HInst) + // Only speculatively execution a single instruction (not counting the + // terminator) for now. + ++SpeculationCost; + if (SpeculationCost > 1) return false; - HInst = I; - } - - BasicBlock *BIParent = BI->getParent(); - // Check the instruction to be hoisted, if there is one. - if (HInst) { // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(HInst)) + if (!isSafeToSpeculativelyExecute(I)) return false; - if (ComputeSpeculationCost(HInst) > PHINodeFoldingThreshold) + if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold) return false; // Do not hoist the instruction if any of its operands are defined but not // used in this BB. The transformation will prevent the operand from // being sunk into the use block. - for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { Instruction *OpI = dyn_cast<Instruction>(*i); - if (OpI && OpI->getParent() == BIParent && - !OpI->mayHaveSideEffects() && - !OpI->isUsedInBasicBlock(BIParent)) - return false; + if (!OpI || OpI->getParent() != BB || + OpI->mayHaveSideEffects()) + continue; // Not a candidate for sinking. + + ++SinkCandidateUseCounts[OpI]; } } - // Be conservative for now. FP select instruction can often be expensive. - Value *BrCond = BI->getCondition(); - if (isa<FCmpInst>(BrCond)) - return false; - - // If BB1 is actually on the false edge of the conditional branch, remember - // to swap the select operands later. - bool Invert = false; - if (BB1 != BI->getSuccessor(0)) { - assert(BB1 == BI->getSuccessor(1) && "No edge from 'if' block?"); - Invert = true; - } + // Consider any sink candidates which are only used in CondBB as costs for + // speculation. Note, while we iterate over a DenseMap here, we are summing + // and so iteration order isn't significant. + for (SmallDenseMap<Instruction *, unsigned, 4>::iterator I = + SinkCandidateUseCounts.begin(), E = SinkCandidateUseCounts.end(); + I != E; ++I) + if (I->first->getNumUses() == I->second) { + ++SpeculationCost; + if (SpeculationCost > 1) + return false; + } - // Collect interesting PHIs, and scan for hazards. - SmallSetVector<std::pair<Value *, Value *>, 4> PHIs; - BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0); - for (BasicBlock::iterator I = BB2->begin(); + // Check that the PHI nodes can be converted to selects. + bool HaveRewritablePHIs = false; + for (BasicBlock::iterator I = EndBB->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) { - Value *BB1V = PN->getIncomingValueForBlock(BB1); - Value *BIParentV = PN->getIncomingValueForBlock(BIParent); + Value *OrigV = PN->getIncomingValueForBlock(BB); + Value *ThenV = PN->getIncomingValueForBlock(ThenBB); // Skip PHIs which are trivial. - if (BB1V == BIParentV) + if (ThenV == OrigV) continue; - // Check for safety. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BB1V)) { - // An unfolded ConstantExpr could end up getting expanded into - // Instructions. Don't speculate this and another instruction at - // the same time. - if (HInst) - return false; - if (!isSafeToSpeculativelyExecute(CE)) - return false; - if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold) - return false; - } + HaveRewritablePHIs = true; + ConstantExpr *CE = dyn_cast<ConstantExpr>(ThenV); + if (!CE) + continue; // Known safe and cheap. + + if (!isSafeToSpeculativelyExecute(CE)) + return false; + if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold) + return false; - // Ok, we may insert a select for this PHI. - PHIs.insert(std::make_pair(BB1V, BIParentV)); + // Account for the cost of an unfolded ConstantExpr which could end up + // getting expanded into Instructions. + // FIXME: This doesn't account for how many operations are combined in the + // constant expression. + ++SpeculationCost; + if (SpeculationCost > 1) + return false; } // If there are no PHIs to process, bail early. This helps ensure idempotence // as well. - if (PHIs.empty()) + if (!HaveRewritablePHIs) return false; // If we get here, we can hoist the instruction and if-convert. - DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";); + DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); - // Hoist the instruction. - if (HInst) - BIParent->getInstList().splice(BI, BB1->getInstList(), HInst); + // Hoist the instructions. + BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(), + llvm::prior(ThenBB->end())); // Insert selects and rewrite the PHI operands. IRBuilder<true, NoFolder> Builder(BI); - for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { - Value *TrueV = PHIs[i].first; - Value *FalseV = PHIs[i].second; + for (BasicBlock::iterator I = EndBB->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + unsigned OrigI = PN->getBasicBlockIndex(BB); + unsigned ThenI = PN->getBasicBlockIndex(ThenBB); + Value *OrigV = PN->getIncomingValue(OrigI); + Value *ThenV = PN->getIncomingValue(ThenI); + + // Skip PHIs which are trivial. + if (OrigV == ThenV) + continue; // Create a select whose true value is the speculatively executed value and - // false value is the previously determined FalseV. - SelectInst *SI; + // false value is the preexisting value. Swap them if the branch + // destinations were inverted. + Value *TrueV = ThenV, *FalseV = OrigV; if (Invert) - SI = cast<SelectInst> - (Builder.CreateSelect(BrCond, FalseV, TrueV, - FalseV->getName() + "." + TrueV->getName())); - else - SI = cast<SelectInst> - (Builder.CreateSelect(BrCond, TrueV, FalseV, - TrueV->getName() + "." + FalseV->getName())); - - // Make the PHI node use the select for all incoming values for "then" and - // "if" blocks. - for (BasicBlock::iterator I = BB2->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) { - unsigned BB1I = PN->getBasicBlockIndex(BB1); - unsigned BIParentI = PN->getBasicBlockIndex(BIParent); - Value *BB1V = PN->getIncomingValue(BB1I); - Value *BIParentV = PN->getIncomingValue(BIParentI); - if (TrueV == BB1V && FalseV == BIParentV) { - PN->setIncomingValue(BB1I, SI); - PN->setIncomingValue(BIParentI, SI); - } - } + std::swap(TrueV, FalseV); + Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, + TrueV->getName() + "." + FalseV->getName()); + PN->setIncomingValue(OrigI, V); + PN->setIncomingValue(ThenI, V); } ++NumSpeculations; @@ -3382,7 +3413,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M, ConstantInt *Offset, const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, Constant *DefaultValue, - const DataLayout *TD) { + const DataLayout *TD) + : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) { assert(Values.size() && "Can't build lookup table without values!"); assert(TableSize >= Values.size() && "Can't fit values in table!"); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 83c74e7..8ad566c 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -50,6 +50,10 @@ public: virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) =0; + /// ignoreCallingConv - Returns false if this transformation could possibly + /// change the calling convention. + virtual bool ignoreCallingConv() { return false; } + Value *optimizeCall(CallInst *CI, const DataLayout *TD, const TargetLibraryInfo *TLI, const LibCallSimplifier *LCS, IRBuilder<> &B) { @@ -61,7 +65,7 @@ public: Context = &CI->getCalledFunction()->getContext(); // We never change the calling convention. - if (CI->getCallingConv() != llvm::CallingConv::C) + if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C) return NULL; return callOptimizer(CI->getCalledFunction(), CI, B); @@ -724,6 +728,7 @@ struct StrNCpyOpt : public LibCallOptimization { }; struct StrLenOpt : public LibCallOptimization { + virtual bool ignoreCallingConv() { return true; } virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || @@ -1260,6 +1265,7 @@ struct FFSOpt : public LibCallOptimization { }; struct AbsOpt : public LibCallOptimization { + virtual bool ignoreCallingConv() { return true; } virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { FunctionType *FT = Callee->getFunctionType(); // We require integer(integer) where the types agree. @@ -1883,6 +1889,7 @@ LibCallSimplifier::~LibCallSimplifier() { } Value *LibCallSimplifier::optimizeCall(CallInst *CI) { + if (CI->hasFnAttr(Attribute::NoBuiltin)) return 0; return Impl->optimizeCall(CI); } diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index a5e1643..b5941bd 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -63,14 +63,29 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Check all operands to see if any need to be remapped. for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { Value *OP = MD->getOperand(i); - if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue; + if (OP == 0) continue; + Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper); + // Use identity map if Mapped_Op is null and we can ignore missing + // entries. + if (Mapped_OP == OP || + (Mapped_OP == 0 && (Flags & RF_IgnoreMissingEntries))) + continue; // Ok, at least one operand needs remapping. SmallVector<Value*, 4> Elts; Elts.reserve(MD->getNumOperands()); for (i = 0; i != e; ++i) { Value *Op = MD->getOperand(i); - Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0); + if (Op == 0) + Elts.push_back(0); + else { + Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper); + // Use identity map if Mapped_Op is null and we can ignore missing + // entries. + if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries)) + Mapped_Op = Op; + Elts.push_back(Mapped_Op); + } } MDNode *NewMD = MDNode::get(V->getContext(), Elts); Dummy->replaceAllUsesWith(NewMD); diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index d72a4a1..7636541 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -48,7 +48,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include <algorithm> -#include <map> using namespace llvm; static cl::opt<bool> @@ -89,6 +88,10 @@ MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden, cl::desc("The maximum number of pairable instructions per group")); static cl::opt<unsigned> +MaxPairs("bb-vectorize-max-pairs-per-group", cl::init(3000), cl::Hidden, + cl::desc("The maximum number of candidate instruction pairs per group")); + +static cl::opt<unsigned> MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200), cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use" " a full cycle check")); @@ -207,11 +210,6 @@ namespace { typedef std::pair<ValuePair, size_t> ValuePairWithDepth; typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair typedef std::pair<VPPair, unsigned> VPPairWithType; - typedef std::pair<std::multimap<Value *, Value *>::iterator, - std::multimap<Value *, Value *>::iterator> VPIteratorPair; - typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator, - std::multimap<ValuePair, ValuePair>::iterator> - VPPIteratorPair; AliasAnalysis *AA; DominatorTree *DT; @@ -225,7 +223,7 @@ namespace { bool getCandidatePairs(BasicBlock &BB, BasicBlock::iterator &Start, - std::multimap<Value *, Value *> &CandidatePairs, + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, DenseSet<ValuePair> &FixedOrderPairs, DenseMap<ValuePair, int> &CandidatePairCostSavings, std::vector<Value *> &PairableInsts, bool NonPow2Len); @@ -239,33 +237,36 @@ namespace { PairConnectionSplat }; - void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes); + void computeConnectedPairs( + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes); void buildDepMap(BasicBlock &BB, - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - DenseSet<ValuePair> &PairableInstUsers); - - void choosePairs(std::multimap<Value *, Value *> &CandidatePairs, - DenseMap<ValuePair, int> &CandidatePairCostSavings, - std::vector<Value *> &PairableInsts, - DenseSet<ValuePair> &FixedOrderPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, - DenseSet<ValuePair> &PairableInstUsers, - DenseMap<Value *, Value *>& ChosenPairs); + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + std::vector<Value *> &PairableInsts, + DenseSet<ValuePair> &PairableInstUsers); + + void choosePairs(DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + DenseMap<ValuePair, int> &CandidatePairCostSavings, + std::vector<Value *> &PairableInsts, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps, + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<Value *, Value *>& ChosenPairs); void fuseChosenPairs(BasicBlock &BB, - std::vector<Value *> &PairableInsts, - DenseMap<Value *, Value *>& ChosenPairs, - DenseSet<ValuePair> &FixedOrderPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - std::multimap<ValuePair, ValuePair> &ConnectedPairDeps); + std::vector<Value *> &PairableInsts, + DenseMap<Value *, Value *>& ChosenPairs, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps); bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); @@ -277,56 +278,63 @@ namespace { bool trackUsesOfI(DenseSet<Value *> &Users, AliasSetTracker &WriteSet, Instruction *I, Instruction *J, bool UpdateUsers = true, - std::multimap<Value *, Value *> *LoadMoveSet = 0); + DenseSet<ValuePair> *LoadMoveSetPairs = 0); - void computePairsConnectedTo( - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes, - ValuePair P); + void computePairsConnectedTo( + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + ValuePair P); bool pairsConflict(ValuePair P, ValuePair Q, - DenseSet<ValuePair> &PairableInstUsers, - std::multimap<ValuePair, ValuePair> *PairableInstUserMap = 0); + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<ValuePair, std::vector<ValuePair> > + *PairableInstUserMap = 0, + DenseSet<VPPair> *PairableInstUserPairSet = 0); bool pairWillFormCycle(ValuePair P, - std::multimap<ValuePair, ValuePair> &PairableInstUsers, - DenseSet<ValuePair> &CurrentPairs); - - void pruneTreeFor( - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseSet<ValuePair> &PairableInstUsers, - std::multimap<ValuePair, ValuePair> &PairableInstUserMap, - DenseMap<Value *, Value *> &ChosenPairs, - DenseMap<ValuePair, size_t> &Tree, - DenseSet<ValuePair> &PrunedTree, ValuePair J, - bool UseCycleCheck); - - void buildInitialTreeFor( - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseSet<ValuePair> &PairableInstUsers, - DenseMap<Value *, Value *> &ChosenPairs, - DenseMap<ValuePair, size_t> &Tree, ValuePair J); - - void findBestTreeFor( - std::multimap<Value *, Value *> &CandidatePairs, - DenseMap<ValuePair, int> &CandidatePairCostSavings, - std::vector<Value *> &PairableInsts, - DenseSet<ValuePair> &FixedOrderPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, - DenseSet<ValuePair> &PairableInstUsers, - std::multimap<ValuePair, ValuePair> &PairableInstUserMap, - DenseMap<Value *, Value *> &ChosenPairs, - DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, - int &BestEffSize, VPIteratorPair ChoiceRange, - bool UseCycleCheck); + DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUsers, + DenseSet<ValuePair> &CurrentPairs); + + void pruneDAGFor( + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap, + DenseSet<VPPair> &PairableInstUserPairSet, + DenseMap<Value *, Value *> &ChosenPairs, + DenseMap<ValuePair, size_t> &DAG, + DenseSet<ValuePair> &PrunedDAG, ValuePair J, + bool UseCycleCheck); + + void buildInitialDAGFor( + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<Value *, Value *> &ChosenPairs, + DenseMap<ValuePair, size_t> &DAG, ValuePair J); + + void findBestDAGFor( + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + DenseMap<ValuePair, int> &CandidatePairCostSavings, + std::vector<Value *> &PairableInsts, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps, + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap, + DenseSet<VPPair> &PairableInstUserPairSet, + DenseMap<Value *, Value *> &ChosenPairs, + DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth, + int &BestEffSize, Value *II, std::vector<Value *>&JJ, + bool UseCycleCheck); Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, Instruction *J, unsigned o); @@ -358,20 +366,22 @@ namespace { void collectPairLoadMoveSet(BasicBlock &BB, DenseMap<Value *, Value *> &ChosenPairs, - std::multimap<Value *, Value *> &LoadMoveSet, + DenseMap<Value *, std::vector<Value *> > &LoadMoveSet, + DenseSet<ValuePair> &LoadMoveSetPairs, Instruction *I); void collectLoadMoveSet(BasicBlock &BB, std::vector<Value *> &PairableInsts, DenseMap<Value *, Value *> &ChosenPairs, - std::multimap<Value *, Value *> &LoadMoveSet); + DenseMap<Value *, std::vector<Value *> > &LoadMoveSet, + DenseSet<ValuePair> &LoadMoveSetPairs); bool canMoveUsesOfIAfterJ(BasicBlock &BB, - std::multimap<Value *, Value *> &LoadMoveSet, + DenseSet<ValuePair> &LoadMoveSetPairs, Instruction *I, Instruction *J); void moveUsesOfIAfterJ(BasicBlock &BB, - std::multimap<Value *, Value *> &LoadMoveSet, + DenseSet<ValuePair> &LoadMoveSetPairs, Instruction *&InsertionPt, Instruction *I, Instruction *J); @@ -463,18 +473,18 @@ namespace { static inline void getInstructionTypes(Instruction *I, Type *&T1, Type *&T2) { - if (isa<StoreInst>(I)) { + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { // For stores, it is the value type, not the pointer type that matters // because the value is what will come from a vector register. - Value *IVal = cast<StoreInst>(I)->getValueOperand(); + Value *IVal = SI->getValueOperand(); T1 = IVal->getType(); } else { T1 = I->getType(); } - if (I->isCast()) - T2 = cast<CastInst>(I)->getSrcTy(); + if (CastInst *CI = dyn_cast<CastInst>(I)) + T2 = CI->getSrcTy(); else T2 = T1; @@ -500,7 +510,7 @@ namespace { // InsertElement and ExtractElement have a depth factor of zero. This is // for two reasons: First, they cannot be usefully fused. Second, because // the pass generates a lot of these, they can confuse the simple metric - // used to compare the trees in the next iteration. Thus, giving them a + // used to compare the dags in the next iteration. Thus, giving them a // weight of zero allows the pass to essentially ignore them in // subsequent iterations when looking for vectorization opportunities // while still tracking dependency chains that flow through those @@ -661,19 +671,6 @@ namespace { } } - // Returns true if J is the second element in some pair referenced by - // some multimap pair iterator pair. - template <typename V> - bool isSecondInIteratorPair(V J, std::pair< - typename std::multimap<V, V>::iterator, - typename std::multimap<V, V>::iterator> PairRange) { - for (typename std::multimap<V, V>::iterator K = PairRange.first; - K != PairRange.second; ++K) - if (K->second == J) return true; - - return false; - } - bool isPureIEChain(InsertElementInst *IE) { InsertElementInst *IENext = IE; do { @@ -698,11 +695,12 @@ namespace { DenseMap<Value *, Value *> AllChosenPairs; DenseSet<ValuePair> AllFixedOrderPairs; DenseMap<VPPair, unsigned> AllPairConnectionTypes; - std::multimap<ValuePair, ValuePair> AllConnectedPairs, AllConnectedPairDeps; + DenseMap<ValuePair, std::vector<ValuePair> > AllConnectedPairs, + AllConnectedPairDeps; do { std::vector<Value *> PairableInsts; - std::multimap<Value *, Value *> CandidatePairs; + DenseMap<Value *, std::vector<Value *> > CandidatePairs; DenseSet<ValuePair> FixedOrderPairs; DenseMap<ValuePair, int> CandidatePairCostSavings; ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, @@ -711,6 +709,14 @@ namespace { PairableInsts, NonPow2Len); if (PairableInsts.empty()) continue; + // Build the candidate pair set for faster lookups. + DenseSet<ValuePair> CandidatePairsSet; + for (DenseMap<Value *, std::vector<Value *> >::iterator I = + CandidatePairs.begin(), E = CandidatePairs.end(); I != E; ++I) + for (std::vector<Value *>::iterator J = I->second.begin(), + JE = I->second.end(); J != JE; ++J) + CandidatePairsSet.insert(ValuePair(I->first, *J)); + // Now we have a map of all of the pairable instructions and we need to // select the best possible pairing. A good pairing is one such that the // users of the pair are also paired. This defines a (directed) forest @@ -720,30 +726,33 @@ namespace { // Note that it only matters that both members of the second pair use some // element of the first pair (to allow for splatting). - std::multimap<ValuePair, ValuePair> ConnectedPairs, ConnectedPairDeps; + DenseMap<ValuePair, std::vector<ValuePair> > ConnectedPairs, + ConnectedPairDeps; DenseMap<VPPair, unsigned> PairConnectionTypes; - computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs, - PairConnectionTypes); + computeConnectedPairs(CandidatePairs, CandidatePairsSet, + PairableInsts, ConnectedPairs, PairConnectionTypes); if (ConnectedPairs.empty()) continue; - for (std::multimap<ValuePair, ValuePair>::iterator + for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); - I != IE; ++I) { - ConnectedPairDeps.insert(VPPair(I->second, I->first)); - } + I != IE; ++I) + for (std::vector<ValuePair>::iterator J = I->second.begin(), + JE = I->second.end(); J != JE; ++J) + ConnectedPairDeps[*J].push_back(I->first); // Build the pairable-instruction dependency map DenseSet<ValuePair> PairableInstUsers; buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers); // There is now a graph of the connected pairs. For each variable, pick - // the pairing with the largest tree meeting the depth requirement on at - // least one branch. Then select all pairings that are part of that tree + // the pairing with the largest dag meeting the depth requirement on at + // least one branch. Then select all pairings that are part of that dag // and remove them from the list of available pairings and pairable // variables. DenseMap<Value *, Value *> ChosenPairs; - choosePairs(CandidatePairs, CandidatePairCostSavings, + choosePairs(CandidatePairs, CandidatePairsSet, + CandidatePairCostSavings, PairableInsts, FixedOrderPairs, PairConnectionTypes, ConnectedPairs, ConnectedPairDeps, PairableInstUsers, ChosenPairs); @@ -777,14 +786,15 @@ namespace { } } - for (std::multimap<ValuePair, ValuePair>::iterator + for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator I = ConnectedPairs.begin(), IE = ConnectedPairs.end(); - I != IE; ++I) { - if (AllPairConnectionTypes.count(*I)) { - AllConnectedPairs.insert(*I); - AllConnectedPairDeps.insert(VPPair(I->second, I->first)); - } - } + I != IE; ++I) + for (std::vector<ValuePair>::iterator J = I->second.begin(), + JE = I->second.end(); J != JE; ++J) + if (AllPairConnectionTypes.count(VPPair(I->first, *J))) { + AllConnectedPairs[I->first].push_back(*J); + AllConnectedPairDeps[*J].push_back(I->first); + } } while (ShouldContinue); if (AllChosenPairs.empty()) return false; @@ -910,7 +920,7 @@ namespace { // This function returns true if the two provided instructions are compatible // (meaning that they can be fused into a vector instruction). This assumes // that I has already been determined to be vectorizable and that J is not - // in the use tree of I. + // in the use dag of I. bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, bool IsSimpleLoadStore, bool NonPow2Len, int &CostSavings, int &FixedOrder) { @@ -972,6 +982,11 @@ namespace { unsigned VCost = TTI->getMemoryOpCost(I->getOpcode(), VType, BottomAlignment, IAddressSpace); + + ICost += TTI->getAddressComputationCost(aTypeI); + JCost += TTI->getAddressComputationCost(aTypeJ); + VCost += TTI->getAddressComputationCost(VType); + if (VCost > ICost + JCost) return false; @@ -994,6 +1009,12 @@ namespace { unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2); Type *VT1 = getVecTypeForPair(IT1, JT1), *VT2 = getVecTypeForPair(IT2, JT2); + + // Note that this procedure is incorrect for insert and extract element + // instructions (because combining these often results in a shuffle), + // but this cost is ignored (because insert and extract element + // instructions are assigned a zero depth factor and are not really + // fused in general). unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2); if (VCost > ICost + JCost) @@ -1090,7 +1111,7 @@ namespace { // to contain any memory locations to which J writes. The function returns // true if J uses I. By default, alias analysis is used to determine // whether J reads from memory that overlaps with a location in WriteSet. - // If LoadMoveSet is not null, then it is a previously-computed multimap + // If LoadMoveSet is not null, then it is a previously-computed map // where the key is the memory-based user instruction and the value is // the instruction to be compared with I. So, if LoadMoveSet is provided, // then the alias analysis is not used. This is necessary because this @@ -1100,7 +1121,7 @@ namespace { bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users, AliasSetTracker &WriteSet, Instruction *I, Instruction *J, bool UpdateUsers, - std::multimap<Value *, Value *> *LoadMoveSet) { + DenseSet<ValuePair> *LoadMoveSetPairs) { bool UsesI = false; // This instruction may already be marked as a user due, for example, to @@ -1118,9 +1139,8 @@ namespace { } } if (!UsesI && J->mayReadFromMemory()) { - if (LoadMoveSet) { - VPIteratorPair JPairRange = LoadMoveSet->equal_range(J); - UsesI = isSecondInIteratorPair<Value*>(I, JPairRange); + if (LoadMoveSetPairs) { + UsesI = LoadMoveSetPairs->count(ValuePair(J, I)); } else { for (AliasSetTracker::iterator W = WriteSet.begin(), WE = WriteSet.end(); W != WE; ++W) { @@ -1144,10 +1164,11 @@ namespace { // basic block and collects all candidate pairs for vectorization. bool BBVectorize::getCandidatePairs(BasicBlock &BB, BasicBlock::iterator &Start, - std::multimap<Value *, Value *> &CandidatePairs, + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, DenseSet<ValuePair> &FixedOrderPairs, DenseMap<ValuePair, int> &CandidatePairCostSavings, std::vector<Value *> &PairableInsts, bool NonPow2Len) { + size_t TotalPairs = 0; BasicBlock::iterator E = BB.end(); if (Start == E) return false; @@ -1193,7 +1214,8 @@ namespace { PairableInsts.push_back(I); } - CandidatePairs.insert(ValuePair(I, J)); + CandidatePairs[I].push_back(J); + ++TotalPairs; if (TTI) CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J), CostSavings)); @@ -1217,7 +1239,8 @@ namespace { // If we have already found too many pairs, break here and this function // will be called again starting after the last instruction selected // during this invocation. - if (PairableInsts.size() >= Config.MaxInsts) { + if (PairableInsts.size() >= Config.MaxInsts || + TotalPairs >= Config.MaxPairs) { ShouldContinue = true; break; } @@ -1237,11 +1260,12 @@ namespace { // it looks for pairs such that both members have an input which is an // output of PI or PJ. void BBVectorize::computePairsConnectedTo( - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes, - ValuePair P) { + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + ValuePair P) { StoreInst *SI, *SJ; // For each possible pairing for this variable, look at the uses of @@ -1259,8 +1283,6 @@ namespace { continue; } - VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); - // For each use of the first variable, look for uses of the second // variable... for (Value::use_iterator J = P.second->use_begin(), @@ -1269,19 +1291,17 @@ namespace { P.second == SJ->getPointerOperand()) continue; - VPIteratorPair JPairRange = CandidatePairs.equal_range(*J); - // Look for <I, J>: - if (isSecondInIteratorPair<Value*>(*J, IPairRange)) { + if (CandidatePairsSet.count(ValuePair(*I, *J))) { VPPair VP(P, ValuePair(*I, *J)); - ConnectedPairs.insert(VP); + ConnectedPairs[VP.first].push_back(VP.second); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionDirect)); } // Look for <J, I>: - if (isSecondInIteratorPair<Value*>(*I, JPairRange)) { + if (CandidatePairsSet.count(ValuePair(*J, *I))) { VPPair VP(P, ValuePair(*J, *I)); - ConnectedPairs.insert(VP); + ConnectedPairs[VP.first].push_back(VP.second); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSwap)); } } @@ -1294,9 +1314,9 @@ namespace { P.first == SJ->getPointerOperand()) continue; - if (isSecondInIteratorPair<Value*>(*J, IPairRange)) { + if (CandidatePairsSet.count(ValuePair(*I, *J))) { VPPair VP(P, ValuePair(*I, *J)); - ConnectedPairs.insert(VP); + ConnectedPairs[VP.first].push_back(VP.second); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); } } @@ -1313,16 +1333,14 @@ namespace { P.second == SI->getPointerOperand()) continue; - VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); - for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) { if ((SJ = dyn_cast<StoreInst>(*J)) && P.second == SJ->getPointerOperand()) continue; - if (isSecondInIteratorPair<Value*>(*J, IPairRange)) { + if (CandidatePairsSet.count(ValuePair(*I, *J))) { VPPair VP(P, ValuePair(*I, *J)); - ConnectedPairs.insert(VP); + ConnectedPairs[VP.first].push_back(VP.second); PairConnectionTypes.insert(VPPairWithType(VP, PairConnectionSplat)); } } @@ -1333,55 +1351,73 @@ namespace { // connected if some output of the first pair forms an input to both members // of the second pair. void BBVectorize::computeConnectedPairs( - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes) { - + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes) { for (std::vector<Value *>::iterator PI = PairableInsts.begin(), PE = PairableInsts.end(); PI != PE; ++PI) { - VPIteratorPair choiceRange = CandidatePairs.equal_range(*PI); + DenseMap<Value *, std::vector<Value *> >::iterator PP = + CandidatePairs.find(*PI); + if (PP == CandidatePairs.end()) + continue; - for (std::multimap<Value *, Value *>::iterator P = choiceRange.first; - P != choiceRange.second; ++P) - computePairsConnectedTo(CandidatePairs, PairableInsts, - ConnectedPairs, PairConnectionTypes, *P); + for (std::vector<Value *>::iterator P = PP->second.begin(), + E = PP->second.end(); P != E; ++P) + computePairsConnectedTo(CandidatePairs, CandidatePairsSet, + PairableInsts, ConnectedPairs, + PairConnectionTypes, ValuePair(*PI, *P)); } - DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size() + DEBUG(size_t TotalPairs = 0; + for (DenseMap<ValuePair, std::vector<ValuePair> >::iterator I = + ConnectedPairs.begin(), IE = ConnectedPairs.end(); I != IE; ++I) + TotalPairs += I->second.size(); + dbgs() << "BBV: found " << TotalPairs << " pair connections.\n"); } // This function builds a set of use tuples such that <A, B> is in the set - // if B is in the use tree of A. If B is in the use tree of A, then B + // if B is in the use dag of A. If B is in the use dag of A, then B // depends on the output of A. void BBVectorize::buildDepMap( BasicBlock &BB, - std::multimap<Value *, Value *> &CandidatePairs, + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, std::vector<Value *> &PairableInsts, DenseSet<ValuePair> &PairableInstUsers) { DenseSet<Value *> IsInPair; - for (std::multimap<Value *, Value *>::iterator C = CandidatePairs.begin(), - E = CandidatePairs.end(); C != E; ++C) { + for (DenseMap<Value *, std::vector<Value *> >::iterator C = + CandidatePairs.begin(), E = CandidatePairs.end(); C != E; ++C) { IsInPair.insert(C->first); - IsInPair.insert(C->second); + IsInPair.insert(C->second.begin(), C->second.end()); } - // Iterate through the basic block, recording all Users of each + // Iterate through the basic block, recording all users of each // pairable instruction. - BasicBlock::iterator E = BB.end(); + BasicBlock::iterator E = BB.end(), EL = + BasicBlock::iterator(cast<Instruction>(PairableInsts.back())); for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) { if (IsInPair.find(I) == IsInPair.end()) continue; DenseSet<Value *> Users; AliasSetTracker WriteSet(*AA); - for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) + for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) { (void) trackUsesOfI(Users, WriteSet, I, J); + if (J == EL) + break; + } + for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end(); - U != E; ++U) + U != E; ++U) { + if (IsInPair.find(*U) == IsInPair.end()) continue; PairableInstUsers.insert(ValuePair(I, *U)); + } + + if (I == EL) + break; } } @@ -1389,8 +1425,9 @@ namespace { // input of pair Q is an output of pair P. If this is the case, then these // two pairs cannot be simultaneously fused. bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q, - DenseSet<ValuePair> &PairableInstUsers, - std::multimap<ValuePair, ValuePair> *PairableInstUserMap) { + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<ValuePair, std::vector<ValuePair> > *PairableInstUserMap, + DenseSet<VPPair> *PairableInstUserPairSet) { // Two pairs are in conflict if they are mutual Users of eachother. bool QUsesP = PairableInstUsers.count(ValuePair(P.first, Q.first)) || PairableInstUsers.count(ValuePair(P.first, Q.second)) || @@ -1403,17 +1440,14 @@ namespace { if (PairableInstUserMap) { // FIXME: The expensive part of the cycle check is not so much the cycle // check itself but this edge insertion procedure. This needs some - // profiling and probably a different data structure (same is true of - // most uses of std::multimap). + // profiling and probably a different data structure. if (PUsesQ) { - VPPIteratorPair QPairRange = PairableInstUserMap->equal_range(Q); - if (!isSecondInIteratorPair(P, QPairRange)) - PairableInstUserMap->insert(VPPair(Q, P)); + if (PairableInstUserPairSet->insert(VPPair(Q, P)).second) + (*PairableInstUserMap)[Q].push_back(P); } if (QUsesP) { - VPPIteratorPair PPairRange = PairableInstUserMap->equal_range(P); - if (!isSecondInIteratorPair(Q, PPairRange)) - PairableInstUserMap->insert(VPPair(P, Q)); + if (PairableInstUserPairSet->insert(VPPair(P, Q)).second) + (*PairableInstUserMap)[P].push_back(Q); } } @@ -1423,8 +1457,8 @@ namespace { // This function walks the use graph of current pairs to see if, starting // from P, the walk returns to P. bool BBVectorize::pairWillFormCycle(ValuePair P, - std::multimap<ValuePair, ValuePair> &PairableInstUserMap, - DenseSet<ValuePair> &CurrentPairs) { + DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap, + DenseSet<ValuePair> &CurrentPairs) { DEBUG(if (DebugCycleCheck) dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> " << *P.second << "\n"); @@ -1441,36 +1475,41 @@ namespace { DEBUG(if (DebugCycleCheck) dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> " << *QTop.second << "\n"); - VPPIteratorPair QPairRange = PairableInstUserMap.equal_range(QTop); - for (std::multimap<ValuePair, ValuePair>::iterator C = QPairRange.first; - C != QPairRange.second; ++C) { - if (C->second == P) { + DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ = + PairableInstUserMap.find(QTop); + if (QQ == PairableInstUserMap.end()) + continue; + + for (std::vector<ValuePair>::iterator C = QQ->second.begin(), + CE = QQ->second.end(); C != CE; ++C) { + if (*C == P) { DEBUG(dbgs() << "BBV: rejected to prevent non-trivial cycle formation: " - << *C->first.first << " <-> " << *C->first.second << "\n"); + << QTop.first << " <-> " << C->second << "\n"); return true; } - if (CurrentPairs.count(C->second) && !Visited.count(C->second)) - Q.push_back(C->second); + if (CurrentPairs.count(*C) && !Visited.count(*C)) + Q.push_back(*C); } } while (!Q.empty()); return false; } - // This function builds the initial tree of connected pairs with the + // This function builds the initial dag of connected pairs with the // pair J at the root. - void BBVectorize::buildInitialTreeFor( - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseSet<ValuePair> &PairableInstUsers, - DenseMap<Value *, Value *> &ChosenPairs, - DenseMap<ValuePair, size_t> &Tree, ValuePair J) { - // Each of these pairs is viewed as the root node of a Tree. The Tree + void BBVectorize::buildInitialDAGFor( + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<Value *, Value *> &ChosenPairs, + DenseMap<ValuePair, size_t> &DAG, ValuePair J) { + // Each of these pairs is viewed as the root node of a DAG. The DAG // is then walked (depth-first). As this happens, we keep track of - // the pairs that compose the Tree and the maximum depth of the Tree. + // the pairs that compose the DAG and the maximum depth of the DAG. SmallVector<ValuePairWithDepth, 32> Q; // General depth-first post-order traversal: Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); @@ -1480,69 +1519,65 @@ namespace { // Push each child onto the queue: bool MoreChildren = false; size_t MaxChildDepth = QTop.second; - VPPIteratorPair qtRange = ConnectedPairs.equal_range(QTop.first); - for (std::multimap<ValuePair, ValuePair>::iterator k = qtRange.first; - k != qtRange.second; ++k) { - // Make sure that this child pair is still a candidate: - bool IsStillCand = false; - VPIteratorPair checkRange = - CandidatePairs.equal_range(k->second.first); - for (std::multimap<Value *, Value *>::iterator m = checkRange.first; - m != checkRange.second; ++m) { - if (m->second == k->second.second) { - IsStillCand = true; - break; - } - } - - if (IsStillCand) { - DenseMap<ValuePair, size_t>::iterator C = Tree.find(k->second); - if (C == Tree.end()) { - size_t d = getDepthFactor(k->second.first); - Q.push_back(ValuePairWithDepth(k->second, QTop.second+d)); - MoreChildren = true; - } else { - MaxChildDepth = std::max(MaxChildDepth, C->second); + DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ = + ConnectedPairs.find(QTop.first); + if (QQ != ConnectedPairs.end()) + for (std::vector<ValuePair>::iterator k = QQ->second.begin(), + ke = QQ->second.end(); k != ke; ++k) { + // Make sure that this child pair is still a candidate: + if (CandidatePairsSet.count(*k)) { + DenseMap<ValuePair, size_t>::iterator C = DAG.find(*k); + if (C == DAG.end()) { + size_t d = getDepthFactor(k->first); + Q.push_back(ValuePairWithDepth(*k, QTop.second+d)); + MoreChildren = true; + } else { + MaxChildDepth = std::max(MaxChildDepth, C->second); + } } } - } if (!MoreChildren) { - // Record the current pair as part of the Tree: - Tree.insert(ValuePairWithDepth(QTop.first, MaxChildDepth)); + // Record the current pair as part of the DAG: + DAG.insert(ValuePairWithDepth(QTop.first, MaxChildDepth)); Q.pop_back(); } } while (!Q.empty()); } - // Given some initial tree, prune it by removing conflicting pairs (pairs + // Given some initial dag, prune it by removing conflicting pairs (pairs // that cannot be simultaneously chosen for vectorization). - void BBVectorize::pruneTreeFor( - std::multimap<Value *, Value *> &CandidatePairs, - std::vector<Value *> &PairableInsts, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - DenseSet<ValuePair> &PairableInstUsers, - std::multimap<ValuePair, ValuePair> &PairableInstUserMap, - DenseMap<Value *, Value *> &ChosenPairs, - DenseMap<ValuePair, size_t> &Tree, - DenseSet<ValuePair> &PrunedTree, ValuePair J, - bool UseCycleCheck) { + void BBVectorize::pruneDAGFor( + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + std::vector<Value *> &PairableInsts, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap, + DenseSet<VPPair> &PairableInstUserPairSet, + DenseMap<Value *, Value *> &ChosenPairs, + DenseMap<ValuePair, size_t> &DAG, + DenseSet<ValuePair> &PrunedDAG, ValuePair J, + bool UseCycleCheck) { SmallVector<ValuePairWithDepth, 32> Q; // General depth-first post-order traversal: Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first))); do { ValuePairWithDepth QTop = Q.pop_back_val(); - PrunedTree.insert(QTop.first); + PrunedDAG.insert(QTop.first); // Visit each child, pruning as necessary... SmallVector<ValuePairWithDepth, 8> BestChildren; - VPPIteratorPair QTopRange = ConnectedPairs.equal_range(QTop.first); - for (std::multimap<ValuePair, ValuePair>::iterator K = QTopRange.first; - K != QTopRange.second; ++K) { - DenseMap<ValuePair, size_t>::iterator C = Tree.find(K->second); - if (C == Tree.end()) continue; + DenseMap<ValuePair, std::vector<ValuePair> >::iterator QQ = + ConnectedPairs.find(QTop.first); + if (QQ == ConnectedPairs.end()) + continue; - // This child is in the Tree, now we need to make sure it is the + for (std::vector<ValuePair>::iterator K = QQ->second.begin(), + KE = QQ->second.end(); K != KE; ++K) { + DenseMap<ValuePair, size_t>::iterator C = DAG.find(*K); + if (C == DAG.end()) continue; + + // This child is in the DAG, now we need to make sure it is the // best of any conflicting children. There could be multiple // conflicting children, so first, determine if we're keeping // this child, then delete conflicting children as necessary. @@ -1556,7 +1591,7 @@ namespace { // fusing (a,b) we have y .. a/b .. x where y is an input // to a/b and x is an output to a/b: x and y can no longer // be legally fused. To prevent this condition, we must - // make sure that a child pair added to the Tree is not + // make sure that a child pair added to the DAG is not // both an input and output of an already-selected pair. // Pairing-induced dependencies can also form from more complicated @@ -1575,7 +1610,8 @@ namespace { C2->first.second == C->first.first || C2->first.second == C->first.second || pairsConflict(C2->first, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0)) { + UseCycleCheck ? &PairableInstUserMap : 0, + UseCycleCheck ? &PairableInstUserPairSet : 0)) { if (C2->second >= C->second) { CanAdd = false; break; @@ -1587,15 +1623,16 @@ namespace { if (!CanAdd) continue; // Even worse, this child could conflict with another node already - // selected for the Tree. If that is the case, ignore this child. - for (DenseSet<ValuePair>::iterator T = PrunedTree.begin(), - E2 = PrunedTree.end(); T != E2; ++T) { + // selected for the DAG. If that is the case, ignore this child. + for (DenseSet<ValuePair>::iterator T = PrunedDAG.begin(), + E2 = PrunedDAG.end(); T != E2; ++T) { if (T->first == C->first.first || T->first == C->first.second || T->second == C->first.first || T->second == C->first.second || pairsConflict(*T, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0)) { + UseCycleCheck ? &PairableInstUserMap : 0, + UseCycleCheck ? &PairableInstUserPairSet : 0)) { CanAdd = false; break; } @@ -1612,7 +1649,8 @@ namespace { C2->first.second == C->first.first || C2->first.second == C->first.second || pairsConflict(C2->first, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0)) { + UseCycleCheck ? &PairableInstUserMap : 0, + UseCycleCheck ? &PairableInstUserPairSet : 0)) { CanAdd = false; break; } @@ -1627,7 +1665,8 @@ namespace { ChosenPairs.begin(), E2 = ChosenPairs.end(); C2 != E2; ++C2) { if (pairsConflict(*C2, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0)) { + UseCycleCheck ? &PairableInstUserMap : 0, + UseCycleCheck ? &PairableInstUserPairSet : 0)) { CanAdd = false; break; } @@ -1639,7 +1678,7 @@ namespace { // To check for non-trivial cycles formed by the addition of the // current pair we've formed a list of all relevant pairs, now use a // graph walk to check for a cycle. We start from the current pair and - // walk the use tree to see if we again reach the current pair. If we + // walk the use dag to see if we again reach the current pair. If we // do, then the current pair is rejected. // FIXME: It may be more efficient to use a topological-ordering @@ -1676,34 +1715,40 @@ namespace { } while (!Q.empty()); } - // This function finds the best tree of mututally-compatible connected + // This function finds the best dag of mututally-compatible connected // pairs, given the choice of root pairs as an iterator range. - void BBVectorize::findBestTreeFor( - std::multimap<Value *, Value *> &CandidatePairs, - DenseMap<ValuePair, int> &CandidatePairCostSavings, - std::vector<Value *> &PairableInsts, - DenseSet<ValuePair> &FixedOrderPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, - DenseSet<ValuePair> &PairableInstUsers, - std::multimap<ValuePair, ValuePair> &PairableInstUserMap, - DenseMap<Value *, Value *> &ChosenPairs, - DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth, - int &BestEffSize, VPIteratorPair ChoiceRange, - bool UseCycleCheck) { - for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first; - J != ChoiceRange.second; ++J) { + void BBVectorize::findBestDAGFor( + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + DenseMap<ValuePair, int> &CandidatePairCostSavings, + std::vector<Value *> &PairableInsts, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps, + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUserMap, + DenseSet<VPPair> &PairableInstUserPairSet, + DenseMap<Value *, Value *> &ChosenPairs, + DenseSet<ValuePair> &BestDAG, size_t &BestMaxDepth, + int &BestEffSize, Value *II, std::vector<Value *>&JJ, + bool UseCycleCheck) { + for (std::vector<Value *>::iterator J = JJ.begin(), JE = JJ.end(); + J != JE; ++J) { + ValuePair IJ(II, *J); + if (!CandidatePairsSet.count(IJ)) + continue; // Before going any further, make sure that this pair does not // conflict with any already-selected pairs (see comment below - // near the Tree pruning for more details). + // near the DAG pruning for more details). DenseSet<ValuePair> ChosenPairSet; bool DoesConflict = false; for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(), E = ChosenPairs.end(); C != E; ++C) { - if (pairsConflict(*C, *J, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0)) { + if (pairsConflict(*C, IJ, PairableInstUsers, + UseCycleCheck ? &PairableInstUserMap : 0, + UseCycleCheck ? &PairableInstUserPairSet : 0)) { DoesConflict = true; break; } @@ -1713,40 +1758,42 @@ namespace { if (DoesConflict) continue; if (UseCycleCheck && - pairWillFormCycle(*J, PairableInstUserMap, ChosenPairSet)) + pairWillFormCycle(IJ, PairableInstUserMap, ChosenPairSet)) continue; - DenseMap<ValuePair, size_t> Tree; - buildInitialTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, - PairableInstUsers, ChosenPairs, Tree, *J); + DenseMap<ValuePair, size_t> DAG; + buildInitialDAGFor(CandidatePairs, CandidatePairsSet, + PairableInsts, ConnectedPairs, + PairableInstUsers, ChosenPairs, DAG, IJ); // Because we'll keep the child with the largest depth, the largest - // depth is still the same in the unpruned Tree. - size_t MaxDepth = Tree.lookup(*J); + // depth is still the same in the unpruned DAG. + size_t MaxDepth = DAG.lookup(IJ); - DEBUG(if (DebugPairSelection) dbgs() << "BBV: found Tree for pair {" - << *J->first << " <-> " << *J->second << "} of depth " << - MaxDepth << " and size " << Tree.size() << "\n"); + DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {" + << IJ.first << " <-> " << IJ.second << "} of depth " << + MaxDepth << " and size " << DAG.size() << "\n"); - // At this point the Tree has been constructed, but, may contain + // At this point the DAG has been constructed, but, may contain // contradictory children (meaning that different children of - // some tree node may be attempting to fuse the same instruction). - // So now we walk the tree again, in the case of a conflict, + // some dag node may be attempting to fuse the same instruction). + // So now we walk the dag again, in the case of a conflict, // keep only the child with the largest depth. To break a tie, // favor the first child. - DenseSet<ValuePair> PrunedTree; - pruneTreeFor(CandidatePairs, PairableInsts, ConnectedPairs, - PairableInstUsers, PairableInstUserMap, ChosenPairs, Tree, - PrunedTree, *J, UseCycleCheck); + DenseSet<ValuePair> PrunedDAG; + pruneDAGFor(CandidatePairs, PairableInsts, ConnectedPairs, + PairableInstUsers, PairableInstUserMap, + PairableInstUserPairSet, + ChosenPairs, DAG, PrunedDAG, IJ, UseCycleCheck); int EffSize = 0; if (TTI) { - DenseSet<Value *> PrunedTreeInstrs; - for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), - E = PrunedTree.end(); S != E; ++S) { - PrunedTreeInstrs.insert(S->first); - PrunedTreeInstrs.insert(S->second); + DenseSet<Value *> PrunedDAGInstrs; + for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(), + E = PrunedDAG.end(); S != E; ++S) { + PrunedDAGInstrs.insert(S->first); + PrunedDAGInstrs.insert(S->second); } // The set of pairs that have already contributed to the total cost. @@ -1759,8 +1806,8 @@ namespace { // The node weights represent the cost savings associated with // fusing the pair of instructions. - for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), - E = PrunedTree.end(); S != E; ++S) { + for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(), + E = PrunedDAG.end(); S != E; ++S) { if (!isa<ShuffleVectorInst>(S->first) && !isa<InsertElementInst>(S->first) && !isa<ExtractElementInst>(S->first)) @@ -1778,15 +1825,17 @@ namespace { // The edge weights contribute in a negative sense: they represent // the cost of shuffles. - VPPIteratorPair IP = ConnectedPairDeps.equal_range(*S); - if (IP.first != ConnectedPairDeps.end()) { + DenseMap<ValuePair, std::vector<ValuePair> >::iterator SS = + ConnectedPairDeps.find(*S); + if (SS != ConnectedPairDeps.end()) { unsigned NumDepsDirect = 0, NumDepsSwap = 0; - for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; - Q != IP.second; ++Q) { - if (!PrunedTree.count(Q->second)) + for (std::vector<ValuePair>::iterator T = SS->second.begin(), + TE = SS->second.end(); T != TE; ++T) { + VPPair Q(*S, *T); + if (!PrunedDAG.count(Q.second)) continue; DenseMap<VPPair, unsigned>::iterator R = - PairConnectionTypes.find(VPPair(Q->second, Q->first)); + PairConnectionTypes.find(VPPair(Q.second, Q.first)); assert(R != PairConnectionTypes.end() && "Cannot find pair connection type"); if (R->second == PairConnectionDirect) @@ -1802,24 +1851,35 @@ namespace { ((NumDepsSwap > NumDepsDirect) || FixedOrderPairs.count(ValuePair(S->second, S->first))); - for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; - Q != IP.second; ++Q) { - if (!PrunedTree.count(Q->second)) + for (std::vector<ValuePair>::iterator T = SS->second.begin(), + TE = SS->second.end(); T != TE; ++T) { + VPPair Q(*S, *T); + if (!PrunedDAG.count(Q.second)) continue; DenseMap<VPPair, unsigned>::iterator R = - PairConnectionTypes.find(VPPair(Q->second, Q->first)); + PairConnectionTypes.find(VPPair(Q.second, Q.first)); assert(R != PairConnectionTypes.end() && "Cannot find pair connection type"); - Type *Ty1 = Q->second.first->getType(), - *Ty2 = Q->second.second->getType(); + Type *Ty1 = Q.second.first->getType(), + *Ty2 = Q.second.second->getType(); Type *VTy = getVecTypeForPair(Ty1, Ty2); if ((R->second == PairConnectionDirect && FlipOrder) || (R->second == PairConnectionSwap && !FlipOrder) || R->second == PairConnectionSplat) { int ESContrib = (int) getInstrCost(Instruction::ShuffleVector, VTy, VTy); + + if (VTy->getVectorNumElements() == 2) { + if (R->second == PairConnectionSplat) + ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( + TargetTransformInfo::SK_Broadcast, VTy)); + else + ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( + TargetTransformInfo::SK_Reverse, VTy)); + } + DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << - *Q->second.first << " <-> " << *Q->second.second << + *Q.second.first << " <-> " << *Q.second.second << "} -> {" << *S->first << " <-> " << *S->second << "} = " << ESContrib << "\n"); @@ -1846,7 +1906,7 @@ namespace { } if (isa<ExtractElementInst>(*I)) continue; - if (PrunedTreeInstrs.count(*I)) + if (PrunedDAGInstrs.count(*I)) continue; NeedsExtraction = true; break; @@ -1854,10 +1914,12 @@ namespace { if (NeedsExtraction) { int ESContrib; - if (Ty1->isVectorTy()) + if (Ty1->isVectorTy()) { ESContrib = (int) getInstrCost(Instruction::ShuffleVector, Ty1, VTy); - else + ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( + TargetTransformInfo::SK_ExtractSubvector, VTy, 0, Ty1)); + } else ESContrib = (int) TTI->getVectorInstrCost( Instruction::ExtractElement, VTy, 0); @@ -1876,7 +1938,7 @@ namespace { } if (isa<ExtractElementInst>(*I)) continue; - if (PrunedTreeInstrs.count(*I)) + if (PrunedDAGInstrs.count(*I)) continue; NeedsExtraction = true; break; @@ -1884,10 +1946,13 @@ namespace { if (NeedsExtraction) { int ESContrib; - if (Ty2->isVectorTy()) + if (Ty2->isVectorTy()) { ESContrib = (int) getInstrCost(Instruction::ShuffleVector, Ty2, VTy); - else + ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( + TargetTransformInfo::SK_ExtractSubvector, VTy, + Ty1->isVectorTy() ? Ty1->getVectorNumElements() : 1, Ty2)); + } else ESContrib = (int) TTI->getVectorInstrCost( Instruction::ExtractElement, VTy, 1); DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" << @@ -1915,7 +1980,7 @@ namespace { ValuePair VPR = ValuePair(O2, O1); // Internal edges are not handled here. - if (PrunedTree.count(VP) || PrunedTree.count(VPR)) + if (PrunedDAG.count(VP) || PrunedDAG.count(VPR)) continue; Type *Ty1 = O1->getType(), @@ -1963,6 +2028,10 @@ namespace { } else if (IncomingPairs.count(VPR)) { ESContrib = (int) getInstrCost(Instruction::ShuffleVector, VTy, VTy); + + if (VTy->getVectorNumElements() == 2) + ESContrib = std::min(ESContrib, (int) TTI->getShuffleCost( + TargetTransformInfo::SK_Reverse, VTy)); } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) { ESContrib = (int) TTI->getVectorInstrCost( Instruction::InsertElement, VTy, 0); @@ -2005,27 +2074,27 @@ namespace { if (!HasNontrivialInsts) { DEBUG(if (DebugPairSelection) dbgs() << - "\tNo non-trivial instructions in tree;" + "\tNo non-trivial instructions in DAG;" " override to zero effective size\n"); EffSize = 0; } } else { - for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(), - E = PrunedTree.end(); S != E; ++S) + for (DenseSet<ValuePair>::iterator S = PrunedDAG.begin(), + E = PrunedDAG.end(); S != E; ++S) EffSize += (int) getDepthFactor(S->first); } DEBUG(if (DebugPairSelection) - dbgs() << "BBV: found pruned Tree for pair {" - << *J->first << " <-> " << *J->second << "} of depth " << - MaxDepth << " and size " << PrunedTree.size() << + dbgs() << "BBV: found pruned DAG for pair {" + << IJ.first << " <-> " << IJ.second << "} of depth " << + MaxDepth << " and size " << PrunedDAG.size() << " (effective size: " << EffSize << ")\n"); if (((TTI && !UseChainDepthWithTI) || MaxDepth >= Config.ReqChainDepth) && EffSize > 0 && EffSize > BestEffSize) { BestMaxDepth = MaxDepth; BestEffSize = EffSize; - BestTree = PrunedTree; + BestDAG = PrunedDAG; } } } @@ -2033,66 +2102,98 @@ namespace { // Given the list of candidate pairs, this function selects those // that will be fused into vector instructions. void BBVectorize::choosePairs( - std::multimap<Value *, Value *> &CandidatePairs, - DenseMap<ValuePair, int> &CandidatePairCostSavings, - std::vector<Value *> &PairableInsts, - DenseSet<ValuePair> &FixedOrderPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - std::multimap<ValuePair, ValuePair> &ConnectedPairDeps, - DenseSet<ValuePair> &PairableInstUsers, - DenseMap<Value *, Value *>& ChosenPairs) { + DenseMap<Value *, std::vector<Value *> > &CandidatePairs, + DenseSet<ValuePair> &CandidatePairsSet, + DenseMap<ValuePair, int> &CandidatePairCostSavings, + std::vector<Value *> &PairableInsts, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps, + DenseSet<ValuePair> &PairableInstUsers, + DenseMap<Value *, Value *>& ChosenPairs) { bool UseCycleCheck = - CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck; - std::multimap<ValuePair, ValuePair> PairableInstUserMap; + CandidatePairsSet.size() <= Config.MaxCandPairsForCycleCheck; + + DenseMap<Value *, std::vector<Value *> > CandidatePairs2; + for (DenseSet<ValuePair>::iterator I = CandidatePairsSet.begin(), + E = CandidatePairsSet.end(); I != E; ++I) { + std::vector<Value *> &JJ = CandidatePairs2[I->second]; + if (JJ.empty()) JJ.reserve(32); + JJ.push_back(I->first); + } + + DenseMap<ValuePair, std::vector<ValuePair> > PairableInstUserMap; + DenseSet<VPPair> PairableInstUserPairSet; for (std::vector<Value *>::iterator I = PairableInsts.begin(), E = PairableInsts.end(); I != E; ++I) { // The number of possible pairings for this variable: - size_t NumChoices = CandidatePairs.count(*I); + size_t NumChoices = CandidatePairs.lookup(*I).size(); if (!NumChoices) continue; - VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I); + std::vector<Value *> &JJ = CandidatePairs[*I]; - // The best pair to choose and its tree: + // The best pair to choose and its dag: size_t BestMaxDepth = 0; int BestEffSize = 0; - DenseSet<ValuePair> BestTree; - findBestTreeFor(CandidatePairs, CandidatePairCostSavings, + DenseSet<ValuePair> BestDAG; + findBestDAGFor(CandidatePairs, CandidatePairsSet, + CandidatePairCostSavings, PairableInsts, FixedOrderPairs, PairConnectionTypes, ConnectedPairs, ConnectedPairDeps, - PairableInstUsers, PairableInstUserMap, ChosenPairs, - BestTree, BestMaxDepth, BestEffSize, ChoiceRange, + PairableInstUsers, PairableInstUserMap, + PairableInstUserPairSet, ChosenPairs, + BestDAG, BestMaxDepth, BestEffSize, *I, JJ, UseCycleCheck); - // A tree has been chosen (or not) at this point. If no tree was + if (BestDAG.empty()) + continue; + + // A dag has been chosen (or not) at this point. If no dag was // chosen, then this instruction, I, cannot be paired (and is no longer // considered). - DEBUG(if (BestTree.size() > 0) - dbgs() << "BBV: selected pairs in the best tree for: " - << *cast<Instruction>(*I) << "\n"); + DEBUG(dbgs() << "BBV: selected pairs in the best DAG for: " + << *cast<Instruction>(*I) << "\n"); - for (DenseSet<ValuePair>::iterator S = BestTree.begin(), - SE2 = BestTree.end(); S != SE2; ++S) { - // Insert the members of this tree into the list of chosen pairs. + for (DenseSet<ValuePair>::iterator S = BestDAG.begin(), + SE2 = BestDAG.end(); S != SE2; ++S) { + // Insert the members of this dag into the list of chosen pairs. ChosenPairs.insert(ValuePair(S->first, S->second)); DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " << *S->second << "\n"); - // Remove all candidate pairs that have values in the chosen tree. - for (std::multimap<Value *, Value *>::iterator K = - CandidatePairs.begin(); K != CandidatePairs.end();) { - if (K->first == S->first || K->second == S->first || - K->second == S->second || K->first == S->second) { - // Don't remove the actual pair chosen so that it can be used - // in subsequent tree selections. - if (!(K->first == S->first && K->second == S->second)) - CandidatePairs.erase(K++); - else - ++K; - } else { - ++K; - } + // Remove all candidate pairs that have values in the chosen dag. + std::vector<Value *> &KK = CandidatePairs[S->first]; + for (std::vector<Value *>::iterator K = KK.begin(), KE = KK.end(); + K != KE; ++K) { + if (*K == S->second) + continue; + + CandidatePairsSet.erase(ValuePair(S->first, *K)); + } + + std::vector<Value *> &LL = CandidatePairs2[S->second]; + for (std::vector<Value *>::iterator L = LL.begin(), LE = LL.end(); + L != LE; ++L) { + if (*L == S->first) + continue; + + CandidatePairsSet.erase(ValuePair(*L, S->second)); + } + + std::vector<Value *> &MM = CandidatePairs[S->second]; + for (std::vector<Value *>::iterator M = MM.begin(), ME = MM.end(); + M != ME; ++M) { + assert(*M != S->first && "Flipped pair in candidate list?"); + CandidatePairsSet.erase(ValuePair(S->second, *M)); + } + + std::vector<Value *> &NN = CandidatePairs2[S->first]; + for (std::vector<Value *>::iterator N = NN.begin(), NE = NN.end(); + N != NE; ++N) { + assert(*N != S->second && "Flipped pair in candidate list?"); + CandidatePairsSet.erase(ValuePair(*N, S->first)); } } } @@ -2696,7 +2797,7 @@ namespace { // Move all uses of the function I (including pairing-induced uses) after J. bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB, - std::multimap<Value *, Value *> &LoadMoveSet, + DenseSet<ValuePair> &LoadMoveSetPairs, Instruction *I, Instruction *J) { // Skip to the first instruction past I. BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); @@ -2704,18 +2805,18 @@ namespace { DenseSet<Value *> Users; AliasSetTracker WriteSet(*AA); for (; cast<Instruction>(L) != J; ++L) - (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet); + (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs); assert(cast<Instruction>(L) == J && "Tracking has not proceeded far enough to check for dependencies"); // If J is now in the use set of I, then trackUsesOfI will return true // and we have a dependency cycle (and the fusing operation must abort). - return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSet); + return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSetPairs); } // Move all uses of the function I (including pairing-induced uses) after J. void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB, - std::multimap<Value *, Value *> &LoadMoveSet, + DenseSet<ValuePair> &LoadMoveSetPairs, Instruction *&InsertionPt, Instruction *I, Instruction *J) { // Skip to the first instruction past I. @@ -2724,7 +2825,7 @@ namespace { DenseSet<Value *> Users; AliasSetTracker WriteSet(*AA); for (; cast<Instruction>(L) != J;) { - if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet)) { + if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) { // Move this instruction Instruction *InstToMove = L; ++L; @@ -2744,7 +2845,8 @@ namespace { // to be moved after J (the second instruction) when the pair is fused. void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB, DenseMap<Value *, Value *> &ChosenPairs, - std::multimap<Value *, Value *> &LoadMoveSet, + DenseMap<Value *, std::vector<Value *> > &LoadMoveSet, + DenseSet<ValuePair> &LoadMoveSetPairs, Instruction *I) { // Skip to the first instruction past I. BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I)); @@ -2757,8 +2859,10 @@ namespace { // could be before I if this is an inverted input. for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) { if (trackUsesOfI(Users, WriteSet, I, L)) { - if (L->mayReadFromMemory()) - LoadMoveSet.insert(ValuePair(L, I)); + if (L->mayReadFromMemory()) { + LoadMoveSet[L].push_back(I); + LoadMoveSetPairs.insert(ValuePair(L, I)); + } } } } @@ -2767,20 +2871,22 @@ namespace { // are chosen for vectorization, we can end up in a situation where the // aliasing analysis starts returning different query results as the // process of fusing instruction pairs continues. Because the algorithm - // relies on finding the same use trees here as were found earlier, we'll + // relies on finding the same use dags here as were found earlier, we'll // need to precompute the necessary aliasing information here and then // manually update it during the fusion process. void BBVectorize::collectLoadMoveSet(BasicBlock &BB, std::vector<Value *> &PairableInsts, DenseMap<Value *, Value *> &ChosenPairs, - std::multimap<Value *, Value *> &LoadMoveSet) { + DenseMap<Value *, std::vector<Value *> > &LoadMoveSet, + DenseSet<ValuePair> &LoadMoveSetPairs) { for (std::vector<Value *>::iterator PI = PairableInsts.begin(), PIE = PairableInsts.end(); PI != PIE; ++PI) { DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI); if (P == ChosenPairs.end()) continue; Instruction *I = cast<Instruction>(P->first); - collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet, I); + collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet, + LoadMoveSetPairs, I); } } @@ -2816,12 +2922,12 @@ namespace { // because the vector instruction is inserted in the location of the pair's // second member). void BBVectorize::fuseChosenPairs(BasicBlock &BB, - std::vector<Value *> &PairableInsts, - DenseMap<Value *, Value *> &ChosenPairs, - DenseSet<ValuePair> &FixedOrderPairs, - DenseMap<VPPair, unsigned> &PairConnectionTypes, - std::multimap<ValuePair, ValuePair> &ConnectedPairs, - std::multimap<ValuePair, ValuePair> &ConnectedPairDeps) { + std::vector<Value *> &PairableInsts, + DenseMap<Value *, Value *> &ChosenPairs, + DenseSet<ValuePair> &FixedOrderPairs, + DenseMap<VPPair, unsigned> &PairConnectionTypes, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairs, + DenseMap<ValuePair, std::vector<ValuePair> > &ConnectedPairDeps) { LLVMContext& Context = BB.getContext(); // During the vectorization process, the order of the pairs to be fused @@ -2835,8 +2941,10 @@ namespace { E = FlippedPairs.end(); P != E; ++P) ChosenPairs.insert(*P); - std::multimap<Value *, Value *> LoadMoveSet; - collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet); + DenseMap<Value *, std::vector<Value *> > LoadMoveSet; + DenseSet<ValuePair> LoadMoveSetPairs; + collectLoadMoveSet(BB, PairableInsts, ChosenPairs, + LoadMoveSet, LoadMoveSetPairs); DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); @@ -2868,7 +2976,7 @@ namespace { ChosenPairs.erase(FP); ChosenPairs.erase(P); - if (!canMoveUsesOfIAfterJ(BB, LoadMoveSet, I, J)) { + if (!canMoveUsesOfIAfterJ(BB, LoadMoveSetPairs, I, J)) { DEBUG(dbgs() << "BBV: fusion of: " << *I << " <-> " << *J << " aborted because of non-trivial dependency cycle\n"); @@ -2885,18 +2993,20 @@ namespace { // of dependencies connected via swaps, and those directly connected, // and flip the order if the number of swaps is greater. bool OrigOrder = true; - VPPIteratorPair IP = ConnectedPairDeps.equal_range(ValuePair(I, J)); - if (IP.first == ConnectedPairDeps.end()) { - IP = ConnectedPairDeps.equal_range(ValuePair(J, I)); + DenseMap<ValuePair, std::vector<ValuePair> >::iterator IJ = + ConnectedPairDeps.find(ValuePair(I, J)); + if (IJ == ConnectedPairDeps.end()) { + IJ = ConnectedPairDeps.find(ValuePair(J, I)); OrigOrder = false; } - if (IP.first != ConnectedPairDeps.end()) { + if (IJ != ConnectedPairDeps.end()) { unsigned NumDepsDirect = 0, NumDepsSwap = 0; - for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; - Q != IP.second; ++Q) { + for (std::vector<ValuePair>::iterator T = IJ->second.begin(), + TE = IJ->second.end(); T != TE; ++T) { + VPPair Q(IJ->first, *T); DenseMap<VPPair, unsigned>::iterator R = - PairConnectionTypes.find(VPPair(Q->second, Q->first)); + PairConnectionTypes.find(VPPair(Q.second, Q.first)); assert(R != PairConnectionTypes.end() && "Cannot find pair connection type"); if (R->second == PairConnectionDirect) @@ -2922,17 +3032,20 @@ namespace { // If the pair being fused uses the opposite order from that in the pair // connection map, then we need to flip the types. - VPPIteratorPair IP = ConnectedPairs.equal_range(ValuePair(H, L)); - for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first; - Q != IP.second; ++Q) { - DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(*Q); - assert(R != PairConnectionTypes.end() && - "Cannot find pair connection type"); - if (R->second == PairConnectionDirect) - R->second = PairConnectionSwap; - else if (R->second == PairConnectionSwap) - R->second = PairConnectionDirect; - } + DenseMap<ValuePair, std::vector<ValuePair> >::iterator HL = + ConnectedPairs.find(ValuePair(H, L)); + if (HL != ConnectedPairs.end()) + for (std::vector<ValuePair>::iterator T = HL->second.begin(), + TE = HL->second.end(); T != TE; ++T) { + VPPair Q(HL->first, *T); + DenseMap<VPPair, unsigned>::iterator R = PairConnectionTypes.find(Q); + assert(R != PairConnectionTypes.end() && + "Cannot find pair connection type"); + if (R->second == PairConnectionDirect) + R->second = PairConnectionSwap; + else if (R->second == PairConnectionSwap) + R->second = PairConnectionDirect; + } bool LBeforeH = !FlipPairOrder; unsigned NumOperands = I->getNumOperands(); @@ -2964,12 +3077,12 @@ namespace { Instruction *K1 = 0, *K2 = 0; replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2); - // The use tree of the first original instruction must be moved to after - // the location of the second instruction. The entire use tree of the - // first instruction is disjoint from the input tree of the second + // The use dag of the first original instruction must be moved to after + // the location of the second instruction. The entire use dag of the + // first instruction is disjoint from the input dag of the second // (by definition), and so commutes with it. - moveUsesOfIAfterJ(BB, LoadMoveSet, InsertionPt, I, J); + moveUsesOfIAfterJ(BB, LoadMoveSetPairs, InsertionPt, I, J); if (!isa<StoreInst>(I)) { L->replaceAllUsesWith(K1); @@ -2986,17 +3099,23 @@ namespace { // yet-to-be-fused pair. The loads in question are the keys of the map. if (I->mayReadFromMemory()) { std::vector<ValuePair> NewSetMembers; - VPIteratorPair IPairRange = LoadMoveSet.equal_range(I); - VPIteratorPair JPairRange = LoadMoveSet.equal_range(J); - for (std::multimap<Value *, Value *>::iterator N = IPairRange.first; - N != IPairRange.second; ++N) - NewSetMembers.push_back(ValuePair(K, N->second)); - for (std::multimap<Value *, Value *>::iterator N = JPairRange.first; - N != JPairRange.second; ++N) - NewSetMembers.push_back(ValuePair(K, N->second)); + DenseMap<Value *, std::vector<Value *> >::iterator II = + LoadMoveSet.find(I); + if (II != LoadMoveSet.end()) + for (std::vector<Value *>::iterator N = II->second.begin(), + NE = II->second.end(); N != NE; ++N) + NewSetMembers.push_back(ValuePair(K, *N)); + DenseMap<Value *, std::vector<Value *> >::iterator JJ = + LoadMoveSet.find(J); + if (JJ != LoadMoveSet.end()) + for (std::vector<Value *>::iterator N = JJ->second.begin(), + NE = JJ->second.end(); N != NE; ++N) + NewSetMembers.push_back(ValuePair(K, *N)); for (std::vector<ValuePair>::iterator A = NewSetMembers.begin(), - AE = NewSetMembers.end(); A != AE; ++A) - LoadMoveSet.insert(*A); + AE = NewSetMembers.end(); A != AE; ++A) { + LoadMoveSet[A->first].push_back(A->second); + LoadMoveSetPairs.insert(*A); + } } // Before removing I, set the iterator to the next instruction. @@ -3056,6 +3175,7 @@ VectorizeConfig::VectorizeConfig() { MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck; SplatBreaksChain = ::SplatBreaksChain; MaxInsts = ::MaxInsts; + MaxPairs = ::MaxPairs; MaxIter = ::MaxIter; Pow2LenOnly = ::Pow2LenOnly; NoMemOpBoost = ::NoMemOpBoost; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 9c82cb8..f489393 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9,10 +9,10 @@ // // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops // and generates target-independent LLVM-IR. Legalization of the IR is done -// in the codegen. However, the vectorizes uses (will use) the codegen +// in the codegen. However, the vectorizer uses (will use) the codegen // interfaces to generate IR that is likely to result in an optimal binary. // -// The loop vectorizer combines consecutive loop iteration into a single +// The loop vectorizer combines consecutive loop iterations into a single // 'wide' iteration. After this transformation the index is incremented // by the SIMD vector width, and not by one. // @@ -32,7 +32,7 @@ // D. Nuzman and R. Henderson. Multi-platform Auto-vectorization. // // Variable uniformity checks are inspired by: -// Karrenberg, R. and Hack, S. Whole Function Vectorization. +// Karrenberg, R. and Hack, S. Whole Function Vectorization. // // Other ideas/concepts are from: // A. Zaks and D. Nuzman. Autovectorization in GCC-two years later. @@ -101,24 +101,20 @@ EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")); /// We don't vectorize loops with a known constant trip count below this number. -static const unsigned TinyTripCountVectorThreshold = 16; +static cl::opt<unsigned> +TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16), + cl::Hidden, + cl::desc("Don't vectorize loops with a constant " + "trip count that is smaller than this " + "value.")); /// We don't unroll loops with a known constant trip count below this number. static const unsigned TinyTripCountUnrollThreshold = 128; -/// We don't unroll loops that are larget than this threshold. -static const unsigned MaxLoopSizeThreshold = 32; - /// When performing a runtime memory check, do not check more than this /// number of pointers. Notice that the check is quadratic! static const unsigned RuntimeMemoryCheckThreshold = 4; -/// This is the highest vector width that we try to generate. -static const unsigned MaxVectorSize = 8; - -/// This is the highest Unroll Factor. -static const unsigned MaxUnrollSize = 4; - namespace { // Forward declarations. @@ -169,8 +165,8 @@ private: /// Add code that checks at runtime if the accessed arrays overlap. /// Returns the comparator value or NULL if no check is needed. - Value *addRuntimeCheck(LoopVectorizationLegality *Legal, - Instruction *Loc); + Instruction *addRuntimeCheck(LoopVectorizationLegality *Legal, + Instruction *Loc); /// Create an empty loop, based on the loop ranges of the old loop. void createEmptyLoop(LoopVectorizationLegality *Legal); /// Copy and widen the instructions from the old loop. @@ -196,6 +192,10 @@ private: /// of scalars. void scalarizeInstruction(Instruction *Instr); + /// Vectorize Load and Store instructions, + void vectorizeMemoryInstruction(Instruction *Instr, + LoopVectorizationLegality *Legal); + /// Create a broadcast instruction. This method generates a broadcast /// instruction (shuffle) for loop invariant values and for the induction /// value. If this is the induction variable then we extend it to N, N+1, ... @@ -228,31 +228,34 @@ private: ValueMap(unsigned UnrollFactor) : UF(UnrollFactor) {} /// \return True if 'Key' is saved in the Value Map. - bool has(Value *Key) { return MapStoreage.count(Key); } + bool has(Value *Key) const { return MapStorage.count(Key); } /// Initializes a new entry in the map. Sets all of the vector parts to the /// save value in 'Val'. /// \return A reference to a vector with splat values. VectorParts &splat(Value *Key, Value *Val) { - MapStoreage[Key].clear(); - MapStoreage[Key].append(UF, Val); - return MapStoreage[Key]; + VectorParts &Entry = MapStorage[Key]; + Entry.assign(UF, Val); + return Entry; } ///\return A reference to the value that is stored at 'Key'. VectorParts &get(Value *Key) { - if (!has(Key)) - MapStoreage[Key].resize(UF); - return MapStoreage[Key]; + VectorParts &Entry = MapStorage[Key]; + if (Entry.empty()) + Entry.resize(UF); + assert(Entry.size() == UF); + return Entry; } + private: /// The unroll factor. Each entry in the map stores this number of vector /// elements. unsigned UF; /// Map storage. We use std::map and not DenseMap because insertions to a /// dense map invalidates its iterators. - std::map<Value*, VectorParts> MapStoreage; + std::map<Value *, VectorParts> MapStorage; }; /// The original loop. @@ -289,8 +292,8 @@ private: BasicBlock *LoopVectorBody; ///The scalar loop body. BasicBlock *LoopScalarBody; - ///The first bypass block. - BasicBlock *LoopBypassBlock; + /// A list of all bypass blocks. The first block is the entry of the loop. + SmallVector<BasicBlock *, 4> LoopBypassBlocks; /// The new Induction variable which was added to the new block. PHINode *Induction; @@ -316,8 +319,9 @@ private: class LoopVectorizationLegality { public: LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL, - DominatorTree *DT) - : TheLoop(L), SE(SE), DL(DL), DT(DT), Induction(0) {} + DominatorTree *DT, TargetTransformInfo* TTI, + AliasAnalysis* AA) + : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), Induction(0) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -336,7 +340,8 @@ public: IK_NoInduction, ///< Not an induction variable. IK_IntInduction, ///< Integer induction variable. Step = 1. IK_ReverseIntInduction, ///< Reverse int induction variable. Step = -1. - IK_PtrInduction ///< Pointer induction variable. Step = sizeof(elem). + IK_PtrInduction, ///< Pointer induction var. Step = sizeof(elem). + IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem). }; /// This POD struct holds information about reduction variables. @@ -400,6 +405,11 @@ public: /// induction descriptor. typedef MapVector<PHINode*, InductionInfo> InductionList; + /// Alias(Multi)Map stores the values (GEPs or underlying objects and their + /// respective Store/Load instruction(s) to calculate aliasing. + typedef DenseMap<Value*, Instruction* > AliasMap; + typedef DenseMap<Value*, std::vector<Instruction*> > AliasMultiMap; + /// Returns true if it is legal to vectorize this loop. /// This does not mean that it is profitable to vectorize this /// loop, only that it is legal to do so. @@ -473,6 +483,14 @@ private: InductionKind isInductionVariable(PHINode *Phi); /// Return true if can compute the address bounds of Ptr within the loop. bool hasComputableBounds(Value *Ptr); + /// Return true if there is the chance of write reorder. + bool hasPossibleGlobalWriteReorder(Value *Object, + Instruction *Inst, + AliasMultiMap &WriteObjects, + unsigned MaxByteWidth); + /// Return the AA location for a load or a store. + AliasAnalysis::Location getLoadStoreLocation(Instruction *Inst); + /// The loop that we evaluate. Loop *TheLoop; @@ -480,8 +498,12 @@ private: ScalarEvolution *SE; /// DataLayout analysis. DataLayout *DL; - // Dominators. + /// Dominators. DominatorTree *DT; + /// Target Info. + TargetTransformInfo *TTI; + /// Alias Analysis. + AliasAnalysis *AA; // --- vectorization state --- // @@ -517,20 +539,34 @@ class LoopVectorizationCostModel { public: LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, LoopVectorizationLegality *Legal, - const TargetTransformInfo &TTI) - : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI) {} - - /// \return The most profitable vectorization factor. + const TargetTransformInfo &TTI, + DataLayout *DL) + : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL) {} + + /// Information about vectorization costs + struct VectorizationFactor { + unsigned Width; // Vector width with best cost + unsigned Cost; // Cost of the loop with that width + }; + /// \return The most profitable vectorization factor and the cost of that VF. /// This method checks every power of two up to VF. If UserVF is not ZERO /// then this vectorization factor will be selected if vectorization is /// possible. - unsigned selectVectorizationFactor(bool OptForSize, unsigned UserVF); + VectorizationFactor selectVectorizationFactor(bool OptForSize, + unsigned UserVF); + /// \return The size (in bits) of the widest type in the code that + /// needs to be vectorized. We ignore values that remain scalar such as + /// 64 bit loop indices. + unsigned getWidestType(); /// \return The most profitable unroll factor. /// If UserUF is non-zero then this method finds the best unroll-factor /// based on register pressure and other parameters. - unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF); + /// VF and LoopCost are the selected vectorization factor and the cost of the + /// selected VF. + unsigned selectUnrollFactor(bool OptForSize, unsigned UserUF, unsigned VF, + unsigned LoopCost); /// \brief A struct that represents some properties of the register usage /// of a loop. @@ -562,6 +598,10 @@ private: /// the scalar type. static Type* ToVectorTy(Type *Scalar, unsigned VF); + /// Returns whether the instruction is a load or store and will be a emitted + /// as a vector operation. + bool isConsecutiveLoadOrStore(Instruction *I); + /// The loop that we evaluate. Loop *TheLoop; /// Scev analysis. @@ -572,6 +612,8 @@ private: LoopVectorizationLegality *Legal; /// Vector target information. const TargetTransformInfo &TTI; + /// Target data layout information. + DataLayout *DL; }; /// The LoopVectorize Pass. @@ -588,6 +630,7 @@ struct LoopVectorize : public LoopPass { LoopInfo *LI; TargetTransformInfo *TTI; DominatorTree *DT; + AliasAnalysis *AA; virtual bool runOnLoop(Loop *L, LPPassManager &LPM) { // We only vectorize innermost loops. @@ -599,21 +642,22 @@ struct LoopVectorize : public LoopPass { LI = &getAnalysis<LoopInfo>(); TTI = &getAnalysis<TargetTransformInfo>(); DT = &getAnalysis<DominatorTree>(); + AA = getAnalysisIfAvailable<AliasAnalysis>(); DEBUG(dbgs() << "LV: Checking a loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DL, DT); + LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing.\n"); return false; } // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI); + LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL); - // Check the function attribues to find out if this function should be + // Check the function attributes to find out if this function should be // optimized for size. Function *F = L->getHeader()->getParent(); Attribute::AttrKind SzAttr = Attribute::OptimizeForSize; @@ -628,20 +672,24 @@ struct LoopVectorize : public LoopPass { return false; } - unsigned VF = CM.selectVectorizationFactor(OptForSize, VectorizationFactor); - unsigned UF = CM.selectUnrollFactor(OptForSize, VectorizationUnroll); + // Select the optimal vectorization factor. + LoopVectorizationCostModel::VectorizationFactor VF; + VF = CM.selectVectorizationFactor(OptForSize, VectorizationFactor); + // Select the unroll factor. + unsigned UF = CM.selectUnrollFactor(OptForSize, VectorizationUnroll, + VF.Width, VF.Cost); - if (VF == 1) { + if (VF.Width == 1) { DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); return false; } - DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<< + DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<< F->getParent()->getModuleIdentifier()<<"\n"); DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n"); - // If we decided that it is *legal* to vectorizer the loop then do it. - InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF, UF); + // If we decided that it is *legal* to vectorize the loop then do it. + InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF.Width, UF); LB.vectorize(&LVL); DEBUG(verifyFunction(*L->getHeader()->getParent())); @@ -730,6 +778,9 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx, int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr"); + // Make sure that the pointer does not point to structs. + if (cast<PointerType>(Ptr->getType())->getElementType()->isAggregateType()) + return 0; // If this value is a pointer induction variable we know it is consecutive. PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr); @@ -737,6 +788,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { InductionInfo II = Inductions[Phi]; if (IK_PtrInduction == II.IK) return 1; + else if (IK_ReversePtrInduction == II.IK) + return -1; } GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr); @@ -746,6 +799,29 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { unsigned NumOperands = Gep->getNumOperands(); Value *LastIndex = Gep->getOperand(NumOperands - 1); + Value *GpPtr = Gep->getPointerOperand(); + // If this GEP value is a consecutive pointer induction variable and all of + // the indices are constant then we know it is consecutive. We can + Phi = dyn_cast<PHINode>(GpPtr); + if (Phi && Inductions.count(Phi)) { + + // Make sure that the pointer does not point to structs. + PointerType *GepPtrType = cast<PointerType>(GpPtr->getType()); + if (GepPtrType->getElementType()->isAggregateType()) + return 0; + + // Make sure that all of the index operands are loop invariant. + for (unsigned i = 1; i < NumOperands; ++i) + if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) + return 0; + + InductionInfo II = Inductions[Phi]; + if (IK_PtrInduction == II.IK) + return 1; + else if (IK_ReversePtrInduction == II.IK) + return -1; + } + // Check that all of the gep indices are uniform except for the last. for (unsigned i = 0; i < NumOperands - 1; ++i) if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) @@ -784,8 +860,7 @@ InnerLoopVectorizer::getVectorValue(Value *V) { // If this scalar is unknown, assume that it is a constant or that it is // loop invariant. Broadcast V and save the value for future uses. Value *B = getBroadcastInstrs(V); - WidenMap.splat(V, B); - return WidenMap.get(V); + return WidenMap.splat(V, B); } Value *InnerLoopVectorizer::reverseVector(Value *Vec) { @@ -799,6 +874,111 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) { "reverse"); } + +void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, + LoopVectorizationLegality *Legal) { + // Attempt to issue a wide load. + LoadInst *LI = dyn_cast<LoadInst>(Instr); + StoreInst *SI = dyn_cast<StoreInst>(Instr); + + assert((LI || SI) && "Invalid Load/Store instruction"); + + Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType(); + Type *DataTy = VectorType::get(ScalarDataTy, VF); + Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); + unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment(); + + // If the pointer is loop invariant or if it is non consecutive, + // scalarize the load. + int Stride = Legal->isConsecutivePtr(Ptr); + bool Reverse = Stride < 0; + bool UniformLoad = LI && Legal->isUniform(Ptr); + if (Stride == 0 || UniformLoad) + return scalarizeInstruction(Instr); + + Constant *Zero = Builder.getInt32(0); + VectorParts &Entry = WidenMap.get(Instr); + + // Handle consecutive loads/stores. + GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); + if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) { + Value *PtrOperand = Gep->getPointerOperand(); + Value *FirstBasePtr = getVectorValue(PtrOperand)[0]; + FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero); + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); + Gep2->setOperand(0, FirstBasePtr); + Gep2->setName("gep.indvar.base"); + Ptr = Builder.Insert(Gep2); + } else if (Gep) { + assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()), + OrigLoop) && "Base ptr must be invariant"); + + // The last index does not have to be the induction. It can be + // consecutive and be a function of the index. For example A[I+1]; + unsigned NumOperands = Gep->getNumOperands(); + + Value *LastGepOperand = Gep->getOperand(NumOperands - 1); + VectorParts &GEPParts = getVectorValue(LastGepOperand); + Value *LastIndex = GEPParts[0]; + LastIndex = Builder.CreateExtractElement(LastIndex, Zero); + + // Create the new GEP with the new induction variable. + GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); + Gep2->setOperand(NumOperands - 1, LastIndex); + Gep2->setName("gep.indvar.idx"); + Ptr = Builder.Insert(Gep2); + } else { + // Use the induction element ptr. + assert(isa<PHINode>(Ptr) && "Invalid induction ptr"); + VectorParts &PtrVal = getVectorValue(Ptr); + Ptr = Builder.CreateExtractElement(PtrVal[0], Zero); + } + + // Handle Stores: + if (SI) { + assert(!Legal->isUniform(SI->getPointerOperand()) && + "We do not allow storing to uniform addresses"); + + VectorParts &StoredVal = getVectorValue(SI->getValueOperand()); + for (unsigned Part = 0; Part < UF; ++Part) { + // Calculate the pointer for the specific unroll-part. + Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); + + if (Reverse) { + // If we store to reverse consecutive memory locations then we need + // to reverse the order of elements in the stored value. + StoredVal[Part] = reverseVector(StoredVal[Part]); + // If the address is consecutive but reversed, then the + // wide store needs to start at the last vector element. + PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); + PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); + } + + Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo()); + Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment); + } + } + + for (unsigned Part = 0; Part < UF; ++Part) { + // Calculate the pointer for the specific unroll-part. + Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); + + if (Reverse) { + // If the address is consecutive but reversed, then the + // wide store needs to start at the last vector element. + PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); + PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); + } + + Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo()); + Value *LI = Builder.CreateLoad(VecPtr, "wide.load"); + cast<LoadInst>(LI)->setAlignment(Alignment); + Entry[Part] = Reverse ? reverseVector(LI) : LI; + } +} + void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) { assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); // Holds vector parameters or scalars, in case of uniform vals. @@ -870,7 +1050,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) { } } -Value* +Instruction * InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, Instruction *Loc) { LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck = @@ -879,7 +1059,7 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, if (!PtrRtCheck->Need) return NULL; - Value *MemoryRuntimeCheck = 0; + Instruction *MemoryRuntimeCheck = 0; unsigned NumPointers = PtrRtCheck->Pointers.size(); SmallVector<Value* , 2> Starts; SmallVector<Value* , 2> Ends; @@ -908,28 +1088,23 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal, } } + IRBuilder<> ChkBuilder(Loc); + for (unsigned i = 0; i < NumPointers; ++i) { for (unsigned j = i+1; j < NumPointers; ++j) { - Instruction::CastOps Op = Instruction::BitCast; - Value *Start0 = CastInst::Create(Op, Starts[i], PtrArithTy, "bc", Loc); - Value *Start1 = CastInst::Create(Op, Starts[j], PtrArithTy, "bc", Loc); - Value *End0 = CastInst::Create(Op, Ends[i], PtrArithTy, "bc", Loc); - Value *End1 = CastInst::Create(Op, Ends[j], PtrArithTy, "bc", Loc); - - Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, - Start0, End1, "bound0", Loc); - Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE, - Start1, End0, "bound1", Loc); - Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1, - "found.conflict", Loc); + Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc"); + Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc"); + Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc"); + Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy, "bc"); + + Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); + Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); + Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); if (MemoryRuntimeCheck) - MemoryRuntimeCheck = BinaryOperator::Create(Instruction::Or, - MemoryRuntimeCheck, - IsConflict, - "conflict.rdx", Loc); - else - MemoryRuntimeCheck = IsConflict; + IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, + "conflict.rdx"); + MemoryRuntimeCheck = cast<Instruction>(IsConflict); } } @@ -943,7 +1118,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { the vectorized instructions while the old loop will continue to run the scalar remainder. - [ ] <-- vector loop bypass. + [ ] <-- vector loop bypass (may consist of multiple blocks). / | / v | [ ] <-- vector pre header. @@ -1004,10 +1179,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { ConstantInt::get(IdxTy, 0); assert(BypassBlock && "Invalid loop structure"); - - // Generate the code that checks in runtime if arrays overlap. - Value *MemoryRuntimeCheck = addRuntimeCheck(Legal, - BypassBlock->getTerminator()); + LoopBypassBlocks.push_back(BypassBlock); // Split the single block loop into the two loop structure described above. BasicBlock *VectorPH = @@ -1019,10 +1191,6 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { BasicBlock *ScalarPH = MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph"); - // This is the location in which we add all of the logic for bypassing - // the new vector loop. - Instruction *Loc = BypassBlock->getTerminator(); - // Use this IR builder to create the loop instructions (Phi, Br, Cmp) // inside the loop. Builder.SetInsertPoint(VecBody->getFirstInsertionPt()); @@ -1033,42 +1201,62 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // times the unroll factor (num of SIMD instructions). Constant *Step = ConstantInt::get(IdxTy, VF * UF); + // This is the IR builder that we use to add all of the logic for bypassing + // the new vector loop. + IRBuilder<> BypassBuilder(BypassBlock->getTerminator()); + // We may need to extend the index in case there is a type mismatch. // We know that the count starts at zero and does not overflow. if (Count->getType() != IdxTy) { // The exit count can be of pointer type. Convert it to the correct // integer type. if (ExitCount->getType()->isPointerTy()) - Count = CastInst::CreatePointerCast(Count, IdxTy, "ptrcnt.to.int", Loc); + Count = BypassBuilder.CreatePointerCast(Count, IdxTy, "ptrcnt.to.int"); else - Count = CastInst::CreateZExtOrBitCast(Count, IdxTy, "zext.cnt", Loc); + Count = BypassBuilder.CreateZExtOrTrunc(Count, IdxTy, "cnt.cast"); } // Add the start index to the loop count to get the new end index. - Value *IdxEnd = BinaryOperator::CreateAdd(Count, StartIdx, "end.idx", Loc); + Value *IdxEnd = BypassBuilder.CreateAdd(Count, StartIdx, "end.idx"); // Now we need to generate the expression for N - (N % VF), which is // the part that the vectorized body will execute. - Value *R = BinaryOperator::CreateURem(Count, Step, "n.mod.vf", Loc); - Value *CountRoundDown = BinaryOperator::CreateSub(Count, R, "n.vec", Loc); - Value *IdxEndRoundDown = BinaryOperator::CreateAdd(CountRoundDown, StartIdx, - "end.idx.rnd.down", Loc); + Value *R = BypassBuilder.CreateURem(Count, Step, "n.mod.vf"); + Value *CountRoundDown = BypassBuilder.CreateSub(Count, R, "n.vec"); + Value *IdxEndRoundDown = BypassBuilder.CreateAdd(CountRoundDown, StartIdx, + "end.idx.rnd.down"); // Now, compare the new count to zero. If it is zero skip the vector loop and // jump to the scalar loop. - Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, - IdxEndRoundDown, - StartIdx, - "cmp.zero", Loc); - - // If we are using memory runtime checks, include them in. - if (MemoryRuntimeCheck) - Cmp = BinaryOperator::Create(Instruction::Or, Cmp, MemoryRuntimeCheck, - "CntOrMem", Loc); + Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, + "cmp.zero"); + + BasicBlock *LastBypassBlock = BypassBlock; + + // Generate the code that checks in runtime if arrays overlap. We put the + // checks into a separate block to make the more common case of few elements + // faster. + Instruction *MemRuntimeCheck = addRuntimeCheck(Legal, + BypassBlock->getTerminator()); + if (MemRuntimeCheck) { + // Create a new block containing the memory check. + BasicBlock *CheckBlock = BypassBlock->splitBasicBlock(MemRuntimeCheck, + "vector.memcheck"); + LoopBypassBlocks.push_back(CheckBlock); + + // Replace the branch into the memory check block with a conditional branch + // for the "few elements case". + Instruction *OldTerm = BypassBlock->getTerminator(); + BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm); + OldTerm->eraseFromParent(); + + Cmp = MemRuntimeCheck; + LastBypassBlock = CheckBlock; + } - BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc); - // Remove the old terminator. - Loc->eraseFromParent(); + LastBypassBlock->getTerminator()->eraseFromParent(); + BranchInst::Create(MiddleBlock, VectorPH, Cmp, + LastBypassBlock); // We are going to resume the execution of the scalar loop. // Go over all of the induction variables that we found and fix the @@ -1108,30 +1296,45 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { Value *CRD = CountRoundDown; if (CRDSize > IISize) CRD = CastInst::Create(Instruction::Trunc, CountRoundDown, - II.StartValue->getType(), - "tr.crd", BypassBlock->getTerminator()); + II.StartValue->getType(), "tr.crd", + LoopBypassBlocks.back()->getTerminator()); else if (CRDSize < IISize) CRD = CastInst::Create(Instruction::SExt, CountRoundDown, II.StartValue->getType(), - "sext.crd", BypassBlock->getTerminator()); + "sext.crd", + LoopBypassBlocks.back()->getTerminator()); // Handle reverse integer induction counter: - EndValue = BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end", - BypassBlock->getTerminator()); + EndValue = + BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end", + LoopBypassBlocks.back()->getTerminator()); break; } case LoopVectorizationLegality::IK_PtrInduction: { // For pointer induction variables, calculate the offset using // the end index. - EndValue = GetElementPtrInst::Create(II.StartValue, CountRoundDown, - "ptr.ind.end", - BypassBlock->getTerminator()); + EndValue = + GetElementPtrInst::Create(II.StartValue, CountRoundDown, "ptr.ind.end", + LoopBypassBlocks.back()->getTerminator()); + break; + } + case LoopVectorizationLegality::IK_ReversePtrInduction: { + // The value at the end of the loop for the reverse pointer is calculated + // by creating a GEP with a negative index starting from the start value. + Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0); + Value *NegIdx = BinaryOperator::CreateSub(Zero, CountRoundDown, + "rev.ind.end", + LoopBypassBlocks.back()->getTerminator()); + EndValue = GetElementPtrInst::Create(II.StartValue, NegIdx, + "rev.ptr.ind.end", + LoopBypassBlocks.back()->getTerminator()); break; } }// end of case // The new PHI merges the original incoming value, in case of a bypass, // or the value at the end of the vectorized loop. - ResumeVal->addIncoming(II.StartValue, BypassBlock); + for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); ResumeVal->addIncoming(EndValue, VecBody); // Fix the scalar body counter (PHI node). @@ -1147,7 +1350,8 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { assert(!ResumeIndex && "Unexpected resume value found"); ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val", MiddleBlock->getTerminator()); - ResumeIndex->addIncoming(StartIdx, BypassBlock); + for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + ResumeIndex->addIncoming(StartIdx, LoopBypassBlocks[I]); ResumeIndex->addIncoming(IdxEndRoundDown, VecBody); } @@ -1187,6 +1391,8 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // Insert the new loop into the loop nest and register the new basic blocks. if (ParentLoop) { ParentLoop->addChildLoop(Lp); + for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) + ParentLoop->addBasicBlockToLoop(LoopBypassBlocks[I], LI->getBase()); ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase()); ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase()); ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase()); @@ -1203,7 +1409,6 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { LoopExitBlock = ExitBlock; LoopVectorBody = VecBody; LoopScalarBody = OldBasicBlock; - LoopBypassBlock = BypassBlock; } /// This function returns the identity element (or neutral element) for @@ -1295,9 +1500,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // the cost-model. // //===------------------------------------------------===// - BasicBlock &BB = *OrigLoop->getHeader(); - Constant *Zero = - ConstantInt::get(IntegerType::getInt32Ty(BB.getContext()), 0); + Constant *Zero = Builder.getInt32(0); // In order to support reduction variables we need to be able to vectorize // Phi nodes. Phi nodes have cycles, so we need to vectorize them in two @@ -1343,7 +1546,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // To do so, we need to generate the 'identity' vector and overide // one of the elements with the incoming scalar reduction. We need // to do it in the vector-loop preheader. - Builder.SetInsertPoint(LoopBypassBlock->getTerminator()); + Builder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator()); // This is the vector-clone of the value that leaves the loop. VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr); @@ -1391,7 +1594,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr); PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi"); Value *StartVal = (part == 0) ? VectorStart : Identity; - NewPhi->addIncoming(StartVal, LoopBypassBlock); + for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]); NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody); RdxParts.push_back(NewPhi); } @@ -1533,8 +1737,6 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { void InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB, PhiVector *PV) { - Constant *Zero = Builder.getInt32(0); - // For each instruction in the old loop. for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { VectorParts &Entry = WidenMap.get(it); @@ -1568,7 +1770,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, // optimizations will clean it up. VectorParts Cond = createEdgeMask(P->getIncomingBlock(0), P->getParent()); - + for (unsigned part = 0; part < UF; ++part) { VectorParts &In0 = getVectorValue(P->getIncomingValue(0)); VectorParts &In1 = getVectorValue(P->getIncomingValue(1)); @@ -1600,6 +1802,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, } case LoopVectorizationLegality::IK_ReverseIntInduction: case LoopVectorizationLegality::IK_PtrInduction: + case LoopVectorizationLegality::IK_ReversePtrInduction: // Handle reverse integer and pointer inductions. Value *StartIdx = 0; // If we have a single integer induction variable then use it. @@ -1635,15 +1838,23 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, // Handle the pointer induction variable case. assert(P->getType()->isPointerTy() && "Unexpected type."); + // Is this a reverse induction ptr or a consecutive induction ptr. + bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction == + II.IK); + // This is the vector of results. Notice that we don't generate // vector geps because scalar geps result in better code. for (unsigned part = 0; part < UF; ++part) { Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF)); for (unsigned int i = 0; i < VF; ++i) { - Constant *Idx = ConstantInt::get(Induction->getType(), - i + part * VF); - Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, - "gep.idx"); + int EltIndex = (i + part * VF) * (Reverse ? -1 : 1); + Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex); + Value *GlobalIdx; + if (!Reverse) + GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx"); + else + GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx"); + Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx, "next.gep"); VecVal = Builder.CreateInsertElement(VecVal, SclrGep, @@ -1684,13 +1895,13 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, for (unsigned Part = 0; Part < UF; ++Part) { Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]); - // Update the NSW, NUW and Exact flags. - BinaryOperator *VecOp = cast<BinaryOperator>(V); - if (isa<OverflowingBinaryOperator>(BinOp)) { + // Update the NSW, NUW and Exact flags. Notice: V can be an Undef. + BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V); + if (VecOp && isa<OverflowingBinaryOperator>(BinOp)) { VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap()); VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap()); } - if (isa<PossiblyExactOperator>(VecOp)) + if (VecOp && isa<PossiblyExactOperator>(VecOp)) VecOp->setIsExact(BinOp->isExact()); Entry[Part] = V; @@ -1740,124 +1951,10 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, break; } - case Instruction::Store: { - // Attempt to issue a wide store. - StoreInst *SI = dyn_cast<StoreInst>(it); - Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF); - Value *Ptr = SI->getPointerOperand(); - unsigned Alignment = SI->getAlignment(); - - assert(!Legal->isUniform(Ptr) && - "We do not allow storing to uniform addresses"); - - - int Stride = Legal->isConsecutivePtr(Ptr); - bool Reverse = Stride < 0; - if (Stride == 0) { - scalarizeInstruction(it); + case Instruction::Store: + case Instruction::Load: + vectorizeMemoryInstruction(it, Legal); break; - } - - // Handle consecutive stores. - - GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); - if (Gep) { - // The last index does not have to be the induction. It can be - // consecutive and be a function of the index. For example A[I+1]; - unsigned NumOperands = Gep->getNumOperands(); - - Value *LastGepOperand = Gep->getOperand(NumOperands - 1); - VectorParts &GEPParts = getVectorValue(LastGepOperand); - Value *LastIndex = GEPParts[0]; - LastIndex = Builder.CreateExtractElement(LastIndex, Zero); - - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); - Gep2->setOperand(NumOperands - 1, LastIndex); - Ptr = Builder.Insert(Gep2); - } else { - // Use the induction element ptr. - assert(isa<PHINode>(Ptr) && "Invalid induction ptr"); - VectorParts &PtrVal = getVectorValue(Ptr); - Ptr = Builder.CreateExtractElement(PtrVal[0], Zero); - } - - VectorParts &StoredVal = getVectorValue(SI->getValueOperand()); - for (unsigned Part = 0; Part < UF; ++Part) { - // Calculate the pointer for the specific unroll-part. - Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); - - if (Reverse) { - // If we store to reverse consecutive memory locations then we need - // to reverse the order of elements in the stored value. - StoredVal[Part] = reverseVector(StoredVal[Part]); - // If the address is consecutive but reversed, then the - // wide store needs to start at the last vector element. - PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); - PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); - } - - Value *VecPtr = Builder.CreateBitCast(PartPtr, StTy->getPointerTo()); - Builder.CreateStore(StoredVal[Part], VecPtr)->setAlignment(Alignment); - } - break; - } - case Instruction::Load: { - // Attempt to issue a wide load. - LoadInst *LI = dyn_cast<LoadInst>(it); - Type *RetTy = VectorType::get(LI->getType(), VF); - Value *Ptr = LI->getPointerOperand(); - unsigned Alignment = LI->getAlignment(); - - // If the pointer is loop invariant or if it is non consecutive, - // scalarize the load. - int Stride = Legal->isConsecutivePtr(Ptr); - bool Reverse = Stride < 0; - if (Legal->isUniform(Ptr) || Stride == 0) { - scalarizeInstruction(it); - break; - } - - GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); - if (Gep) { - // The last index does not have to be the induction. It can be - // consecutive and be a function of the index. For example A[I+1]; - unsigned NumOperands = Gep->getNumOperands(); - - Value *LastGepOperand = Gep->getOperand(NumOperands - 1); - VectorParts &GEPParts = getVectorValue(LastGepOperand); - Value *LastIndex = GEPParts[0]; - LastIndex = Builder.CreateExtractElement(LastIndex, Zero); - - // Create the new GEP with the new induction variable. - GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); - Gep2->setOperand(NumOperands - 1, LastIndex); - Ptr = Builder.Insert(Gep2); - } else { - // Use the induction element ptr. - assert(isa<PHINode>(Ptr) && "Invalid induction ptr"); - VectorParts &PtrVal = getVectorValue(Ptr); - Ptr = Builder.CreateExtractElement(PtrVal[0], Zero); - } - - for (unsigned Part = 0; Part < UF; ++Part) { - // Calculate the pointer for the specific unroll-part. - Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); - - if (Reverse) { - // If the address is consecutive but reversed, then the - // wide store needs to start at the last vector element. - PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); - PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); - } - - Value *VecPtr = Builder.CreateBitCast(PartPtr, RetTy->getPointerTo()); - Value *LI = Builder.CreateLoad(VecPtr, "wide.load"); - cast<LoadInst>(LI)->setAlignment(Alignment); - Entry[Part] = Reverse ? reverseVector(LI) : LI; - } - break; - } case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: @@ -1924,12 +2021,14 @@ void InnerLoopVectorizer::updateAnalysis() { SE->forgetLoop(OrigLoop); // Update the dominator tree information. - assert(DT->properlyDominates(LoopBypassBlock, LoopExitBlock) && + assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && "Entry does not dominate exit."); - DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlock); + for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) + DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]); + DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back()); DT->addNewBlock(LoopVectorBody, LoopVectorPreHeader); - DT->addNewBlock(LoopMiddleBlock, LoopBypassBlock); + DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks.front()); DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock); DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader); DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock); @@ -2196,7 +2295,51 @@ void LoopVectorizationLegality::collectLoopUniforms() { } } +AliasAnalysis::Location +LoopVectorizationLegality::getLoadStoreLocation(Instruction *Inst) { + if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) + return AA->getLocation(Store); + else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) + return AA->getLocation(Load); + + llvm_unreachable("Should be either load or store instruction"); +} + +bool +LoopVectorizationLegality::hasPossibleGlobalWriteReorder( + Value *Object, + Instruction *Inst, + AliasMultiMap& WriteObjects, + unsigned MaxByteWidth) { + + AliasAnalysis::Location ThisLoc = getLoadStoreLocation(Inst); + + std::vector<Instruction*>::iterator + it = WriteObjects[Object].begin(), + end = WriteObjects[Object].end(); + + for (; it != end; ++it) { + Instruction* I = *it; + if (I == Inst) + continue; + + AliasAnalysis::Location ThatLoc = getLoadStoreLocation(I); + if (AA->alias(ThisLoc.getWithNewSize(MaxByteWidth), + ThatLoc.getWithNewSize(MaxByteWidth))) + return true; + } + return false; +} + bool LoopVectorizationLegality::canVectorizeMemory() { + + if (TheLoop->isAnnotatedParallel()) { + DEBUG(dbgs() + << "LV: A loop annotated parallel, ignore memory dependency " + << "checks.\n"); + return true; + } + typedef SmallVector<Value*, 16> ValueVector; typedef SmallPtrSet<Value*, 16> ValueSet; // Holds the Load and Store *instructions*. @@ -2250,9 +2393,10 @@ bool LoopVectorizationLegality::canVectorizeMemory() { return true; } - // Holds the read and read-write *pointers* that we find. - ValueVector Reads; - ValueVector ReadWrites; + // Holds the read and read-write *pointers* that we find. These maps hold + // unique values for pointers (so no need for multi-map). + AliasMap Reads; + AliasMap ReadWrites; // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -2274,7 +2418,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // If we did *not* see this pointer before, insert it to // the read-write list. At this phase it is only a 'write' list. if (Seen.insert(Ptr)) - ReadWrites.push_back(Ptr); + ReadWrites.insert(std::make_pair(Ptr, ST)); } for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) { @@ -2289,7 +2433,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // read a few words, modify, and write a few words, and some of the // words may be written to the same address. if (Seen.insert(Ptr) || 0 == isConsecutivePtr(Ptr)) - Reads.push_back(Ptr); + Reads.insert(std::make_pair(Ptr, LD)); } // If we write (or read-write) to a single destination and there are no @@ -2302,22 +2446,27 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRT = true; - for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) - if (hasComputableBounds(*I)) { - PtrRtCheck.insert(SE, TheLoop, *I); - DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n"); + AliasMap::iterator MI, ME; + for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) { + Value *V = (*MI).first; + if (hasComputableBounds(V)) { + PtrRtCheck.insert(SE, TheLoop, V); + DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n"); } else { CanDoRT = false; break; } - for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) - if (hasComputableBounds(*I)) { - PtrRtCheck.insert(SE, TheLoop, *I); - DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n"); + } + for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) { + Value *V = (*MI).first; + if (hasComputableBounds(V)) { + PtrRtCheck.insert(SE, TheLoop, V); + DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n"); } else { CanDoRT = false; break; } + } // Check that we did not collect too many pointers or found a // unsizeable pointer. @@ -2332,47 +2481,104 @@ bool LoopVectorizationLegality::canVectorizeMemory() { bool NeedRTCheck = false; + // Biggest vectorized access possible, vector width * unroll factor. + // TODO: We're being very pessimistic here, find a way to know the + // real access width before getting here. + unsigned MaxByteWidth = (TTI->getRegisterBitWidth(true) / 8) * + TTI->getMaximumUnrollFactor(); // Now that the pointers are in two lists (Reads and ReadWrites), we // can check that there are no conflicts between each of the writes and // between the writes to the reads. - ValueSet WriteObjects; + // Note that WriteObjects duplicates the stores (indexed now by underlying + // objects) to avoid pointing to elements inside ReadWrites. + // TODO: Maybe create a new type where they can interact without duplication. + AliasMultiMap WriteObjects; ValueVector TempObjects; // Check that the read-writes do not conflict with other read-write // pointers. bool AllWritesIdentified = true; - for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) { - GetUnderlyingObjects(*I, TempObjects, DL); - for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end(); - it != e; ++it) { - if (!isIdentifiedObject(*it)) { - DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n"); + for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) { + Value *Val = (*MI).first; + Instruction *Inst = (*MI).second; + + GetUnderlyingObjects(Val, TempObjects, DL); + for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end(); + UI != UE; ++UI) { + if (!isIdentifiedObject(*UI)) { + DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **UI <<"\n"); NeedRTCheck = true; AllWritesIdentified = false; } - if (!WriteObjects.insert(*it)) { + + // Never seen it before, can't alias. + if (WriteObjects[*UI].empty()) { + DEBUG(dbgs() << "LV: Adding Underlying value:" << **UI <<"\n"); + WriteObjects[*UI].push_back(Inst); + continue; + } + // Direct alias found. + if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) { + DEBUG(dbgs() << "LV: Found a possible write-write reorder:" + << **UI <<"\n"); + return false; + } + DEBUG(dbgs() << "LV: Found a conflicting global value:" + << **UI <<"\n"); + DEBUG(dbgs() << "LV: While examining store:" << *Inst <<"\n"); + DEBUG(dbgs() << "LV: On value:" << *Val <<"\n"); + + // If global alias, make sure they do alias. + if (hasPossibleGlobalWriteReorder(*UI, + Inst, + WriteObjects, + MaxByteWidth)) { DEBUG(dbgs() << "LV: Found a possible write-write reorder:" - << **it <<"\n"); + << *UI <<"\n"); return false; } + + // Didn't alias, insert into map for further reference. + WriteObjects[*UI].push_back(Inst); } TempObjects.clear(); } /// Check that the reads don't conflict with the read-writes. - for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) { - GetUnderlyingObjects(*I, TempObjects, DL); - for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end(); - it != e; ++it) { + for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) { + Value *Val = (*MI).first; + GetUnderlyingObjects(Val, TempObjects, DL); + for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end(); + UI != UE; ++UI) { // If all of the writes are identified then we don't care if the read // pointer is identified or not. - if (!AllWritesIdentified && !isIdentifiedObject(*it)) { - DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n"); + if (!AllWritesIdentified && !isIdentifiedObject(*UI)) { + DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **UI <<"\n"); NeedRTCheck = true; } - if (WriteObjects.count(*it)) { - DEBUG(dbgs() << "LV: Found a possible read/write reorder:" - << **it <<"\n"); + + // Never seen it before, can't alias. + if (WriteObjects[*UI].empty()) + continue; + // Direct alias found. + if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) { + DEBUG(dbgs() << "LV: Found a possible write-write reorder:" + << **UI <<"\n"); + return false; + } + DEBUG(dbgs() << "LV: Found a global value: " + << **UI <<"\n"); + Instruction *Inst = (*MI).second; + DEBUG(dbgs() << "LV: While examining load:" << *Inst <<"\n"); + DEBUG(dbgs() << "LV: On value:" << *Val <<"\n"); + + // If global alias, make sure they do alias. + if (hasPossibleGlobalWriteReorder(*UI, + Inst, + WriteObjects, + MaxByteWidth)) { + DEBUG(dbgs() << "LV: Found a possible read-write reorder:" + << *UI <<"\n"); return false; } } @@ -2535,7 +2741,7 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) return IK_NoInduction; - // Check that the PHI is consecutive and starts at zero. + // Check that the PHI is consecutive. const SCEV *PhiScev = SE->getSCEV(Phi); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); if (!AR) { @@ -2562,6 +2768,8 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) { uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType()); if (C->getValue()->equalsInt(Size)) return IK_PtrInduction; + else if (C->getValue()->equalsInt(0 - Size)) + return IK_ReversePtrInduction; return IK_NoInduction; } @@ -2612,18 +2820,34 @@ bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) { return AR->isAffine(); } -unsigned +LoopVectorizationCostModel::VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, unsigned UserVF) { + // Width 1 means no vectorize + VectorizationFactor Factor = { 1U, 0U }; if (OptForSize && Legal->getRuntimePointerCheck()->Need) { DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n"); - return 1; + return Factor; } // Find the trip count. unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch()); DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n"); + unsigned WidestType = getWidestType(); + unsigned WidestRegister = TTI.getRegisterBitWidth(true); + unsigned MaxVectorSize = WidestRegister / WidestType; + DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n"); + DEBUG(dbgs() << "LV: The Widest register is:" << WidestRegister << "bits.\n"); + + if (MaxVectorSize == 0) { + DEBUG(dbgs() << "LV: The target has no vector registers.\n"); + MaxVectorSize = 1; + } + + assert(MaxVectorSize <= 32 && "Did not expect to pack so many elements" + " into one vector!"); + unsigned VF = MaxVectorSize; // If we optimize the program for size, avoid creating the tail loop. @@ -2631,7 +2855,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, // If we are unable to calculate the trip count then don't try to vectorize. if (TC < 2) { DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n"); - return 1; + return Factor; } // Find the maximum SIMD width that can fit within the trip count. @@ -2644,7 +2868,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, // zero then we require a tail. if (VF < 2) { DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n"); - return 1; + return Factor; } } @@ -2652,7 +2876,8 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); DEBUG(dbgs() << "LV: Using user VF "<<UserVF<<".\n"); - return UserVF; + Factor.Width = UserVF; + return Factor; } float Cost = expectedCost(1); @@ -2672,12 +2897,70 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, } DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n"); - return Width; + Factor.Width = Width; + Factor.Cost = Width * Cost; + return Factor; +} + +unsigned LoopVectorizationCostModel::getWidestType() { + unsigned MaxWidth = 8; + + // For each block. + for (Loop::block_iterator bb = TheLoop->block_begin(), + be = TheLoop->block_end(); bb != be; ++bb) { + BasicBlock *BB = *bb; + + // For each instruction in the loop. + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + Type *T = it->getType(); + + // Only examine Loads, Stores and PHINodes. + if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it)) + continue; + + // Examine PHI nodes that are reduction variables. + if (PHINode *PN = dyn_cast<PHINode>(it)) + if (!Legal->getReductionVars()->count(PN)) + continue; + + // Examine the stored values. + if (StoreInst *ST = dyn_cast<StoreInst>(it)) + T = ST->getValueOperand()->getType(); + + // Ignore loaded pointer types and stored pointer types that are not + // consecutive. However, we do want to take consecutive stores/loads of + // pointer vectors into account. + if (T->isPointerTy() && !isConsecutiveLoadOrStore(it)) + continue; + + MaxWidth = std::max(MaxWidth, + (unsigned)DL->getTypeSizeInBits(T->getScalarType())); + } + } + + return MaxWidth; } unsigned LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, - unsigned UserUF) { + unsigned UserUF, + unsigned VF, + unsigned LoopCost) { + + // -- The unroll heuristics -- + // We unroll the loop in order to expose ILP and reduce the loop overhead. + // There are many micro-architectural considerations that we can't predict + // at this level. For example frontend pressure (on decode or fetch) due to + // code size, or the number and capabilities of the execution ports. + // + // We use the following heuristics to select the unroll factor: + // 1. If the code has reductions the we unroll in order to break the cross + // iteration dependency. + // 2. If the loop is really small then we unroll in order to reduce the loop + // overhead. + // 3. We don't unroll if we think that we will spill registers to memory due + // to the increased register pressure. + // Use the user preference, unless 'auto' is selected. if (UserUF != 0) return UserUF; @@ -2710,17 +2993,39 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize, // fit without causing spills. unsigned UF = (TargetVectorRegisters - R.LoopInvariantRegs) / R.MaxLocalUsers; - // We don't want to unroll the loops to the point where they do not fit into - // the decoded cache. Assume that we only allow 32 IR instructions. - UF = std::min(UF, (MaxLoopSizeThreshold / R.NumInstructions)); - // Clamp the unroll factor ranges to reasonable factors. + unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor(); + + // If we did not calculate the cost for VF (because the user selected the VF) + // then we calculate the cost of VF here. + if (LoopCost == 0) + LoopCost = expectedCost(VF); + + // Clamp the calculated UF to be between the 1 and the max unroll factor + // that the target allows. if (UF > MaxUnrollSize) UF = MaxUnrollSize; else if (UF < 1) UF = 1; - return UF; + if (Legal->getReductionVars()->size()) { + DEBUG(dbgs() << "LV: Unrolling because of reductions. \n"); + return UF; + } + + // We want to unroll tiny loops in order to reduce the loop overhead. + // We assume that the cost overhead is 1 and we use the cost model + // to estimate the cost of the loop and unroll until the cost of the + // loop overhead is about 5% of the cost of the loop. + DEBUG(dbgs() << "LV: Loop cost is "<< LoopCost <<" \n"); + if (LoopCost < 20) { + DEBUG(dbgs() << "LV: Unrolling to reduce branch cost. \n"); + unsigned NewUF = 20/LoopCost + 1; + return std::min(NewUF, UF); + } + + DEBUG(dbgs() << "LV: Not Unrolling. \n"); + return 1; } LoopVectorizationCostModel::RegisterUsage @@ -2878,9 +3183,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { case Instruction::GetElementPtr: - // We mark this instruction as zero-cost because scalar GEPs are usually - // lowered to the intruction addressing mode. At the moment we don't - // generate vector geps. + // We mark this instruction as zero-cost because the cost of GEPs in + // vectorized code depends on whether the corresponding memory instruction + // is scalarized or not. Therefore, we handle GEPs with the memory + // instruction cost. return 0; case Instruction::Br: { return TTI.getCFInstrCost(I->getOpcode()); @@ -2923,83 +3229,59 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { VectorTy = ToVectorTy(ValTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy); } - case Instruction::Store: { - StoreInst *SI = cast<StoreInst>(I); - Type *ValTy = SI->getValueOperand()->getType(); + case Instruction::Store: + case Instruction::Load: { + StoreInst *SI = dyn_cast<StoreInst>(I); + LoadInst *LI = dyn_cast<LoadInst>(I); + Type *ValTy = (SI ? SI->getValueOperand()->getType() : + LI->getType()); VectorTy = ToVectorTy(ValTy, VF); + unsigned Alignment = SI ? SI->getAlignment() : LI->getAlignment(); + unsigned AS = SI ? SI->getPointerAddressSpace() : + LI->getPointerAddressSpace(); + Value *Ptr = SI ? SI->getPointerOperand() : LI->getPointerOperand(); + // We add the cost of address computation here instead of with the gep + // instruction because only here we know whether the operation is + // scalarized. if (VF == 1) - return TTI.getMemoryOpCost(I->getOpcode(), VectorTy, - SI->getAlignment(), - SI->getPointerAddressSpace()); + return TTI.getAddressComputationCost(VectorTy) + + TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); - // Scalarized stores. - int Stride = Legal->isConsecutivePtr(SI->getPointerOperand()); + // Scalarized loads/stores. + int Stride = Legal->isConsecutivePtr(Ptr); bool Reverse = Stride < 0; if (0 == Stride) { unsigned Cost = 0; - // The cost of extracting from the value vector and pointer vector. - Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF); + Type *PtrTy = ToVectorTy(Ptr->getType(), VF); for (unsigned i = 0; i < VF; ++i) { - Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy, - i); + // The cost of extracting the pointer operand. Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i); + // In case of STORE, the cost of ExtractElement from the vector. + // In case of LOAD, the cost of InsertElement into the returned + // vector. + Cost += TTI.getVectorInstrCost(SI ? Instruction::ExtractElement : + Instruction::InsertElement, + VectorTy, i); } - // The cost of the scalar stores. + // The cost of the scalar loads/stores. + Cost += VF * TTI.getAddressComputationCost(ValTy->getScalarType()); Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), - SI->getAlignment(), - SI->getPointerAddressSpace()); + Alignment, AS); return Cost; } - // Wide stores. - unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy, - SI->getAlignment(), - SI->getPointerAddressSpace()); + // Wide load/stores. + unsigned Cost = TTI.getAddressComputationCost(VectorTy); + Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS); + if (Reverse) Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0); return Cost; } - case Instruction::Load: { - LoadInst *LI = cast<LoadInst>(I); - - if (VF == 1) - return TTI.getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(), - LI->getPointerAddressSpace()); - - // Scalarized loads. - int Stride = Legal->isConsecutivePtr(LI->getPointerOperand()); - bool Reverse = Stride < 0; - if (0 == Stride) { - unsigned Cost = 0; - Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF); - - // The cost of extracting from the pointer vector. - for (unsigned i = 0; i < VF; ++i) - Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i); - - // The cost of inserting data to the result vector. - for (unsigned i = 0; i < VF; ++i) - Cost += TTI.getVectorInstrCost(Instruction::InsertElement, VectorTy, i); - - // The cost of the scalar stores. - Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), RetTy->getScalarType(), - LI->getAlignment(), - LI->getPointerAddressSpace()); - return Cost; - } - - // Wide loads. - unsigned Cost = TTI.getMemoryOpCost(I->getOpcode(), VectorTy, - LI->getAlignment(), - LI->getPointerAddressSpace()); - if (Reverse) - Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0); - return Cost; - } case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: @@ -3077,4 +3359,14 @@ namespace llvm { } } +bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { + // Check for a store. + if (StoreInst *ST = dyn_cast<StoreInst>(Inst)) + return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0; + + // Check for a load. + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0; + return false; +} |