From ebe69fe11e48d322045d5949c83283927a0d790b Mon Sep 17 00:00:00 2001 From: Stephen Hines Date: Mon, 23 Mar 2015 12:10:34 -0700 Subject: Update aosp/master LLVM for rebase to r230699. Change-Id: I2b5be30509658cb8266be782de0ab24f9099f9b9 --- lib/Transforms/IPO/Android.mk | 1 + lib/Transforms/IPO/ArgumentPromotion.cpp | 6 +- lib/Transforms/IPO/CMakeLists.txt | 5 + lib/Transforms/IPO/DeadArgumentElimination.cpp | 161 +- lib/Transforms/IPO/FunctionAttrs.cpp | 8 +- lib/Transforms/IPO/GlobalDCE.cpp | 3 + lib/Transforms/IPO/GlobalOpt.cpp | 8 +- lib/Transforms/IPO/IPO.cpp | 3 +- lib/Transforms/IPO/InlineAlways.cpp | 4 +- lib/Transforms/IPO/InlineSimple.cpp | 4 +- lib/Transforms/IPO/Inliner.cpp | 53 +- lib/Transforms/IPO/LLVMBuild.txt | 2 +- lib/Transforms/IPO/LoopExtractor.cpp | 2 +- lib/Transforms/IPO/LowerBitSets.cpp | 612 +++++++ lib/Transforms/IPO/PartialInlining.cpp | 10 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 53 +- lib/Transforms/IPO/PruneEH.cpp | 8 +- lib/Transforms/IPO/StripSymbols.cpp | 4 +- lib/Transforms/InstCombine/CMakeLists.txt | 4 + lib/Transforms/InstCombine/InstCombine.h | 432 ----- lib/Transforms/InstCombine/InstCombineAddSub.cpp | 126 +- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 216 ++- lib/Transforms/InstCombine/InstCombineCalls.cpp | 310 ++-- lib/Transforms/InstCombine/InstCombineCasts.cpp | 85 +- lib/Transforms/InstCombine/InstCombineCompares.cpp | 309 +++- lib/Transforms/InstCombine/InstCombineInternal.h | 529 ++++++ .../InstCombine/InstCombineLoadStoreAlloca.cpp | 469 +++-- .../InstCombine/InstCombineMulDivRem.cpp | 174 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 4 +- lib/Transforms/InstCombine/InstCombineSelect.cpp | 261 ++- lib/Transforms/InstCombine/InstCombineShifts.cpp | 16 +- .../InstCombine/InstCombineSimplifyDemanded.cpp | 2 +- .../InstCombine/InstCombineVectorOps.cpp | 123 +- lib/Transforms/InstCombine/InstCombineWorklist.h | 107 -- .../InstCombine/InstructionCombining.cpp | 572 +++--- lib/Transforms/InstCombine/LLVMBuild.txt | 2 +- .../Instrumentation/AddressSanitizer.cpp | 587 ++++-- lib/Transforms/Instrumentation/Android.mk | 2 +- lib/Transforms/Instrumentation/BoundsChecking.cpp | 6 +- lib/Transforms/Instrumentation/CMakeLists.txt | 5 +- .../Instrumentation/DataFlowSanitizer.cpp | 63 +- lib/Transforms/Instrumentation/DebugIR.cpp | 617 ------- lib/Transforms/Instrumentation/DebugIR.h | 98 - lib/Transforms/Instrumentation/GCOVProfiling.cpp | 43 +- lib/Transforms/Instrumentation/InstrProfiling.cpp | 351 ++++ lib/Transforms/Instrumentation/Instrumentation.cpp | 1 + lib/Transforms/Instrumentation/LLVMBuild.txt | 2 +- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 526 ++++-- .../Instrumentation/SanitizerCoverage.cpp | 175 +- lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 40 +- lib/Transforms/ObjCARC/ARCInstKind.cpp | 645 +++++++ lib/Transforms/ObjCARC/ARCInstKind.h | 123 ++ lib/Transforms/ObjCARC/Android.mk | 2 +- lib/Transforms/ObjCARC/CMakeLists.txt | 5 +- lib/Transforms/ObjCARC/DependencyAnalysis.cpp | 81 +- lib/Transforms/ObjCARC/DependencyAnalysis.h | 21 +- lib/Transforms/ObjCARC/ObjCARC.h | 211 +-- lib/Transforms/ObjCARC/ObjCARCAPElim.cpp | 8 +- lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp | 26 +- lib/Transforms/ObjCARC/ObjCARCContract.cpp | 494 +++-- lib/Transforms/ObjCARC/ObjCARCExpand.cpp | 14 +- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 240 +-- lib/Transforms/ObjCARC/ObjCARCUtil.cpp | 254 --- lib/Transforms/ObjCARC/ProvenanceAnalysis.h | 4 +- lib/Transforms/Scalar/ADCE.cpp | 70 +- lib/Transforms/Scalar/AlignmentFromAssumptions.cpp | 16 +- lib/Transforms/Scalar/Android.mk | 6 + lib/Transforms/Scalar/BDCE.cpp | 411 +++++ lib/Transforms/Scalar/CMakeLists.txt | 10 + lib/Transforms/Scalar/ConstantHoisting.cpp | 9 +- lib/Transforms/Scalar/ConstantProp.cpp | 9 +- lib/Transforms/Scalar/DCE.cpp | 8 +- lib/Transforms/Scalar/DeadStoreElimination.cpp | 2 +- lib/Transforms/Scalar/EarlyCSE.cpp | 635 ++++--- lib/Transforms/Scalar/GVN.cpp | 516 +++--- lib/Transforms/Scalar/IndVarSimplify.cpp | 14 +- .../Scalar/InductiveRangeCheckElimination.cpp | 1422 +++++++++++++++ lib/Transforms/Scalar/JumpThreading.cpp | 30 +- lib/Transforms/Scalar/LICM.cpp | 401 +++-- lib/Transforms/Scalar/LLVMBuild.txt | 2 +- lib/Transforms/Scalar/LoopDeletion.cpp | 8 +- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 98 +- lib/Transforms/Scalar/LoopInstSimplify.cpp | 29 +- lib/Transforms/Scalar/LoopRerollPass.cpp | 1285 ++++++++----- lib/Transforms/Scalar/LoopRotation.cpp | 98 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 36 +- lib/Transforms/Scalar/LoopUnrollPass.cpp | 495 ++++- lib/Transforms/Scalar/LoopUnswitch.cpp | 70 +- lib/Transforms/Scalar/LowerExpectIntrinsic.cpp | 192 ++ lib/Transforms/Scalar/MemCpyOptimizer.cpp | 32 +- lib/Transforms/Scalar/MergedLoadStoreMotion.cpp | 83 +- lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp | 14 +- lib/Transforms/Scalar/PlaceSafepoints.cpp | 989 ++++++++++ lib/Transforms/Scalar/Reassociate.cpp | 15 +- lib/Transforms/Scalar/Reg2Mem.cpp | 2 +- lib/Transforms/Scalar/RewriteStatepointsForGC.cpp | 1897 ++++++++++++++++++++ lib/Transforms/Scalar/SCCP.cpp | 14 +- lib/Transforms/Scalar/SROA.cpp | 1501 ++++++++++++---- lib/Transforms/Scalar/SampleProfile.cpp | 6 +- lib/Transforms/Scalar/Scalar.cpp | 15 +- lib/Transforms/Scalar/ScalarReplAggregates.cpp | 31 +- .../Scalar/SeparateConstOffsetFromGEP.cpp | 14 +- lib/Transforms/Scalar/SimplifyCFGPass.cpp | 118 +- lib/Transforms/Scalar/Sink.cpp | 8 +- .../Scalar/StraightLineStrengthReduce.cpp | 274 +++ lib/Transforms/Scalar/StructurizeCFG.cpp | 75 +- lib/Transforms/Scalar/TailRecursionElimination.cpp | 6 +- lib/Transforms/Utils/ASanStackFrameLayout.cpp | 8 +- lib/Transforms/Utils/AddDiscriminators.cpp | 2 +- lib/Transforms/Utils/Android.mk | 1 - lib/Transforms/Utils/BasicBlockUtils.cpp | 211 ++- lib/Transforms/Utils/BreakCriticalEdges.cpp | 63 +- lib/Transforms/Utils/BuildLibCalls.cpp | 134 +- lib/Transforms/Utils/CMakeLists.txt | 5 +- lib/Transforms/Utils/CloneFunction.cpp | 158 +- lib/Transforms/Utils/CloneModule.cpp | 4 +- lib/Transforms/Utils/DemoteRegToStack.cpp | 34 +- lib/Transforms/Utils/InlineFunction.cpp | 136 +- lib/Transforms/Utils/LCSSA.cpp | 55 +- lib/Transforms/Utils/LLVMBuild.txt | 2 +- lib/Transforms/Utils/Local.cpp | 85 +- lib/Transforms/Utils/LoopSimplify.cpp | 91 +- lib/Transforms/Utils/LoopUnroll.cpp | 40 +- lib/Transforms/Utils/LoopUnrollRuntime.cpp | 104 +- lib/Transforms/Utils/LowerExpectIntrinsic.cpp | 188 -- lib/Transforms/Utils/LowerSwitch.cpp | 272 ++- lib/Transforms/Utils/Mem2Reg.cpp | 11 +- lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 19 +- lib/Transforms/Utils/SSAUpdater.cpp | 41 +- lib/Transforms/Utils/SimplifyCFG.cpp | 600 ++++--- lib/Transforms/Utils/SimplifyIndVar.cpp | 129 +- lib/Transforms/Utils/SimplifyInstructions.cpp | 20 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 677 +++---- lib/Transforms/Utils/SymbolRewriter.cpp | 33 +- lib/Transforms/Utils/UnifyFunctionExitNodes.cpp | 1 - lib/Transforms/Utils/ValueMapper.cpp | 255 ++- lib/Transforms/Vectorize/BBVectorize.cpp | 22 +- lib/Transforms/Vectorize/CMakeLists.txt | 3 + lib/Transforms/Vectorize/LLVMBuild.txt | 2 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 1882 +++++-------------- lib/Transforms/Vectorize/SLPVectorizer.cpp | 552 ++++-- lib/Transforms/Vectorize/Vectorize.cpp | 2 +- 142 files changed, 17554 insertions(+), 8586 deletions(-) create mode 100644 lib/Transforms/IPO/LowerBitSets.cpp delete mode 100644 lib/Transforms/InstCombine/InstCombine.h create mode 100644 lib/Transforms/InstCombine/InstCombineInternal.h delete mode 100644 lib/Transforms/InstCombine/InstCombineWorklist.h delete mode 100644 lib/Transforms/Instrumentation/DebugIR.cpp delete mode 100644 lib/Transforms/Instrumentation/DebugIR.h create mode 100644 lib/Transforms/Instrumentation/InstrProfiling.cpp create mode 100644 lib/Transforms/ObjCARC/ARCInstKind.cpp create mode 100644 lib/Transforms/ObjCARC/ARCInstKind.h delete mode 100644 lib/Transforms/ObjCARC/ObjCARCUtil.cpp create mode 100644 lib/Transforms/Scalar/BDCE.cpp create mode 100644 lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp create mode 100644 lib/Transforms/Scalar/LowerExpectIntrinsic.cpp create mode 100644 lib/Transforms/Scalar/PlaceSafepoints.cpp create mode 100644 lib/Transforms/Scalar/RewriteStatepointsForGC.cpp create mode 100644 lib/Transforms/Scalar/StraightLineStrengthReduce.cpp delete mode 100644 lib/Transforms/Utils/LowerExpectIntrinsic.cpp (limited to 'lib/Transforms') diff --git a/lib/Transforms/IPO/Android.mk b/lib/Transforms/IPO/Android.mk index 1fe7d63..f08b0ad 100644 --- a/lib/Transforms/IPO/Android.mk +++ b/lib/Transforms/IPO/Android.mk @@ -16,6 +16,7 @@ transforms_ipo_SRC_FILES := \ Inliner.cpp \ Internalize.cpp \ LoopExtractor.cpp \ + LowerBitSets.cpp \ MergeFunctions.cpp \ PartialInlining.cpp \ PassManagerBuilder.cpp \ diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index c4706e8..7e48ce3 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -554,14 +554,14 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, BasicBlock *BB = Load->getParent(); AliasAnalysis::Location Loc = AA.getLocation(Load); - if (AA.canInstructionRangeModify(BB->front(), *Load, Loc)) + if (AA.canInstructionRangeModRef(BB->front(), *Load, Loc, + AliasAnalysis::Mod)) return false; // Pointer is invalidated! // Now check every path from the entry block to the load for transparency. // To do this, we perform a depth first search on the inverse CFG from the // loading block. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - BasicBlock *P = *PI; + for (BasicBlock *P : predecessors(BB)) { for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks)) if (AA.canBasicBlockModify(*TranspBB, Loc)) return false; diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 90c1c33..3df17b9 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -14,12 +14,17 @@ add_llvm_library(LLVMipo Inliner.cpp Internalize.cpp LoopExtractor.cpp + LowerBitSets.cpp MergeFunctions.cpp PartialInlining.cpp PassManagerBuilder.cpp PruneEH.cpp StripDeadPrototypes.cpp StripSymbols.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/IPO ) add_dependencies(LLVMipo intrinsics_gen) diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 4045c09..4431311 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -146,7 +146,7 @@ namespace { private: Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses); Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses, - unsigned RetValNum = 0); + unsigned RetValNum = -1U); Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses); void SurveyFunction(const Function &F); @@ -387,14 +387,32 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) /// for void functions and 1 for functions not returning a struct. It returns /// the number of struct elements for functions returning a struct. static unsigned NumRetVals(const Function *F) { - if (F->getReturnType()->isVoidTy()) + Type *RetTy = F->getReturnType(); + if (RetTy->isVoidTy()) return 0; - else if (StructType *STy = dyn_cast(F->getReturnType())) + else if (StructType *STy = dyn_cast(RetTy)) return STy->getNumElements(); + else if (ArrayType *ATy = dyn_cast(RetTy)) + return ATy->getNumElements(); else return 1; } +/// Returns the sub-type a function will return at a given Idx. Should +/// correspond to the result type of an ExtractValue instruction executed with +/// just that one Idx (i.e. only top-level structure is considered). +static Type *getRetComponentType(const Function *F, unsigned Idx) { + Type *RetTy = F->getReturnType(); + assert(!RetTy->isVoidTy() && "void type has no subtype"); + + if (StructType *STy = dyn_cast(RetTy)) + return STy->getElementType(Idx); + else if (ArrayType *ATy = dyn_cast(RetTy)) + return ATy->getElementType(); + else + return RetTy; +} + /// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not /// live, it adds Use to the MaybeLiveUses argument. Returns the determined /// liveness of Use. @@ -425,9 +443,24 @@ DAE::Liveness DAE::SurveyUse(const Use *U, // function's return value is live. We use RetValNum here, for the case // that U is really a use of an insertvalue instruction that uses the // original Use. - RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum); - // We might be live, depending on the liveness of Use. - return MarkIfNotLive(Use, MaybeLiveUses); + const Function *F = RI->getParent()->getParent(); + if (RetValNum != -1U) { + RetOrArg Use = CreateRet(F, RetValNum); + // We might be live, depending on the liveness of Use. + return MarkIfNotLive(Use, MaybeLiveUses); + } else { + DAE::Liveness Result = MaybeLive; + for (unsigned i = 0; i < NumRetVals(F); ++i) { + RetOrArg Use = CreateRet(F, i); + // We might be live, depending on the liveness of Use. If any + // sub-value is live, then the entire value is considered live. This + // is a conservative choice, and better tracking is possible. + DAE::Liveness SubResult = MarkIfNotLive(Use, MaybeLiveUses); + if (Result != Live) + Result = SubResult; + } + return Result; + } } if (const InsertValueInst *IV = dyn_cast(V)) { if (U->getOperandNo() != InsertValueInst::getAggregateOperandIndex() @@ -541,7 +574,6 @@ void DAE::SurveyFunction(const Function &F) { // Keep track of the number of live retvals, so we can skip checks once all // of them turn out to be live. unsigned NumLiveRetVals = 0; - Type *STy = dyn_cast(F.getReturnType()); // Loop all uses of the function. for (const Use &U : F.uses()) { // If the function is PASSED IN as an argument, its address has been @@ -563,34 +595,35 @@ void DAE::SurveyFunction(const Function &F) { // Now, check how our return value(s) is/are used in this caller. Don't // bother checking return values if all of them are live already. - if (NumLiveRetVals != RetCount) { - if (STy) { - // Check all uses of the return value. - for (const User *U : TheCall->users()) { - const ExtractValueInst *Ext = dyn_cast(U); - if (Ext && Ext->hasIndices()) { - // This use uses a part of our return value, survey the uses of - // that part and store the results for this index only. - unsigned Idx = *Ext->idx_begin(); - if (RetValLiveness[Idx] != Live) { - RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]); - if (RetValLiveness[Idx] == Live) - NumLiveRetVals++; - } - } else { - // Used by something else than extractvalue. Mark all return - // values as live. - for (unsigned i = 0; i != RetCount; ++i ) - RetValLiveness[i] = Live; - NumLiveRetVals = RetCount; - break; - } + if (NumLiveRetVals == RetCount) + continue; + + // Check all uses of the return value. + for (const Use &U : TheCall->uses()) { + if (ExtractValueInst *Ext = dyn_cast(U.getUser())) { + // This use uses a part of our return value, survey the uses of + // that part and store the results for this index only. + unsigned Idx = *Ext->idx_begin(); + if (RetValLiveness[Idx] != Live) { + RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]); + if (RetValLiveness[Idx] == Live) + NumLiveRetVals++; } } else { - // Single return value - RetValLiveness[0] = SurveyUses(TheCall, MaybeLiveRetUses[0]); - if (RetValLiveness[0] == Live) + // Used by something else than extractvalue. Survey, but assume that the + // result applies to all sub-values. + UseVector MaybeLiveAggregateUses; + if (SurveyUse(&U, MaybeLiveAggregateUses) == Live) { NumLiveRetVals = RetCount; + RetValLiveness.assign(RetCount, Live); + break; + } else { + for (unsigned i = 0; i != RetCount; ++i) { + if (RetValLiveness[i] != Live) + MaybeLiveRetUses[i].append(MaybeLiveAggregateUses.begin(), + MaybeLiveAggregateUses.end()); + } + } } } } @@ -775,39 +808,29 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (RetTy->isVoidTy() || HasLiveReturnedArg) { NRetTy = RetTy; } else { - StructType *STy = dyn_cast(RetTy); - if (STy) - // Look at each of the original return values individually. - for (unsigned i = 0; i != RetCount; ++i) { - RetOrArg Ret = CreateRet(F, i); - if (LiveValues.erase(Ret)) { - RetTypes.push_back(STy->getElementType(i)); - NewRetIdxs[i] = RetTypes.size() - 1; - } else { - ++NumRetValsEliminated; - DEBUG(dbgs() << "DAE - Removing return value " << i << " from " - << F->getName() << "\n"); - } - } - else - // We used to return a single value. - if (LiveValues.erase(CreateRet(F, 0))) { - RetTypes.push_back(RetTy); - NewRetIdxs[0] = 0; + // Look at each of the original return values individually. + for (unsigned i = 0; i != RetCount; ++i) { + RetOrArg Ret = CreateRet(F, i); + if (LiveValues.erase(Ret)) { + RetTypes.push_back(getRetComponentType(F, i)); + NewRetIdxs[i] = RetTypes.size() - 1; } else { - DEBUG(dbgs() << "DAE - Removing return value from " << F->getName() - << "\n"); ++NumRetValsEliminated; + DEBUG(dbgs() << "DAE - Removing return value " << i << " from " + << F->getName() << "\n"); + } + } + if (RetTypes.size() > 1) { + // More than one return type? Reduce it down to size. + if (StructType *STy = dyn_cast(RetTy)) { + // Make the new struct packed if we used to return a packed struct + // already. + NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked()); + } else { + assert(isa(RetTy) && "unexpected multi-value return"); + NRetTy = ArrayType::get(RetTypes[0], RetTypes.size()); } - if (RetTypes.size() > 1) - // More than one return type? Return a struct with them. Also, if we used - // to return a struct and didn't change the number of return values, - // return a struct again. This prevents changing {something} into - // something and {} into void. - // Make the new struct packed if we used to return a packed struct - // already. - NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked()); - else if (RetTypes.size() == 1) + } else if (RetTypes.size() == 1) // One return type? Just a simple value then, but only if we didn't use to // return a struct with that simple value before. NRetTy = RetTypes.front(); @@ -959,9 +982,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (!Call->getType()->isX86_MMXTy()) Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); } else { - assert(RetTy->isStructTy() && + assert((RetTy->isStructTy() || RetTy->isArrayTy()) && "Return type changed, but not into a void. The old return type" - " must have been a struct!"); + " must have been a struct or an array!"); Instruction *InsertPt = Call; if (InvokeInst *II = dyn_cast(Call)) { BasicBlock::iterator IP = II->getNormalDest()->begin(); @@ -969,9 +992,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { InsertPt = IP; } - // We used to return a struct. Instead of doing smart stuff with all the - // uses of this struct, we will just rebuild it using - // extract/insertvalue chaining and let instcombine clean that up. + // We used to return a struct or array. Instead of doing smart stuff + // with all the uses, we will just rebuild it using extract/insertvalue + // chaining and let instcombine clean that up. // // Start out building up our return value from undef Value *RetVal = UndefValue::get(RetTy); @@ -1034,8 +1057,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (NFTy->getReturnType()->isVoidTy()) { RetVal = nullptr; } else { - assert (RetTy->isStructTy()); - // The original return value was a struct, insert + assert(RetTy->isStructTy() || RetTy->isArrayTy()); + // The original return value was a struct or array, insert // extractvalue/insertvalue chains to extract only the values we need // to return and insert them into our new result. // This does generate messy code, but we'll let it to instcombine to diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 823ae53..8925e4c 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -31,7 +31,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; #define DEBUG_TYPE "functionattrs" @@ -124,7 +124,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); CallGraphSCCPass::getAnalysisUsage(AU); } @@ -139,7 +139,7 @@ INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", "Deduce function attributes", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", "Deduce function attributes", false, false) @@ -1702,7 +1702,7 @@ bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) { bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { AA = &getAnalysis(); - TLI = &getAnalysis(); + TLI = &getAnalysis().getTLI(); bool Changed = annotateLibraryCalls(SCC); Changed |= AddReadAttrs(SCC); diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 705e929..0c844fe 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -219,6 +219,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { if (F->hasPrefixData()) MarkUsedGlobalsAsNeeded(F->getPrefixData()); + if (F->hasPrologueData()) + MarkUsedGlobalsAsNeeded(F->getPrologueData()); + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U) diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 6e0ae83..45e04f1 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -38,7 +38,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -68,7 +68,7 @@ STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); namespace { struct GlobalOpt : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); + AU.addRequired(); } static char ID; // Pass identification, replacement for typeid GlobalOpt() : ModulePass(ID) { @@ -95,7 +95,7 @@ namespace { char GlobalOpt::ID = 0; INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt", "Global Variable Optimizer", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(GlobalOpt, "globalopt", "Global Variable Optimizer", false, false) @@ -3042,7 +3042,7 @@ bool GlobalOpt::runOnModule(Module &M) { DataLayoutPass *DLP = getAnalysisIfAvailable(); DL = DLP ? &DLP->getDataLayout() : nullptr; - TLI = &getAnalysis(); + TLI = &getAnalysis().getTLI(); bool LocalChange = true; while (LocalChange) { diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index b4d31d8..fcacec3 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -16,7 +16,7 @@ #include "llvm-c/Initialization.h" #include "llvm-c/Transforms/IPO.h" #include "llvm/InitializePasses.h" -#include "llvm/PassManager.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/Transforms/IPO.h" using namespace llvm; @@ -36,6 +36,7 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeLoopExtractorPass(Registry); initializeBlockExtractorPassPass(Registry); initializeSingleLoopExtractorPass(Registry); + initializeLowerBitSetsPass(Registry); initializeMergeFunctionsPass(Registry); initializePartialInlinerPass(Registry); initializePruneEHPass(Registry); diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 819b2e0..dc56a02 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -15,7 +15,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/IR/CallSite.h" @@ -68,7 +68,7 @@ char AlwaysInliner::ID = 0; INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline", "Inliner for always_inline functions", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) INITIALIZE_PASS_END(AlwaysInliner, "always-inline", diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index d9a2b9e..9b01d81 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -13,7 +13,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/IR/CallSite.h" @@ -76,7 +76,7 @@ char SimpleInliner::ID = 0; INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", "Function Integration/Inlining", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis) INITIALIZE_PASS_END(SimpleInliner, "inline", diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 3abe7a8..305ad7a 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -17,7 +17,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/IR/CallSite.h" @@ -29,7 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -77,7 +77,7 @@ Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime) /// always explicitly call the implementation here. void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); - AU.addRequired(); + AU.addRequired(); CallGraphSCCPass::getAnalysisUsage(AU); } @@ -97,25 +97,17 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) { AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(), AttributeSet::FunctionIndex, B); - AttributeSet CallerAttr = Caller->getAttributes(), - CalleeAttr = Callee->getAttributes(); - if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectReq)) { + if (Callee->hasFnAttribute(Attribute::StackProtectReq)) { Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); Caller->addFnAttr(Attribute::StackProtectReq); - } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectStrong) && - !CallerAttr.hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectReq)) { + } else if (Callee->hasFnAttribute(Attribute::StackProtectStrong) && + !Caller->hasFnAttribute(Attribute::StackProtectReq)) { Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); Caller->addFnAttr(Attribute::StackProtectStrong); - } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtect) && - !CallerAttr.hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectReq) && - !CallerAttr.hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectStrong)) + } else if (Callee->hasFnAttribute(Attribute::StackProtect) && + !Caller->hasFnAttribute(Attribute::StackProtectReq) && + !Caller->hasFnAttribute(Attribute::StackProtectStrong)) Caller->addFnAttr(Attribute::StackProtect); } @@ -273,8 +265,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const { // would decrease the threshold. Function *Caller = CS.getCaller(); bool OptSize = Caller && !Caller->isDeclaration() && - Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize); + Caller->hasFnAttribute(Attribute::OptimizeForSize); if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && OptSizeThreshold < thres) thres = OptSizeThreshold; @@ -283,17 +274,14 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const { // and the caller does not need to minimize its size. Function *Callee = CS.getCalledFunction(); bool InlineHint = Callee && !Callee->isDeclaration() && - Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::InlineHint); - if (InlineHint && HintThreshold > thres - && !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::MinSize)) + Callee->hasFnAttribute(Attribute::InlineHint); + if (InlineHint && HintThreshold > thres && + !Caller->hasFnAttribute(Attribute::MinSize)) thres = HintThreshold; // Listen to the cold attribute when it would decrease the threshold. bool ColdCallee = Callee && !Callee->isDeclaration() && - Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Cold); + Callee->hasFnAttribute(Attribute::Cold); // Command line argument for InlineLimit will override the default // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold, // do not use the default cold threshold even if it is smaller. @@ -443,10 +431,11 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis().getCallGraph(); - AssumptionTracker *AT = &getAnalysis(); + AssumptionCacheTracker *ACT = &getAnalysis(); DataLayoutPass *DLP = getAnalysisIfAvailable(); const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; - const TargetLibraryInfo *TLI = getAnalysisIfAvailable(); + auto *TLIP = getAnalysisIfAvailable(); + const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr; AliasAnalysis *AA = &getAnalysis(); SmallPtrSet SCCFunctions; @@ -506,8 +495,8 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, DL, AA, AT); - + InlineFunctionInfo InlineInfo(&CG, DL, AA, ACT); + // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. bool Changed = false; @@ -658,9 +647,7 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { // Handle the case when this function is called and we only want to care // about always-inline functions. This is a bit of a hack to share code // between here and the InlineAlways pass. - if (AlwaysInlineOnly && - !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::AlwaysInline)) + if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline)) continue; // If the only remaining users of the function are dead constants, remove diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt index 77e0b22..575dce4 100644 --- a/lib/Transforms/IPO/LLVMBuild.txt +++ b/lib/Transforms/IPO/LLVMBuild.txt @@ -20,4 +20,4 @@ type = Library name = IPO parent = Transforms library_name = ipo -required_libraries = Analysis Core IPA InstCombine Scalar Support Target TransformUtils Vectorize +required_libraries = Analysis Core IPA InstCombine Scalar Support TransformUtils Vectorize diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 20414aa..41334ca 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -242,7 +242,7 @@ void BlockExtractorPass::SplitLandingPadPreds(Function *F) { if (!Split) continue; SmallVector NewBBs; - SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", nullptr, NewBBs); + SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs); } } diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp new file mode 100644 index 0000000..0a22a80 --- /dev/null +++ b/lib/Transforms/IPO/LowerBitSets.cpp @@ -0,0 +1,612 @@ +//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic. +// See http://llvm.org/docs/LangRef.html#bitsets for more information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/LowerBitSets.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "lowerbitsets" + +STATISTIC(NumBitSetsCreated, "Number of bitsets created"); +STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered"); +STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets"); + +bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { + if (Offset < ByteOffset) + return false; + + if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0) + return false; + + uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2; + if (BitOffset >= BitSize) + return false; + + return (Bits[BitOffset / 8] >> (BitOffset % 8)) & 1; +} + +bool BitSetInfo::containsValue( + const DataLayout *DL, + const DenseMap &GlobalLayout, Value *V, + uint64_t COffset) const { + if (auto GV = dyn_cast(V)) { + auto I = GlobalLayout.find(GV); + if (I == GlobalLayout.end()) + return false; + return containsGlobalOffset(I->second + COffset); + } + + if (auto GEP = dyn_cast(V)) { + APInt APOffset(DL->getPointerSizeInBits(0), 0); + bool Result = GEP->accumulateConstantOffset(*DL, APOffset); + if (!Result) + return false; + COffset += APOffset.getZExtValue(); + return containsValue(DL, GlobalLayout, GEP->getPointerOperand(), + COffset); + } + + if (auto Op = dyn_cast(V)) { + if (Op->getOpcode() == Instruction::BitCast) + return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset); + + if (Op->getOpcode() == Instruction::Select) + return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) && + containsValue(DL, GlobalLayout, Op->getOperand(2), COffset); + } + + return false; +} + +BitSetInfo BitSetBuilder::build() { + if (Min > Max) + Min = 0; + + // Normalize each offset against the minimum observed offset, and compute + // the bitwise OR of each of the offsets. The number of trailing zeros + // in the mask gives us the log2 of the alignment of all offsets, which + // allows us to compress the bitset by only storing one bit per aligned + // address. + uint64_t Mask = 0; + for (uint64_t &Offset : Offsets) { + Offset -= Min; + Mask |= Offset; + } + + BitSetInfo BSI; + BSI.ByteOffset = Min; + + BSI.AlignLog2 = 0; + // FIXME: Can probably do something smarter if all offsets are 0. + if (Mask != 0) + BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined); + + // Build the compressed bitset while normalizing the offsets against the + // computed alignment. + BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1; + uint64_t ByteSize = (BSI.BitSize + 7) / 8; + BSI.Bits.resize(ByteSize); + for (uint64_t Offset : Offsets) { + Offset >>= BSI.AlignLog2; + BSI.Bits[Offset / 8] |= 1 << (Offset % 8); + } + + return BSI; +} + +void GlobalLayoutBuilder::addFragment(const std::set &F) { + // Create a new fragment to hold the layout for F. + Fragments.emplace_back(); + std::vector &Fragment = Fragments.back(); + uint64_t FragmentIndex = Fragments.size() - 1; + + for (auto ObjIndex : F) { + uint64_t OldFragmentIndex = FragmentMap[ObjIndex]; + if (OldFragmentIndex == 0) { + // We haven't seen this object index before, so just add it to the current + // fragment. + Fragment.push_back(ObjIndex); + } else { + // This index belongs to an existing fragment. Copy the elements of the + // old fragment into this one and clear the old fragment. We don't update + // the fragment map just yet, this ensures that any further references to + // indices from the old fragment in this fragment do not insert any more + // indices. + std::vector &OldFragment = Fragments[OldFragmentIndex]; + Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end()); + OldFragment.clear(); + } + } + + // Update the fragment map to point our object indices to this fragment. + for (uint64_t ObjIndex : Fragment) + FragmentMap[ObjIndex] = FragmentIndex; +} + +namespace { + +struct LowerBitSets : public ModulePass { + static char ID; + LowerBitSets() : ModulePass(ID) { + initializeLowerBitSetsPass(*PassRegistry::getPassRegistry()); + } + + const DataLayout *DL; + IntegerType *Int1Ty; + IntegerType *Int8Ty; + IntegerType *Int32Ty; + Type *Int32PtrTy; + IntegerType *Int64Ty; + Type *IntPtrTy; + + // The llvm.bitsets named metadata. + NamedMDNode *BitSetNM; + + // Mapping from bitset mdstrings to the call sites that test them. + DenseMap> BitSetTestCallSites; + + BitSetInfo + buildBitSet(MDString *BitSet, + const DenseMap &GlobalLayout); + Value *createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI, + GlobalVariable *BitSetGlobal, Value *BitOffset); + Value * + lowerBitSetCall(CallInst *CI, const BitSetInfo &BSI, + GlobalVariable *BitSetGlobal, GlobalVariable *CombinedGlobal, + const DenseMap &GlobalLayout); + void buildBitSetsFromGlobals(Module &M, + const std::vector &BitSets, + const std::vector &Globals); + bool buildBitSets(Module &M); + bool eraseBitSetMetadata(Module &M); + + bool doInitialization(Module &M) override; + bool runOnModule(Module &M) override; +}; + +} // namespace + +INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets", + "Lower bitset metadata", false, false) +INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets", + "Lower bitset metadata", false, false) +char LowerBitSets::ID = 0; + +ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; } + +bool LowerBitSets::doInitialization(Module &M) { + DL = M.getDataLayout(); + if (!DL) + report_fatal_error("Data layout required"); + + Int1Ty = Type::getInt1Ty(M.getContext()); + Int8Ty = Type::getInt8Ty(M.getContext()); + Int32Ty = Type::getInt32Ty(M.getContext()); + Int32PtrTy = PointerType::getUnqual(Int32Ty); + Int64Ty = Type::getInt64Ty(M.getContext()); + IntPtrTy = DL->getIntPtrType(M.getContext(), 0); + + BitSetNM = M.getNamedMetadata("llvm.bitsets"); + + BitSetTestCallSites.clear(); + + return false; +} + +/// Build a bit set for BitSet using the object layouts in +/// GlobalLayout. +BitSetInfo LowerBitSets::buildBitSet( + MDString *BitSet, + const DenseMap &GlobalLayout) { + BitSetBuilder BSB; + + // Compute the byte offset of each element of this bitset. + if (BitSetNM) { + for (MDNode *Op : BitSetNM->operands()) { + if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) + continue; + auto OpGlobal = cast( + cast(Op->getOperand(1))->getValue()); + uint64_t Offset = + cast(cast(Op->getOperand(2)) + ->getValue())->getZExtValue(); + + Offset += GlobalLayout.find(OpGlobal)->second; + + BSB.addOffset(Offset); + } + } + + return BSB.build(); +} + +/// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in +/// Bits. This pattern matches to the bt instruction on x86. +static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits, + Value *BitOffset) { + auto BitsType = cast(Bits->getType()); + unsigned BitWidth = BitsType->getBitWidth(); + + BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType); + Value *BitIndex = + B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1)); + Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex); + Value *MaskedBits = B.CreateAnd(Bits, BitMask); + return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0)); +} + +/// Build a test that bit BitOffset is set in BSI, where +/// BitSetGlobal is a global containing the bits in BSI. +Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI, + GlobalVariable *BitSetGlobal, + Value *BitOffset) { + if (BSI.Bits.size() <= 8) { + // If the bit set is sufficiently small, we can avoid a load by bit testing + // a constant. + IntegerType *BitsTy; + if (BSI.Bits.size() <= 4) + BitsTy = Int32Ty; + else + BitsTy = Int64Ty; + + uint64_t Bits = 0; + for (auto I = BSI.Bits.rbegin(), E = BSI.Bits.rend(); I != E; ++I) { + Bits <<= 8; + Bits |= *I; + } + Constant *BitsConst = ConstantInt::get(BitsTy, Bits); + return createMaskedBitTest(B, BitsConst, BitOffset); + } else { + // TODO: We might want to use the memory variant of the bt instruction + // with the previously computed bit offset at -Os. This instruction does + // exactly what we want but has been benchmarked as being slower than open + // coding the load+bt. + Value *BitSetGlobalOffset = + B.CreateLShr(BitOffset, ConstantInt::get(IntPtrTy, 5)); + Value *BitSetEntryAddr = B.CreateGEP( + ConstantExpr::getBitCast(BitSetGlobal, Int32PtrTy), BitSetGlobalOffset); + Value *BitSetEntry = B.CreateLoad(BitSetEntryAddr); + + return createMaskedBitTest(B, BitSetEntry, BitOffset); + } +} + +/// Lower a llvm.bitset.test call to its implementation. Returns the value to +/// replace the call with. +Value *LowerBitSets::lowerBitSetCall( + CallInst *CI, const BitSetInfo &BSI, GlobalVariable *BitSetGlobal, + GlobalVariable *CombinedGlobal, + const DenseMap &GlobalLayout) { + Value *Ptr = CI->getArgOperand(0); + + if (BSI.containsValue(DL, GlobalLayout, Ptr)) + return ConstantInt::getTrue(BitSetGlobal->getParent()->getContext()); + + Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy); + Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd( + GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset)); + + BasicBlock *InitialBB = CI->getParent(); + + IRBuilder<> B(CI); + + Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy); + + if (BSI.isSingleOffset()) + return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt); + + Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt); + + Value *BitOffset; + if (BSI.AlignLog2 == 0) { + BitOffset = PtrOffset; + } else { + // We need to check that the offset both falls within our range and is + // suitably aligned. We can check both properties at the same time by + // performing a right rotate by log2(alignment) followed by an integer + // comparison against the bitset size. The rotate will move the lower + // order bits that need to be zero into the higher order bits of the + // result, causing the comparison to fail if they are nonzero. The rotate + // also conveniently gives us a bit offset to use during the load from + // the bitset. + Value *OffsetSHR = + B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2)); + Value *OffsetSHL = B.CreateShl( + PtrOffset, ConstantInt::get(IntPtrTy, DL->getPointerSizeInBits(0) - + BSI.AlignLog2)); + BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); + } + + Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize); + Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst); + + // If the bit set is all ones, testing against it is unnecessary. + if (BSI.isAllOnes()) + return OffsetInRange; + + TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false); + IRBuilder<> ThenB(Term); + + // Now that we know that the offset is in range and aligned, load the + // appropriate bit from the bitset. + Value *Bit = createBitSetTest(ThenB, BSI, BitSetGlobal, BitOffset); + + // The value we want is 0 if we came directly from the initial block + // (having failed the range or alignment checks), or the loaded bit if + // we came from the block in which we loaded it. + B.SetInsertPoint(CI); + PHINode *P = B.CreatePHI(Int1Ty, 2); + P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB); + P->addIncoming(Bit, ThenB.GetInsertBlock()); + return P; +} + +/// Given a disjoint set of bitsets and globals, layout the globals, build the +/// bit sets and lower the llvm.bitset.test calls. +void LowerBitSets::buildBitSetsFromGlobals( + Module &M, + const std::vector &BitSets, + const std::vector &Globals) { + // Build a new global with the combined contents of the referenced globals. + std::vector GlobalInits; + for (GlobalVariable *G : Globals) { + GlobalInits.push_back(G->getInitializer()); + uint64_t InitSize = DL->getTypeAllocSize(G->getInitializer()->getType()); + + // Compute the amount of padding required to align the next element to the + // next power of 2. + uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize; + + // Cap at 128 was found experimentally to have a good data/instruction + // overhead tradeoff. + if (Padding > 128) + Padding = RoundUpToAlignment(InitSize, 128) - InitSize; + + GlobalInits.push_back( + ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); + } + if (!GlobalInits.empty()) + GlobalInits.pop_back(); + Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits); + auto CombinedGlobal = + new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, + GlobalValue::PrivateLinkage, NewInit); + + const StructLayout *CombinedGlobalLayout = + DL->getStructLayout(cast(NewInit->getType())); + + // Compute the offsets of the original globals within the new global. + DenseMap GlobalLayout; + for (unsigned I = 0; I != Globals.size(); ++I) + // Multiply by 2 to account for padding elements. + GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2); + + // For each bitset in this disjoint set... + for (MDString *BS : BitSets) { + // Build the bitset. + BitSetInfo BSI = buildBitSet(BS, GlobalLayout); + + // Create a global in which to store it. + ++NumBitSetsCreated; + Constant *BitsConst = ConstantDataArray::get(M.getContext(), BSI.Bits); + auto BitSetGlobal = new GlobalVariable( + M, BitsConst->getType(), /*isConstant=*/true, + GlobalValue::PrivateLinkage, BitsConst, BS->getString() + ".bits"); + + // Lower each call to llvm.bitset.test for this bitset. + for (CallInst *CI : BitSetTestCallSites[BS]) { + ++NumBitSetCallsLowered; + Value *Lowered = + lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal, GlobalLayout); + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); + } + } + + // Build aliases pointing to offsets into the combined global for each + // global from which we built the combined global, and replace references + // to the original globals with references to the aliases. + for (unsigned I = 0; I != Globals.size(); ++I) { + // Multiply by 2 to account for padding elements. + Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, I * 2)}; + Constant *CombinedGlobalElemPtr = + ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs); + GlobalAlias *GAlias = GlobalAlias::create( + Globals[I]->getType()->getElementType(), + Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(), + "", CombinedGlobalElemPtr, &M); + GAlias->takeName(Globals[I]); + Globals[I]->replaceAllUsesWith(GAlias); + Globals[I]->eraseFromParent(); + } +} + +/// Lower all bit sets in this module. +bool LowerBitSets::buildBitSets(Module &M) { + Function *BitSetTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::bitset_test)); + if (!BitSetTestFunc) + return false; + + // Equivalence class set containing bitsets and the globals they reference. + // This is used to partition the set of bitsets in the module into disjoint + // sets. + typedef EquivalenceClasses> + GlobalClassesTy; + GlobalClassesTy GlobalClasses; + + for (const Use &U : BitSetTestFunc->uses()) { + auto CI = cast(U.getUser()); + + auto BitSetMDVal = dyn_cast(CI->getArgOperand(1)); + if (!BitSetMDVal || !isa(BitSetMDVal->getMetadata())) + report_fatal_error( + "Second argument of llvm.bitset.test must be metadata string"); + auto BitSet = cast(BitSetMDVal->getMetadata()); + + // Add the call site to the list of call sites for this bit set. We also use + // BitSetTestCallSites to keep track of whether we have seen this bit set + // before. If we have, we don't need to re-add the referenced globals to the + // equivalence class. + std::pair>::iterator, + bool> Ins = + BitSetTestCallSites.insert( + std::make_pair(BitSet, std::vector())); + Ins.first->second.push_back(CI); + if (!Ins.second) + continue; + + // Add the bitset to the equivalence class. + GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet); + GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI); + + if (!BitSetNM) + continue; + + // Verify the bitset metadata and add the referenced globals to the bitset's + // equivalence class. + for (MDNode *Op : BitSetNM->operands()) { + if (Op->getNumOperands() != 3) + report_fatal_error( + "All operands of llvm.bitsets metadata must have 3 elements"); + + if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) + continue; + + auto OpConstMD = dyn_cast(Op->getOperand(1)); + if (!OpConstMD) + report_fatal_error("Bit set element must be a constant"); + auto OpGlobal = dyn_cast(OpConstMD->getValue()); + if (!OpGlobal) + report_fatal_error("Bit set element must refer to global"); + + auto OffsetConstMD = dyn_cast(Op->getOperand(2)); + if (!OffsetConstMD) + report_fatal_error("Bit set element offset must be a constant"); + auto OffsetInt = dyn_cast(OffsetConstMD->getValue()); + if (!OffsetInt) + report_fatal_error( + "Bit set element offset must be an integer constant"); + + CurSet = GlobalClasses.unionSets( + CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal))); + } + } + + if (GlobalClasses.empty()) + return false; + + // For each disjoint set we found... + for (GlobalClassesTy::iterator I = GlobalClasses.begin(), + E = GlobalClasses.end(); + I != E; ++I) { + if (!I->isLeader()) continue; + + ++NumBitSetDisjointSets; + + // Build the list of bitsets and referenced globals in this disjoint set. + std::vector BitSets; + std::vector Globals; + llvm::DenseMap BitSetIndices; + llvm::DenseMap GlobalIndices; + for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); + MI != GlobalClasses.member_end(); ++MI) { + if ((*MI).is()) { + BitSetIndices[MI->get()] = BitSets.size(); + BitSets.push_back(MI->get()); + } else { + GlobalIndices[MI->get()] = Globals.size(); + Globals.push_back(MI->get()); + } + } + + // For each bitset, build a set of indices that refer to globals referenced + // by the bitset. + std::vector> BitSetMembers(BitSets.size()); + if (BitSetNM) { + for (MDNode *Op : BitSetNM->operands()) { + // Op = { bitset name, global, offset } + if (!Op->getOperand(1)) + continue; + auto I = BitSetIndices.find(cast(Op->getOperand(0))); + if (I == BitSetIndices.end()) + continue; + + auto OpGlobal = cast( + cast(Op->getOperand(1))->getValue()); + BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]); + } + } + + // Order the sets of indices by size. The GlobalLayoutBuilder works best + // when given small index sets first. + std::stable_sort( + BitSetMembers.begin(), BitSetMembers.end(), + [](const std::set &O1, const std::set &O2) { + return O1.size() < O2.size(); + }); + + // Create a GlobalLayoutBuilder and provide it with index sets as layout + // fragments. The GlobalLayoutBuilder tries to lay out members of fragments + // as close together as possible. + GlobalLayoutBuilder GLB(Globals.size()); + for (auto &&MemSet : BitSetMembers) + GLB.addFragment(MemSet); + + // Build a vector of globals with the computed layout. + std::vector OrderedGlobals(Globals.size()); + auto OGI = OrderedGlobals.begin(); + for (auto &&F : GLB.Fragments) + for (auto &&Offset : F) + *OGI++ = Globals[Offset]; + + // Order bitsets by name for determinism. + std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) { + return S1->getString() < S2->getString(); + }); + + // Build the bitsets from this disjoint set. + buildBitSetsFromGlobals(M, BitSets, OrderedGlobals); + } + + return true; +} + +bool LowerBitSets::eraseBitSetMetadata(Module &M) { + if (!BitSetNM) + return false; + + M.eraseNamedMetadata(BitSetNM); + return true; +} + +bool LowerBitSets::runOnModule(Module &M) { + bool Changed = buildBitSets(M); + Changed |= eraseBitSetMetadata(M); + return Changed; +} diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 76d6dfa..4a7cb7b 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -58,13 +58,13 @@ Function* PartialInliner::unswitchFunction(Function* F) { BasicBlock* returnBlock = nullptr; BasicBlock* nonReturnBlock = nullptr; unsigned returnCount = 0; - for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock); - SI != SE; ++SI) - if (isa((*SI)->getTerminator())) { - returnBlock = *SI; + for (BasicBlock *BB : successors(entryBlock)) { + if (isa(BB->getTerminator())) { + returnBlock = BB; returnCount++; } else - nonReturnBlock = *SI; + nonReturnBlock = BB; + } if (returnCount != 1) return nullptr; diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index da85a91..9a75050 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -19,12 +19,11 @@ #include "llvm/Analysis/Passes.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Verifier.h" -#include "llvm/PassManager.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Vectorize.h" @@ -118,7 +117,7 @@ void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { } void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, - PassManagerBase &PM) const { + legacy::PassManagerBase &PM) const { for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i) if ((*GlobalExtensions)[i].first == ETy) (*GlobalExtensions)[i].second(*this, PM); @@ -127,8 +126,8 @@ void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, Extensions[i].second(*this, PM); } -void -PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const { +void PassManagerBuilder::addInitialAliasAnalysisPasses( + legacy::PassManagerBase &PM) const { // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. @@ -139,11 +138,13 @@ PassManagerBuilder::addInitialAliasAnalysisPasses(PassManagerBase &PM) const { PM.add(createBasicAliasAnalysisPass()); } -void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) { +void PassManagerBuilder::populateFunctionPassManager( + legacy::FunctionPassManager &FPM) { addExtensionsToPM(EP_EarlyAsPossible, FPM); // Add LibraryInfo if we have some. - if (LibraryInfo) FPM.add(new TargetLibraryInfo(*LibraryInfo)); + if (LibraryInfo) + FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); if (OptLevel == 0) return; @@ -158,7 +159,8 @@ void PassManagerBuilder::populateFunctionPassManager(FunctionPassManager &FPM) { FPM.add(createLowerExpectIntrinsicPass()); } -void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { +void PassManagerBuilder::populateModulePassManager( + legacy::PassManagerBase &MPM) { // If all optimizations are disabled, just run the always-inline pass and, // if enabled, the function merging pass. if (OptLevel == 0) { @@ -182,7 +184,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { } // Add LibraryInfo if we have some. - if (LibraryInfo) MPM.add(new TargetLibraryInfo(*LibraryInfo)); + if (LibraryInfo) + MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); addInitialAliasAnalysisPasses(MPM); @@ -228,7 +231,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createTailCallEliminationPass()); // Eliminate tail calls MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions - MPM.add(createLoopRotatePass()); // Rotate Loop + // Rotate Loop - disable header duplication at -Oz + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); MPM.add(createLICMPass()); // Hoist loop invariants MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); MPM.add(createInstructionCombiningPass()); @@ -248,6 +252,11 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset MPM.add(createSCCPPass()); // Constant prop with SCCP + // Delete dead bit computations (instcombine runs after to fold away the dead + // computations, and then ADCE will run later to exploit any new DCE + // opportunities that creates). + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. MPM.add(createInstructionCombiningPass()); @@ -255,6 +264,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + MPM.add(createLICMPass()); addExtensionsToPM(EP_ScalarOptimizerLate, MPM); @@ -373,7 +383,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { addExtensionsToPM(EP_OptimizerLast, MPM); } -void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) { +void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Provide AliasAnalysis services for optimizations. addInitialAliasAnalysisPasses(PM); @@ -464,6 +474,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) { PM.add(createJumpThreadingPass()); + // Lower bitset metadata to bitsets. + PM.add(createLowerBitSetsPass()); + // Delete basic blocks, which optimization passes may have killed. PM.add(createCFGSimplificationPass()); @@ -476,15 +489,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(PassManagerBase &PM) { PM.add(createMergeFunctionsPass()); } -void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, - TargetMachine *TM) { - if (TM) { - PM.add(new DataLayoutPass()); - TM->addAnalysisPasses(PM); - } - +void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (LibraryInfo) - PM.add(new TargetLibraryInfo(*LibraryInfo)); + PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); if (VerifyInput) PM.add(createVerifierPass()); @@ -567,7 +574,7 @@ void LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB, LLVMPassManagerRef PM) { PassManagerBuilder *Builder = unwrap(PMB); - FunctionPassManager *FPM = unwrap(PM); + legacy::FunctionPassManager *FPM = unwrap(PM); Builder->populateFunctionPassManager(*FPM); } @@ -575,7 +582,7 @@ void LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, LLVMPassManagerRef PM) { PassManagerBuilder *Builder = unwrap(PMB); - PassManagerBase *MPM = unwrap(PM); + legacy::PassManagerBase *MPM = unwrap(PM); Builder->populateModulePassManager(*MPM); } @@ -584,7 +591,7 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, LLVMBool Internalize, LLVMBool RunInliner) { PassManagerBuilder *Builder = unwrap(PMB); - PassManagerBase *LPM = unwrap(PM); + legacy::PassManagerBase *LPM = unwrap(PM); // A small backwards compatibility hack. populateLTOPassManager used to take // an RunInliner option. diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index b2c4a09..1943b93 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -18,8 +18,10 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -175,7 +177,7 @@ bool PruneEH::SimplifyFunction(Function *F) { bool MadeChange = false; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { if (InvokeInst *II = dyn_cast(BB->getTerminator())) - if (II->doesNotThrow()) { + if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(II)) { SmallVector Args(II->op_begin(), II->op_end() - 3); // Insert a call instruction before the invoke. CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); @@ -200,7 +202,7 @@ bool PruneEH::SimplifyFunction(Function *F) { BB->getInstList().pop_back(); // If the unwind block is now dead, nuke it. - if (pred_begin(UnwindBlock) == pred_end(UnwindBlock)) + if (pred_empty(UnwindBlock)) DeleteBasicBlock(UnwindBlock); // Delete the new BB. ++NumRemoved; @@ -234,7 +236,7 @@ bool PruneEH::SimplifyFunction(Function *F) { /// updating the callgraph to reflect any now-obsolete edges due to calls that /// exist in the BB. void PruneEH::DeleteBasicBlock(BasicBlock *BB) { - assert(pred_begin(BB) == pred_end(BB) && "BB is not dead!"); + assert(pred_empty(BB) && "BB is not dead!"); CallGraph &CG = getAnalysis().getCallGraph(); CallGraphNode *CGN = CG[BB->getParent()]; diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 3412b9e..816978e 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -301,8 +301,8 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { // For each compile unit, find the live set of global variables/functions and // replace the current list of potentially dead global variables/functions // with the live list. - SmallVector LiveGlobalVariables; - SmallVector LiveSubprograms; + SmallVector LiveGlobalVariables; + SmallVector LiveSubprograms; DenseSet VisitedSet; for (DICompileUnit DIC : F.compile_units()) { diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt index a25696e..0ed8e62 100644 --- a/lib/Transforms/InstCombine/CMakeLists.txt +++ b/lib/Transforms/InstCombine/CMakeLists.txt @@ -12,6 +12,10 @@ add_llvm_library(LLVMInstCombine InstCombineShifts.cpp InstCombineSimplifyDemanded.cpp InstCombineVectorOps.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/InstCombine ) add_dependencies(LLVMInstCombine intrinsics_gen) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h deleted file mode 100644 index d4b252b..0000000 --- a/lib/Transforms/InstCombine/InstCombine.h +++ /dev/null @@ -1,432 +0,0 @@ -//===- InstCombine.h - Main InstCombine pass definition ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINE_H -#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINE_H - -#include "InstCombineWorklist.h" -#include "llvm/Analysis/AssumptionTracker.h" -#include "llvm/Analysis/TargetFolder.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstVisitor.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils/SimplifyLibCalls.h" - -#define DEBUG_TYPE "instcombine" - -namespace llvm { -class CallSite; -class DataLayout; -class DominatorTree; -class TargetLibraryInfo; -class DbgDeclareInst; -class MemIntrinsic; -class MemSetInst; - -/// SelectPatternFlavor - We can match a variety of different patterns for -/// select operations. -enum SelectPatternFlavor { - SPF_UNKNOWN = 0, - SPF_SMIN, - SPF_UMIN, - SPF_SMAX, - SPF_UMAX, - SPF_ABS, - SPF_NABS -}; - -/// getComplexity: Assign a complexity or rank value to LLVM Values... -/// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst -static inline unsigned getComplexity(Value *V) { - if (isa(V)) { - if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) || - BinaryOperator::isNot(V)) - return 3; - return 4; - } - if (isa(V)) - return 3; - return isa(V) ? (isa(V) ? 0 : 1) : 2; -} - -/// AddOne - Add one to a Constant -static inline Constant *AddOne(Constant *C) { - return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); -} -/// SubOne - Subtract one from a Constant -static inline Constant *SubOne(Constant *C) { - return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); -} - -/// InstCombineIRInserter - This is an IRBuilder insertion helper that works -/// just like the normal insertion helper, but also adds any new instructions -/// to the instcombine worklist. -class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter - : public IRBuilderDefaultInserter { - InstCombineWorklist &Worklist; - AssumptionTracker *AT; - -public: - InstCombineIRInserter(InstCombineWorklist &WL, AssumptionTracker *AT) - : Worklist(WL), AT(AT) {} - - void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, - BasicBlock::iterator InsertPt) const { - IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt); - Worklist.Add(I); - - using namespace llvm::PatternMatch; - if (match(I, m_Intrinsic())) - AT->registerAssumption(cast(I)); - } -}; - -/// InstCombiner - The -instcombine pass. -class LLVM_LIBRARY_VISIBILITY InstCombiner - : public FunctionPass, - public InstVisitor { - AssumptionTracker *AT; - const DataLayout *DL; - TargetLibraryInfo *TLI; - DominatorTree *DT; // not required - bool MadeIRChange; - LibCallSimplifier *Simplifier; - bool MinimizeSize; - -public: - /// Worklist - All of the instructions that need to be simplified. - InstCombineWorklist Worklist; - - /// Builder - This is an IRBuilder that automatically inserts new - /// instructions into the worklist when they are created. - typedef IRBuilder BuilderTy; - BuilderTy *Builder; - - static char ID; // Pass identification, replacement for typeid - InstCombiner() : FunctionPass(ID), DL(nullptr), Builder(nullptr) { - MinimizeSize = false; - initializeInstCombinerPass(*PassRegistry::getPassRegistry()); - } - -public: - bool runOnFunction(Function &F) override; - - bool DoOneIteration(Function &F, unsigned ItNum); - - void getAnalysisUsage(AnalysisUsage &AU) const override; - - AssumptionTracker *getAssumptionTracker() const { return AT; } - - const DataLayout *getDataLayout() const { return DL; } - - DominatorTree *getDominatorTree() const { return DT; } - - TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; } - - // Visitation implementation - Implement instruction combining for different - // instruction types. The semantics are as follows: - // Return Value: - // null - No change was made - // I - Change was made, I is still valid, I may be dead though - // otherwise - Change was made, replace I with returned instruction - // - Instruction *visitAdd(BinaryOperator &I); - Instruction *visitFAdd(BinaryOperator &I); - Value *OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty); - Instruction *visitSub(BinaryOperator &I); - Instruction *visitFSub(BinaryOperator &I); - Instruction *visitMul(BinaryOperator &I); - Value *foldFMulConst(Instruction *FMulOrDiv, Constant *C, - Instruction *InsertBefore); - Instruction *visitFMul(BinaryOperator &I); - Instruction *visitURem(BinaryOperator &I); - Instruction *visitSRem(BinaryOperator &I); - Instruction *visitFRem(BinaryOperator &I); - bool SimplifyDivRemOfSelect(BinaryOperator &I); - Instruction *commonRemTransforms(BinaryOperator &I); - Instruction *commonIRemTransforms(BinaryOperator &I); - Instruction *commonDivTransforms(BinaryOperator &I); - Instruction *commonIDivTransforms(BinaryOperator &I); - Instruction *visitUDiv(BinaryOperator &I); - Instruction *visitSDiv(BinaryOperator &I); - Instruction *visitFDiv(BinaryOperator &I); - Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS); - Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS); - Instruction *visitAnd(BinaryOperator &I); - Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI); - Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); - Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, - Value *B, Value *C); - Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, Value *A, - Value *B, Value *C); - Instruction *visitOr(BinaryOperator &I); - Instruction *visitXor(BinaryOperator &I); - Instruction *visitShl(BinaryOperator &I); - Instruction *visitAShr(BinaryOperator &I); - Instruction *visitLShr(BinaryOperator &I); - Instruction *commonShiftTransforms(BinaryOperator &I); - Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI, - Constant *RHSC); - Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, - GlobalVariable *GV, CmpInst &ICI, - ConstantInt *AndCst = nullptr); - Instruction *visitFCmpInst(FCmpInst &I); - Instruction *visitICmpInst(ICmpInst &I); - Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI); - Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *LHS, - ConstantInt *RHS); - Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, - ConstantInt *DivRHS); - Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI, - ConstantInt *DivRHS); - Instruction *FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A, - ConstantInt *CI1, ConstantInt *CI2); - Instruction *FoldICmpCstShlCst(ICmpInst &I, Value *Op, Value *A, - ConstantInt *CI1, ConstantInt *CI2); - Instruction *FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI, - ICmpInst::Predicate Pred); - Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, - ICmpInst::Predicate Cond, Instruction &I); - Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1, - BinaryOperator &I); - Instruction *commonCastTransforms(CastInst &CI); - Instruction *commonPointerCastTransforms(CastInst &CI); - Instruction *visitTrunc(TruncInst &CI); - Instruction *visitZExt(ZExtInst &CI); - Instruction *visitSExt(SExtInst &CI); - Instruction *visitFPTrunc(FPTruncInst &CI); - Instruction *visitFPExt(CastInst &CI); - Instruction *visitFPToUI(FPToUIInst &FI); - Instruction *visitFPToSI(FPToSIInst &FI); - Instruction *visitUIToFP(CastInst &CI); - Instruction *visitSIToFP(CastInst &CI); - Instruction *visitPtrToInt(PtrToIntInst &CI); - Instruction *visitIntToPtr(IntToPtrInst &CI); - Instruction *visitBitCast(BitCastInst &CI); - Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI); - Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI); - Instruction *FoldSelectIntoOp(SelectInst &SI, Value *, Value *); - Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, - Value *A, Value *B, Instruction &Outer, - SelectPatternFlavor SPF2, Value *C); - Instruction *visitSelectInst(SelectInst &SI); - Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); - Instruction *visitCallInst(CallInst &CI); - Instruction *visitInvokeInst(InvokeInst &II); - - Instruction *SliceUpIllegalIntegerPHI(PHINode &PN); - Instruction *visitPHINode(PHINode &PN); - Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); - Instruction *visitAllocaInst(AllocaInst &AI); - Instruction *visitAllocSite(Instruction &FI); - Instruction *visitFree(CallInst &FI); - Instruction *visitLoadInst(LoadInst &LI); - Instruction *visitStoreInst(StoreInst &SI); - Instruction *visitBranchInst(BranchInst &BI); - Instruction *visitSwitchInst(SwitchInst &SI); - Instruction *visitReturnInst(ReturnInst &RI); - Instruction *visitInsertValueInst(InsertValueInst &IV); - Instruction *visitInsertElementInst(InsertElementInst &IE); - Instruction *visitExtractElementInst(ExtractElementInst &EI); - Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); - Instruction *visitExtractValueInst(ExtractValueInst &EV); - Instruction *visitLandingPadInst(LandingPadInst &LI); - - // visitInstruction - Specify what to return for unhandled instructions... - Instruction *visitInstruction(Instruction &I) { return nullptr; } - -private: - bool ShouldChangeType(Type *From, Type *To) const; - Value *dyn_castNegVal(Value *V) const; - Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const; - Type *FindElementAtOffset(Type *PtrTy, int64_t Offset, - SmallVectorImpl &NewIndices); - Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); - - /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually - /// results in any code being generated and is interesting to optimize out. If - /// the cast can be eliminated by some other simple transformation, we prefer - /// to do the simplification first. - bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V, - Type *Ty); - - Instruction *visitCallSite(CallSite CS); - Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *DL); - bool transformConstExprCastCall(CallSite CS); - Instruction *transformCallThroughTrampoline(CallSite CS, - IntrinsicInst *Tramp); - Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, - bool DoXform = true); - Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); - bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction *CxtI); - bool WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS, Instruction *CxtI); - bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI); - bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI); - Value *EmitGEPOffset(User *GEP); - Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); - Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask); - -public: - // InsertNewInstBefore - insert an instruction New before instruction Old - // in the program. Add the new instruction to the worklist. - // - Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) { - assert(New && !New->getParent() && - "New instruction already inserted into a basic block!"); - BasicBlock *BB = Old.getParent(); - BB->getInstList().insert(&Old, New); // Insert inst - Worklist.Add(New); - return New; - } - - // InsertNewInstWith - same as InsertNewInstBefore, but also sets the - // debug loc. - // - Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) { - New->setDebugLoc(Old.getDebugLoc()); - return InsertNewInstBefore(New, Old); - } - - // ReplaceInstUsesWith - This method is to be used when an instruction is - // found to be dead, replacable with another preexisting expression. Here - // we add all uses of I to the worklist, replace all uses of I with the new - // value, then return I, so that the inst combiner will know that I was - // modified. - // - Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { - Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. - - // If we are replacing the instruction with itself, this must be in a - // segment of unreachable code, so just clobber the instruction. - if (&I == V) - V = UndefValue::get(I.getType()); - - DEBUG(dbgs() << "IC: Replacing " << I << "\n" - " with " << *V << '\n'); - - I.replaceAllUsesWith(V); - return &I; - } - - // EraseInstFromFunction - When dealing with an instruction that has side - // effects or produces a void value, we can't rely on DCE to delete the - // instruction. Instead, visit methods should return the value returned by - // this function. - Instruction *EraseInstFromFunction(Instruction &I) { - DEBUG(dbgs() << "IC: ERASE " << I << '\n'); - - assert(I.use_empty() && "Cannot erase instruction that is used!"); - // Make sure that we reprocess all operands now that we reduced their - // use counts. - if (I.getNumOperands() < 8) { - for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) - if (Instruction *Op = dyn_cast(*i)) - Worklist.Add(Op); - } - Worklist.Remove(&I); - I.eraseFromParent(); - MadeIRChange = true; - return nullptr; // Don't do anything with FI - } - - void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - unsigned Depth = 0, Instruction *CxtI = nullptr) const { - return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, - AT, CxtI, DT); - } - - bool MaskedValueIsZero(Value *V, const APInt &Mask, - unsigned Depth = 0, - Instruction *CxtI = nullptr) const { - return llvm::MaskedValueIsZero(V, Mask, DL, Depth, AT, CxtI, DT); - } - unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0, - Instruction *CxtI = nullptr) const { - return llvm::ComputeNumSignBits(Op, DL, Depth, AT, CxtI, DT); - } - -private: - /// SimplifyAssociativeOrCommutative - This performs a few simplifications for - /// operators which are associative or commutative. - bool SimplifyAssociativeOrCommutative(BinaryOperator &I); - - /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations - /// which some other binary operation distributes over either by factorizing - /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this - /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is - /// a win). Returns the simplified value, or null if it didn't simplify. - Value *SimplifyUsingDistributiveLaws(BinaryOperator &I); - - /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value - /// based on the demanded bits. - Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, - APInt &KnownOne, unsigned Depth, - Instruction *CxtI = nullptr); - bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero, - APInt &KnownOne, unsigned Depth = 0); - /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded - /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence. - Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl, - APInt DemandedMask, APInt &KnownZero, - APInt &KnownOne); - - /// SimplifyDemandedInstructionBits - Inst is an integer instruction that - /// SimplifyDemandedBits knows about. See if the instruction has any - /// properties that allow us to simplify its operands. - bool SimplifyDemandedInstructionBits(Instruction &Inst); - - Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt &UndefElts, unsigned Depth = 0); - - Value *SimplifyVectorOp(BinaryOperator &Inst); - - // FoldOpIntoPhi - Given a binary operator, cast instruction, or select - // which has a PHI node as operand #0, see if we can fold the instruction - // into the PHI (which is only possible if all operands to the PHI are - // constants). - // - Instruction *FoldOpIntoPhi(Instruction &I); - - // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" - // operator and they all are only used by the PHI, PHI together their - // inputs, and do the operation once, to the result of the PHI. - Instruction *FoldPHIArgOpIntoPHI(PHINode &PN); - Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN); - Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); - Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); - - Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, - ConstantInt *AndRHS, BinaryOperator &TheAnd); - - Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, - bool isSub, Instruction &I); - Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned, - bool Inside); - Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); - Instruction *MatchBSwap(BinaryOperator &I); - bool SimplifyStoreAtEndOfBlock(StoreInst &SI); - Instruction *SimplifyMemTransfer(MemIntrinsic *MI); - Instruction *SimplifyMemSet(MemSetInst *MI); - - Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); - - /// Descale - Return a value X such that Val = X * Scale, or null if none. If - /// the multiplication is known not to overflow then NoSignedWrap is set. - Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap); -}; - -} // end namespace llvm. - -#undef DEBUG_TYPE - -#endif diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 902b640..752f79d 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "InstCombine.h" +#include "InstCombineInternal.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/DataLayout.h" @@ -751,8 +751,7 @@ Value *FAddCombine::createNaryFAdd return LastVal; } -Value *FAddCombine::createFSub - (Value *Opnd0, Value *Opnd1) { +Value *FAddCombine::createFSub(Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFSub(Opnd0, Opnd1); if (Instruction *I = dyn_cast(V)) createInstPostProc(I); @@ -760,15 +759,14 @@ Value *FAddCombine::createFSub } Value *FAddCombine::createFNeg(Value *V) { - Value *Zero = cast(ConstantFP::get(V->getType(), 0.0)); + Value *Zero = cast(ConstantFP::getZeroValueForNegation(V->getType())); Value *NewV = createFSub(Zero, V); if (Instruction *I = dyn_cast(NewV)) createInstPostProc(I, true); // fneg's don't receive instruction numbers. return NewV; } -Value *FAddCombine::createFAdd - (Value *Opnd0, Value *Opnd1) { +Value *FAddCombine::createFAdd(Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFAdd(Opnd0, Opnd1); if (Instruction *I = dyn_cast(V)) createInstPostProc(I); @@ -789,8 +787,7 @@ Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { return V; } -void FAddCombine::createInstPostProc(Instruction *NewInstr, - bool NoNumber) { +void FAddCombine::createInstPostProc(Instruction *NewInstr, bool NoNumber) { NewInstr->setDebugLoc(Instr->getDebugLoc()); // Keep track of the number of instruction created. @@ -840,8 +837,7 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { // "fmul V, C" false // // NOTE: Keep this function in sync with FAddCombine::calcInstrNumber. -Value *FAddCombine::createAddendVal - (const FAddend &Opnd, bool &NeedNeg) { +Value *FAddCombine::createAddendVal(const FAddend &Opnd, bool &NeedNeg) { const FAddendCoef &Coeff = Opnd.getCoef(); if (Opnd.isConstant()) { @@ -894,7 +890,6 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero, /// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. -/// TODO: Handle this for Vectors. bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction *CxtI) { // There are different heuristics we can use for this. Here are some simple @@ -918,42 +913,25 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, ComputeNumSignBits(RHS, 0, CxtI) > 1) return true; - if (IntegerType *IT = dyn_cast(LHS->getType())) { - int BitWidth = IT->getBitWidth(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI); - - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI); - - // Addition of two 2's compliment numbers having opposite signs will never - // overflow. - if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) || - (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1])) - return true; - - // Check if carry bit of addition will not cause overflow. - if (checkRippleForAdd(LHSKnownZero, RHSKnownZero)) - return true; - if (checkRippleForAdd(RHSKnownZero, LHSKnownZero)) - return true; - } - return false; -} + unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI); -/// WillNotOverflowUnsignedAdd - Return true if we can prove that: -/// (zext (add LHS, RHS)) === (add (zext LHS), (zext RHS)) -bool InstCombiner::WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS, - Instruction *CxtI) { - // There are different heuristics we can use for this. Here is a simple one. - // If the sign bit of LHS and that of RHS are both zero, no unsigned wrap. - bool LHSKnownNonNegative, LHSKnownNegative; - bool RHSKnownNonNegative, RHSKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, 0, AT, CxtI, DT); - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, 0, AT, CxtI, DT); - if (LHSKnownNonNegative && RHSKnownNonNegative) + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI); + + // Addition of two 2's compliment numbers having opposite signs will never + // overflow. + if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) || + (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1])) + return true; + + // Check if carry bit of addition will not cause overflow. + if (checkRippleForAdd(LHSKnownZero, RHSKnownZero)) + return true; + if (checkRippleForAdd(RHSKnownZero, LHSKnownZero)) return true; return false; @@ -972,24 +950,22 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, ComputeNumSignBits(RHS, 0, CxtI) > 1) return true; - if (IntegerType *IT = dyn_cast(LHS->getType())) { - unsigned BitWidth = IT->getBitWidth(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI); + unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI); - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI); + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI); - // Subtraction of two 2's compliment numbers having identical signs will - // never overflow. - if ((LHSKnownOne[BitWidth - 1] && RHSKnownOne[BitWidth - 1]) || - (LHSKnownZero[BitWidth - 1] && RHSKnownZero[BitWidth - 1])) - return true; + // Subtraction of two 2's compliment numbers having identical signs will + // never overflow. + if ((LHSKnownOne[BitWidth - 1] && RHSKnownOne[BitWidth - 1]) || + (LHSKnownZero[BitWidth - 1] && RHSKnownZero[BitWidth - 1])) + return true; - // TODO: implement logic similar to checkRippleForAdd - } + // TODO: implement logic similar to checkRippleForAdd return false; } @@ -1000,8 +976,8 @@ bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, // If the LHS is negative and the RHS is non-negative, no unsigned wrap. bool LHSKnownNonNegative, LHSKnownNegative; bool RHSKnownNonNegative, RHSKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, 0, AT, CxtI, DT); - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, 0, AT, CxtI, DT); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0, CxtI); + ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0, CxtI); if (LHSKnownNegative && RHSKnownNonNegative) return true; @@ -1077,7 +1053,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return ReplaceInstUsesWith(I, V); if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, TLI, DT, AT)) + I.hasNoUnsignedWrap(), DL, TLI, DT, AC)) return ReplaceInstUsesWith(I, V); // (A*B)+(A*C) -> A*(B+C) etc @@ -1335,7 +1311,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Changed = true; I.setHasNoSignedWrap(true); } - if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedAdd(LHS, RHS, &I)) { + if (!I.hasNoUnsignedWrap() && + computeOverflowForUnsignedAdd(LHS, RHS, &I) == + OverflowResult::NeverOverflows) { Changed = true; I.setHasNoUnsignedWrap(true); } @@ -1350,8 +1328,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return ReplaceInstUsesWith(I, V); - if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, - TLI, DT, AT)) + if (Value *V = + SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, TLI, DT, AC)) return ReplaceInstUsesWith(I, V); if (isa(RHS)) { @@ -1529,7 +1507,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return ReplaceInstUsesWith(I, V); if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, TLI, DT, AT)) + I.hasNoUnsignedWrap(), DL, TLI, DT, AC)) return ReplaceInstUsesWith(I, V); // (A*B)-(A*C) -> A*(B-C) etc @@ -1717,10 +1695,18 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return ReplaceInstUsesWith(I, V); - if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL, - TLI, DT, AT)) + if (Value *V = + SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL, TLI, DT, AC)) return ReplaceInstUsesWith(I, V); + // fsub nsz 0, X ==> fsub nsz -0.0, X + if (I.getFastMathFlags().noSignedZeros() && match(Op0, m_Zero())) { + // Subtraction from -0.0 is the canonical form of fneg. + Instruction *NewI = BinaryOperator::CreateFNeg(Op1); + NewI->copyFastMathFlags(&I); + return NewI; + } + if (isa(Op0)) if (SelectInst *SI = dyn_cast(Op1)) if (Instruction *NV = FoldOpIntoSelect(I, SI)) diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 55ebced..863eeaf 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "InstCombine.h" +#include "InstCombineInternal.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Intrinsics.h" @@ -22,30 +22,12 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" -/// isFreeToInvert - Return true if the specified value is free to invert (apply -/// ~ to). This happens in cases where the ~ can be eliminated. -static inline bool isFreeToInvert(Value *V) { - // ~(~(X)) -> X. - if (BinaryOperator::isNot(V)) - return true; - - // Constants can be considered to be not'ed values. - if (isa(V)) - return true; - - // Compares can be inverted if they have a single use. - if (CmpInst *CI = dyn_cast(V)) - return CI->hasOneUse(); - - return false; -} - static inline Value *dyn_castNotVal(Value *V) { // If this is not(not(x)) don't return that this is a not: we want the two // not's to be folded first. if (BinaryOperator::isNot(V)) { Value *Operand = BinaryOperator::getNotArgument(V); - if (!isFreeToInvert(Operand)) + if (!IsFreeToInvert(Operand, Operand->hasOneUse())) return Operand; } @@ -117,6 +99,61 @@ static Value *getFCmpValue(bool isordered, unsigned code, return Builder->CreateFCmp(Pred, LHS, RHS); } +/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) to BSWAP(BITWISE_OP(A, B)) +/// \param I Binary operator to transform. +/// \return Pointer to node that must replace the original binary operator, or +/// null pointer if no transformation was made. +Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) { + IntegerType *ITy = dyn_cast(I.getType()); + + // Can't do vectors. + if (I.getType()->isVectorTy()) return nullptr; + + // Can only do bitwise ops. + unsigned Op = I.getOpcode(); + if (Op != Instruction::And && Op != Instruction::Or && + Op != Instruction::Xor) + return nullptr; + + Value *OldLHS = I.getOperand(0); + Value *OldRHS = I.getOperand(1); + ConstantInt *ConstLHS = dyn_cast(OldLHS); + ConstantInt *ConstRHS = dyn_cast(OldRHS); + IntrinsicInst *IntrLHS = dyn_cast(OldLHS); + IntrinsicInst *IntrRHS = dyn_cast(OldRHS); + bool IsBswapLHS = (IntrLHS && IntrLHS->getIntrinsicID() == Intrinsic::bswap); + bool IsBswapRHS = (IntrRHS && IntrRHS->getIntrinsicID() == Intrinsic::bswap); + + if (!IsBswapLHS && !IsBswapRHS) + return nullptr; + + if (!IsBswapLHS && !ConstLHS) + return nullptr; + + if (!IsBswapRHS && !ConstRHS) + return nullptr; + + /// OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) ) + /// OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) ) + Value *NewLHS = IsBswapLHS ? IntrLHS->getOperand(0) : + Builder->getInt(ConstLHS->getValue().byteSwap()); + + Value *NewRHS = IsBswapRHS ? IntrRHS->getOperand(0) : + Builder->getInt(ConstRHS->getValue().byteSwap()); + + Value *BinOp = nullptr; + if (Op == Instruction::And) + BinOp = Builder->CreateAnd(NewLHS, NewRHS); + else if (Op == Instruction::Or) + BinOp = Builder->CreateOr(NewLHS, NewRHS); + else //if (Op == Instruction::Xor) + BinOp = Builder->CreateXor(NewLHS, NewRHS); + + Module *M = I.getParent()->getParent()->getParent(); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); + return Builder->CreateCall(F, BinOp); +} + // OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where // the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is // guaranteed to be a binary operator. @@ -785,6 +822,62 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, return nullptr; } +/// Try to fold a signed range checked with lower bound 0 to an unsigned icmp. +/// Example: (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n +/// If \p Inverted is true then the check is for the inverted range, e.g. +/// (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n +Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, + bool Inverted) { + // Check the lower range comparison, e.g. x >= 0 + // InstCombine already ensured that if there is a constant it's on the RHS. + ConstantInt *RangeStart = dyn_cast(Cmp0->getOperand(1)); + if (!RangeStart) + return nullptr; + + ICmpInst::Predicate Pred0 = (Inverted ? Cmp0->getInversePredicate() : + Cmp0->getPredicate()); + + // Accept x > -1 or x >= 0 (after potentially inverting the predicate). + if (!((Pred0 == ICmpInst::ICMP_SGT && RangeStart->isMinusOne()) || + (Pred0 == ICmpInst::ICMP_SGE && RangeStart->isZero()))) + return nullptr; + + ICmpInst::Predicate Pred1 = (Inverted ? Cmp1->getInversePredicate() : + Cmp1->getPredicate()); + + Value *Input = Cmp0->getOperand(0); + Value *RangeEnd; + if (Cmp1->getOperand(0) == Input) { + // For the upper range compare we have: icmp x, n + RangeEnd = Cmp1->getOperand(1); + } else if (Cmp1->getOperand(1) == Input) { + // For the upper range compare we have: icmp n, x + RangeEnd = Cmp1->getOperand(0); + Pred1 = ICmpInst::getSwappedPredicate(Pred1); + } else { + return nullptr; + } + + // Check the upper range comparison, e.g. x < n + ICmpInst::Predicate NewPred; + switch (Pred1) { + case ICmpInst::ICMP_SLT: NewPred = ICmpInst::ICMP_ULT; break; + case ICmpInst::ICMP_SLE: NewPred = ICmpInst::ICMP_ULE; break; + default: return nullptr; + } + + // This simplification is only valid if the upper range is not negative. + bool IsNegative, IsNotNegative; + ComputeSignBit(RangeEnd, IsNotNegative, IsNegative, /*Depth=*/0, Cmp1); + if (!IsNotNegative) + return nullptr; + + if (Inverted) + NewPred = ICmpInst::getInversePredicate(NewPred); + + return Builder->CreateICmp(NewPred, Input, RangeEnd); +} + /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); @@ -807,6 +900,14 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, true, Builder)) return V; + // E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n + if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/false)) + return V; + + // E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n + if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/false)) + return V; + // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); ConstantInt *LHSCst = dyn_cast(LHS->getOperand(1)); @@ -1108,7 +1209,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return ReplaceInstUsesWith(I, V); - if (Value *V = SimplifyAndInst(Op0, Op1, DL, TLI, DT, AT)) + if (Value *V = SimplifyAndInst(Op0, Op1, DL, TLI, DT, AC)) return ReplaceInstUsesWith(I, V); // (A|B)&(A|C) -> A|(B&C) etc @@ -1120,6 +1221,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; + if (Value *V = SimplifyBSwap(I)) + return ReplaceInstUsesWith(I, V); + if (ConstantInt *AndRHS = dyn_cast(Op1)) { const APInt &AndRHSMask = AndRHS->getValue(); @@ -1605,15 +1709,15 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Value *Mask = nullptr; Value *Masked = nullptr; if (LAnd->getOperand(0) == RAnd->getOperand(0) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, AT, CxtI, DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, AT, CxtI, DT)) { + isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, AC, CxtI, DT) && + isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, AC, CxtI, DT)) { Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1)); Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask); } else if (LAnd->getOperand(1) == RAnd->getOperand(1) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(0), - false, 0, AT, CxtI, DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(0), - false, 0, AT, CxtI, DT)) { + isKnownToBeAPowerOfTwo(LAnd->getOperand(0), false, 0, AC, CxtI, + DT) && + isKnownToBeAPowerOfTwo(RAnd->getOperand(0), false, 0, AC, CxtI, + DT)) { Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0)); Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask); } @@ -1724,6 +1828,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Builder->CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A); } + // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n + if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/true)) + return V; + + // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n + if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/true)) + return V; + // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). if (!LHSCst || !RHSCst) return nullptr; @@ -2033,7 +2145,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return ReplaceInstUsesWith(I, V); - if (Value *V = SimplifyOrInst(Op0, Op1, DL, TLI, DT, AT)) + if (Value *V = SimplifyOrInst(Op0, Op1, DL, TLI, DT, AC)) return ReplaceInstUsesWith(I, V); // (A&B)|(A&C) -> A&(B|C) etc @@ -2045,6 +2157,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; + if (Value *V = SimplifyBSwap(I)) + return ReplaceInstUsesWith(I, V); + if (ConstantInt *RHS = dyn_cast(Op1)) { ConstantInt *C1 = nullptr; Value *X = nullptr; // (X & C1) | C2 --> (X | C2) & (C1|C2) @@ -2305,11 +2420,34 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (SwappedForXor) std::swap(Op0, Op1); - if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) - if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) + { + ICmpInst *LHS = dyn_cast(Op0); + ICmpInst *RHS = dyn_cast(Op1); + if (LHS && RHS) if (Value *Res = FoldOrOfICmps(LHS, RHS, &I)) return ReplaceInstUsesWith(I, Res); + // TODO: Make this recursive; it's a little tricky because an arbitrary + // number of 'or' instructions might have to be created. + Value *X, *Y; + if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { + if (auto *Cmp = dyn_cast(X)) + if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I)) + return ReplaceInstUsesWith(I, Builder->CreateOr(Res, Y)); + if (auto *Cmp = dyn_cast(Y)) + if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I)) + return ReplaceInstUsesWith(I, Builder->CreateOr(Res, X)); + } + if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { + if (auto *Cmp = dyn_cast(X)) + if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I)) + return ReplaceInstUsesWith(I, Builder->CreateOr(Res, Y)); + if (auto *Cmp = dyn_cast(Y)) + if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I)) + return ReplaceInstUsesWith(I, Builder->CreateOr(Res, X)); + } + } + // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) @@ -2394,7 +2532,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return ReplaceInstUsesWith(I, V); - if (Value *V = SimplifyXorInst(Op0, Op1, DL, TLI, DT, AT)) + if (Value *V = SimplifyXorInst(Op0, Op1, DL, TLI, DT, AC)) return ReplaceInstUsesWith(I, V); // (A&B)^(A&C) -> A&(B^C) etc @@ -2406,6 +2544,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; + if (Value *V = SimplifyBSwap(I)) + return ReplaceInstUsesWith(I, V); + // Is this a ~ operation? if (Value *NotOp = dyn_castNotVal(&I)) { if (BinaryOperator *Op0I = dyn_cast(NotOp)) { @@ -2426,8 +2567,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // ~(X & Y) --> (~X | ~Y) - De Morgan's Law // ~(X | Y) === (~X & ~Y) - De Morgan's Law - if (isFreeToInvert(Op0I->getOperand(0)) && - isFreeToInvert(Op0I->getOperand(1))) { + if (IsFreeToInvert(Op0I->getOperand(0), + Op0I->getOperand(0)->hasOneUse()) && + IsFreeToInvert(Op0I->getOperand(1), + Op0I->getOperand(1)->hasOneUse())) { Value *NotX = Builder->CreateNot(Op0I->getOperand(0), "notlhs"); Value *NotY = @@ -2445,15 +2588,16 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } } - - if (ConstantInt *RHS = dyn_cast(Op1)) { - if (RHS->isOne() && Op0->hasOneUse()) + if (Constant *RHS = dyn_cast(Op1)) { + if (RHS->isAllOnesValue() && Op0->hasOneUse()) // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B if (CmpInst *CI = dyn_cast(Op0)) return CmpInst::Create(CI->getOpcode(), CI->getInversePredicate(), CI->getOperand(0), CI->getOperand(1)); + } + if (ConstantInt *RHS = dyn_cast(Op1)) { // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp). if (CastInst *Op0C = dyn_cast(Op0)) { if (CmpInst *CI = dyn_cast(Op0C->getOperand(0))) { diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 87e49a1..05e7162 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -11,15 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "InstCombine.h" +#include "InstCombineInternal.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SimplifyLibCalls.h" using namespace llvm; using namespace PatternMatch; @@ -59,8 +61,8 @@ static Type *reduceToSingleValueType(Type *T) { } Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, AT, MI, DT); - unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, AT, MI, DT); + unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, AC, MI, DT); + unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, AC, MI, DT); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment(); @@ -118,15 +120,14 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // If the memcpy has metadata describing the members, see if we can // get the TBAA tag describing our copy. if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) { - if (M->getNumOperands() == 3 && - M->getOperand(0) && - isa(M->getOperand(0)) && - cast(M->getOperand(0))->isNullValue() && + if (M->getNumOperands() == 3 && M->getOperand(0) && + mdconst::hasa(M->getOperand(0)) && + mdconst::extract(M->getOperand(0))->isNullValue() && M->getOperand(1) && - isa(M->getOperand(1)) && - cast(M->getOperand(1))->getValue() == Size && - M->getOperand(2) && - isa(M->getOperand(2))) + mdconst::hasa(M->getOperand(1)) && + mdconst::extract(M->getOperand(1))->getValue() == + Size && + M->getOperand(2) && isa(M->getOperand(2))) CopyMD = cast(M->getOperand(2)); } } @@ -155,7 +156,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { } Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { - unsigned Alignment = getKnownAlignment(MI->getDest(), DL, AT, MI, DT); + unsigned Alignment = getKnownAlignment(MI->getDest(), DL, AC, MI, DT); if (MI->getAlignment() < Alignment) { MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Alignment, false)); @@ -352,48 +353,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::uadd_with_overflow: { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - IntegerType *IT = cast(II->getArgOperand(0)->getType()); - uint32_t BitWidth = IT->getBitWidth(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, II); - bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; - bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; - - if (LHSKnownNegative || LHSKnownPositive) { - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, II); - bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; - bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; - if (LHSKnownNegative && RHSKnownNegative) { - // The sign bit is set in both cases: this MUST overflow. - // Create a simple add instruction, and insert it into the struct. - Value *Add = Builder->CreateAdd(LHS, RHS); - Add->takeName(&CI); - Constant *V[] = { - UndefValue::get(LHS->getType()), - ConstantInt::getTrue(II->getContext()) - }; - StructType *ST = cast(II->getType()); - Constant *Struct = ConstantStruct::get(ST, V); - return InsertValueInst::Create(Struct, Add, 0); - } - - if (LHSKnownPositive && RHSKnownPositive) { - // The sign bit is clear in both cases: this CANNOT overflow. - // Create a simple add instruction, and insert it into the struct. - Value *Add = Builder->CreateNUWAdd(LHS, RHS); - Add->takeName(&CI); - Constant *V[] = { - UndefValue::get(LHS->getType()), - ConstantInt::getFalse(II->getContext()) - }; - StructType *ST = cast(II->getType()); - Constant *Struct = ConstantStruct::get(ST, V); - return InsertValueInst::Create(Struct, Add, 0); - } - } + OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, II); + if (OR == OverflowResult::NeverOverflows) + return CreateOverflowTuple(II, Builder->CreateNUWAdd(LHS, RHS), false); + if (OR == OverflowResult::AlwaysOverflows) + return CreateOverflowTuple(II, Builder->CreateAdd(LHS, RHS), true); } // FALL THROUGH uadd into sadd case Intrinsic::sadd_with_overflow: @@ -413,13 +377,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (ConstantInt *RHS = dyn_cast(II->getArgOperand(1))) { // X + 0 -> {X, false} if (RHS->isZero()) { - Constant *V[] = { - UndefValue::get(II->getArgOperand(0)->getType()), - ConstantInt::getFalse(II->getContext()) - }; - Constant *Struct = - ConstantStruct::get(cast(II->getType()), V); - return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); + return CreateOverflowTuple(II, II->getArgOperand(0), false, + /*ReUseName*/false); } } @@ -428,65 +387,43 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); if (WillNotOverflowSignedAdd(LHS, RHS, II)) { - Value *Add = Builder->CreateNSWAdd(LHS, RHS); - Add->takeName(&CI); - Constant *V[] = {UndefValue::get(Add->getType()), Builder->getFalse()}; - StructType *ST = cast(II->getType()); - Constant *Struct = ConstantStruct::get(ST, V); - return InsertValueInst::Create(Struct, Add, 0); + return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false); } } break; case Intrinsic::usub_with_overflow: - case Intrinsic::ssub_with_overflow: + case Intrinsic::ssub_with_overflow: { + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); // undef - X -> undef // X - undef -> undef - if (isa(II->getArgOperand(0)) || - isa(II->getArgOperand(1))) + if (isa(LHS) || isa(RHS)) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - if (ConstantInt *RHS = dyn_cast(II->getArgOperand(1))) { + if (ConstantInt *ConstRHS = dyn_cast(RHS)) { // X - 0 -> {X, false} - if (RHS->isZero()) { - Constant *V[] = { - UndefValue::get(II->getArgOperand(0)->getType()), - ConstantInt::getFalse(II->getContext()) - }; - Constant *Struct = - ConstantStruct::get(cast(II->getType()), V); - return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); + if (ConstRHS->isZero()) { + return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false); + } + } + if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) { + if (WillNotOverflowSignedSub(LHS, RHS, II)) { + return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false); + } + } else { + if (WillNotOverflowUnsignedSub(LHS, RHS, II)) { + return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false); } } break; + } case Intrinsic::umul_with_overflow: { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - unsigned BitWidth = cast(LHS->getType())->getBitWidth(); - - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, II); - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, II); - - // Get the largest possible values for each operand. - APInt LHSMax = ~LHSKnownZero; - APInt RHSMax = ~RHSKnownZero; - - // If multiplying the maximum values does not overflow then we can turn - // this into a plain NUW mul. - bool Overflow; - LHSMax.umul_ov(RHSMax, Overflow); - if (!Overflow) { - Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow"); - Constant *V[] = { - UndefValue::get(LHS->getType()), - Builder->getFalse() - }; - Constant *Struct = ConstantStruct::get(cast(II->getType()),V); - return InsertValueInst::Create(Struct, Mul, 0); - } + OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, II); + if (OR == OverflowResult::NeverOverflows) + return CreateOverflowTuple(II, Builder->CreateNUWMul(LHS, RHS), false); + if (OR == OverflowResult::AlwaysOverflows) + return CreateOverflowTuple(II, Builder->CreateMul(LHS, RHS), true); } // FALL THROUGH case Intrinsic::smul_with_overflow: // Canonicalize constants into the RHS. @@ -509,13 +446,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // X * 1 -> {X, false} if (RHSI->equalsInt(1)) { - Constant *V[] = { - UndefValue::get(II->getArgOperand(0)->getType()), - ConstantInt::getFalse(II->getContext()) - }; - Constant *Struct = - ConstantStruct::get(cast(II->getType()), V); - return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); + return CreateOverflowTuple(II, II->getArgOperand(0), false, + /*ReUseName*/false); + } + } + if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) { + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + if (WillNotOverflowSignedMul(LHS, RHS, II)) { + return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false); } } break; @@ -606,8 +544,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: // Turn PPC lvx -> load if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, - DL, AT, II, DT) >= 16) { + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >= + 16) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); @@ -623,8 +561,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, - DL, AT, II, DT) >= 16) { + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >= + 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); @@ -638,12 +576,50 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr, false, 1); } + case Intrinsic::ppc_qpx_qvlfs: + // Turn PPC QPX qvlfs -> load if the pointer is known aligned. + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >= + 16) { + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), + PointerType::getUnqual(II->getType())); + return new LoadInst(Ptr); + } + break; + case Intrinsic::ppc_qpx_qvlfd: + // Turn PPC QPX qvlfd -> load if the pointer is known aligned. + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, AC, II, DT) >= + 32) { + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), + PointerType::getUnqual(II->getType())); + return new LoadInst(Ptr); + } + break; + case Intrinsic::ppc_qpx_qvstfs: + // Turn PPC QPX qvstfs -> store if the pointer is known aligned. + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >= + 16) { + Type *OpPtrTy = + PointerType::getUnqual(II->getArgOperand(0)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + return new StoreInst(II->getArgOperand(0), Ptr); + } + break; + case Intrinsic::ppc_qpx_qvstfd: + // Turn PPC QPX qvstfd -> store if the pointer is known aligned. + if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, AC, II, DT) >= + 32) { + Type *OpPtrTy = + PointerType::getUnqual(II->getArgOperand(0)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + return new StoreInst(II->getArgOperand(0), Ptr); + } + break; case Intrinsic::x86_sse_storeu_ps: case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. - if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, - DL, AT, II, DT) >= 16) { + if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >= + 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(1)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); @@ -774,7 +750,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // TODO: eventually we should lower this intrinsic to IR if (auto CIWidth = dyn_cast(II->getArgOperand(2))) { if (auto CIStart = dyn_cast(II->getArgOperand(3))) { - if (CIWidth->equalsInt(64) && CIStart->isZero()) { + unsigned Index = CIStart->getZExtValue(); + // From AMD documentation: "a value of zero in the field length is + // defined as length of 64". + unsigned Length = CIWidth->equalsInt(0) ? 64 : CIWidth->getZExtValue(); + + // From AMD documentation: "If the sum of the bit index + length field + // is greater than 64, the results are undefined". + + // Note that both field index and field length are 8-bit quantities. + // Since variables 'Index' and 'Length' are unsigned values + // obtained from zero-extending field index and field length + // respectively, their sum should never wrap around. + if ((Index + Length) > 64) + return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + + if (Length == 64 && Index == 0) { Value *Vec = II->getArgOperand(1); Value *Undef = UndefValue::get(Vec->getType()); const uint32_t Mask[] = { 0, 2 }; @@ -988,7 +979,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::arm_neon_vst2lane: case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: { - unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, AT, II, DT); + unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, AC, II, DT); unsigned AlignArg = II->getNumArgOperands() - 1; ConstantInt *IntrAlign = dyn_cast(II->getArgOperand(AlignArg)); if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { @@ -1128,7 +1119,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { cast(RHS)->isNullValue()) { LoadInst* LI = cast(LHS); if (isValidAssumeForContext(II, LI, DL, DT)) { - MDNode* MD = MDNode::get(II->getContext(), ArrayRef()); + MDNode *MD = MDNode::get(II->getContext(), None); LI->setMetadata(LLVMContext::MD_nonnull, MD); return EraseInstFromFunction(*II); } @@ -1145,6 +1136,48 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::experimental_gc_relocate: { + // Translate facts known about a pointer before relocating into + // facts about the relocate value, while being careful to + // preserve relocation semantics. + GCRelocateOperands Operands(II); + Value *DerivedPtr = Operands.derivedPtr(); + + // Remove the relocation if unused, note that this check is required + // to prevent the cases below from looping forever. + if (II->use_empty()) + return EraseInstFromFunction(*II); + + // Undef is undef, even after relocation. + // TODO: provide a hook for this in GCStrategy. This is clearly legal for + // most practical collectors, but there was discussion in the review thread + // about whether it was legal for all possible collectors. + if (isa(DerivedPtr)) + return ReplaceInstUsesWith(*II, DerivedPtr); + + // The relocation of null will be null for most any collector. + // TODO: provide a hook for this in GCStrategy. There might be some weird + // collector this property does not hold for. + if (isa(DerivedPtr)) + return ReplaceInstUsesWith(*II, DerivedPtr); + + // isKnownNonNull -> nonnull attribute + if (isKnownNonNull(DerivedPtr)) + II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull); + + // isDereferenceablePointer -> deref attribute + if (DerivedPtr->isDereferenceablePointer(DL)) { + if (Argument *A = dyn_cast(DerivedPtr)) { + uint64_t Bytes = A->getDereferenceableBytes(); + II->addDereferenceableAttr(AttributeSet::ReturnIndex, Bytes); + } + } + + // TODO: bitcast(relocate(p)) -> relocate(bitcast(p)) + // Canonicalize on the type from the uses to the defs + + // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...) + } } return visitCallSite(II); @@ -1165,6 +1198,14 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, if (!CI->isLosslessCast()) return false; + // If this is a GC intrinsic, avoid munging types. We need types for + // statepoint reconstruction in SelectionDAG. + // TODO: This is probably something which should be expanded to all + // intrinsics since the entire point of intrinsics is that + // they are understandable by the optimizer. + if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS)) + return false; + // The size of ByVal or InAlloca arguments is derived from the type, so we // can't change to a type with a different size. If the size were // passed explicitly we could avoid this check. @@ -1188,7 +1229,11 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) { if (!CI->getCalledFunction()) return nullptr; - if (Value *With = Simplifier->optimizeCall(CI)) { + auto InstCombineRAUW = [this](Instruction *From, Value *With) { + ReplaceInstUsesWith(*From, With); + }; + LibCallSimplifier Simplifier(DL, TLI, InstCombineRAUW); + if (Value *With = Simplifier.optimizeCall(CI)) { ++NumSimplified; return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With); } @@ -1380,6 +1425,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { dyn_cast(CS.getCalledValue()->stripPointerCasts()); if (!Callee) return false; + // The prototype of thunks are a lie, don't try to directly call such + // functions. + if (Callee->hasFnAttribute("thunk")) + return false; Instruction *Caller = CS.getInstruction(); const AttributeSet &CallerPAL = CS.getAttributes(); @@ -1397,7 +1446,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (NewRetTy->isStructTy()) return false; // TODO: Handle multiple return values. - if (!CastInst::isBitCastable(NewRetTy, OldRetTy)) { + if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) { if (Callee->isDeclaration()) return false; // Cannot transform this return value. @@ -1432,12 +1481,21 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { unsigned NumActualArgs = CS.arg_size(); unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); + // Prevent us turning: + // declare void @takes_i32_inalloca(i32* inalloca) + // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0) + // + // into: + // call void @takes_i32_inalloca(i32* null) + if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca)) + return false; + CallSite::arg_iterator AI = CS.arg_begin(); for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { Type *ParamTy = FT->getParamType(i); Type *ActTy = (*AI)->getType(); - if (!CastInst::isBitCastable(ActTy, ParamTy)) + if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL)) return false; // Cannot transform this parameter value. if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1). @@ -1532,7 +1590,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if ((*AI)->getType() == ParamTy) { Args.push_back(*AI); } else { - Args.push_back(Builder->CreateBitCast(*AI, ParamTy)); + Args.push_back(Builder->CreateBitOrPointerCast(*AI, ParamTy)); } // Add any parameter attributes. @@ -1603,7 +1661,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Value *NV = NC; if (OldRetTy != NV->getType() && !Caller->use_empty()) { if (!NV->getType()->isVoidTy()) { - NV = NC = CastInst::Create(CastInst::BitCast, NC, OldRetTy); + NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy); NC->setDebugLoc(Caller->getDebugLoc()); // If this is an invoke instruction, we should insert it after the first diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index aba77bb..3e2b719 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "InstCombine.h" +#include "InstCombineInternal.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; using namespace PatternMatch; @@ -1064,6 +1064,15 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); + // If we know that the value being extended is positive, we can use a zext + // instead. + bool KnownZero, KnownOne; + ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI); + if (KnownZero) { + Value *ZExt = Builder->CreateZExt(Src, DestTy); + return ReplaceInstUsesWith(CI, ZExt); + } + // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also @@ -1269,6 +1278,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // type of OpI doesn't enter into things at all. We simply evaluate // in whichever source type is larger, then convert to the // destination type. + if (SrcWidth == OpWidth) + break; if (LHSWidth < SrcWidth) LHSOrig = Builder->CreateFPExt(LHSOrig, RHSOrig->getType()); else if (RHSWidth <= SrcWidth) @@ -1330,22 +1341,57 @@ Instruction *InstCombiner::visitFPExt(CastInst &CI) { return commonCastTransforms(CI); } +// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) +// This is safe if the intermediate type has enough bits in its mantissa to +// accurately represent all values of X. For example, this won't work with +// i64 -> float -> i64. +Instruction *InstCombiner::FoldItoFPtoI(Instruction &FI) { + if (!isa(FI.getOperand(0)) && !isa(FI.getOperand(0))) + return nullptr; + Instruction *OpI = cast(FI.getOperand(0)); + + Value *SrcI = OpI->getOperand(0); + Type *FITy = FI.getType(); + Type *OpITy = OpI->getType(); + Type *SrcTy = SrcI->getType(); + bool IsInputSigned = isa(OpI); + bool IsOutputSigned = isa(FI); + + // We can safely assume the conversion won't overflow the output range, + // because (for example) (uint8_t)18293.f is undefined behavior. + + // Since we can assume the conversion won't overflow, our decision as to + // whether the input will fit in the float should depend on the minimum + // of the input range and output range. + + // This means this is also safe for a signed input and unsigned output, since + // a negative input would lead to undefined behavior. + int InputSize = (int)SrcTy->getScalarSizeInBits() - IsInputSigned; + int OutputSize = (int)FITy->getScalarSizeInBits() - IsOutputSigned; + int ActualSize = std::min(InputSize, OutputSize); + + if (ActualSize <= OpITy->getFPMantissaWidth()) { + if (FITy->getScalarSizeInBits() > SrcTy->getScalarSizeInBits()) { + if (IsInputSigned && IsOutputSigned) + return new SExtInst(SrcI, FITy); + return new ZExtInst(SrcI, FITy); + } + if (FITy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()) + return new TruncInst(SrcI, FITy); + if (SrcTy == FITy) + return ReplaceInstUsesWith(FI, SrcI); + return new BitCastInst(SrcI, FITy); + } + return nullptr; +} + Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { Instruction *OpI = dyn_cast(FI.getOperand(0)); if (!OpI) return commonCastTransforms(FI); - // fptoui(uitofp(X)) --> X - // fptoui(sitofp(X)) --> X - // This is safe if the intermediate type has enough bits in its mantissa to - // accurately represent all values of X. For example, do not do this with - // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. - if ((isa(OpI) || isa(OpI)) && - OpI->getOperand(0)->getType() == FI.getType() && - (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ - OpI->getType()->getFPMantissaWidth()) - return ReplaceInstUsesWith(FI, OpI->getOperand(0)); + if (Instruction *I = FoldItoFPtoI(FI)) + return I; return commonCastTransforms(FI); } @@ -1355,17 +1401,8 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { if (!OpI) return commonCastTransforms(FI); - // fptosi(sitofp(X)) --> X - // fptosi(uitofp(X)) --> X - // This is safe if the intermediate type has enough bits in its mantissa to - // accurately represent all values of X. For example, do not do this with - // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. - if ((isa(OpI) || isa(OpI)) && - OpI->getOperand(0)->getType() == FI.getType() && - (int)FI.getType()->getScalarSizeInBits() <= - OpI->getType()->getFPMantissaWidth()) - return ReplaceInstUsesWith(FI, OpI->getOperand(0)); + if (Instruction *I = FoldItoFPtoI(FI)) + return I; return commonCastTransforms(FI); } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 399f1c3..f48d89b 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -11,7 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "InstCombine.h" +#include "InstCombineInternal.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -20,12 +22,20 @@ #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Analysis/TargetLibraryInfo.h" + using namespace llvm; using namespace PatternMatch; #define DEBUG_TYPE "instcombine" +// How many times is a select replaced by one of its operands? +STATISTIC(NumSel, "Number of select opts"); + +// Initialization Routines + static ConstantInt *getOne(Constant *C) { return ConstantInt::get(cast(C->getType()), 1); } @@ -1921,14 +1931,17 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { if (DL && LHSCI->getOpcode() == Instruction::PtrToInt && DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { Value *RHSOp = nullptr; - if (Constant *RHSC = dyn_cast(ICI.getOperand(1))) { + if (PtrToIntOperator *RHSC = dyn_cast(ICI.getOperand(1))) { + Value *RHSCIOp = RHSC->getOperand(0); + if (RHSCIOp->getType()->getPointerAddressSpace() == + LHSCIOp->getType()->getPointerAddressSpace()) { + RHSOp = RHSC->getOperand(0); + // If the pointer types don't match, insert a bitcast. + if (LHSCIOp->getType() != RHSOp->getType()) + RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); + } + } else if (Constant *RHSC = dyn_cast(ICI.getOperand(1))) RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); - } else if (PtrToIntInst *RHSC = dyn_cast(ICI.getOperand(1))) { - RHSOp = RHSC->getOperand(0); - // If the pointer types don't match, insert a bitcast. - if (LHSCIOp->getType() != RHSOp->getType()) - RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); - } if (RHSOp) return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp); @@ -2446,6 +2459,122 @@ static bool swapMayExposeCSEOpportunities(const Value * Op0, return GlobalSwapBenefits > 0; } +/// \brief Check that one use is in the same block as the definition and all +/// other uses are in blocks dominated by a given block +/// +/// \param DI Definition +/// \param UI Use +/// \param DB Block that must dominate all uses of \p DI outside +/// the parent block +/// \return true when \p UI is the only use of \p DI in the parent block +/// and all other uses of \p DI are in blocks dominated by \p DB. +/// +bool InstCombiner::dominatesAllUses(const Instruction *DI, + const Instruction *UI, + const BasicBlock *DB) const { + assert(DI && UI && "Instruction not defined\n"); + // ignore incomplete definitions + if (!DI->getParent()) + return false; + // DI and UI must be in the same block + if (DI->getParent() != UI->getParent()) + return false; + // Protect from self-referencing blocks + if (DI->getParent() == DB) + return false; + // DominatorTree available? + if (!DT) + return false; + for (const User *U : DI->users()) { + auto *Usr = cast(U); + if (Usr != UI && !DT->dominates(DB, Usr->getParent())) + return false; + } + return true; +} + +/// +/// true when the instruction sequence within a block is select-cmp-br. +/// +static bool isChainSelectCmpBranch(const SelectInst *SI) { + const BasicBlock *BB = SI->getParent(); + if (!BB) + return false; + auto *BI = dyn_cast_or_null(BB->getTerminator()); + if (!BI || BI->getNumSuccessors() != 2) + return false; + auto *IC = dyn_cast(BI->getCondition()); + if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI)) + return false; + return true; +} + +/// +/// \brief True when a select result is replaced by one of its operands +/// in select-icmp sequence. This will eventually result in the elimination +/// of the select. +/// +/// \param SI Select instruction +/// \param Icmp Compare instruction +/// \param SIOpd Operand that replaces the select +/// +/// Notes: +/// - The replacement is global and requires dominator information +/// - The caller is responsible for the actual replacement +/// +/// Example: +/// +/// entry: +/// %4 = select i1 %3, %C* %0, %C* null +/// %5 = icmp eq %C* %4, null +/// br i1 %5, label %9, label %7 +/// ... +/// ;