diff options
Diffstat (limited to 'lib/Transforms')
57 files changed, 2492 insertions, 1262 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 46480bd..56975ea 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -207,6 +207,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // Make sure that it is local to this module. if (!F || !F->hasLocalLinkage()) return nullptr; + // Don't promote arguments for variadic functions. Adding, removing, or + // changing non-pack parameters can change the classification of pack + // parameters. Frontends encode that classification at the call site in the + // IR, while in the callee the classification is determined dynamically based + // on the number of registers consumed so far. + if (F->isVarArg()) return nullptr; + // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<Argument*, 16> PointerArgs; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) @@ -227,12 +234,6 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { isSelfRecursive = true; } - // Don't promote arguments for variadic functions. Adding, removing, or - // changing non-pack parameters can change the classification of pack - // parameters. Frontends encode that classification at the call site in the - // IR, while in the callee the classification is determined dynamically based - // on the number of registers consumed so far. - if (F->isVarArg()) return nullptr; const DataLayout &DL = F->getParent()->getDataLayout(); // Check to see which arguments are promotable. If an argument is promotable, @@ -674,8 +675,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { // not allowed to dereference ->begin() if size() is 0 - Params.push_back( - GetElementPtrInst::getIndexedType(I->getType(), SI->second)); + Params.push_back(GetElementPtrInst::getIndexedType( + cast<PointerType>(I->getType()->getScalarType())->getElementType(), + SI->second)); assert(Params.back()); } @@ -704,7 +706,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, auto DI = FunctionDIs.find(F); if (DI != FunctionDIs.end()) { DISubprogram SP = DI->second; - SP.replaceFunction(NF); + SP->replaceFunction(NF); // Ensure the map is updated so it can be reused on subsequent argument // promotions of the same function. FunctionDIs.erase(DI); @@ -860,7 +862,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Update the callgraph to know that the callsite has been transformed. CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; - CalleeNode->replaceCallEdge(Call, New, NF_CGN); + CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN); if (!Call->use_empty()) { Call->replaceAllUsesWith(New); diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 4431311..3be23d5 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -73,8 +73,8 @@ namespace { } std::string getDescription() const { - return std::string((IsArg ? "Argument #" : "Return value #")) - + utostr(Idx) + " of function " + F->getName().str(); + return (Twine(IsArg ? "Argument #" : "Return value #") + utostr(Idx) + + " of function " + F->getName()).str(); } }; @@ -304,7 +304,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { auto DI = FunctionDIs.find(&Fn); if (DI != FunctionDIs.end()) { DISubprogram SP = DI->second; - SP.replaceFunction(NF); + SP->replaceFunction(NF); // Ensure the map is updated so it can be reused on non-varargs argument // eliminations of the same function. FunctionDIs.erase(DI); @@ -482,7 +482,7 @@ DAE::Liveness DAE::SurveyUse(const Use *U, return Result; } - if (ImmutableCallSite CS = V) { + if (auto CS = ImmutableCallSite(V)) { const Function *F = CS.getCalledFunction(); if (F) { // Used in a direct call. @@ -1092,7 +1092,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Patch the pointer to LLVM function in debug info descriptor. auto DI = FunctionDIs.find(F); if (DI != FunctionDIs.end()) - DI->second.replaceFunction(NF); + DI->second->replaceFunction(NF); // Now that the old function is dead, delete it. F->eraseFromParent(); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 20b41fb..b8c4f5d 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -564,6 +564,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access. Value *NewPtr = NewGlobals[Val]; + Type *NewTy = NewGlobals[Val]->getType(); // Form a shorter GEP if needed. if (GEP->getNumOperands() > 3) { @@ -572,7 +573,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { Idxs.push_back(NullInt); for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i) Idxs.push_back(CE->getOperand(i)); - NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs); + NewPtr = + ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs); + NewTy = GetElementPtrInst::getIndexedType(NewTy, Idxs); } else { GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); SmallVector<Value*, 8> Idxs; @@ -721,8 +724,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { else break; if (Idxs.size() == GEPI->getNumOperands()-1) - Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, - ConstantExpr::getGetElementPtr(NewV, Idxs)); + Changed |= OptimizeAwayTrappingUsesOfValue( + GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs)); if (GEPI->use_empty()) { Changed = true; GEPI->eraseFromParent(); @@ -2338,7 +2341,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); Constant * const IdxList[] = {IdxZero, IdxZero}; - Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList); + Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) Ptr = ConstantFoldConstantExpression(CE, DL, TLI); @@ -2402,8 +2405,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, i != e; ++i) GEPOps.push_back(getVal(*i)); InstResult = - ConstantExpr::getGetElementPtr(P, GEPOps, - cast<GEPOperator>(GEP)->isInBounds()); + ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps, + cast<GEPOperator>(GEP)->isInBounds()); DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult << "\n"); } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp index fe00d92..f3f8529 100644 --- a/lib/Transforms/IPO/LowerBitSets.cpp +++ b/lib/Transforms/IPO/LowerBitSets.cpp @@ -349,7 +349,8 @@ void LowerBitSets::allocateByteArrays() { Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0), ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])}; - Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(ByteArray, Idxs); + Constant *GEP = ConstantExpr::getInBoundsGetElementPtr( + ByteArrayConst->getType(), ByteArray, Idxs); // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures // that the pc-relative displacement is folded into the lea instead of the @@ -395,16 +396,17 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, } Constant *ByteArray = BAI->ByteArray; + Type *Ty = BAI->ByteArray->getValueType(); if (!LinkerSubsectionsViaSymbols && AvoidReuse) { // Each use of the byte array uses a different alias. This makes the // backend less likely to reuse previously computed byte array addresses, // improving the security of the CFI mechanism based on this pass. - ByteArray = GlobalAlias::create( - BAI->ByteArray->getType()->getElementType(), 0, - GlobalValue::PrivateLinkage, "bits_use", ByteArray, M); + ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0, + GlobalValue::PrivateLinkage, "bits_use", + ByteArray, M); } - Value *ByteAddr = B.CreateGEP(ByteArray, BitOffset); + Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset); Value *Byte = B.CreateLoad(ByteAddr); Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask); @@ -546,8 +548,8 @@ void LowerBitSets::buildBitSetsFromGlobals( // Multiply by 2 to account for padding elements. Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, I * 2)}; - Constant *CombinedGlobalElemPtr = - ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs); + Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( + NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs); if (LinkerSubsectionsViaSymbols) { Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr); } else { diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index d28d563..502451b 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -59,6 +59,10 @@ static cl::opt<bool> RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); +static cl::opt<bool> +RunFloat2Int("float-to-int", cl::Hidden, cl::init(true), + cl::desc("Run the float2int (float demotion) pass")); + static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false), cl::Hidden, cl::desc("Run the load combining pass")); @@ -307,6 +311,9 @@ void PassManagerBuilder::populateModulePassManager( // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + if (RunFloat2Int) + MPM.add(createFloat2IntPass()); + // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 816978e..ad7c5a0 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -305,33 +305,29 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { SmallVector<Metadata *, 64> LiveSubprograms; DenseSet<const MDNode *> VisitedSet; - for (DICompileUnit DIC : F.compile_units()) { - assert(DIC.Verify() && "DIC must verify as a DICompileUnit."); - + for (MDCompileUnit *DIC : F.compile_units()) { // Create our live subprogram list. - DIArray SPs = DIC.getSubprograms(); + MDSubprogramArray SPs = DIC->getSubprograms(); bool SubprogramChange = false; - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { - DISubprogram DISP(SPs.getElement(i)); - assert(DISP.Verify() && "DISP must verify as a DISubprogram."); + for (unsigned i = 0, e = SPs.size(); i != e; ++i) { + DISubprogram DISP = SPs[i]; // Make sure we visit each subprogram only once. if (!VisitedSet.insert(DISP).second) continue; // If the function referenced by DISP is not null, the function is live. - if (DISP.getFunction()) + if (DISP->getFunction()) LiveSubprograms.push_back(DISP); else SubprogramChange = true; } // Create our live global variable list. - DIArray GVs = DIC.getGlobalVariables(); + MDGlobalVariableArray GVs = DIC->getGlobalVariables(); bool GlobalVariableChange = false; - for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) { - DIGlobalVariable DIG(GVs.getElement(i)); - assert(DIG.Verify() && "DIG must verify as DIGlobalVariable."); + for (unsigned i = 0, e = GVs.size(); i != e; ++i) { + DIGlobalVariable DIG = GVs[i]; // Make sure we only visit each global variable only once. if (!VisitedSet.insert(DIG).second) @@ -339,7 +335,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { // If the global variable referenced by DIG is not null, the global // variable is live. - if (DIG.getGlobal()) + if (DIG->getVariable()) LiveGlobalVariables.push_back(DIG); else GlobalVariableChange = true; @@ -349,12 +345,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) { // subprogram list/global variable list with our new live subprogram/global // variable list. if (SubprogramChange) { - DIC.replaceSubprograms(DIArray(MDNode::get(C, LiveSubprograms))); + DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms)); Changed = true; } if (GlobalVariableChange) { - DIC.replaceGlobalVariables(DIArray(MDNode::get(C, LiveGlobalVariables))); + DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables)); Changed = true; } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 21243c2..56b6cd3 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -197,12 +197,51 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return nullptr; } +static Value *SimplifyX86insertps(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder) { + if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { + VectorType *VecTy = cast<VectorType>(II.getType()); + ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); + + // The immediate permute control byte looks like this: + // [3:0] - zero mask for each 32-bit lane + // [5:4] - select one 32-bit destination lane + // [7:6] - select one 32-bit source lane + + uint8_t Imm = CInt->getZExtValue(); + uint8_t ZMask = Imm & 0xf; + uint8_t DestLane = (Imm >> 4) & 0x3; + uint8_t SourceLane = (Imm >> 6) & 0x3; + + // If all zero mask bits are set, this was just a weird way to + // generate a zero vector. + if (ZMask == 0xf) + return ZeroVector; + + // TODO: Model this case as two shuffles or a 'logical and' plus shuffle? + if (ZMask) + return nullptr; + + assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type"); + + // If we're not zeroing anything, this is a single shuffle. + // Replace the selected destination lane with the selected source lane. + // For all other lanes, pass the first source bits through. + int ShuffleMask[4] = { 0, 1, 2, 3 }; + ShuffleMask[DestLane] = SourceLane + 4; + + return Builder.CreateShuffleVector(II.getArgOperand(0), II.getArgOperand(1), + ShuffleMask); + } + return nullptr; +} + /// The shuffle mask for a perm2*128 selects any two halves of two 256-bit /// source vectors, unless a zero bit is set. If a zero bit is set, /// then ignore that half of the mask and clear that half of the vector. static Value *SimplifyX86vperm2(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder) { - if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { + if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { VectorType *VecTy = cast<VectorType>(II.getType()); ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); @@ -415,112 +454,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; - case Intrinsic::uadd_with_overflow: { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, II); - if (OR == OverflowResult::NeverOverflows) - return CreateOverflowTuple(II, Builder->CreateNUWAdd(LHS, RHS), false); - if (OR == OverflowResult::AlwaysOverflows) - return CreateOverflowTuple(II, Builder->CreateAdd(LHS, RHS), true); - } - // FALL THROUGH uadd into sadd + + case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: - // Canonicalize constants into the RHS. + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: if (isa<Constant>(II->getArgOperand(0)) && !isa<Constant>(II->getArgOperand(1))) { + // Canonicalize constants into the RHS. Value *LHS = II->getArgOperand(0); II->setArgOperand(0, II->getArgOperand(1)); II->setArgOperand(1, LHS); return II; } + // fall through - // X + undef -> undef - if (isa<UndefValue>(II->getArgOperand(1))) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - - if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { - // X + 0 -> {X, false} - if (RHS->isZero()) { - return CreateOverflowTuple(II, II->getArgOperand(0), false, - /*ReUseName*/false); - } - } - - // We can strength reduce reduce this signed add into a regular add if we - // can prove that it will never overflow. - if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - if (WillNotOverflowSignedAdd(LHS, RHS, *II)) { - return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false); - } - } - - break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - // undef - X -> undef - // X - undef -> undef - if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - - if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) { - // X - 0 -> {X, false} - if (ConstRHS->isZero()) { - return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false); - } - } - if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) { - if (WillNotOverflowSignedSub(LHS, RHS, *II)) { - return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false); - } - } else { - if (WillNotOverflowUnsignedSub(LHS, RHS, *II)) { - return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false); - } - } - break; - } - case Intrinsic::umul_with_overflow: { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, II); - if (OR == OverflowResult::NeverOverflows) - return CreateOverflowTuple(II, Builder->CreateNUWMul(LHS, RHS), false); - if (OR == OverflowResult::AlwaysOverflows) - return CreateOverflowTuple(II, Builder->CreateMul(LHS, RHS), true); - } // FALL THROUGH - case Intrinsic::smul_with_overflow: - // Canonicalize constants into the RHS. - if (isa<Constant>(II->getArgOperand(0)) && - !isa<Constant>(II->getArgOperand(1))) { - Value *LHS = II->getArgOperand(0); - II->setArgOperand(0, II->getArgOperand(1)); - II->setArgOperand(1, LHS); - return II; - } - - // X * undef -> undef - if (isa<UndefValue>(II->getArgOperand(1))) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + OverflowCheckFlavor OCF = + IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID()); + assert(OCF != OCF_INVALID && "unexpected!"); - if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) { - // X*0 -> {0, false} - if (RHSI->isZero()) - return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); + Value *OperationResult = nullptr; + Constant *OverflowResult = nullptr; + if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1), + *II, OperationResult, OverflowResult)) + return CreateOverflowTuple(II, OperationResult, OverflowResult); - // X * 1 -> {X, false} - if (RHSI->equalsInt(1)) { - return CreateOverflowTuple(II, II->getArgOperand(0), false, - /*ReUseName*/false); - } - } - if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - if (WillNotOverflowSignedMul(LHS, RHS, *II)) { - return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false); - } - } break; + } + case Intrinsic::minnum: case Intrinsic::maxnum: { Value *Arg0 = II->getArgOperand(0); @@ -806,7 +769,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; } - + case Intrinsic::x86_sse41_insertps: + if (Value *V = SimplifyX86insertps(*II, *Builder)) + return ReplaceInstUsesWith(*II, V); + break; + case Intrinsic::x86_sse4a_insertqi: { // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top // ones undef diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index fe544c2..bd79a26 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1450,42 +1450,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { CI.setOperand(0, GEP->getOperand(0)); return &CI; } - - // If the GEP has a single use, and the base pointer is a bitcast, and the - // GEP computes a constant offset, see if we can convert these three - // instructions into fewer. This typically happens with unions and other - // non-type-safe code. - unsigned AS = GEP->getPointerAddressSpace(); - unsigned OffsetBits = DL.getPointerSizeInBits(AS); - APInt Offset(OffsetBits, 0); - BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0)); - if (GEP->hasOneUse() && BCI && GEP->accumulateConstantOffset(DL, Offset)) { - // FIXME: This is insufficiently tested - just a no-crash test - // (test/Transforms/InstCombine/2007-05-14-Crash.ll) - // - // Get the base pointer input of the bitcast, and the type it points to. - Value *OrigBase = BCI->getOperand(0); - SmallVector<Value*, 8> NewIndices; - if (FindElementAtOffset(OrigBase->getType(), Offset.getSExtValue(), - NewIndices)) { - // FIXME: This codepath is completely untested - could be unreachable - // for all I know. - // If we were able to index down into an element, create the GEP - // and bitcast the result. This eliminates one bitcast, potentially - // two. - Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() - ? Builder->CreateInBoundsGEP(OrigBase, NewIndices) - : Builder->CreateGEP( - OrigBase->getType()->getPointerElementType(), - OrigBase, NewIndices); - NGEP->takeName(GEP); - - if (isa<BitCastInst>(CI)) - return new BitCastInst(NGEP, CI.getType()); - assert(isa<PtrToIntInst>(CI)); - return new PtrToIntInst(NGEP, CI.getType()); - } - } } return commonCastTransforms(CI); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 803b50a..223bba0 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2109,33 +2109,112 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, return ExtractValueInst::Create(Call, 1, "sadd.overflow"); } -static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, - InstCombiner &IC) { - // Don't bother doing this transformation for pointers, don't do it for - // vectors. - if (!isa<IntegerType>(OrigAddV->getType())) return nullptr; +bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, + Value *RHS, Instruction &OrigI, + Value *&Result, Constant *&Overflow) { + assert((!OrigI.isCommutative() || + !(isa<Constant>(LHS) && !isa<Constant>(RHS))) && + "call with a constant RHS if possible!"); + + auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) { + Result = OpResult; + Overflow = OverflowVal; + if (ReuseName) + Result->takeName(&OrigI); + return true; + }; - // If the add is a constant expr, then we don't bother transforming it. - Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV); - if (!OrigAdd) return nullptr; + switch (OCF) { + case OCF_INVALID: + llvm_unreachable("bad overflow check kind!"); - Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); + case OCF_UNSIGNED_ADD: { + OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI); + if (OR == OverflowResult::NeverOverflows) + return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(), + true); - // Put the new code above the original add, in case there are any uses of the - // add between the add and the compare. - InstCombiner::BuilderTy *Builder = IC.Builder; - Builder->SetInsertPoint(OrigAdd); + if (OR == OverflowResult::AlwaysOverflows) + return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true); + } + // FALL THROUGH uadd into sadd + case OCF_SIGNED_ADD: { + // X + undef -> undef + if (isa<UndefValue>(RHS)) + return SetResult(UndefValue::get(RHS->getType()), + UndefValue::get(Builder->getInt1Ty()), false); + + if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) + // X + 0 -> {X, false} + if (ConstRHS->isZero()) + return SetResult(LHS, Builder->getFalse(), false); + + // We can strength reduce this signed add into a regular add if we can prove + // that it will never overflow. + if (OCF == OCF_SIGNED_ADD) + if (WillNotOverflowSignedAdd(LHS, RHS, OrigI)) + return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(), + true); + } - Module *M = I.getParent()->getParent()->getParent(); - Type *Ty = LHS->getType(); - Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); - CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); - Value *Add = Builder->CreateExtractValue(Call, 0); + case OCF_UNSIGNED_SUB: + case OCF_SIGNED_SUB: { + // undef - X -> undef + // X - undef -> undef + if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) + return SetResult(UndefValue::get(LHS->getType()), + UndefValue::get(Builder->getInt1Ty()), false); + + if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) + // X - 0 -> {X, false} + if (ConstRHS->isZero()) + return SetResult(UndefValue::get(LHS->getType()), Builder->getFalse(), + false); + + if (OCF == OCF_SIGNED_SUB) { + if (WillNotOverflowSignedSub(LHS, RHS, OrigI)) + return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(), + true); + } else { + if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI)) + return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(), + true); + } + break; + } - IC.ReplaceInstUsesWith(*OrigAdd, Add); + case OCF_UNSIGNED_MUL: { + OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI); + if (OR == OverflowResult::NeverOverflows) + return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(), + true); + if (OR == OverflowResult::AlwaysOverflows) + return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true); + } // FALL THROUGH + case OCF_SIGNED_MUL: + // X * undef -> undef + if (isa<UndefValue>(RHS)) + return SetResult(UndefValue::get(LHS->getType()), + UndefValue::get(Builder->getInt1Ty()), false); + + if (ConstantInt *RHSI = dyn_cast<ConstantInt>(RHS)) { + // X * 0 -> {0, false} + if (RHSI->isZero()) + return SetResult(Constant::getNullValue(RHS->getType()), + Builder->getFalse(), false); + + // X * 1 -> {X, false} + if (RHSI->equalsInt(1)) + return SetResult(LHS, Builder->getFalse(), false); + } - // The original icmp gets replaced with the overflow value. - return ExtractValueInst::Create(Call, 1, "uadd.overflow"); + if (OCF == OCF_SIGNED_MUL) + if (WillNotOverflowSignedMul(LHS, RHS, OrigI)) + return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(), + true); + } + + return false; } /// \brief Recognize and process idiom involving test for multiplication @@ -3432,21 +3511,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); } - // (a+b) <u a --> llvm.uadd.with.overflow. - // (a+b) <u b --> llvm.uadd.with.overflow. - if (I.getPredicate() == ICmpInst::ICMP_ULT && - match(Op0, m_Add(m_Value(A), m_Value(B))) && - (Op1 == A || Op1 == B)) - if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) - return R; - - // a >u (a+b) --> llvm.uadd.with.overflow. - // b >u (a+b) --> llvm.uadd.with.overflow. - if (I.getPredicate() == ICmpInst::ICMP_UGT && - match(Op1, m_Add(m_Value(A), m_Value(B))) && - (Op0 == A || Op0 == B)) - if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) - return R; + Instruction *AddI = nullptr; + if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B), + m_Instruction(AddI))) && + isa<IntegerType>(A->getType())) { + Value *Result; + Constant *Overflow; + if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result, + Overflow)) { + ReplaceInstUsesWith(*AddI, Result); + return ReplaceInstUsesWith(I, Overflow); + } + } // (zext a) * (zext b) --> llvm.umul.with.overflow. if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index fb2321d..d0caf34 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -110,6 +110,41 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) { return false; } + +/// \brief Specific patterns of overflow check idioms that we match. +enum OverflowCheckFlavor { + OCF_UNSIGNED_ADD, + OCF_SIGNED_ADD, + OCF_UNSIGNED_SUB, + OCF_SIGNED_SUB, + OCF_UNSIGNED_MUL, + OCF_SIGNED_MUL, + + OCF_INVALID +}; + +/// \brief Returns the OverflowCheckFlavor corresponding to a overflow_with_op +/// intrinsic. +static inline OverflowCheckFlavor +IntrinsicIDToOverflowCheckFlavor(unsigned ID) { + switch (ID) { + default: + return OCF_INVALID; + case Intrinsic::uadd_with_overflow: + return OCF_UNSIGNED_ADD; + case Intrinsic::sadd_with_overflow: + return OCF_SIGNED_ADD; + case Intrinsic::usub_with_overflow: + return OCF_UNSIGNED_SUB; + case Intrinsic::ssub_with_overflow: + return OCF_SIGNED_SUB; + case Intrinsic::umul_with_overflow: + return OCF_UNSIGNED_MUL; + case Intrinsic::smul_with_overflow: + return OCF_SIGNED_MUL; + } +} + /// \brief An IRBuilder inserter that adds new instructions to the instcombine /// worklist. class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter @@ -316,7 +351,7 @@ private: bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const; - Type *FindElementAtOffset(Type *PtrTy, int64_t Offset, + Type *FindElementAtOffset(PointerType *PtrTy, int64_t Offset, SmallVectorImpl<Value *> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); @@ -329,6 +364,17 @@ private: bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V, Type *Ty); + /// \brief Try to optimize a sequence of instructions checking if an operation + /// on LHS and RHS overflows. + /// + /// If a simplification is possible, stores the simplified result of the + /// operation in OperationResult and result of the overflow check in + /// OverflowResult, and return true. If no simplification is possible, + /// returns false. + bool OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS, + Instruction &CtxI, Value *&OperationResult, + Constant *&OverflowResult); + Instruction *visitCallSite(CallSite CS); Instruction *tryOptimizeCall(CallInst *CI); bool transformConstExprCastCall(CallSite CS); @@ -391,14 +437,10 @@ public: } /// Creates a result tuple for an overflow intrinsic \p II with a given - /// \p Result and a constant \p Overflow value. If \p ReUseName is true the - /// \p Result's name is taken from \p II. + /// \p Result and a constant \p Overflow value. Instruction *CreateOverflowTuple(IntrinsicInst *II, Value *Result, - bool Overflow, bool ReUseName = true) { - if (ReUseName) - Result->takeName(II); - Constant *V[] = {UndefValue::get(Result->getType()), - Overflow ? Builder->getTrue() : Builder->getFalse()}; + Constant *Overflow) { + Constant *V[] = {UndefValue::get(Result->getType()), Overflow}; StructType *ST = cast<StructType>(II->getType()); Constant *Struct = ConstantStruct::get(ST, V); return InsertValueInst::Create(Struct, Result, 0); diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 6b0f268..d8a559c 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -84,7 +84,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, continue; } - if (CallSite CS = I) { + if (auto CS = CallSite(I)) { // If this is the function being called then we treat it like a load and // ignore it. if (CS.isCallee(&U)) @@ -611,8 +611,10 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, return false; SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx); - Type *AllocTy = - GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops); + Type *AllocTy = GetElementPtrInst::getIndexedType( + cast<PointerType>(GEPI->getOperand(0)->getType()->getScalarType()) + ->getElementType(), + Ops); if (!AllocTy || !AllocTy->isSized()) return false; const DataLayout &DL = IC.getDataLayout(); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index b6beb65..24446c8 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -987,8 +987,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { unsigned BegIdx = Mask.front(); VectorType *SrcTy = cast<VectorType>(V->getType()); unsigned VecBitWidth = SrcTy->getBitWidth(); - unsigned SrcElemBitWidth = - SrcTy->getElementType()->getPrimitiveSizeInBits(); + unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType()); assert(SrcElemBitWidth && "vector elements must have a bitwidth"); unsigned SrcNumElems = SrcTy->getNumElements(); SmallVector<BitCastInst *, 8> BCs; @@ -1000,7 +999,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { BCs.push_back(BC); for (BitCastInst *BC : BCs) { Type *TgtTy = BC->getDestTy(); - unsigned TgtElemBitWidth = TgtTy->getPrimitiveSizeInBits(); + unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy); if (!TgtElemBitWidth) continue; unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 90551e4..3b46156 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -869,11 +869,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { /// whether or not there is a sequence of GEP indices into the pointed type that /// will land us at the specified offset. If so, fill them into NewIndices and /// return the resultant element type, otherwise return null. -Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, +Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset, SmallVectorImpl<Value *> &NewIndices) { - assert(PtrTy->isPtrOrPtrVectorTy()); - - Type *Ty = PtrTy->getPointerElementType(); + Type *Ty = PtrTy->getElementType(); if (!Ty->isSized()) return nullptr; @@ -1611,12 +1609,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // %0 = GEP [10 x i8] addrspace(1)* X, ... // addrspacecast i8 addrspace(1)* %0 to i8* SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end()); - Value *NewGEP = - GEP.isInBounds() - ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, - GEP.getName()) - : Builder->CreateGEP(StrippedPtrTy->getElementType(), - StrippedPtr, Idx, GEP.getName()); + Value *NewGEP = GEP.isInBounds() + ? Builder->CreateInBoundsGEP( + nullptr, StrippedPtr, Idx, GEP.getName()) + : Builder->CreateGEP(nullptr, StrippedPtr, Idx, + GEP.getName()); return new AddrSpaceCastInst(NewGEP, GEP.getType()); } } @@ -1634,9 +1631,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; Value *NewGEP = GEP.isInBounds() - ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) - : Builder->CreateGEP(StrippedPtrTy->getElementType(), - StrippedPtr, Idx, GEP.getName()); + ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, Idx, + GEP.getName()) + : Builder->CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName()); // V and GEP are both pointer types --> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1669,10 +1666,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // GEP may not be "inbounds". Value *NewGEP = GEP.isInBounds() && NSW - ? Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, + ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx, GEP.getName()) - : Builder->CreateGEP(StrippedPtrTy->getElementType(), - StrippedPtr, NewIdx, GEP.getName()); + : Builder->CreateGEP(nullptr, StrippedPtr, NewIdx, + GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1710,9 +1707,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Constant::getNullValue(DL.getIntPtrType(GEP.getType())), NewIdx}; - Value *NewGEP = GEP.isInBounds() && NSW ? - Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) : - Builder->CreateGEP(SrcElTy, StrippedPtr, Off, GEP.getName()); + Value *NewGEP = GEP.isInBounds() && NSW + ? Builder->CreateInBoundsGEP( + SrcElTy, StrippedPtr, Off, GEP.getName()) + : Builder->CreateGEP(SrcElTy, StrippedPtr, Off, + GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEP.getType()); @@ -1774,9 +1773,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // GEP. SmallVector<Value*, 8> NewIndices; if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) { - Value *NGEP = GEP.isInBounds() ? - Builder->CreateInBoundsGEP(Operand, NewIndices) : - Builder->CreateGEP(OpType->getElementType(), Operand, NewIndices); + Value *NGEP = + GEP.isInBounds() + ? Builder->CreateInBoundsGEP(nullptr, Operand, NewIndices) + : Builder->CreateGEP(nullptr, Operand, NewIndices); if (NGEP->getType() == GEP.getType()) return ReplaceInstUsesWith(GEP, NGEP); @@ -2268,7 +2268,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // We need to insert these at the location of the old load, not at that of // the extractvalue. Builder->SetInsertPoint(L->getParent(), L); - Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices); + Value *GEP = Builder->CreateInBoundsGEP(L->getType(), + L->getPointerOperand(), Indices); // Returning the load directly will cause the main loop to insert it in // the wrong spot, so use ReplaceInstUsesWith(). return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP)); @@ -2725,7 +2726,7 @@ bool InstCombiner::run() { DEBUG(dbgs() << "IC: Old = " << *I << '\n' << " New = " << *Result << '\n'); - if (!I->getDebugLoc().isUnknown()) + if (I->getDebugLoc()) Result->setDebugLoc(I->getDebugLoc()); // Everything uses the new instruction now. I->replaceAllUsesWith(Result); diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 978c857..8d6d3ce 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -401,12 +401,12 @@ struct AddressSanitizer : public FunctionPass { return SizeInBytes; } /// Check if we want (and can) handle this alloca. - bool isInterestingAlloca(AllocaInst &AI) const; + bool isInterestingAlloca(AllocaInst &AI); /// If it is an interesting memory access, return the PointerOperand /// and set IsWrite/Alignment. Otherwise return nullptr. Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, uint64_t *TypeSize, - unsigned *Alignment) const; + unsigned *Alignment); void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I, bool UseCalls, const DataLayout &DL); void instrumentPointerComparisonOrSubtraction(Instruction *I); @@ -458,6 +458,7 @@ struct AddressSanitizer : public FunctionPass { Function *AsanMemmove, *AsanMemcpy, *AsanMemset; InlineAsm *EmptyAsm; GlobalsMetadata GlobalsMD; + DenseMap<AllocaInst *, bool> ProcessedAllocas; friend struct FunctionStackPoisoner; }; @@ -804,13 +805,21 @@ void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { } /// Check if we want (and can) handle this alloca. -bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const { - return (AI.getAllocatedType()->isSized() && - // alloca() may be called with 0 size, ignore it. - getAllocaSizeInBytes(&AI) > 0 && - // We are only interested in allocas not promotable to registers. - // Promotable allocas are common under -O0. - (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI))); +bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) { + auto PreviouslySeenAllocaInfo = ProcessedAllocas.find(&AI); + + if (PreviouslySeenAllocaInfo != ProcessedAllocas.end()) + return PreviouslySeenAllocaInfo->getSecond(); + + bool IsInteresting = (AI.getAllocatedType()->isSized() && + // alloca() may be called with 0 size, ignore it. + getAllocaSizeInBytes(&AI) > 0 && + // We are only interested in allocas not promotable to registers. + // Promotable allocas are common under -O0. + (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI))); + + ProcessedAllocas[&AI] = IsInteresting; + return IsInteresting; } /// If I is an interesting memory access, return the PointerOperand @@ -818,7 +827,7 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const { Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I, bool *IsWrite, uint64_t *TypeSize, - unsigned *Alignment) const { + unsigned *Alignment) { // Skip memory accesses inserted by another instrumentation. if (I->getMetadata("nosanitize")) return nullptr; @@ -959,18 +968,6 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, UseCalls, Exp); } -// Validate the result of Module::getOrInsertFunction called for an interface -// function of AddressSanitizer. If the instrumented module defines a function -// with the same name, their prototypes must match, otherwise -// getOrInsertFunction returns a bitcast. -static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { - if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast); - FuncOrBitcast->dump(); - report_fatal_error( - "trying to redefine an AddressSanitizer " - "interface function"); -} - Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex, @@ -1056,7 +1053,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, // path is rarely taken. This seems to be the case for SPEC benchmarks. TerminatorInst *CheckTerm = SplitBlockAndInsertIfThen( Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000)); - assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional()); + assert(cast<BranchInst>(CheckTerm)->isUnconditional()); BasicBlock *NextBB = CheckTerm->getSuccessor(0); IRB.SetInsertPoint(CheckTerm); Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize); @@ -1219,17 +1216,17 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { void AddressSanitizerModule::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); // Declare our poisoning and unpoisoning functions. - AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( + AsanPoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr)); AsanPoisonGlobals->setLinkage(Function::ExternalLinkage); - AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( + AsanUnpoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanUnpoisonGlobalsName, IRB.getVoidTy(), nullptr)); AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage); // Declare functions that register/unregister globals. - AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( + AsanRegisterGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanRegisterGlobals->setLinkage(Function::ExternalLinkage); - AsanUnregisterGlobals = checkInterfaceFunction( + AsanUnregisterGlobals = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); @@ -1317,7 +1314,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { Indices2[1] = IRB.getInt32(0); G->replaceAllUsesWith( - ConstantExpr::getGetElementPtr(NewGlobal, Indices2, true)); + ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true)); NewGlobal->takeName(G); G->eraseFromParent(); @@ -1399,44 +1396,44 @@ void AddressSanitizer::initializeCallbacks(Module &M) { const std::string ExpStr = Exp ? "exp_" : ""; const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr; AsanErrorCallbackSized[AccessIsWrite][Exp] = - checkInterfaceFunction(M.getOrInsertFunction( + checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanReportErrorTemplate + ExpStr + TypeStr + "_n", IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = - checkInterfaceFunction(M.getOrInsertFunction( + checkSanitizerInterfaceFunction(M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N", IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr)); for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; AccessSizeIndex++) { const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex); AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] = - checkInterfaceFunction(M.getOrInsertFunction( + checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanReportErrorTemplate + ExpStr + Suffix, IRB.getVoidTy(), IntptrTy, ExpType, nullptr)); AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] = - checkInterfaceFunction(M.getOrInsertFunction( + checkSanitizerInterfaceFunction(M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + ExpStr + Suffix, IRB.getVoidTy(), IntptrTy, ExpType, nullptr)); } } } - AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction( + AsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr)); - AsanMemcpy = checkInterfaceFunction(M.getOrInsertFunction( + AsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr)); - AsanMemset = checkInterfaceFunction(M.getOrInsertFunction( + AsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction( ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, nullptr)); - AsanHandleNoReturnFunc = checkInterfaceFunction( + AsanHandleNoReturnFunc = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), nullptr)); - AsanPtrCmpFunction = checkInterfaceFunction(M.getOrInsertFunction( + AsanPtrCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); - AsanPtrSubFunction = checkInterfaceFunction(M.getOrInsertFunction( + AsanPtrSubFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); // We insert an empty inline asm after __asan_report* to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), @@ -1461,7 +1458,7 @@ bool AddressSanitizer::doInitialization(Module &M) { BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction); // call __asan_init in the module ctor. IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB)); - AsanInitFunction = checkInterfaceFunction( + AsanInitFunction = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), nullptr)); AsanInitFunction->setLinkage(Function::ExternalLinkage); IRB.CreateCall(AsanInitFunction); @@ -1613,16 +1610,17 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) { std::string Suffix = itostr(i); - AsanStackMallocFunc[i] = checkInterfaceFunction(M.getOrInsertFunction( - kAsanStackMallocNameTemplate + Suffix, IntptrTy, IntptrTy, nullptr)); - AsanStackFreeFunc[i] = checkInterfaceFunction( + AsanStackMallocFunc[i] = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy, + IntptrTy, nullptr)); + AsanStackFreeFunc[i] = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); } - AsanPoisonStackMemoryFunc = checkInterfaceFunction( + AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); - AsanUnpoisonStackMemoryFunc = checkInterfaceFunction( + AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); } @@ -1757,9 +1755,11 @@ void FunctionStackPoisoner::poisonStack() { uint64_t LocalStackSize = L.FrameSize; bool DoStackMalloc = ClUseAfterReturn && LocalStackSize <= kMaxStackMallocSize; - // Don't do dynamic alloca in presence of inline asm: too often it - // makes assumptions on which registers are available. + // Don't do dynamic alloca in presence of inline asm: too often it makes + // assumptions on which registers are available. Don't do stack malloc in the + // presence of inline asm on 32-bit platforms for the same reason. bool DoDynamicAlloca = ClDynamicAllocaStack && !HasNonEmptyInlineAsm; + DoStackMalloc &= !HasNonEmptyInlineAsm || ASan.LongSize != 32; Value *StaticAlloca = DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false); diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index b3925ee..06d5aed 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -753,7 +753,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { // Patch the pointer to LLVM function in debug info descriptor. auto DI = FunctionDIs.find(&F); if (DI != FunctionDIs.end()) - DI->second.replaceFunction(&F); + DI->second->replaceFunction(&F); UnwrappedFnMap[WrappedFnCst] = &F; *i = NewF; @@ -1075,8 +1075,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, } case 2: { IRBuilder<> IRB(Pos); - Value *ShadowAddr1 = - IRB.CreateGEP(ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1)); + Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr, + ConstantInt::get(DFS.IntptrTy, 1)); return combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign), IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign), Pos); } @@ -1127,7 +1127,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F); DT.addNewBlock(NextBB, LastBr->getParent()); IRBuilder<> NextIRB(NextBB); - WideAddr = NextIRB.CreateGEP(WideAddr, ConstantInt::get(DFS.IntptrTy, 1)); + WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr, + ConstantInt::get(DFS.IntptrTy, 1)); Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign); ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow); LastBr->setSuccessor(0, NextBB); @@ -1213,7 +1214,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align, Value *ShadowVecAddr = IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy)); do { - Value *CurShadowVecAddr = IRB.CreateConstGEP1_32(ShadowVecAddr, Offset); + Value *CurShadowVecAddr = + IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset); IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign); Size -= ShadowVecSize; ++Offset; @@ -1221,7 +1223,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align, Offset *= ShadowVecSize; } while (Size > 0) { - Value *CurShadowAddr = IRB.CreateConstGEP1_32(ShadowAddr, Offset); + Value *CurShadowAddr = + IRB.CreateConstGEP1_32(DFS.ShadowTy, ShadowAddr, Offset); IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign); --Size; ++Offset; @@ -1469,17 +1472,17 @@ void DFSanVisitor::visitCallSite(CallSite CS) { Args.push_back(DFSF.getShadow(*i)); if (FT->isVarArg()) { - auto LabelVAAlloca = - new AllocaInst(ArrayType::get(DFSF.DFS.ShadowTy, - CS.arg_size() - FT->getNumParams()), - "labelva", DFSF.F->getEntryBlock().begin()); + auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy, + CS.arg_size() - FT->getNumParams()); + auto *LabelVAAlloca = new AllocaInst(LabelVATy, "labelva", + DFSF.F->getEntryBlock().begin()); for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) { - auto LabelVAPtr = IRB.CreateStructGEP(LabelVAAlloca, n); + auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n); IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr); } - Args.push_back(IRB.CreateStructGEP(LabelVAAlloca, 0)); + Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0)); } if (!FT->getReturnType()->isVoidTy()) { @@ -1565,10 +1568,11 @@ void DFSanVisitor::visitCallSite(CallSite CS) { ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize); AllocaInst *VarArgShadow = new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin()); - Args.push_back(IRB.CreateConstGEP2_32(VarArgShadow, 0, 0)); + Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0)); for (unsigned n = 0; i != e; ++i, ++n) { - IRB.CreateStore(DFSF.getShadow(*i), - IRB.CreateConstGEP2_32(VarArgShadow, 0, n)); + IRB.CreateStore( + DFSF.getShadow(*i), + IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, n)); Args.push_back(*i); } } diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index a793e69..368a81d 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -149,10 +149,10 @@ ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) { return new GCOVProfiler(Options); } -static StringRef getFunctionName(DISubprogram SP) { - if (!SP.getLinkageName().empty()) - return SP.getLinkageName(); - return SP.getName(); +static StringRef getFunctionName(MDSubprogram *SP) { + if (!SP->getLinkageName().empty()) + return SP->getLinkageName(); + return SP->getName(); } namespace { @@ -163,7 +163,7 @@ namespace { static const char *const BlockTag; static const char *const EdgeTag; - GCOVRecord() {} + GCOVRecord() = default; void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); @@ -315,7 +315,7 @@ namespace { ReturnBlock(1, os) { this->os = os; - Function *F = SP.getFunction(); + Function *F = SP->getFunction(); DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n"); uint32_t i = 0; @@ -330,7 +330,7 @@ namespace { std::string FunctionNameAndLine; raw_string_ostream FNLOS(FunctionNameAndLine); - FNLOS << getFunctionName(SP) << SP.getLineNumber(); + FNLOS << getFunctionName(SP) << SP->getLine(); FNLOS.flush(); FuncChecksum = hash_value(FunctionNameAndLine); } @@ -366,7 +366,7 @@ namespace { void writeOut() { writeBytes(FunctionTag, 4); uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) + - 1 + lengthOfGCOVString(SP.getFilename()) + 1; + 1 + lengthOfGCOVString(SP->getFilename()) + 1; if (UseCfgChecksum) ++BlockLen; write(BlockLen); @@ -375,8 +375,8 @@ namespace { if (UseCfgChecksum) write(CfgChecksum); writeGCOVString(getFunctionName(SP)); - writeGCOVString(SP.getFilename()); - write(SP.getLineNumber()); + writeGCOVString(SP->getFilename()); + write(SP->getLine()); // Emit count of blocks. writeBytes(BlockTag, 4); @@ -437,12 +437,12 @@ std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) { } } - SmallString<128> Filename = CU.getFilename(); + SmallString<128> Filename = CU->getFilename(); sys::path::replace_extension(Filename, NewStem); StringRef FName = sys::path::filename(Filename); SmallString<128> CurPath; if (sys::fs::current_path(CurPath)) return FName; - sys::path::append(CurPath, FName.str()); + sys::path::append(CurPath, FName); return CurPath.str(); } @@ -466,7 +466,8 @@ static bool functionHasLines(Function *F) { if (isa<DbgInfoIntrinsic>(I)) continue; const DebugLoc &Loc = I->getDebugLoc(); - if (Loc.isUnknown()) continue; + if (!Loc) + continue; // Artificial lines such as calls to the global constructors. if (Loc.getLine() == 0) continue; @@ -486,21 +487,14 @@ void GCOVProfiler::emitProfileNotes() { // this pass over the original .o's as they're produced, or run it after // LTO, we'll generate the same .gcno files. - DICompileUnit CU(CU_Nodes->getOperand(i)); + DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i)); std::error_code EC; raw_fd_ostream out(mangleName(CU, "gcno"), EC, sys::fs::F_None); std::string EdgeDestinations; - DIArray SPs = CU.getSubprograms(); unsigned FunctionIdent = 0; - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { - DISubprogram SP(SPs.getElement(i)); - assert((!SP || SP.isSubprogram()) && - "A MDNode in subprograms of a CU should be null or a DISubprogram."); - if (!SP) - continue; - - Function *F = SP.getFunction(); + for (auto *SP : CU->getSubprograms()) { + Function *F = SP->getFunction(); if (!F) continue; if (!functionHasLines(F)) continue; @@ -536,16 +530,18 @@ void GCOVProfiler::emitProfileNotes() { if (isa<DbgInfoIntrinsic>(I)) continue; const DebugLoc &Loc = I->getDebugLoc(); - if (Loc.isUnknown()) continue; + if (!Loc) + continue; // Artificial lines such as calls to the global constructors. if (Loc.getLine() == 0) continue; if (Line == Loc.getLine()) continue; Line = Loc.getLine(); - if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue; + if (SP != getDISubprogram(Loc.getScope())) + continue; - GCOVLines &Lines = Block.getFile(SP.getFilename()); + GCOVLines &Lines = Block.getFile(SP->getFilename()); Lines.addLine(Loc.getLine()); } } @@ -574,16 +570,10 @@ bool GCOVProfiler::emitProfileArcs() { bool Result = false; bool InsertIndCounterIncrCode = false; for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - DICompileUnit CU(CU_Nodes->getOperand(i)); - DIArray SPs = CU.getSubprograms(); + DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i)); SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP; - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { - DISubprogram SP(SPs.getElement(i)); - assert((!SP || SP.isSubprogram()) && - "A MDNode in subprograms of a CU should be null or a DISubprogram."); - if (!SP) - continue; - Function *F = SP.getFunction(); + for (auto *SP : CU->getSubprograms()) { + Function *F = SP->getFunction(); if (!F) continue; if (!functionHasLines(F)) continue; if (!Result) Result = true; @@ -603,7 +593,7 @@ bool GCOVProfiler::emitProfileArcs() { GlobalValue::InternalLinkage, Constant::getNullValue(CounterTy), "__llvm_gcov_ctr"); - CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP)); + CountersBySP.push_back(std::make_pair(Counters, SP)); UniqueVector<BasicBlock *> ComplexEdgePreds; UniqueVector<BasicBlock *> ComplexEdgeSuccs; @@ -628,7 +618,8 @@ bool GCOVProfiler::emitProfileArcs() { SmallVector<Value *, 2> Idx; Idx.push_back(Builder.getInt64(0)); Idx.push_back(Sel); - Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx); + Value *Counter = Builder.CreateInBoundsGEP(Counters->getValueType(), + Counters, Idx); Value *Count = Builder.CreateLoad(Counter); Count = Builder.CreateAdd(Count, Builder.getInt64(1)); Builder.CreateStore(Count, Counter); @@ -855,7 +846,7 @@ Function *GCOVProfiler::insertCounterWriteout( NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (CU_Nodes) { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - DICompileUnit CU(CU_Nodes->getOperand(i)); + DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i)); std::string FilenameGcda = mangleName(CU, "gcda"); uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i]; Builder.CreateCall3(StartFile, @@ -863,7 +854,7 @@ Function *GCOVProfiler::insertCounterWriteout( Builder.CreateGlobalStringPtr(ReversedVersion), Builder.getInt32(CfgChecksum)); for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) { - DISubprogram SP(CountersBySP[j].second); + auto *SP = cast_or_null<MDSubprogram>(CountersBySP[j].second); uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum(); Builder.CreateCall5( EmitFunction, Builder.getInt32(j), @@ -922,7 +913,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty()); Arg = std::next(Fn->arg_begin()); Arg->setName("counters"); - Value *GEP = Builder.CreateGEP(Arg, ZExtPred); + Value *GEP = Builder.CreateGEP(Type::getInt64PtrTy(*Ctx), Arg, ZExtPred); Value *Counter = Builder.CreateLoad(GEP, "counter"); Cond = Builder.CreateICmpEQ(Counter, Constant::getNullValue( diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index c2aa1e2..2b35066 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -623,8 +623,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *IntptrOriginPtr = IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0)); for (unsigned i = 0; i < Size / IntptrSize; ++i) { - Value *Ptr = - i ? IRB.CreateConstGEP1_32(IntptrOriginPtr, i) : IntptrOriginPtr; + Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i) + : IntptrOriginPtr; IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment); Ofs += IntptrSize / kOriginSize; CurrentAlignment = IntptrAlignment; @@ -632,7 +632,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) { - Value *GEP = i ? IRB.CreateConstGEP1_32(OriginPtr, i) : OriginPtr; + Value *GEP = + i ? IRB.CreateConstGEP1_32(nullptr, OriginPtr, i) : OriginPtr; IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment); CurrentAlignment = kMinOriginAlignment; } @@ -2843,7 +2844,8 @@ struct VarArgAMD64Helper : public VarArgHelper { Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr); Value *OverflowArgAreaShadowPtr = MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB); - Value *SrcPtr = IRB.CreateConstGEP1_32(VAArgTLSCopy, AMD64FpEndOffset); + Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy, + AMD64FpEndOffset); IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16); } } diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 289675e..662513d 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -140,16 +140,6 @@ class SanitizerCoverageModule : public ModulePass { } // namespace -static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { - if (Function *F = dyn_cast<Function>(FuncOrBitcast)) - return F; - std::string Err; - raw_string_ostream Stream(Err); - Stream << "SanitizerCoverage interface function redefined: " - << *FuncOrBitcast; - report_fatal_error(Err); -} - bool SanitizerCoverageModule::runOnModule(Module &M) { if (!CoverageLevel) return false; C = &(M.getContext()); @@ -167,16 +157,18 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { ReturnInst::Create(*C, BasicBlock::Create(*C, "", CtorFunc)); appendToGlobalCtors(M, CtorFunc, kSanCtorAndDtorPriority); - SanCovFunction = checkInterfaceFunction( + SanCovFunction = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kSanCovName, VoidTy, Int32PtrTy, nullptr)); - SanCovWithCheckFunction = checkInterfaceFunction( + SanCovWithCheckFunction = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kSanCovWithCheckName, VoidTy, Int32PtrTy, nullptr)); - SanCovIndirCallFunction = checkInterfaceFunction(M.getOrInsertFunction( - kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr)); - SanCovTraceCmpFunction = checkInterfaceFunction(M.getOrInsertFunction( - kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr)); - - SanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction( + SanCovIndirCallFunction = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr)); + SanCovTraceCmpFunction = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr)); + + SanCovModuleInit = checkSanitizerInterfaceFunction(M.getOrInsertFunction( kSanCovModuleInitName, Type::getVoidTy(*C), Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy, nullptr)); SanCovModuleInit->setLinkage(Function::ExternalLinkage); @@ -186,9 +178,9 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { /*hasSideEffects=*/true); if (ClExperimentalTracing) { - SanCovTraceEnter = checkInterfaceFunction( + SanCovTraceEnter = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr)); - SanCovTraceBB = checkInterfaceFunction( + SanCovTraceBB = checkSanitizerInterfaceFunction( M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr)); } @@ -316,7 +308,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( IRBuilder<> IRB(I); CallSite CS(I); Value *Callee = CS.getCalledValue(); - if (dyn_cast<InlineAsm>(Callee)) continue; + if (isa<InlineAsm>(Callee)) continue; GlobalVariable *CalleeCache = new GlobalVariable( *F.getParent(), Ty, false, GlobalValue::PrivateLinkage, Constant::getNullValue(Ty), "__sancov_gen_callee_cache"); @@ -366,8 +358,8 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } bool IsEntryBB = &BB == &F.getEntryBlock(); - DebugLoc EntryLoc = IsEntryBB && !IP->getDebugLoc().isUnknown() - ? IP->getDebugLoc().getFnDebugLoc(*C) + DebugLoc EntryLoc = IsEntryBB && IP->getDebugLoc() + ? IP->getDebugLoc().getFnDebugLoc() : IP->getDebugLoc(); IRBuilder<> IRB(IP); IRB.SetCurrentDebugLocation(EntryLoc); diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index c3ba722..aa8ee5a 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -129,54 +129,48 @@ FunctionPass *llvm::createThreadSanitizerPass() { return new ThreadSanitizer(); } -static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { - if (Function *F = dyn_cast<Function>(FuncOrBitcast)) - return F; - FuncOrBitcast->dump(); - report_fatal_error("ThreadSanitizer interface function redefined"); -} - void ThreadSanitizer::initializeCallbacks(Module &M) { IRBuilder<> IRB(M.getContext()); // Initialize the callbacks. - TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction( + TsanFuncEntry = checkSanitizerInterfaceFunction(M.getOrInsertFunction( "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); - TsanFuncExit = checkInterfaceFunction(M.getOrInsertFunction( - "__tsan_func_exit", IRB.getVoidTy(), nullptr)); + TsanFuncExit = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), nullptr)); OrdTy = IRB.getInt32Ty(); for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { const size_t ByteSize = 1 << i; const size_t BitSize = ByteSize * 8; SmallString<32> ReadName("__tsan_read" + itostr(ByteSize)); - TsanRead[i] = checkInterfaceFunction(M.getOrInsertFunction( + TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); SmallString<32> WriteName("__tsan_write" + itostr(ByteSize)); - TsanWrite[i] = checkInterfaceFunction(M.getOrInsertFunction( + TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); SmallString<64> UnalignedReadName("__tsan_unaligned_read" + itostr(ByteSize)); - TsanUnalignedRead[i] = checkInterfaceFunction(M.getOrInsertFunction( - UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + TsanUnalignedRead[i] = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + itostr(ByteSize)); - TsanUnalignedWrite[i] = checkInterfaceFunction(M.getOrInsertFunction( - UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); + TsanUnalignedWrite[i] = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); Type *Ty = Type::getIntNTy(M.getContext(), BitSize); Type *PtrTy = Ty->getPointerTo(); SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) + "_load"); - TsanAtomicLoad[i] = checkInterfaceFunction(M.getOrInsertFunction( - AtomicLoadName, Ty, PtrTy, OrdTy, nullptr)); + TsanAtomicLoad[i] = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(AtomicLoadName, Ty, PtrTy, OrdTy, nullptr)); SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) + "_store"); - TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction( - AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, - nullptr)); + TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( + AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr)); for (int op = AtomicRMWInst::FIRST_BINOP; op <= AtomicRMWInst::LAST_BINOP; ++op) { @@ -199,34 +193,34 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { else continue; SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart); - TsanAtomicRMW[op][i] = checkInterfaceFunction(M.getOrInsertFunction( - RMWName, Ty, PtrTy, Ty, OrdTy, nullptr)); + TsanAtomicRMW[op][i] = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(RMWName, Ty, PtrTy, Ty, OrdTy, nullptr)); } SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) + "_compare_exchange_val"); - TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction( + TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr)); } - TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction( - "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), nullptr)); - TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction( + TsanVptrUpdate = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(), + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), nullptr)); + TsanVptrLoad = checkSanitizerInterfaceFunction(M.getOrInsertFunction( "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); - TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction( + TsanAtomicThreadFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction( "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, nullptr)); - TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction( + TsanAtomicSignalFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction( "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, nullptr)); - MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction( - "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IntptrTy, nullptr)); - MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction( - "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), - IntptrTy, nullptr)); - MemsetFn = checkInterfaceFunction(M.getOrInsertFunction( - "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), - IntptrTy, nullptr)); + MemmoveFn = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy, nullptr)); + MemcpyFn = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy, nullptr)); + MemsetFn = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt32Ty(), IntptrTy, nullptr)); } bool ThreadSanitizer::doInitialization(Module &M) { diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h index 87de33b..d4fef10 100644 --- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h +++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h @@ -54,8 +54,6 @@ public: RetainAutorelease(nullptr), RetainAutoreleaseRV(nullptr) { } - ~ARCRuntimeEntryPoints() { } - void init(Module *M) { TheModule = M; AutoreleaseRV = nullptr; diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index b197c97..4edd029 100644 --- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -45,7 +45,7 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr, default: break; } - ImmutableCallSite CS = static_cast<const Value *>(Inst); + ImmutableCallSite CS(Inst); assert(CS && "Only calls can alter reference counts!"); // See if AliasAnalysis can help us with the call. @@ -99,7 +99,7 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr, // of any other dynamic reference-counted pointers. if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA())) return false; - } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) { + } else if (auto CS = ImmutableCallSite(Inst)) { // For calls, just check the arguments (and not the callee operand). for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(), OE = CS.arg_end(); OI != OE; ++OI) { diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index 5aa2b97..8918909 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -50,9 +50,9 @@ struct AlignmentFromAssumptions : public FunctionPass { initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry()); } - bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AssumptionCacheTracker>(); AU.addRequired<ScalarEvolution>(); AU.addRequired<DominatorTreeWrapperPass>(); diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk index cf30f39..16f2ead 100644 --- a/lib/Transforms/Scalar/Android.mk +++ b/lib/Transforms/Scalar/Android.mk @@ -11,6 +11,7 @@ transforms_scalar_SRC_FILES := \ DeadStoreElimination.cpp \ EarlyCSE.cpp \ FlattenCFGPass.cpp \ + Float2Int.cpp \ GVN.cpp \ IndVarSimplify.cpp \ InductiveRangeCheckElimination.cpp \ @@ -30,6 +31,7 @@ transforms_scalar_SRC_FILES := \ LowerExpectIntrinsic.cpp \ MemCpyOptimizer.cpp \ MergedLoadStoreMotion.cpp \ + NaryReassociate.cpp \ PartiallyInlineLibCalls.cpp \ PlaceSafepoints.cpp \ Reassociate.cpp \ diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index d12fdb7..c29fcc3 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMScalarOpts DeadStoreElimination.cpp EarlyCSE.cpp FlattenCFGPass.cpp + Float2Int.cpp GVN.cpp InductiveRangeCheckElimination.cpp IndVarSimplify.cpp @@ -28,6 +29,7 @@ add_llvm_library(LLVMScalarOpts LowerExpectIntrinsic.cpp MemCpyOptimizer.cpp MergedLoadStoreMotion.cpp + NaryReassociate.cpp PartiallyInlineLibCalls.cpp PlaceSafepoints.cpp Reassociate.cpp diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index cb8981b..01952cf 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -168,7 +168,7 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) { return true; } } - if (CallSite CS = I) { + if (auto CS = CallSite(I)) { if (Function *F = CS.getCalledFunction()) { if (TLI && TLI->has(LibFunc::strcpy) && F->getName() == TLI->getName(LibFunc::strcpy)) { @@ -262,7 +262,7 @@ static bool isRemovable(Instruction *I) { } } - if (CallSite CS = I) + if (auto CS = CallSite(I)) return CS.getInstruction()->use_empty(); return false; @@ -306,7 +306,7 @@ static Value *getStoredPointerOperand(Instruction *I) { } } - CallSite CS = I; + CallSite CS(I); // All the supported functions so far happen to have dest as their first // argument. return CS.getArgument(0); @@ -780,7 +780,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { continue; } - if (CallSite CS = cast<Value>(BBI)) { + if (auto CS = CallSite(BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. if (isAllocLikeFn(BBI, TLI)) diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp new file mode 100644 index 0000000..c931422 --- /dev/null +++ b/lib/Transforms/Scalar/Float2Int.cpp @@ -0,0 +1,540 @@ +//===- Float2Int.cpp - Demote floating point ops to work on integers ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Float2Int pass, which aims to demote floating +// point operations to work on integers, where that is losslessly possible. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "float2int" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include <deque> +#include <functional> // For std::function +using namespace llvm; + +// The algorithm is simple. Start at instructions that convert from the +// float to the int domain: fptoui, fptosi and fcmp. Walk up the def-use +// graph, using an equivalence datastructure to unify graphs that interfere. +// +// Mappable instructions are those with an integer corrollary that, given +// integer domain inputs, produce an integer output; fadd, for example. +// +// If a non-mappable instruction is seen, this entire def-use graph is marked +// as non-transformable. If we see an instruction that converts from the +// integer domain to FP domain (uitofp,sitofp), we terminate our walk. + +/// The largest integer type worth dealing with. +static cl::opt<unsigned> +MaxIntegerBW("float2int-max-integer-bw", cl::init(64), cl::Hidden, + cl::desc("Max integer bitwidth to consider in float2int" + "(default=64)")); + +namespace { + struct Float2Int : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + Float2Int() : FunctionPass(ID) { + initializeFloat2IntPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + } + + void findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots); + ConstantRange seen(Instruction *I, ConstantRange R); + ConstantRange badRange(); + ConstantRange unknownRange(); + ConstantRange validateRange(ConstantRange R); + void walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots); + void walkForwards(); + bool validateAndTransform(); + Value *convert(Instruction *I, Type *ToTy); + void cleanup(); + + MapVector<Instruction*, ConstantRange > SeenInsts; + SmallPtrSet<Instruction*,8> Roots; + EquivalenceClasses<Instruction*> ECs; + MapVector<Instruction*, Value*> ConvertedInsts; + LLVMContext *Ctx; + }; +} + +char Float2Int::ID = 0; +INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false) + +// Given a FCmp predicate, return a matching ICmp predicate if one +// exists, otherwise return BAD_ICMP_PREDICATE. +static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) { + switch (P) { + case CmpInst::FCMP_OEQ: + case CmpInst::FCMP_UEQ: + return CmpInst::ICMP_EQ; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: + return CmpInst::ICMP_SGT; + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_UGE: + return CmpInst::ICMP_SGE; + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: + return CmpInst::ICMP_SLT; + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ULE: + return CmpInst::ICMP_SLE; + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UNE: + return CmpInst::ICMP_NE; + default: + return CmpInst::BAD_ICMP_PREDICATE; + } +} + +// Given a floating point binary operator, return the matching +// integer version. +static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) { + switch (Opcode) { + default: llvm_unreachable("Unhandled opcode!"); + case Instruction::FAdd: return Instruction::Add; + case Instruction::FSub: return Instruction::Sub; + case Instruction::FMul: return Instruction::Mul; + } +} + +// Find the roots - instructions that convert from the FP domain to +// integer domain. +void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) { + for (auto &I : inst_range(F)) { + switch (I.getOpcode()) { + default: break; + case Instruction::FPToUI: + case Instruction::FPToSI: + Roots.insert(&I); + break; + case Instruction::FCmp: + if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) != + CmpInst::BAD_ICMP_PREDICATE) + Roots.insert(&I); + break; + } + } +} + +// Helper - mark I as having been traversed, having range R. +ConstantRange Float2Int::seen(Instruction *I, ConstantRange R) { + DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n"); + if (SeenInsts.find(I) != SeenInsts.end()) + SeenInsts.find(I)->second = R; + else + SeenInsts.insert(std::make_pair(I, R)); + return R; +} + +// Helper - get a range representing a poison value. +ConstantRange Float2Int::badRange() { + return ConstantRange(MaxIntegerBW + 1, true); +} +ConstantRange Float2Int::unknownRange() { + return ConstantRange(MaxIntegerBW + 1, false); +} +ConstantRange Float2Int::validateRange(ConstantRange R) { + if (R.getBitWidth() > MaxIntegerBW + 1) + return badRange(); + return R; +} + +// The most obvious way to structure the search is a depth-first, eager +// search from each root. However, that require direct recursion and so +// can only handle small instruction sequences. Instead, we split the search +// up into two phases: +// - walkBackwards: A breadth-first walk of the use-def graph starting from +// the roots. Populate "SeenInsts" with interesting +// instructions and poison values if they're obvious and +// cheap to compute. Calculate the equivalance set structure +// while we're here too. +// - walkForwards: Iterate over SeenInsts in reverse order, so we visit +// defs before their uses. Calculate the real range info. + +// Breadth-first walk of the use-def graph; determine the set of nodes +// we care about and eagerly determine if some of them are poisonous. +void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) { + std::deque<Instruction*> Worklist(Roots.begin(), Roots.end()); + while (!Worklist.empty()) { + Instruction *I = Worklist.back(); + Worklist.pop_back(); + + if (SeenInsts.find(I) != SeenInsts.end()) + // Seen already. + continue; + + switch (I->getOpcode()) { + // FIXME: Handle select and phi nodes. + default: + // Path terminated uncleanly. + seen(I, badRange()); + break; + + case Instruction::UIToFP: { + // Path terminated cleanly. + unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + APInt Min = APInt::getMinValue(BW).zextOrSelf(MaxIntegerBW+1); + APInt Max = APInt::getMaxValue(BW).zextOrSelf(MaxIntegerBW+1); + seen(I, validateRange(ConstantRange(Min, Max))); + continue; + } + + case Instruction::SIToFP: { + // Path terminated cleanly. + unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(MaxIntegerBW+1); + APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(MaxIntegerBW+1); + seen(I, validateRange(ConstantRange(SMin, SMax))); + continue; + } + + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FCmp: + seen(I, unknownRange()); + break; + } + + for (Value *O : I->operands()) { + if (Instruction *OI = dyn_cast<Instruction>(O)) { + // Unify def-use chains if they interfere. + ECs.unionSets(I, OI); + if (SeenInsts.find(I)->second != badRange()) + Worklist.push_back(OI); + } else if (!isa<ConstantFP>(O)) { + // Not an instruction or ConstantFP? we can't do anything. + seen(I, badRange()); + } + } + } +} + +// Walk forwards down the list of seen instructions, so we visit defs before +// uses. +void Float2Int::walkForwards() { + for (auto It = SeenInsts.rbegin(), E = SeenInsts.rend(); It != E; ++It) { + if (It->second != unknownRange()) + continue; + + Instruction *I = It->first; + std::function<ConstantRange(ArrayRef<ConstantRange>)> Op; + switch (I->getOpcode()) { + // FIXME: Handle select and phi nodes. + default: + case Instruction::UIToFP: + case Instruction::SIToFP: + llvm_unreachable("Should have been handled in walkForwards!"); + + case Instruction::FAdd: + Op = [](ArrayRef<ConstantRange> Ops) { + assert(Ops.size() == 2 && "FAdd is a binary operator!"); + return Ops[0].add(Ops[1]); + }; + break; + + case Instruction::FSub: + Op = [](ArrayRef<ConstantRange> Ops) { + assert(Ops.size() == 2 && "FSub is a binary operator!"); + return Ops[0].sub(Ops[1]); + }; + break; + + case Instruction::FMul: + Op = [](ArrayRef<ConstantRange> Ops) { + assert(Ops.size() == 2 && "FMul is a binary operator!"); + return Ops[0].multiply(Ops[1]); + }; + break; + + // + // Root-only instructions - we'll only see these if they're the + // first node in a walk. + // + case Instruction::FPToUI: + case Instruction::FPToSI: + Op = [](ArrayRef<ConstantRange> Ops) { + assert(Ops.size() == 1 && "FPTo[US]I is a unary operator!"); + return Ops[0]; + }; + break; + + case Instruction::FCmp: + Op = [](ArrayRef<ConstantRange> Ops) { + assert(Ops.size() == 2 && "FCmp is a binary operator!"); + return Ops[0].unionWith(Ops[1]); + }; + break; + } + + bool Abort = false; + SmallVector<ConstantRange,4> OpRanges; + for (Value *O : I->operands()) { + if (Instruction *OI = dyn_cast<Instruction>(O)) { + assert(SeenInsts.find(OI) != SeenInsts.end() && + "def not seen before use!"); + OpRanges.push_back(SeenInsts.find(OI)->second); + } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) { + // Work out if the floating point number can be losslessly represented + // as an integer. + // APFloat::convertToInteger(&Exact) purports to do what we want, but + // the exactness can be too precise. For example, negative zero can + // never be exactly converted to an integer. + // + // Instead, we ask APFloat to round itself to an integral value - this + // preserves sign-of-zero - then compare the result with the original. + // + APFloat F = CF->getValueAPF(); + + // First, weed out obviously incorrect values. Non-finite numbers + // can't be represented and neither can negative zero, unless + // we're in fast math mode. + if (!F.isFinite() || + (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) && + !I->hasNoSignedZeros())) { + seen(I, badRange()); + Abort = true; + break; + } + + APFloat NewF = F; + auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven); + if (Res != APFloat::opOK || NewF.compare(F) != APFloat::cmpEqual) { + seen(I, badRange()); + Abort = true; + break; + } + // OK, it's representable. Now get it. + APSInt Int(MaxIntegerBW+1, false); + bool Exact; + CF->getValueAPF().convertToInteger(Int, + APFloat::rmNearestTiesToEven, + &Exact); + OpRanges.push_back(ConstantRange(Int)); + } else { + llvm_unreachable("Should have already marked this as badRange!"); + } + } + + // Reduce the operands' ranges to a single range and return. + if (!Abort) + seen(I, Op(OpRanges)); + } +} + +// If there is a valid transform to be done, do it. +bool Float2Int::validateAndTransform() { + bool MadeChange = false; + + // Iterate over every disjoint partition of the def-use graph. + for (auto It = ECs.begin(), E = ECs.end(); It != E; ++It) { + ConstantRange R(MaxIntegerBW + 1, false); + bool Fail = false; + Type *ConvertedToTy = nullptr; + + // For every member of the partition, union all the ranges together. + for (auto MI = ECs.member_begin(It), ME = ECs.member_end(); + MI != ME; ++MI) { + Instruction *I = *MI; + auto SeenI = SeenInsts.find(I); + if (SeenI == SeenInsts.end()) + continue; + + R = R.unionWith(SeenI->second); + // We need to ensure I has no users that have not been seen. + // If it does, transformation would be illegal. + // + // Don't count the roots, as they terminate the graphs. + if (Roots.count(I) == 0) { + // Set the type of the conversion while we're here. + if (!ConvertedToTy) + ConvertedToTy = I->getType(); + for (User *U : I->users()) { + Instruction *UI = dyn_cast<Instruction>(U); + if (!UI || SeenInsts.find(UI) == SeenInsts.end()) { + DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n"); + Fail = true; + break; + } + } + } + if (Fail) + break; + } + + // If the set was empty, or we failed, or the range is poisonous, + // bail out. + if (ECs.member_begin(It) == ECs.member_end() || Fail || + R.isFullSet() || R.isSignWrappedSet()) + continue; + assert(ConvertedToTy && "Must have set the convertedtoty by this point!"); + + // The number of bits required is the maximum of the upper and + // lower limits, plus one so it can be signed. + unsigned MinBW = std::max(R.getLower().getMinSignedBits(), + R.getUpper().getMinSignedBits()) + 1; + DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n"); + + // If we've run off the realms of the exactly representable integers, + // the floating point result will differ from an integer approximation. + + // Do we need more bits than are in the mantissa of the type we converted + // to? semanticsPrecision returns the number of mantissa bits plus one + // for the sign bit. + unsigned MaxRepresentableBits + = APFloat::semanticsPrecision(ConvertedToTy->getFltSemantics()) - 1; + if (MinBW > MaxRepresentableBits) { + DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n"); + continue; + } + if (MinBW > 64) { + DEBUG(dbgs() << "F2I: Value requires more than 64 bits to represent!\n"); + continue; + } + + // OK, R is known to be representable. Now pick a type for it. + // FIXME: Pick the smallest legal type that will fit. + Type *Ty = (MinBW > 32) ? Type::getInt64Ty(*Ctx) : Type::getInt32Ty(*Ctx); + + for (auto MI = ECs.member_begin(It), ME = ECs.member_end(); + MI != ME; ++MI) + convert(*MI, Ty); + MadeChange = true; + } + + return MadeChange; +} + +Value *Float2Int::convert(Instruction *I, Type *ToTy) { + if (ConvertedInsts.find(I) != ConvertedInsts.end()) + // Already converted this instruction. + return ConvertedInsts[I]; + + SmallVector<Value*,4> NewOperands; + for (Value *V : I->operands()) { + // Don't recurse if we're an instruction that terminates the path. + if (I->getOpcode() == Instruction::UIToFP || + I->getOpcode() == Instruction::SIToFP) { + NewOperands.push_back(V); + } else if (Instruction *VI = dyn_cast<Instruction>(V)) { + NewOperands.push_back(convert(VI, ToTy)); + } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + APSInt Val(ToTy->getPrimitiveSizeInBits(), /*IsUnsigned=*/false); + bool Exact; + CF->getValueAPF().convertToInteger(Val, + APFloat::rmNearestTiesToEven, + &Exact); + NewOperands.push_back(ConstantInt::get(ToTy, Val)); + } else { + llvm_unreachable("Unhandled operand type?"); + } + } + + // Now create a new instruction. + IRBuilder<> IRB(I); + Value *NewV = nullptr; + switch (I->getOpcode()) { + default: llvm_unreachable("Unhandled instruction!"); + + case Instruction::FPToUI: + NewV = IRB.CreateZExtOrTrunc(NewOperands[0], I->getType()); + break; + + case Instruction::FPToSI: + NewV = IRB.CreateSExtOrTrunc(NewOperands[0], I->getType()); + break; + + case Instruction::FCmp: { + CmpInst::Predicate P = mapFCmpPred(cast<CmpInst>(I)->getPredicate()); + assert(P != CmpInst::BAD_ICMP_PREDICATE && "Unhandled predicate!"); + NewV = IRB.CreateICmp(P, NewOperands[0], NewOperands[1], I->getName()); + break; + } + + case Instruction::UIToFP: + NewV = IRB.CreateZExtOrTrunc(NewOperands[0], ToTy); + break; + + case Instruction::SIToFP: + NewV = IRB.CreateSExtOrTrunc(NewOperands[0], ToTy); + break; + + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + NewV = IRB.CreateBinOp(mapBinOpcode(I->getOpcode()), + NewOperands[0], NewOperands[1], + I->getName()); + break; + } + + // If we're a root instruction, RAUW. + if (Roots.count(I)) + I->replaceAllUsesWith(NewV); + + ConvertedInsts[I] = NewV; + return NewV; +} + +// Perform dead code elimination on the instructions we just modified. +void Float2Int::cleanup() { + for (auto I = ConvertedInsts.rbegin(), E = ConvertedInsts.rend(); + I != E; ++I) + I->first->eraseFromParent(); +} + +bool Float2Int::runOnFunction(Function &F) { + if (skipOptnoneFunction(F)) + return false; + + DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n"); + // Clear out all state. + ECs = EquivalenceClasses<Instruction*>(); + SeenInsts.clear(); + ConvertedInsts.clear(); + Roots.clear(); + + Ctx = &F.getParent()->getContext(); + + findRoots(F, Roots); + + walkBackwards(Roots); + walkForwards(); + + bool Modified = validateAndTransform(); + if (Modified) + cleanup(); + return Modified; +} + +FunctionPass *llvm::createFloat2IntPass() { + return new Float2Int(); +} + diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index c73e60f..97d5b6d 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1102,7 +1102,8 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, Type::getInt8PtrTy(Src->getContext(), AS)); Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); + Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, + OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); if (ConstantFoldLoadFromConstPtr(Src, DL)) return Offset; @@ -1263,7 +1264,8 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, Type::getInt8PtrTy(Src->getContext(), AS)); Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); - Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); + Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, + OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS)); return ConstantFoldLoadFromConstPtr(Src, DL); } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 51e8041..ab8e5b8 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1274,55 +1274,6 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, // LinearFunctionTestReplace and its kin. Rewrite the loop exit condition. //===----------------------------------------------------------------------===// -/// Check for expressions that ScalarEvolution generates to compute -/// BackedgeTakenInfo. If these expressions have not been reduced, then -/// expanding them may incur additional cost (albeit in the loop preheader). -static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, - SmallPtrSetImpl<const SCEV*> &Processed, - ScalarEvolution *SE) { - if (!Processed.insert(S).second) - return false; - - // If the backedge-taken count is a UDiv, it's very likely a UDiv that - // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a - // precise expression, rather than a UDiv from the user's code. If we can't - // find a UDiv in the code with some simple searching, assume the former and - // forego rewriting the loop. - if (isa<SCEVUDivExpr>(S)) { - ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); - if (!OrigCond) return true; - const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); - R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); - if (R != S) { - const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); - L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); - if (L != S) - return true; - } - } - - // Recurse past add expressions, which commonly occur in the - // BackedgeTakenCount. They may already exist in program code, and if not, - // they are not too expensive rematerialize. - if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { - for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); - I != E; ++I) { - if (isHighCostExpansion(*I, BI, Processed, SE)) - return true; - } - return false; - } - - // HowManyLessThans uses a Max expression whenever the loop is not guarded by - // the exit condition. - if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) - return true; - - // If we haven't recognized an expensive SCEV pattern, assume it's an - // expression produced by program code. - return false; -} - /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken /// count expression can be safely and cheaply expanded into an instruction /// sequence that can be used by LinearFunctionTestReplace. @@ -1336,7 +1287,8 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint). /// However, we don't yet have a strong motivation for converting loop tests /// into inequality tests. -static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { +static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE, + SCEVExpander &Rewriter) { const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || BackedgeTakenCount->isZero()) @@ -1346,12 +1298,10 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { return false; // Can't rewrite non-branch yet. - BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); - if (!BI) + if (!isa<BranchInst>(L->getExitingBlock()->getTerminator())) return false; - SmallPtrSet<const SCEV*, 8> Processed; - if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE)) + if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L)) return false; return true; @@ -1637,7 +1587,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, && "unit stride pointer IV must be i8*"); IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); - return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit"); + return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit"); } else { // In any other case, convert both IVInit and IVCount to integers before @@ -1691,7 +1641,7 @@ LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter) { - assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); + assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition"); // Initialize CmpIndVar and IVCount to their preincremented values. Value *CmpIndVar = IndVar; @@ -1936,7 +1886,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. - if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) { + if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) { PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT); if (IndVar) { // Check preconditions for proper SCEVExpander operation. SCEV does not diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp index 1f33f72..c19cd19 100644 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -41,9 +41,9 @@ struct PointerOffsetPair { }; struct LoadPOPPair { + LoadPOPPair() = default; LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O) : Load(L), POP(P), InsertOrder(O) {} - LoadPOPPair() {} LoadInst *Load; PointerOffsetPair POP; /// \brief The new load needs to be created before the first load in IR order. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 8445d5f..099f227 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -28,7 +28,7 @@ // // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however // it's useful to think about these as the same register, with some uses using -// the value of the register before the add and some using // it after. In this +// the value of the register before the add and some using it after. In this // example, the icmp is a post-increment user, since it uses %i.next, which is // the value of the induction variable after the increment. The other common // case of post-increment users is users outside the loop. @@ -112,8 +112,6 @@ public: /// a particular register. SmallBitVector UsedByIndices; - RegSortData() {} - void print(raw_ostream &OS) const; void dump() const; }; diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 600cbde..3de3b3d 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -165,6 +165,7 @@ namespace { UP.MaxCount = UINT_MAX; UP.Partial = CurrentAllowPartial; UP.Runtime = CurrentRuntime; + UP.AllowExpensiveTripCount = false; TTI.getUnrollingPreferences(L, UP); } @@ -886,8 +887,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } // Unroll the loop. - if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this, - &LPM, &AC)) + if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount, + TripMultiple, LI, this, &LPM, &AC)) return false; return true; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 2b5a078..d651473 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1045,7 +1045,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) { RepeatInstruction = processMemCpy(M); else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) RepeatInstruction = processMemMove(M); - else if (CallSite CS = (Value*)I) { + else if (auto CS = CallSite(I)) { for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) if (CS.isByValArgument(i)) MadeChange |= processByValArgument(CS, i); diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp new file mode 100644 index 0000000..fea7641 --- /dev/null +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -0,0 +1,252 @@ +//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass reassociates n-ary add expressions and eliminates the redundancy +// exposed by the reassociation. +// +// A motivating example: +// +// void foo(int a, int b) { +// bar(a + b); +// bar((a + 2) + b); +// } +// +// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify +// the above code to +// +// int t = a + b; +// bar(t); +// bar(t + 2); +// +// However, the Reassociate pass is unable to do that because it processes each +// instruction individually and believes (a + 2) + b is the best form according +// to its rank system. +// +// To address this limitation, NaryReassociate reassociates an expression in a +// form that reuses existing instructions. As a result, NaryReassociate can +// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that +// (a + b) is computed before. +// +// NaryReassociate works as follows. For every instruction in the form of (a + +// b) + c, it checks whether a + c or b + c is already computed by a dominating +// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b + +// c) + a and removes the redundancy accordingly. To efficiently look up whether +// an expression is computed before, we store each instruction seen and its SCEV +// into an SCEV-to-instruction map. +// +// Although the algorithm pattern-matches only ternary additions, it +// automatically handles many >3-ary expressions by walking through the function +// in the depth-first order. For example, given +// +// (a + c) + d +// ((a + b) + c) + d +// +// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites +// ((a + c) + b) + d into ((a + c) + d) + b. +// +// Finally, the above dominator-based algorithm may need to be run multiple +// iterations before emitting optimal code. One source of this need is that we +// only split an operand when it is used only once. The above algorithm can +// eliminate an instruction and decrease the usage count of its operands. As a +// result, an instruction that previously had multiple uses may become a +// single-use instruction and thus eligible for split consideration. For +// example, +// +// ac = a + c +// ab = a + b +// abc = ab + c +// ab2 = ab + b +// ab2c = ab2 + c +// +// In the first iteration, we cannot reassociate abc to ac+b because ab is used +// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a +// result, ab2 becomes dead and ab will be used only once in the second +// iteration. +// +// Limitations and TODO items: +// +// 1) We only considers n-ary adds for now. This should be extended and +// generalized. +// +// 2) Besides arithmetic operations, similar reassociation can be applied to +// GEPs. For example, if +// X = &arr[a] +// dominates +// Y = &arr[a + b] +// we may rewrite Y into X + b. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "nary-reassociate" + +namespace { +class NaryReassociate : public FunctionPass { +public: + static char ID; + + NaryReassociate(): FunctionPass(ID) { + initializeNaryReassociatePass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<TargetLibraryInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<ScalarEvolution>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.setPreservesCFG(); + } + +private: + // Runs only one iteration of the dominator-based algorithm. See the header + // comments for why we need multiple iterations. + bool doOneIteration(Function &F); + // Reasssociates I to a better form. + Instruction *tryReassociateAdd(Instruction *I); + // A helper function for tryReassociateAdd. LHS and RHS are explicitly passed. + Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I); + // Rewrites I to LHS + RHS if LHS is computed already. + Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I); + + DominatorTree *DT; + ScalarEvolution *SE; + TargetLibraryInfo *TLI; + // A lookup table quickly telling which instructions compute the given SCEV. + // Note that there can be multiple instructions at different locations + // computing to the same SCEV, so we map a SCEV to an instruction list. For + // example, + // + // if (p1) + // foo(a + b); + // if (p2) + // bar(a + b); + DenseMap<const SCEV *, SmallVector<Instruction *, 2>> SeenExprs; +}; +} // anonymous namespace + +char NaryReassociate::ID = 0; +INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation", + false, false) + +FunctionPass *llvm::createNaryReassociatePass() { + return new NaryReassociate(); +} + +bool NaryReassociate::runOnFunction(Function &F) { + if (skipOptnoneFunction(F)) + return false; + + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + SE = &getAnalysis<ScalarEvolution>(); + TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + + bool Changed = false, ChangedInThisIteration; + do { + ChangedInThisIteration = doOneIteration(F); + Changed |= ChangedInThisIteration; + } while (ChangedInThisIteration); + return Changed; +} + +bool NaryReassociate::doOneIteration(Function &F) { + bool Changed = false; + SeenExprs.clear(); + // Traverse the dominator tree in the depth-first order. This order makes sure + // all bases of a candidate are in Candidates when we process it. + for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT); + Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) { + BasicBlock *BB = Node->getBlock(); + for (auto I = BB->begin(); I != BB->end(); ++I) { + if (I->getOpcode() == Instruction::Add) { + if (Instruction *NewI = tryReassociateAdd(I)) { + Changed = true; + SE->forgetValue(I); + I->replaceAllUsesWith(NewI); + RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + I = NewI; + } + // We should add the rewritten instruction because tryReassociateAdd may + // have invalidated the original one. + SeenExprs[SE->getSCEV(I)].push_back(I); + } + } + } + return Changed; +} + +Instruction *NaryReassociate::tryReassociateAdd(Instruction *I) { + Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); + if (auto *NewI = tryReassociateAdd(LHS, RHS, I)) + return NewI; + if (auto *NewI = tryReassociateAdd(RHS, LHS, I)) + return NewI; + return nullptr; +} + +Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS, + Instruction *I) { + Value *A = nullptr, *B = nullptr; + // To be conservative, we reassociate I only when it is the only user of A+B. + if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) { + // I = (A + B) + RHS + // = (A + RHS) + B or (B + RHS) + A + const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B); + const SCEV *RHSExpr = SE->getSCEV(RHS); + if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I)) + return NewI; + if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I)) + return NewI; + } + return nullptr; +} + +Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr, + Value *RHS, Instruction *I) { + auto Pos = SeenExprs.find(LHSExpr); + // Bail out if LHSExpr is not previously seen. + if (Pos == SeenExprs.end()) + return nullptr; + + auto &LHSCandidates = Pos->second; + // Look for the closest dominator LHS of I that computes LHSExpr, and replace + // I with LHS + RHS. + // + // Because we traverse the dominator tree in the pre-order, a + // candidate that doesn't dominate the current instruction won't dominate any + // future instruction either. Therefore, we pop it out of the stack. This + // optimization makes the algorithm O(n). + while (!LHSCandidates.empty()) { + Instruction *LHS = LHSCandidates.back(); + if (DT->dominates(LHS, I)) { + Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I); + NewI->takeName(I); + return NewI; + } + LHSCandidates.pop_back(); + } + return nullptr; +} diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp index 944725a..536f2a6 100644 --- a/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -217,7 +217,7 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header, BasicBlock *Current = Pred; while (true) { for (Instruction &I : *Current) { - if (CallSite CS = &I) + if (auto CS = CallSite(&I)) // Note: Technically, needing a safepoint isn't quite the right // condition here. We should instead be checking if the target method // has an @@ -424,8 +424,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F, // We need to stop going forward as soon as we see a call that can // grow the stack (i.e. the call target has a non-zero frame // size). - if (CallSite CS = cursor) { - (void)CS; // Silence an unused variable warning by gcc 4.8.2 + if (CallSite(cursor)) { if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(cursor)) { // llvm.assume(...) are not really calls. if (II->getIntrinsicID() == Intrinsic::assume) { diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index f5d21ff..ba48e0a 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Dominators.h" @@ -49,11 +50,20 @@ static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden, // Print the liveset found at the insert location static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden, cl::init(false)); -static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", - cl::Hidden, cl::init(false)); +static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden, + cl::init(false)); // Print out the base pointers for debugging -static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", - cl::Hidden, cl::init(false)); +static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden, + cl::init(false)); + +#ifdef XDEBUG +static bool ClobberNonLive = true; +#else +static bool ClobberNonLive = false; +#endif +static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live", + cl::location(ClobberNonLive), + cl::Hidden); namespace { struct RewriteStatepointsForGC : public FunctionPass { @@ -85,6 +95,22 @@ INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc", "Make relocations explicit at statepoints", false, false) namespace { +struct GCPtrLivenessData { + /// Values defined in this block. + DenseMap<BasicBlock *, DenseSet<Value *>> KillSet; + /// Values used in this block (and thus live); does not included values + /// killed within this block. + DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet; + + /// Values live into this basic block (i.e. used by any + /// instruction in this basic block or ones reachable from here) + DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn; + + /// Values live out of this basic block (i.e. live into + /// any successor block) + DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut; +}; + // The type of the internal cache used inside the findBasePointers family // of functions. From the callers perspective, this is an opaque type and // should not be inspected. @@ -105,10 +131,6 @@ struct PartiallyConstructedSafepointRecord { /// Mapping from live pointers to a base-defining-value DenseMap<llvm::Value *, llvm::Value *> PointerToBase; - /// Any new values which were added to the IR during base pointer analysis - /// for this safepoint - DenseSet<llvm::Value *> NewInsertedDefs; - /// The *new* gc.statepoint instruction itself. This produces the token /// that normal path gc.relocates and the gc.result are tied to. Instruction *StatepointToken; @@ -119,6 +141,15 @@ struct PartiallyConstructedSafepointRecord { }; } +/// Compute the live-in set for every basic block in the function +static void computeLiveInValues(DominatorTree &DT, Function &F, + GCPtrLivenessData &Data); + +/// Given results from the dataflow liveness computation, find the set of live +/// Values at a particular instruction. +static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, + StatepointLiveSetTy &out); + // TODO: Once we can get to the GCStrategy, this becomes // Optional<bool> isGCManagedPointer(const Value *V) const override { @@ -131,129 +162,46 @@ static bool isGCPointerType(const Type *T) { return false; } -/// Return true if the Value is a gc reference type which is potentially used -/// after the instruction 'loc'. This is only used with the edge reachability -/// liveness code. Note: It is assumed the V dominates loc. -static bool isLiveGCReferenceAt(Value &V, Instruction *loc, DominatorTree &DT, - LoopInfo *LI) { - if (!isGCPointerType(V.getType())) - return false; - - if (V.use_empty()) - return false; - - // Given assumption that V dominates loc, this may be live - return true; +// Return true if this type is one which a) is a gc pointer or contains a GC +// pointer and b) is of a type this code expects to encounter as a live value. +// (The insertion code will assert that a type which matches (a) and not (b) +// is not encountered.) +static bool isHandledGCPointerType(Type *T) { + // We fully support gc pointers + if (isGCPointerType(T)) + return true; + // We partially support vectors of gc pointers. The code will assert if it + // can't handle something. + if (auto VT = dyn_cast<VectorType>(T)) + if (isGCPointerType(VT->getElementType())) + return true; + return false; } #ifndef NDEBUG -static bool isAggWhichContainsGCPtrType(Type *Ty) { +/// Returns true if this type contains a gc pointer whether we know how to +/// handle that type or not. +static bool containsGCPtrType(Type *Ty) { + if (isGCPointerType(Ty)) + return true; if (VectorType *VT = dyn_cast<VectorType>(Ty)) return isGCPointerType(VT->getScalarType()); if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) - return isGCPointerType(AT->getElementType()) || - isAggWhichContainsGCPtrType(AT->getElementType()); + return containsGCPtrType(AT->getElementType()); if (StructType *ST = dyn_cast<StructType>(Ty)) - return std::any_of(ST->subtypes().begin(), ST->subtypes().end(), - [](Type *SubType) { - return isGCPointerType(SubType) || - isAggWhichContainsGCPtrType(SubType); - }); + return std::any_of( + ST->subtypes().begin(), ST->subtypes().end(), + [](Type *SubType) { return containsGCPtrType(SubType); }); return false; } -#endif - -// Conservatively identifies any definitions which might be live at the -// given instruction. The analysis is performed immediately before the -// given instruction. Values defined by that instruction are not considered -// live. Values used by that instruction are considered live. -// -// preconditions: valid IR graph, term is either a terminator instruction or -// a call instruction, pred is the basic block of term, DT, LI are valid -// -// side effects: none, does not mutate IR -// -// postconditions: populates liveValues as discussed above -static void findLiveGCValuesAtInst(Instruction *term, BasicBlock *pred, - DominatorTree &DT, LoopInfo *LI, - StatepointLiveSetTy &liveValues) { - liveValues.clear(); - - assert(isa<CallInst>(term) || isa<InvokeInst>(term) || term->isTerminator()); - Function *F = pred->getParent(); - - auto is_live_gc_reference = - [&](Value &V) { return isLiveGCReferenceAt(V, term, DT, LI); }; - - // Are there any gc pointer arguments live over this point? This needs to be - // special cased since arguments aren't defined in basic blocks. - for (Argument &arg : F->args()) { - assert(!isAggWhichContainsGCPtrType(arg.getType()) && - "support for FCA unimplemented"); - - if (is_live_gc_reference(arg)) { - liveValues.insert(&arg); - } - } - - // Walk through all dominating blocks - the ones which can contain - // definitions used in this block - and check to see if any of the values - // they define are used in locations potentially reachable from the - // interesting instruction. - BasicBlock *BBI = pred; - while (true) { - if (TraceLSP) { - errs() << "[LSP] Looking at dominating block " << pred->getName() << "\n"; - } - assert(DT.dominates(BBI, pred)); - assert(isPotentiallyReachable(BBI, pred, &DT) && - "dominated block must be reachable"); - - // Walk through the instructions in dominating blocks and keep any - // that have a use potentially reachable from the block we're - // considering putting the safepoint in - for (Instruction &inst : *BBI) { - if (TraceLSP) { - errs() << "[LSP] Looking at instruction "; - inst.dump(); - } - - if (pred == BBI && (&inst) == term) { - if (TraceLSP) { - errs() << "[LSP] stopped because we encountered the safepoint " - "instruction.\n"; - } - - // If we're in the block which defines the interesting instruction, - // we don't want to include any values as live which are defined - // _after_ the interesting line or as part of the line itself - // i.e. "term" is the call instruction for a call safepoint, the - // results of the call should not be considered live in that stackmap - break; - } - - assert(!isAggWhichContainsGCPtrType(inst.getType()) && - "support for FCA unimplemented"); - - if (is_live_gc_reference(inst)) { - if (TraceLSP) { - errs() << "[LSP] found live value for this safepoint "; - inst.dump(); - term->dump(); - } - liveValues.insert(&inst); - } - } - if (!DT.getNode(BBI)->getIDom()) { - assert(BBI == &F->getEntryBlock() && - "failed to find a dominator for something other than " - "the entry block"); - break; - } - BBI = DT.getNode(BBI)->getIDom()->getBlock(); - } +// Returns true if this is a type which a) is a gc pointer or contains a GC +// pointer and b) is of a type which the code doesn't expect (i.e. first class +// aggregates). Used to trip assertions. +static bool isUnhandledGCPointerType(Type *Ty) { + return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty); } +#endif static bool order_by_name(llvm::Value *a, llvm::Value *b) { if (a->hasName() && b->hasName()) { @@ -268,16 +216,17 @@ static bool order_by_name(llvm::Value *a, llvm::Value *b) { } } -/// Find the initial live set. Note that due to base pointer -/// insertion, the live set may be incomplete. -static void -analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS, - PartiallyConstructedSafepointRecord &result) { +// Conservatively identifies any definitions which might be live at the +// given instruction. The analysis is performed immediately before the +// given instruction. Values defined by that instruction are not considered +// live. Values used by that instruction are considered live. +static void analyzeParsePointLiveness( + DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, + const CallSite &CS, PartiallyConstructedSafepointRecord &result) { Instruction *inst = CS.getInstruction(); - BasicBlock *BB = inst->getParent(); StatepointLiveSetTy liveset; - findLiveGCValuesAtInst(inst, BB, DT, nullptr, liveset); + findLiveSetAtInst(inst, OriginalLivenessData, liveset); if (PrintLiveSet) { // Note: This output is used by several of the test cases @@ -299,10 +248,49 @@ analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS, result.liveset = liveset; } -/// True iff this value is the null pointer constant (of any pointer type) -static bool LLVM_ATTRIBUTE_UNUSED isNullConstant(Value *V) { - return isa<Constant>(V) && isa<PointerType>(V->getType()) && - cast<Constant>(V)->isNullValue(); +/// If we can trivially determine that this vector contains only base pointers, +/// return the base instruction. +static Value *findBaseOfVector(Value *I) { + assert(I->getType()->isVectorTy() && + cast<VectorType>(I->getType())->getElementType()->isPointerTy() && + "Illegal to ask for the base pointer of a non-pointer type"); + + // Each case parallels findBaseDefiningValue below, see that code for + // detailed motivation. + + if (isa<Argument>(I)) + // An incoming argument to the function is a base pointer + return I; + + // We shouldn't see the address of a global as a vector value? + assert(!isa<GlobalVariable>(I) && + "unexpected global variable found in base of vector"); + + // inlining could possibly introduce phi node that contains + // undef if callee has multiple returns + if (isa<UndefValue>(I)) + // utterly meaningless, but useful for dealing with partially optimized + // code. + return I; + + // Due to inheritance, this must be _after_ the global variable and undef + // checks + if (Constant *Con = dyn_cast<Constant>(I)) { + assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && + "order of checks wrong!"); + assert(Con->isNullValue() && "null is the only case which makes sense"); + return Con; + } + + if (isa<LoadInst>(I)) + return I; + + // Note: This code is currently rather incomplete. We are essentially only + // handling cases where the vector element is trivially a base pointer. We + // need to update the entire base pointer construction algorithm to know how + // to track vector elements and potentially scalarize, but the case which + // would motivate the work hasn't shown up in real workloads yet. + llvm_unreachable("no base found for vector element"); } /// Helper function for findBasePointer - Will return a value which either a) @@ -312,52 +300,36 @@ static Value *findBaseDefiningValue(Value *I) { assert(I->getType()->isPointerTy() && "Illegal to ask for the base pointer of a non-pointer type"); - // There are instructions which can never return gc pointer values. Sanity - // check - // that this is actually true. - assert(!isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) && - !isa<ShuffleVectorInst>(I) && "Vector types are not gc pointers"); - assert((!isa<Instruction>(I) || isa<InvokeInst>(I) || - !cast<Instruction>(I)->isTerminator()) && - "With the exception of invoke terminators don't define values"); - assert(!isa<StoreInst>(I) && !isa<FenceInst>(I) && - "Can't be definitions to start with"); - assert(!isa<ICmpInst>(I) && !isa<FCmpInst>(I) && - "Comparisons don't give ops"); - // There's a bunch of instructions which just don't make sense to apply to - // a pointer. The only valid reason for this would be pointer bit - // twiddling which we're just not going to support. - assert((!isa<Instruction>(I) || !cast<Instruction>(I)->isBinaryOp()) && - "Binary ops on pointer values are meaningless. Unless your " - "bit-twiddling which we don't support"); - - if (Argument *Arg = dyn_cast<Argument>(I)) { + // This case is a bit of a hack - it only handles extracts from vectors which + // trivially contain only base pointers. See note inside the function for + // how to improve this. + if (auto *EEI = dyn_cast<ExtractElementInst>(I)) { + Value *VectorOperand = EEI->getVectorOperand(); + Value *VectorBase = findBaseOfVector(VectorOperand); + (void)VectorBase; + assert(VectorBase && "extract element not known to be a trivial base"); + return EEI; + } + + if (isa<Argument>(I)) // An incoming argument to the function is a base pointer // We should have never reached here if this argument isn't an gc value - assert(Arg->getType()->isPointerTy() && - "Base for pointer must be another pointer"); - return Arg; - } + return I; - if (GlobalVariable *global = dyn_cast<GlobalVariable>(I)) { + if (isa<GlobalVariable>(I)) // base case - assert(global->getType()->isPointerTy() && - "Base for pointer must be another pointer"); - return global; - } + return I; // inlining could possibly introduce phi node that contains // undef if callee has multiple returns - if (UndefValue *undef = dyn_cast<UndefValue>(I)) { - assert(undef->getType()->isPointerTy() && - "Base for pointer must be another pointer"); - return undef; // utterly meaningless, but useful for dealing with - // partially optimized code. - } + if (isa<UndefValue>(I)) + // utterly meaningless, but useful for dealing with + // partially optimized code. + return I; // Due to inheritance, this must be _after_ the global variable and undef // checks - if (Constant *con = dyn_cast<Constant>(I)) { + if (Constant *Con = dyn_cast<Constant>(I)) { assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && "order of checks wrong!"); // Note: Finding a constant base for something marked for relocation @@ -368,49 +340,30 @@ static Value *findBaseDefiningValue(Value *I) { // off a potentially null value and have proven it null. We also use // null pointers in dead paths of relocation phis (which we might later // want to find a base pointer for). - assert(con->getType()->isPointerTy() && - "Base for pointer must be another pointer"); - assert(con->isNullValue() && "null is the only case which makes sense"); - return con; + assert(isa<ConstantPointerNull>(Con) && + "null is the only case which makes sense"); + return Con; } if (CastInst *CI = dyn_cast<CastInst>(I)) { - Value *def = CI->stripPointerCasts(); - assert(def->getType()->isPointerTy() && - "Base for pointer must be another pointer"); + Value *Def = CI->stripPointerCasts(); // If we find a cast instruction here, it means we've found a cast which is // not simply a pointer cast (i.e. an inttoptr). We don't know how to // handle int->ptr conversion. - assert(!isa<CastInst>(def) && "shouldn't find another cast here"); - return findBaseDefiningValue(def); + assert(!isa<CastInst>(Def) && "shouldn't find another cast here"); + return findBaseDefiningValue(Def); } - if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (LI->getType()->isPointerTy()) { - Value *Op = LI->getOperand(0); - (void)Op; - // Has to be a pointer to an gc object, or possibly an array of such? - assert(Op->getType()->isPointerTy()); - return LI; // The value loaded is an gc base itself - } - } - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { - Value *Op = GEP->getOperand(0); - if (Op->getType()->isPointerTy()) { - return findBaseDefiningValue(Op); // The base of this GEP is the base - } - } + if (isa<LoadInst>(I)) + return I; // The value loaded is an gc base itself - if (AllocaInst *alloc = dyn_cast<AllocaInst>(I)) { - // An alloca represents a conceptual stack slot. It's the slot itself - // that the GC needs to know about, not the value in the slot. - assert(alloc->getType()->isPointerTy() && - "Base for pointer must be another pointer"); - return alloc; - } + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) + // The base of this GEP is the base + return findBaseDefiningValue(GEP->getPointerOperand()); if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { + case Intrinsic::experimental_gc_result_ptr: default: // fall through to general call handling break; @@ -418,11 +371,6 @@ static Value *findBaseDefiningValue(Value *I) { case Intrinsic::experimental_gc_result_float: case Intrinsic::experimental_gc_result_int: llvm_unreachable("these don't produce pointers"); - case Intrinsic::experimental_gc_result_ptr: - // This is just a special case of the CallInst check below to handle a - // statepoint with deopt args which hasn't been rewritten for GC yet. - // TODO: Assert that the statepoint isn't rewritten yet. - return II; case Intrinsic::experimental_gc_relocate: { // Rerunning safepoint insertion after safepoints are already // inserted is not supported. It could probably be made to work, @@ -440,41 +388,27 @@ static Value *findBaseDefiningValue(Value *I) { // We assume that functions in the source language only return base // pointers. This should probably be generalized via attributes to support // both source language and internal functions. - if (CallInst *call = dyn_cast<CallInst>(I)) { - assert(call->getType()->isPointerTy() && - "Base for pointer must be another pointer"); - return call; - } - if (InvokeInst *invoke = dyn_cast<InvokeInst>(I)) { - assert(invoke->getType()->isPointerTy() && - "Base for pointer must be another pointer"); - return invoke; - } + if (isa<CallInst>(I) || isa<InvokeInst>(I)) + return I; // I have absolutely no idea how to implement this part yet. It's not // neccessarily hard, I just haven't really looked at it yet. assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented"); - if (AtomicCmpXchgInst *cas = dyn_cast<AtomicCmpXchgInst>(I)) { + if (isa<AtomicCmpXchgInst>(I)) // A CAS is effectively a atomic store and load combined under a // predicate. From the perspective of base pointers, we just treat it - // like a load. We loaded a pointer from a address in memory, that value - // had better be a valid base pointer. - return cas->getPointerOperand(); - } - if (AtomicRMWInst *atomic = dyn_cast<AtomicRMWInst>(I)) { - assert(AtomicRMWInst::Xchg == atomic->getOperation() && - "All others are binary ops which don't apply to base pointers"); - // semantically, a load, store pair. Treat it the same as a standard load - return atomic->getPointerOperand(); - } + // like a load. + return I; + + assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are " + "binary ops which don't apply to pointers"); // The aggregate ops. Aggregates can either be in the heap or on the // stack, but in either case, this is simply a field load. As a result, // this is a defining definition of the base just like a load is. - if (ExtractValueInst *ev = dyn_cast<ExtractValueInst>(I)) { - return ev; - } + if (isa<ExtractValueInst>(I)) + return I; // We should never see an insert vector since that would require we be // tracing back a struct value not a pointer value. @@ -485,23 +419,21 @@ static Value *findBaseDefiningValue(Value *I) { // return a value which dynamically selects from amoung several base // derived pointers (each with it's own base potentially). It's the job of // the caller to resolve these. - if (SelectInst *select = dyn_cast<SelectInst>(I)) { - return select; - } - - return cast<PHINode>(I); + assert((isa<SelectInst>(I) || isa<PHINode>(I)) && + "missing instruction case in findBaseDefiningValing"); + return I; } /// Returns the base defining value for this value. -static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &cache) { - Value *&Cached = cache[I]; +static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) { + Value *&Cached = Cache[I]; if (!Cached) { Cached = findBaseDefiningValue(I); } - assert(cache[I] != nullptr); + assert(Cache[I] != nullptr); if (TraceLSP) { - errs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName() + dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName() << "\n"; } return Cached; @@ -509,25 +441,26 @@ static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &cache) { /// Return a base pointer for this value if known. Otherwise, return it's /// base defining value. -static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &cache) { - Value *def = findBaseDefiningValueCached(I, cache); - auto Found = cache.find(def); - if (Found != cache.end()) { +static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) { + Value *Def = findBaseDefiningValueCached(I, Cache); + auto Found = Cache.find(Def); + if (Found != Cache.end()) { // Either a base-of relation, or a self reference. Caller must check. return Found->second; } // Only a BDV available - return def; + return Def; } /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV, /// is it known to be a base pointer? Or do we need to continue searching. -static bool isKnownBaseResult(Value *v) { - if (!isa<PHINode>(v) && !isa<SelectInst>(v)) { +static bool isKnownBaseResult(Value *V) { + if (!isa<PHINode>(V) && !isa<SelectInst>(V)) { // no recursion possible return true; } - if (cast<Instruction>(v)->getMetadata("is_base_value")) { + if (isa<Instruction>(V) && + cast<Instruction>(V)->getMetadata("is_base_value")) { // This is a previously inserted base phi or select. We know // that this is a base value. return true; @@ -647,8 +580,7 @@ private: /// from. For gc objects, this is simply itself. On success, returns a value /// which is the base pointer. (This is reliable and can be used for /// relocation.) On failure, returns nullptr. -static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, - DenseSet<llvm::Value *> &NewInsertedDefs) { +static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) { Value *def = findBaseOrBDV(I, cache); if (isKnownBaseResult(def)) { @@ -687,7 +619,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, done = true; // Since we're adding elements to 'states' as we run, we can't keep // iterators into the set. - SmallVector<Value*, 16> Keys; + SmallVector<Value *, 16> Keys; Keys.reserve(states.size()); for (auto Pair : states) { Value *V = Pair.first; @@ -777,7 +709,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, // We want to keep naming deterministic in the loop that follows, so // sort the keys before iteration. This is useful in allowing us to // write stable tests. Note that there is no invalidation issue here. - SmallVector<Value*, 16> Keys; + SmallVector<Value *, 16> Keys; Keys.reserve(states.size()); for (auto Pair : states) { Value *V = Pair.first; @@ -792,13 +724,12 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); if (!state.isConflict()) continue; - + if (isa<PHINode>(v)) { int num_preds = std::distance(pred_begin(v->getParent()), pred_end(v->getParent())); assert(num_preds > 0 && "how did we reach here"); PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v); - NewInsertedDefs.insert(phi); // Add metadata marking this as a base value auto *const_1 = ConstantInt::get( Type::getInt32Ty( @@ -815,7 +746,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, UndefValue *undef = UndefValue::get(sel->getType()); SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef, undef, "base_select", sel); - NewInsertedDefs.insert(basesel); // Add metadata marking this as a base value auto *const_1 = ConstantInt::get( Type::getInt32Ty( @@ -838,7 +768,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); if (!state.isConflict()) continue; - + if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) { PHINode *phi = cast<PHINode>(v); unsigned NumPHIValues = phi->getNumIncomingValues(); @@ -866,8 +796,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, assert(states.count(base)); base = states[base].getBase(); assert(base != nullptr && "unknown PhiState!"); - assert(NewInsertedDefs.count(base) && - "should have already added this in a prev. iteration!"); } // In essense this assert states: the only way two @@ -898,7 +826,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, if (base->getType() != basephi->getType()) { base = new BitCastInst(base, basephi->getType(), "cast", InBB->getTerminator()); - NewInsertedDefs.insert(base); } basephi->addIncoming(base, InBB); } @@ -924,7 +851,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, // The cast is needed since base traversal may strip away bitcasts if (base->getType() != basesel->getType()) { base = new BitCastInst(base, basesel->getType(), "cast", basesel); - NewInsertedDefs.insert(base); } basesel->setOperand(i, base); } @@ -979,18 +905,18 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, // post condition: PointerToBase contains one (derived, base) pair for every // pointer in live. Note that derived can be equal to base if the original // pointer was a base pointer. -static void findBasePointers(const StatepointLiveSetTy &live, - DenseMap<llvm::Value *, llvm::Value *> &PointerToBase, - DominatorTree *DT, DefiningValueMapTy &DVCache, - DenseSet<llvm::Value *> &NewInsertedDefs) { +static void +findBasePointers(const StatepointLiveSetTy &live, + DenseMap<llvm::Value *, llvm::Value *> &PointerToBase, + DominatorTree *DT, DefiningValueMapTy &DVCache) { // For the naming of values inserted to be deterministic - which makes for // much cleaner and more stable tests - we need to assign an order to the // live values. DenseSets do not provide a deterministic order across runs. - SmallVector<Value*, 64> Temp; + SmallVector<Value *, 64> Temp; Temp.insert(Temp.end(), live.begin(), live.end()); std::sort(Temp.begin(), Temp.end(), order_by_name); for (Value *ptr : Temp) { - Value *base = findBasePointer(ptr, DVCache, NewInsertedDefs); + Value *base = findBasePointer(ptr, DVCache); assert(base && "failed to find base pointer"); PointerToBase[ptr] = base; assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) || @@ -1001,10 +927,10 @@ static void findBasePointers(const StatepointLiveSetTy &live, // If you see this trip and like to live really dangerously, the code should // be correct, just with idioms the verifier can't handle. You can try // disabling the verifier at your own substaintial risk. - assert(!isNullConstant(base) && "the relocation code needs adjustment to " - "handle the relocation of a null pointer " - "constant without causing false positives " - "in the safepoint ir verifier."); + assert(!isa<ConstantPointerNull>(base) && + "the relocation code needs adjustment to handle the relocation of " + "a null pointer constant without causing false positives in the " + "safepoint ir verifier."); } } @@ -1014,14 +940,13 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, const CallSite &CS, PartiallyConstructedSafepointRecord &result) { DenseMap<llvm::Value *, llvm::Value *> PointerToBase; - DenseSet<llvm::Value *> NewInsertedDefs; - findBasePointers(result.liveset, PointerToBase, &DT, DVCache, NewInsertedDefs); + findBasePointers(result.liveset, PointerToBase, &DT, DVCache); if (PrintBasePointers) { // Note: Need to print these in a stable order since this is checked in // some tests. errs() << "Base Pairs (w/o Relocation):\n"; - SmallVector<Value*, 64> Temp; + SmallVector<Value *, 64> Temp; Temp.reserve(PointerToBase.size()); for (auto Pair : PointerToBase) { Temp.push_back(Pair.first); @@ -1029,90 +954,59 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, std::sort(Temp.begin(), Temp.end(), order_by_name); for (Value *Ptr : Temp) { Value *Base = PointerToBase[Ptr]; - errs() << " derived %" << Ptr->getName() << " base %" - << Base->getName() << "\n"; + errs() << " derived %" << Ptr->getName() << " base %" << Base->getName() + << "\n"; } } result.PointerToBase = PointerToBase; - result.NewInsertedDefs = NewInsertedDefs; } -/// Check for liveness of items in the insert defs and add them to the live -/// and base pointer sets -static void fixupLiveness(DominatorTree &DT, const CallSite &CS, - const DenseSet<Value *> &allInsertedDefs, - PartiallyConstructedSafepointRecord &result) { - Instruction *inst = CS.getInstruction(); - - auto liveset = result.liveset; - auto PointerToBase = result.PointerToBase; +/// Given an updated version of the dataflow liveness results, update the +/// liveset and base pointer maps for the call site CS. +static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, + const CallSite &CS, + PartiallyConstructedSafepointRecord &result); - auto is_live_gc_reference = - [&](Value &V) { return isLiveGCReferenceAt(V, inst, DT, nullptr); }; - - // For each new definition, check to see if a) the definition dominates the - // instruction we're interested in, and b) one of the uses of that definition - // is edge-reachable from the instruction we're interested in. This is the - // same definition of liveness we used in the intial liveness analysis - for (Value *newDef : allInsertedDefs) { - if (liveset.count(newDef)) { - // already live, no action needed - continue; - } - - // PERF: Use DT to check instruction domination might not be good for - // compilation time, and we could change to optimal solution if this - // turn to be a issue - if (!DT.dominates(cast<Instruction>(newDef), inst)) { - // can't possibly be live at inst - continue; - } - - if (is_live_gc_reference(*newDef)) { - // Add the live new defs into liveset and PointerToBase - liveset.insert(newDef); - PointerToBase[newDef] = newDef; - } - } - - result.liveset = liveset; - result.PointerToBase = PointerToBase; -} - -static void fixupLiveReferences( - Function &F, DominatorTree &DT, Pass *P, - const DenseSet<llvm::Value *> &allInsertedDefs, - ArrayRef<CallSite> toUpdate, +static void recomputeLiveInValues( + Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate, MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) { + // TODO-PERF: reuse the original liveness, then simply run the dataflow + // again. The old values are still live and will help it stablize quickly. + GCPtrLivenessData RevisedLivenessData; + computeLiveInValues(DT, F, RevisedLivenessData); for (size_t i = 0; i < records.size(); i++) { struct PartiallyConstructedSafepointRecord &info = records[i]; const CallSite &CS = toUpdate[i]; - fixupLiveness(DT, CS, allInsertedDefs, info); + recomputeLiveInValues(RevisedLivenessData, CS, info); } } -// Normalize basic block to make it ready to be target of invoke statepoint. -// It means spliting it to have single predecessor. Return newly created BB -// ready to be successor of invoke statepoint. -static BasicBlock *normalizeBBForInvokeSafepoint(BasicBlock *BB, - BasicBlock *InvokeParent, - Pass *P) { - BasicBlock *ret = BB; - +// When inserting gc.relocate calls, we need to ensure there are no uses +// of the original value between the gc.statepoint and the gc.relocate call. +// One case which can arise is a phi node starting one of the successor blocks. +// We also need to be able to insert the gc.relocates only on the path which +// goes through the statepoint. We might need to split an edge to make this +// possible. +static BasicBlock * +normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, Pass *P) { + DominatorTree *DT = nullptr; + if (auto *DTP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) + DT = &DTP->getDomTree(); + + BasicBlock *Ret = BB; if (!BB->getUniquePredecessor()) { - ret = SplitBlockPredecessors(BB, InvokeParent, ""); + Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, DT); } - // Another requirement for such basic blocks is to not have any phi nodes. - // Since we just ensured that new BB will have single predecessor, - // all phi nodes in it will have one value. Here it would be naturall place - // to - // remove them all. But we can not do this because we are risking to remove - // one of the values stored in liveset of another statepoint. We will do it - // later after placing all safepoints. + // Now that 'ret' has unique predecessor we can safely remove all phi nodes + // from it + FoldSingleEntryPHINodes(Ret); + assert(!isa<PHINode>(Ret->begin())); - return ret; + // At this point, we can safely insert a gc.relocate as the first instruction + // in Ret if needed. + return Ret; } static int find_index(ArrayRef<Value *> livevec, Value *val) { @@ -1180,7 +1074,7 @@ static void CreateGCRelocates(ArrayRef<llvm::Value *> liveVariables, // combination. This results is some blow up the function declarations in // the IR, but removes the need for argument bitcasts which shrinks the IR // greatly and makes it much more readable. - SmallVector<Type *, 1> types; // one per 'any' type + SmallVector<Type *, 1> types; // one per 'any' type types.push_back(liveVariables[i]->getType()); // result type Value *gc_relocate_decl = Intrinsic::getDeclaration( M, Intrinsic::experimental_gc_relocate, types); @@ -1298,8 +1192,10 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */ token = invoke; // Generate gc relocates in exceptional path - BasicBlock *unwindBlock = normalizeBBForInvokeSafepoint( - toReplace->getUnwindDest(), invoke->getParent(), P); + BasicBlock *unwindBlock = toReplace->getUnwindDest(); + assert(!isa<PHINode>(unwindBlock->begin()) && + unwindBlock->getUniquePredecessor() && + "can't safely insert in this block!"); Instruction *IP = &*(unwindBlock->getFirstInsertionPt()); Builder.SetInsertPoint(IP); @@ -1319,8 +1215,10 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */ exceptional_token, Builder); // Generate gc relocates and returns for normal block - BasicBlock *normalDest = normalizeBBForInvokeSafepoint( - toReplace->getNormalDest(), invoke->getParent(), P); + BasicBlock *normalDest = toReplace->getNormalDest(); + assert(!isa<PHINode>(normalDest->begin()) && + normalDest->getUniquePredecessor() && + "can't safely insert in this block!"); IP = &*(normalDest->getFirstInsertionPt()); Builder.SetInsertPoint(IP); @@ -1333,7 +1231,7 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */ // Take the name of the original value call if it had one. token->takeName(CS.getInstruction()); - // The GCResult is already inserted, we just need to find it +// The GCResult is already inserted, we just need to find it #ifndef NDEBUG Instruction *toReplace = CS.getInstruction(); assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) && @@ -1351,7 +1249,6 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */ // Second, create a gc.relocate for every live variable CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder); - } namespace { @@ -1383,7 +1280,7 @@ static void stablize_order(SmallVectorImpl<Value *> &basevec, // Replace an existing gc.statepoint with a new one and a set of gc.relocates // which make the relocations happening at this safepoint explicit. -// +// // WARNING: Does not do any fixup to adjust users of the original live // values. That's the callers responsibility. static void @@ -1458,14 +1355,13 @@ static void relocationViaAlloca( Function &F, DominatorTree &DT, ArrayRef<Value *> live, ArrayRef<struct PartiallyConstructedSafepointRecord> records) { #ifndef NDEBUG - int initialAllocaNum = 0; - - // record initial number of allocas - for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end; - itr++) { - if (isa<AllocaInst>(*itr)) - initialAllocaNum++; - } + // record initial number of (static) allocas; we'll check we have the same + // number when we get done. + int InitialAllocaNum = 0; + for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E; + I++) + if (isa<AllocaInst>(*I)) + InitialAllocaNum++; #endif // TODO-PERF: change data structures, reserve @@ -1505,47 +1401,49 @@ static void relocationViaAlloca( // In case if it was invoke statepoint // we will insert stores for exceptional path gc relocates. if (isa<InvokeInst>(Statepoint)) { - insertRelocationStores(info.UnwindToken->users(), - allocaMap, visitedLiveValues); + insertRelocationStores(info.UnwindToken->users(), allocaMap, + visitedLiveValues); } -#ifndef NDEBUG - // As a debuging aid, pretend that an unrelocated pointer becomes null at - // the gc.statepoint. This will turn some subtle GC problems into slightly - // easier to debug SEGVs - SmallVector<AllocaInst *, 64> ToClobber; - for (auto Pair : allocaMap) { - Value *Def = Pair.first; - AllocaInst *Alloca = cast<AllocaInst>(Pair.second); - - // This value was relocated - if (visitedLiveValues.count(Def)) { - continue; + if (ClobberNonLive) { + // As a debuging aid, pretend that an unrelocated pointer becomes null at + // the gc.statepoint. This will turn some subtle GC problems into + // slightly easier to debug SEGVs. Note that on large IR files with + // lots of gc.statepoints this is extremely costly both memory and time + // wise. + SmallVector<AllocaInst *, 64> ToClobber; + for (auto Pair : allocaMap) { + Value *Def = Pair.first; + AllocaInst *Alloca = cast<AllocaInst>(Pair.second); + + // This value was relocated + if (visitedLiveValues.count(Def)) { + continue; + } + ToClobber.push_back(Alloca); } - ToClobber.push_back(Alloca); - } - auto InsertClobbersAt = [&](Instruction *IP) { - for (auto *AI : ToClobber) { - auto AIType = cast<PointerType>(AI->getType()); - auto PT = cast<PointerType>(AIType->getElementType()); - Constant *CPN = ConstantPointerNull::get(PT); - StoreInst *store = new StoreInst(CPN, AI); - store->insertBefore(IP); - } - }; + auto InsertClobbersAt = [&](Instruction *IP) { + for (auto *AI : ToClobber) { + auto AIType = cast<PointerType>(AI->getType()); + auto PT = cast<PointerType>(AIType->getElementType()); + Constant *CPN = ConstantPointerNull::get(PT); + StoreInst *store = new StoreInst(CPN, AI); + store->insertBefore(IP); + } + }; - // Insert the clobbering stores. These may get intermixed with the - // gc.results and gc.relocates, but that's fine. - if (auto II = dyn_cast<InvokeInst>(Statepoint)) { - InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt()); - InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt()); - } else { - BasicBlock::iterator Next(cast<CallInst>(Statepoint)); - Next++; - InsertClobbersAt(Next); + // Insert the clobbering stores. These may get intermixed with the + // gc.results and gc.relocates, but that's fine. + if (auto II = dyn_cast<InvokeInst>(Statepoint)) { + InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt()); + InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt()); + } else { + BasicBlock::iterator Next(cast<CallInst>(Statepoint)); + Next++; + InsertClobbersAt(Next); + } } -#endif } // update use with load allocas and add store for gc_relocated for (auto Pair : allocaMap) { @@ -1603,11 +1501,11 @@ static void relocationViaAlloca( assert(!inst->isTerminator() && "The only TerminatorInst that can produce a value is " "InvokeInst which is handled above."); - store->insertAfter(inst); + store->insertAfter(inst); } } else { assert((isa<Argument>(def) || isa<GlobalVariable>(def) || - (isa<Constant>(def) && cast<Constant>(def)->isNullValue())) && + isa<ConstantPointerNull>(def)) && "Must be argument or global"); store->insertAfter(cast<Instruction>(alloca)); } @@ -1621,12 +1519,11 @@ static void relocationViaAlloca( } #ifndef NDEBUG - for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end; - itr++) { - if (isa<AllocaInst>(*itr)) - initialAllocaNum--; - } - assert(initialAllocaNum == 0 && "We must not introduce any extra allocas"); + for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E; + I++) + if (isa<AllocaInst>(*I)) + InitialAllocaNum--; + assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas"); #endif } @@ -1647,76 +1544,155 @@ template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) { } } -static Function *getUseHolder(Module &M) { - FunctionType *ftype = - FunctionType::get(Type::getVoidTy(M.getContext()), true); - Function *Func = cast<Function>(M.getOrInsertFunction("__tmp_use", ftype)); - return Func; -} - /// Insert holders so that each Value is obviously live through the entire -/// liftetime of the call. +/// lifetime of the call. static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values, - SmallVectorImpl<CallInst *> &holders) { + SmallVectorImpl<CallInst *> &Holders) { + if (Values.empty()) + // No values to hold live, might as well not insert the empty holder + return; + Module *M = CS.getInstruction()->getParent()->getParent()->getParent(); - Function *Func = getUseHolder(*M); + // Use a dummy vararg function to actually hold the values live + Function *Func = cast<Function>(M->getOrInsertFunction( + "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true))); if (CS.isCall()) { // For call safepoints insert dummy calls right after safepoint - BasicBlock::iterator next(CS.getInstruction()); - next++; - CallInst *base_holder = CallInst::Create(Func, Values, "", next); - holders.push_back(base_holder); - } else if (CS.isInvoke()) { - // For invoke safepooints insert dummy calls both in normal and - // exceptional destination blocks - InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction()); - CallInst *normal_holder = CallInst::Create( - Func, Values, "", invoke->getNormalDest()->getFirstInsertionPt()); - CallInst *unwind_holder = CallInst::Create( - Func, Values, "", invoke->getUnwindDest()->getFirstInsertionPt()); - holders.push_back(normal_holder); - holders.push_back(unwind_holder); - } else - llvm_unreachable("unsupported call type"); + BasicBlock::iterator Next(CS.getInstruction()); + Next++; + Holders.push_back(CallInst::Create(Func, Values, "", Next)); + return; + } + // For invoke safepooints insert dummy calls both in normal and + // exceptional destination blocks + auto *II = cast<InvokeInst>(CS.getInstruction()); + Holders.push_back(CallInst::Create( + Func, Values, "", II->getNormalDest()->getFirstInsertionPt())); + Holders.push_back(CallInst::Create( + Func, Values, "", II->getUnwindDest()->getFirstInsertionPt())); } static void findLiveReferences( Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate, MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) { + GCPtrLivenessData OriginalLivenessData; + computeLiveInValues(DT, F, OriginalLivenessData); for (size_t i = 0; i < records.size(); i++) { struct PartiallyConstructedSafepointRecord &info = records[i]; const CallSite &CS = toUpdate[i]; - analyzeParsePointLiveness(DT, CS, info); + analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info); } } -static void addBasesAsLiveValues(StatepointLiveSetTy &liveset, - DenseMap<Value *, Value *> &PointerToBase) { - // Identify any base pointers which are used in this safepoint, but not - // themselves relocated. We need to relocate them so that later inserted - // safepoints can get the properly relocated base register. - DenseSet<Value *> missing; - for (Value *L : liveset) { - assert(PointerToBase.find(L) != PointerToBase.end()); - Value *base = PointerToBase[L]; - assert(base); - if (liveset.find(base) == liveset.end()) { - assert(PointerToBase.find(base) == PointerToBase.end()); - // uniqued by set insert - missing.insert(base); +/// Remove any vector of pointers from the liveset by scalarizing them over the +/// statepoint instruction. Adds the scalarized pieces to the liveset. It +/// would be preferrable to include the vector in the statepoint itself, but +/// the lowering code currently does not handle that. Extending it would be +/// slightly non-trivial since it requires a format change. Given how rare +/// such cases are (for the moment?) scalarizing is an acceptable comprimise. +static void splitVectorValues(Instruction *StatepointInst, + StatepointLiveSetTy &LiveSet, DominatorTree &DT) { + SmallVector<Value *, 16> ToSplit; + for (Value *V : LiveSet) + if (isa<VectorType>(V->getType())) + ToSplit.push_back(V); + + if (ToSplit.empty()) + return; + + Function &F = *(StatepointInst->getParent()->getParent()); + + DenseMap<Value *, AllocaInst *> AllocaMap; + // First is normal return, second is exceptional return (invoke only) + DenseMap<Value *, std::pair<Value *, Value *>> Replacements; + for (Value *V : ToSplit) { + LiveSet.erase(V); + + AllocaInst *Alloca = + new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI()); + AllocaMap[V] = Alloca; + + VectorType *VT = cast<VectorType>(V->getType()); + IRBuilder<> Builder(StatepointInst); + SmallVector<Value *, 16> Elements; + for (unsigned i = 0; i < VT->getNumElements(); i++) + Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i))); + LiveSet.insert(Elements.begin(), Elements.end()); + + auto InsertVectorReform = [&](Instruction *IP) { + Builder.SetInsertPoint(IP); + Builder.SetCurrentDebugLocation(IP->getDebugLoc()); + Value *ResultVec = UndefValue::get(VT); + for (unsigned i = 0; i < VT->getNumElements(); i++) + ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i], + Builder.getInt32(i)); + return ResultVec; + }; + + if (isa<CallInst>(StatepointInst)) { + BasicBlock::iterator Next(StatepointInst); + Next++; + Instruction *IP = &*(Next); + Replacements[V].first = InsertVectorReform(IP); + Replacements[V].second = nullptr; + } else { + InvokeInst *Invoke = cast<InvokeInst>(StatepointInst); + // We've already normalized - check that we don't have shared destination + // blocks + BasicBlock *NormalDest = Invoke->getNormalDest(); + assert(!isa<PHINode>(NormalDest->begin())); + BasicBlock *UnwindDest = Invoke->getUnwindDest(); + assert(!isa<PHINode>(UnwindDest->begin())); + // Insert insert element sequences in both successors + Instruction *IP = &*(NormalDest->getFirstInsertionPt()); + Replacements[V].first = InsertVectorReform(IP); + IP = &*(UnwindDest->getFirstInsertionPt()); + Replacements[V].second = InsertVectorReform(IP); } } + for (Value *V : ToSplit) { + AllocaInst *Alloca = AllocaMap[V]; + + // Capture all users before we start mutating use lists + SmallVector<Instruction *, 16> Users; + for (User *U : V->users()) + Users.push_back(cast<Instruction>(U)); + + for (Instruction *I : Users) { + if (auto Phi = dyn_cast<PHINode>(I)) { + for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) + if (V == Phi->getIncomingValue(i)) { + LoadInst *Load = new LoadInst( + Alloca, "", Phi->getIncomingBlock(i)->getTerminator()); + Phi->setIncomingValue(i, Load); + } + } else { + LoadInst *Load = new LoadInst(Alloca, "", I); + I->replaceUsesOfWith(V, Load); + } + } - // Note that we want these at the end of the list, otherwise - // register placement gets screwed up once we lower to STATEPOINT - // instructions. This is an utter hack, but there doesn't seem to be a - // better one. - for (Value *base : missing) { - assert(base); - liveset.insert(base); - PointerToBase[base] = base; - } - assert(liveset.size() == PointerToBase.size()); + // Store the original value and the replacement value into the alloca + StoreInst *Store = new StoreInst(V, Alloca); + if (auto I = dyn_cast<Instruction>(V)) + Store->insertAfter(I); + else + Store->insertAfter(Alloca); + + // Normal return for invoke, or call return + Instruction *Replacement = cast<Instruction>(Replacements[V].first); + (new StoreInst(Replacement, Alloca))->insertAfter(Replacement); + // Unwind return for invoke only + Replacement = cast_or_null<Instruction>(Replacements[V].second); + if (Replacement) + (new StoreInst(Replacement, Alloca))->insertAfter(Replacement); + } + + // apply mem2reg to promote alloca to SSA + SmallVector<AllocaInst *, 16> Allocas; + for (Value *V : ToSplit) + Allocas.push_back(AllocaMap[V]); + PromoteMemToReg(Allocas, DT); } static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, @@ -1734,6 +1710,20 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, } #endif + // When inserting gc.relocates for invokes, we need to be able to insert at + // the top of the successor blocks. See the comment on + // normalForInvokeSafepoint on exactly what is needed. Note that this step + // may restructure the CFG. + for (CallSite CS : toUpdate) { + if (!CS.isInvoke()) + continue; + InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction()); + normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(), + P); + normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(), + P); + } + // A list of dummy calls added to the IR to keep various values obviously // live in the IR. We'll remove all of these when done. SmallVector<CallInst *, 64> holders; @@ -1749,7 +1739,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, SmallVector<Value *, 64> DeoptValues; for (Use &U : StatepointCS.vm_state_args()) { Value *Arg = cast<Value>(&U); - if (isGCPointerType(Arg->getType())) + assert(!isUnhandledGCPointerType(Arg->getType()) && + "support for FCA unimplemented"); + if (isHandledGCPointerType(Arg->getType())) DeoptValues.push_back(Arg); } insertUseHolderAfter(CS, DeoptValues, holders); @@ -1767,6 +1759,17 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, // site. findLiveReferences(F, DT, P, toUpdate, records); + // Do a limited scalarization of any live at safepoint vector values which + // contain pointers. This enables this pass to run after vectorization at + // the cost of some possible performance loss. TODO: it would be nice to + // natively support vectors all the way through the backend so we don't need + // to scalarize here. + for (size_t i = 0; i < records.size(); i++) { + struct PartiallyConstructedSafepointRecord &info = records[i]; + Instruction *statepoint = toUpdate[i].getInstruction(); + splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT); + } + // B) Find the base pointers for each live pointer /* scope for caching */ { // Cache the 'defining value' relation used in the computation and @@ -1790,13 +1793,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, // gep a + 1 // safepoint 2 // br loop - DenseSet<llvm::Value *> allInsertedDefs; - for (size_t i = 0; i < records.size(); i++) { - struct PartiallyConstructedSafepointRecord &info = records[i]; - allInsertedDefs.insert(info.NewInsertedDefs.begin(), - info.NewInsertedDefs.end()); - } - // We insert some dummy calls after each safepoint to definitely hold live // the base pointers which were identified for that safepoint. We'll then // ask liveness for _every_ base inserted to see what is now live. Then we @@ -1813,22 +1809,11 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, insertUseHolderAfter(CS, Bases, holders); } - // Add the bases explicitly to the live vector set. This may result in a few - // extra relocations, but the base has to be available whenever a pointer - // derived from it is used. Thus, we need it to be part of the statepoint's - // gc arguments list. TODO: Introduce an explicit notion (in the following - // code) of the GC argument list as seperate from the live Values at a - // given statepoint. - for (size_t i = 0; i < records.size(); i++) { - struct PartiallyConstructedSafepointRecord &info = records[i]; - addBasesAsLiveValues(info.liveset, info.PointerToBase); - } + // By selecting base pointers, we've effectively inserted new uses. Thus, we + // need to rerun liveness. We may *also* have inserted new defs, but that's + // not the key issue. + recomputeLiveInValues(F, DT, P, toUpdate, records); - // If we inserted any new values, we need to adjust our notion of what is - // live at a particular safepoint. - if (!allInsertedDefs.empty()) { - fixupLiveReferences(F, DT, P, allInsertedDefs, toUpdate, records); - } if (PrintBasePointers) { for (size_t i = 0; i < records.size(); i++) { struct PartiallyConstructedSafepointRecord &info = records[i]; @@ -1858,25 +1843,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, } toUpdate.clear(); // prevent accident use of invalid CallSites - // In case if we inserted relocates in a different basic block than the - // original safepoint (this can happen for invokes). We need to be sure that - // original values were not used in any of the phi nodes at the - // beginning of basic block containing them. Because we know that all such - // blocks will have single predecessor we can safely assume that all phi - // nodes have single entry (because of normalizeBBForInvokeSafepoint). - // Just remove them all here. - for (size_t i = 0; i < records.size(); i++) { - Instruction *I = records[i].StatepointToken; - - if (InvokeInst *invoke = dyn_cast<InvokeInst>(I)) { - FoldSingleEntryPHINodes(invoke->getNormalDest()); - assert(!isa<PHINode>(invoke->getNormalDest()->begin())); - - FoldSingleEntryPHINodes(invoke->getUnwindDest()); - assert(!isa<PHINode>(invoke->getUnwindDest()->begin())); - } - } - // Do all the fixups of the original live variables to their relocated selves SmallVector<Value *, 128> live; for (size_t i = 0; i < records.size(); i++) { @@ -1889,6 +1855,24 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, Statepoint statepoint(info.StatepointToken); live.insert(live.end(), statepoint.gc_args_begin(), statepoint.gc_args_end()); +#ifndef NDEBUG + // Do some basic sanity checks on our liveness results before performing + // relocation. Relocation can and will turn mistakes in liveness results + // into non-sensical code which is must harder to debug. + // TODO: It would be nice to test consistency as well + assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) && + "statepoint must be reachable or liveness is meaningless"); + for (Value *V : statepoint.gc_args()) { + if (!isa<Instruction>(V)) + // Non-instruction values trivial dominate all possible uses + continue; + auto LiveInst = cast<Instruction>(V); + assert(DT.isReachableFromEntry(LiveInst->getParent()) && + "unreachable values should never be live"); + assert(DT.dominates(LiveInst, info.StatepointToken) && + "basic SSA liveness expectation violated by liveness analysis"); + } +#endif } unique_unsorted(live); @@ -1924,18 +1908,285 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) { if (!shouldRewriteStatepointsIn(F)) return false; - // Gather all the statepoints which need rewritten. + DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + + // Gather all the statepoints which need rewritten. Be careful to only + // consider those in reachable code since we need to ask dominance queries + // when rewriting. We'll delete the unreachable ones in a moment. SmallVector<CallSite, 64> ParsePointNeeded; + bool HasUnreachableStatepoint = false; for (Instruction &I : inst_range(F)) { // TODO: only the ones with the flag set! - if (isStatepoint(I)) - ParsePointNeeded.push_back(CallSite(&I)); + if (isStatepoint(I)) { + if (DT.isReachableFromEntry(I.getParent())) + ParsePointNeeded.push_back(CallSite(&I)); + else + HasUnreachableStatepoint = true; + } } + bool MadeChange = false; + + // Delete any unreachable statepoints so that we don't have unrewritten + // statepoints surviving this pass. This makes testing easier and the + // resulting IR less confusing to human readers. Rather than be fancy, we + // just reuse a utility function which removes the unreachable blocks. + if (HasUnreachableStatepoint) + MadeChange |= removeUnreachableBlocks(F); + // Return early if no work to do. if (ParsePointNeeded.empty()) - return false; + return MadeChange; + + // As a prepass, go ahead and aggressively destroy single entry phi nodes. + // These are created by LCSSA. They have the effect of increasing the size + // of liveness sets for no good reason. It may be harder to do this post + // insertion since relocations and base phis can confuse things. + for (BasicBlock &BB : F) + if (BB.getUniquePredecessor()) { + MadeChange = true; + FoldSingleEntryPHINodes(&BB); + } - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - return insertParsePoints(F, DT, this, ParsePointNeeded); + MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded); + return MadeChange; +} + +// liveness computation via standard dataflow +// ------------------------------------------------------------------- + +// TODO: Consider using bitvectors for liveness, the set of potentially +// interesting values should be small and easy to pre-compute. + +/// Is this value a constant consisting of entirely null values? +static bool isConstantNull(Value *V) { + return isa<Constant>(V) && cast<Constant>(V)->isNullValue(); +} + +/// Compute the live-in set for the location rbegin starting from +/// the live-out set of the basic block +static void computeLiveInValues(BasicBlock::reverse_iterator rbegin, + BasicBlock::reverse_iterator rend, + DenseSet<Value *> &LiveTmp) { + + for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) { + Instruction *I = &*ritr; + + // KILL/Def - Remove this definition from LiveIn + LiveTmp.erase(I); + + // Don't consider *uses* in PHI nodes, we handle their contribution to + // predecessor blocks when we seed the LiveOut sets + if (isa<PHINode>(I)) + continue; + + // USE - Add to the LiveIn set for this instruction + for (Value *V : I->operands()) { + assert(!isUnhandledGCPointerType(V->getType()) && + "support for FCA unimplemented"); + if (isHandledGCPointerType(V->getType()) && !isConstantNull(V) && + !isa<UndefValue>(V)) { + // The choice to exclude null and undef is arbitrary here. Reconsider? + LiveTmp.insert(V); + } + } + } +} + +static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) { + + for (BasicBlock *Succ : successors(BB)) { + const BasicBlock::iterator E(Succ->getFirstNonPHI()); + for (BasicBlock::iterator I = Succ->begin(); I != E; I++) { + PHINode *Phi = cast<PHINode>(&*I); + Value *V = Phi->getIncomingValueForBlock(BB); + assert(!isUnhandledGCPointerType(V->getType()) && + "support for FCA unimplemented"); + if (isHandledGCPointerType(V->getType()) && !isConstantNull(V) && + !isa<UndefValue>(V)) { + // The choice to exclude null and undef is arbitrary here. Reconsider? + LiveTmp.insert(V); + } + } + } +} + +static DenseSet<Value *> computeKillSet(BasicBlock *BB) { + DenseSet<Value *> KillSet; + for (Instruction &I : *BB) + if (isHandledGCPointerType(I.getType())) + KillSet.insert(&I); + return KillSet; +} + +#ifndef NDEBUG +/// Check that the items in 'Live' dominate 'TI'. This is used as a basic +/// sanity check for the liveness computation. +static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live, + TerminatorInst *TI, bool TermOkay = false) { + for (Value *V : Live) { + if (auto *I = dyn_cast<Instruction>(V)) { + // The terminator can be a member of the LiveOut set. LLVM's definition + // of instruction dominance states that V does not dominate itself. As + // such, we need to special case this to allow it. + if (TermOkay && TI == I) + continue; + assert(DT.dominates(I, TI) && + "basic SSA liveness expectation violated by liveness analysis"); + } + } +} + +/// Check that all the liveness sets used during the computation of liveness +/// obey basic SSA properties. This is useful for finding cases where we miss +/// a def. +static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data, + BasicBlock &BB) { + checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator()); + checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true); + checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator()); +} +#endif + +static void computeLiveInValues(DominatorTree &DT, Function &F, + GCPtrLivenessData &Data) { + + SmallSetVector<BasicBlock *, 200> Worklist; + auto AddPredsToWorklist = [&](BasicBlock *BB) { + // We use a SetVector so that we don't have duplicates in the worklist. + Worklist.insert(pred_begin(BB), pred_end(BB)); + }; + auto NextItem = [&]() { + BasicBlock *BB = Worklist.back(); + Worklist.pop_back(); + return BB; + }; + + // Seed the liveness for each individual block + for (BasicBlock &BB : F) { + Data.KillSet[&BB] = computeKillSet(&BB); + Data.LiveSet[&BB].clear(); + computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]); + +#ifndef NDEBUG + for (Value *Kill : Data.KillSet[&BB]) + assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill"); +#endif + + Data.LiveOut[&BB] = DenseSet<Value *>(); + computeLiveOutSeed(&BB, Data.LiveOut[&BB]); + Data.LiveIn[&BB] = Data.LiveSet[&BB]; + set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]); + set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]); + if (!Data.LiveIn[&BB].empty()) + AddPredsToWorklist(&BB); + } + + // Propagate that liveness until stable + while (!Worklist.empty()) { + BasicBlock *BB = NextItem(); + + // Compute our new liveout set, then exit early if it hasn't changed + // despite the contribution of our successor. + DenseSet<Value *> LiveOut = Data.LiveOut[BB]; + const auto OldLiveOutSize = LiveOut.size(); + for (BasicBlock *Succ : successors(BB)) { + assert(Data.LiveIn.count(Succ)); + set_union(LiveOut, Data.LiveIn[Succ]); + } + // assert OutLiveOut is a subset of LiveOut + if (OldLiveOutSize == LiveOut.size()) { + // If the sets are the same size, then we didn't actually add anything + // when unioning our successors LiveIn Thus, the LiveIn of this block + // hasn't changed. + continue; + } + Data.LiveOut[BB] = LiveOut; + + // Apply the effects of this basic block + DenseSet<Value *> LiveTmp = LiveOut; + set_union(LiveTmp, Data.LiveSet[BB]); + set_subtract(LiveTmp, Data.KillSet[BB]); + + assert(Data.LiveIn.count(BB)); + const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB]; + // assert: OldLiveIn is a subset of LiveTmp + if (OldLiveIn.size() != LiveTmp.size()) { + Data.LiveIn[BB] = LiveTmp; + AddPredsToWorklist(BB); + } + } // while( !worklist.empty() ) + +#ifndef NDEBUG + // Sanity check our ouput against SSA properties. This helps catch any + // missing kills during the above iteration. + for (BasicBlock &BB : F) { + checkBasicSSA(DT, Data, BB); + } +#endif +} + +static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, + StatepointLiveSetTy &Out) { + + BasicBlock *BB = Inst->getParent(); + + // Note: The copy is intentional and required + assert(Data.LiveOut.count(BB)); + DenseSet<Value *> LiveOut = Data.LiveOut[BB]; + + // We want to handle the statepoint itself oddly. It's + // call result is not live (normal), nor are it's arguments + // (unless they're used again later). This adjustment is + // specifically what we need to relocate + BasicBlock::reverse_iterator rend(Inst); + computeLiveInValues(BB->rbegin(), rend, LiveOut); + LiveOut.erase(Inst); + Out.insert(LiveOut.begin(), LiveOut.end()); +} + +static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, + const CallSite &CS, + PartiallyConstructedSafepointRecord &Info) { + Instruction *Inst = CS.getInstruction(); + StatepointLiveSetTy Updated; + findLiveSetAtInst(Inst, RevisedLivenessData, Updated); + +#ifndef NDEBUG + DenseSet<Value *> Bases; + for (auto KVPair : Info.PointerToBase) { + Bases.insert(KVPair.second); + } +#endif + // We may have base pointers which are now live that weren't before. We need + // to update the PointerToBase structure to reflect this. + for (auto V : Updated) + if (!Info.PointerToBase.count(V)) { + assert(Bases.count(V) && "can't find base for unexpected live value"); + Info.PointerToBase[V] = V; + continue; + } + +#ifndef NDEBUG + for (auto V : Updated) { + assert(Info.PointerToBase.count(V) && + "must be able to find base for live value"); + } +#endif + + // Remove any stale base mappings - this can happen since our liveness is + // more precise then the one inherent in the base pointer analysis + DenseSet<Value *> ToErase; + for (auto KVPair : Info.PointerToBase) + if (!Updated.count(KVPair.first)) + ToErase.insert(KVPair.first); + for (auto V : ToErase) + Info.PointerToBase.erase(V); + +#ifndef NDEBUG + for (auto KVPair : Info.PointerToBase) + assert(Updated.count(KVPair.first) && "record for non-live value"); +#endif + + Info.liveset = Updated; } diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 875a007..bc068f7 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1012,7 +1012,8 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) { Constant *Ptr = Operands[0]; auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end()); - markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices)); + markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, + Indices)); } void SCCPSolver::visitStoreInst(StoreInst &SI) { diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 06b000f..59dc528 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -1166,10 +1166,9 @@ public: } else { continue; } - Instruction *DbgVal = - DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()), - DIExpression(DVI->getExpression()), Inst); - DbgVal->setDebugLoc(DVI->getDebugLoc()); + DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()), + DIExpression(DVI->getExpression()), + DVI->getDebugLoc(), Inst); } } }; @@ -1552,7 +1551,8 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr, if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero()) return BasePtr; - return IRB.CreateInBoundsGEP(BasePtr, Indices, NamePrefix + "sroa_idx"); + return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices, + NamePrefix + "sroa_idx"); } /// \brief Get a natural GEP off of the BasePtr walking through Ty toward @@ -1803,7 +1803,8 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr - : IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset), + : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr, + IRB.getInt(Int8PtrOffset), NamePrefix + "sroa_raw_idx"); } Ptr = OffsetPtr; @@ -3250,7 +3251,8 @@ private: void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { assert(Ty->isSingleValueType()); // Load the single value and insert it using the indices. - Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"); + Value *GEP = + IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"); Value *Load = IRB.CreateLoad(GEP, Name + ".load"); Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); DEBUG(dbgs() << " to: " << *Load << "\n"); @@ -3283,7 +3285,7 @@ private: // Extract the single value and store it using the indices. Value *Store = IRB.CreateStore( IRB.CreateExtractValue(Agg, Indices, Name + ".extract"), - IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep")); + IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep")); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); } @@ -4188,14 +4190,14 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { // Create a piece expression describing the new partition or reuse AI's // expression if there is only one partition. DIExpression PieceExpr = Expr; - if (IsSplit || Expr.isBitPiece()) { + if (IsSplit || Expr->isBitPiece()) { // If this alloca is already a scalar replacement of a larger aggregate, // Piece.Offset describes the offset inside the scalar. - uint64_t Offset = Expr.isBitPiece() ? Expr.getBitPieceOffset() : 0; + uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0; uint64_t Start = Offset + Piece.Offset; uint64_t Size = Piece.Size; - if (Expr.isBitPiece()) { - uint64_t AbsEnd = Expr.getBitPieceOffset() + Expr.getBitPieceSize(); + if (Expr->isBitPiece()) { + uint64_t AbsEnd = Expr->getBitPieceOffset() + Expr->getBitPieceSize(); if (Start >= AbsEnd) // No need to describe a SROAed padding. continue; @@ -4208,8 +4210,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca)) OldDDI->eraseFromParent(); - auto *NewDDI = DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, &AI); - NewDDI->setDebugLoc(DbgDecl->getDebugLoc()); + DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(), + &AI); } } return Changed; diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp index 3e7cf04..f99fe3f 100644 --- a/lib/Transforms/Scalar/SampleProfile.cpp +++ b/lib/Transforms/Scalar/SampleProfile.cpp @@ -217,16 +217,16 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) { /// \returns The profiled weight of I. unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) { DebugLoc DLoc = Inst.getDebugLoc(); - if (DLoc.isUnknown()) + if (!DLoc) return 0; unsigned Lineno = DLoc.getLine(); if (Lineno < HeaderLineno) return 0; - DILocation DIL(DLoc.getAsMDNode(*Ctx)); + DILocation DIL = DLoc.get(); int LOffset = Lineno - HeaderLineno; - unsigned Discriminator = DIL.getDiscriminator(); + unsigned Discriminator = DIL->getDiscriminator(); unsigned Weight = Samples->samplesAt(LOffset, Discriminator); DEBUG(dbgs() << " " << Lineno << "." << Discriminator << ":" << Inst << " (line offset: " << LOffset << "." << Discriminator @@ -642,9 +642,8 @@ void SampleProfileLoader::propagateWeights(Function &F) { /// \returns the line number where \p F is defined. If it returns 0, /// it means that there is no debug information available for \p F. unsigned SampleProfileLoader::getFunctionLoc(Function &F) { - DISubprogram S = getDISubprogram(&F); - if (S.isSubprogram()) - return S.getLineNumber(); + if (MDSubprogram *S = getDISubprogram(&F)) + return S->getLine(); // If could not find the start of \p F, emit a diagnostic to inform the user // about the missed opportunity. diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 6cc8411..42095ae 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -59,6 +59,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLowerExpectIntrinsicPass(Registry); initializeMemCpyOptPass(Registry); initializeMergedLoadStoreMotionPass(Registry); + initializeNaryReassociatePass(Registry); initializePartiallyInlineLibCallsPass(Registry); initializeReassociatePass(Registry); initializeRegToMemPass(Registry); @@ -77,6 +78,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoadCombinePass(Registry); initializePlaceBackedgeSafepointsImplPass(Registry); initializePlaceSafepointsPass(Registry); + initializeFloat2IntPass(Registry); } void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index acd8585..693c5ae 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1117,10 +1117,9 @@ public: } else { continue; } - Instruction *DbgVal = DIB->insertDbgValueIntrinsic( - Arg, 0, DIVariable(DVI->getVariable()), - DIExpression(DVI->getExpression()), Inst); - DbgVal->setDebugLoc(DVI->getDebugLoc()); + DIB->insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()), + DIExpression(DVI->getExpression()), + DVI->getDebugLoc(), Inst); } } }; @@ -2135,7 +2134,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, // split the alloca again later. unsigned AS = AI->getType()->getAddressSpace(); Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS)); - V = Builder.CreateGEP(V, Builder.getInt64(NewOffset)); + V = Builder.CreateGEP(Builder.getInt8Ty(), V, Builder.getInt64(NewOffset)); IdxTy = NewElts[Idx]->getAllocatedType(); uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset; diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index a457cba..d55dc6a 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -213,7 +213,7 @@ Value *Scatterer::operator[](unsigned I) { CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0"); } if (I != 0) - CV[I] = Builder.CreateConstGEP1_32(CV[0], I, + CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I, V->getName() + ".i" + Twine(I)); } else { // Search through a chain of InsertElementInsts looking for element I. diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 1a04d74..8af4753 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -757,14 +757,16 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( } } // Create an ugly GEP with a single index for each index. - ResultPtr = Builder.CreateGEP(ResultPtr, Idx, "uglygep"); + ResultPtr = + Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep"); } } // Create a GEP with the constant offset index. if (AccumulativeByteOffset != 0) { Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset); - ResultPtr = Builder.CreateGEP(ResultPtr, Offset, "uglygep"); + ResultPtr = + Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep"); } if (ResultPtr->getType() != Variadic->getType()) ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType()); diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index e71031c..2fc9368 100644 --- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -15,42 +15,46 @@ // // There are many optimizations we can perform in the domain of SLSR. This file // for now contains only an initial step. Specifically, we look for strength -// reduction candidates in two forms: +// reduction candidates in the following forms: // -// Form 1: (B + i) * S -// Form 2: &B[i * S] +// Form 1: B + i * S +// Form 2: (B + i) * S +// Form 3: &B[i * S] // // where S is an integer variable, and i is a constant integer. If we found two -// candidates +// candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2 +// in a simpler way with respect to S1. For example, // -// S1: X = (B + i) * S -// S2: Y = (B + i') * S +// S1: X = B + i * S +// S2: Y = B + i' * S => X + (i' - i) * S // -// or +// S1: X = (B + i) * S +// S2: Y = (B + i') * S => X + (i' - i) * S // // S1: X = &B[i * S] -// S2: Y = &B[i' * S] -// -// and S1 dominates S2, we call S1 a basis of S2, and can replace S2 with +// S2: Y = &B[i' * S] => &X[(i' - i) * S] // -// Y = X + (i' - i) * S +// Note: (i' - i) * S is folded to the extent possible. // -// or +// This rewriting is in general a good idea. The code patterns we focus on +// usually come from loop unrolling, so (i' - i) * S is likely the same +// across iterations and can be reused. When that happens, the optimized form +// takes only one add starting from the second iteration. // -// Y = &X[(i' - i) * S] -// -// where (i' - i) * S is folded to the extent possible. When S2 has multiple -// bases, we pick the one that is closest to S2, or S2's "immediate" basis. +// When such rewriting is possible, we call S1 a "basis" of S2. When S2 has +// multiple bases, we choose to rewrite S2 with respect to its "immediate" +// basis, the basis that is the closest ancestor in the dominator tree. // // TODO: // -// - Handle candidates in the form of B + i * S -// // - Floating point arithmetics when fast math is enabled. // // - SLSR may decrease ILP at the architecture level. Targets that are very // sensitive to ILP may want to disable it. Having SLSR to consider ILP is // left as future work. +// +// - When (i' - i) is constant but i and i' are not, we could still perform +// SLSR. #include <vector> #include "llvm/ADT/DenseSet.h" @@ -72,13 +76,12 @@ namespace { class StraightLineStrengthReduce : public FunctionPass { public: - // SLSR candidate. Such a candidate must be in the form of - // (Base + Index) * Stride - // or - // Base[..][Index * Stride][..] + // SLSR candidate. Such a candidate must be in one of the forms described in + // the header comments. struct Candidate : public ilist_node<Candidate> { enum Kind { Invalid, // reserved for the default constructor + Add, // B + i * S Mul, // (B + i) * S GEP, // &B[..][i * S][..] }; @@ -92,14 +95,14 @@ public: Basis(nullptr) {} Kind CandidateKind; const SCEV *Base; - // Note that Index and Stride of a GEP candidate may not have the same - // integer type. In that case, during rewriting, Stride will be + // Note that Index and Stride of a GEP candidate do not necessarily have the + // same integer type. In that case, during rewriting, Stride will be // sign-extended or truncated to Index's type. ConstantInt *Index; Value *Stride; // The instruction this candidate corresponds to. It helps us to rewrite a // candidate with respect to its immediate basis. Note that one instruction - // can corresponds to multiple candidates depending on how you associate the + // can correspond to multiple candidates depending on how you associate the // expression. For instance, // // (a + 1) * (b + 2) @@ -143,31 +146,43 @@ private: // Returns true if Basis is a basis for C, i.e., Basis dominates C and they // share the same base and stride. bool isBasisFor(const Candidate &Basis, const Candidate &C); + // Returns whether the candidate can be folded into an addressing mode. + bool isFoldable(const Candidate &C, TargetTransformInfo *TTI, + const DataLayout *DL); + // Returns true if C is already in a simplest form and not worth being + // rewritten. + bool isSimplestForm(const Candidate &C); // Checks whether I is in a candidate form. If so, adds all the matching forms // to Candidates, and tries to find the immediate basis for each of them. - void allocateCandidateAndFindBasis(Instruction *I); + void allocateCandidatesAndFindBasis(Instruction *I); + // Allocate candidates and find bases for Add instructions. + void allocateCandidatesAndFindBasisForAdd(Instruction *I); + // Given I = LHS + RHS, factors RHS into i * S and makes (LHS + i * S) a + // candidate. + void allocateCandidatesAndFindBasisForAdd(Value *LHS, Value *RHS, + Instruction *I); // Allocate candidates and find bases for Mul instructions. - void allocateCandidateAndFindBasisForMul(Instruction *I); + void allocateCandidatesAndFindBasisForMul(Instruction *I); // Splits LHS into Base + Index and, if succeeds, calls - // allocateCandidateAndFindBasis. - void allocateCandidateAndFindBasisForMul(Value *LHS, Value *RHS, - Instruction *I); + // allocateCandidatesAndFindBasis. + void allocateCandidatesAndFindBasisForMul(Value *LHS, Value *RHS, + Instruction *I); // Allocate candidates and find bases for GetElementPtr instructions. - void allocateCandidateAndFindBasisForGEP(GetElementPtrInst *GEP); + void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP); // A helper function that scales Idx with ElementSize before invoking - // allocateCandidateAndFindBasis. - void allocateCandidateAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx, - Value *S, uint64_t ElementSize, - Instruction *I); + // allocateCandidatesAndFindBasis. + void allocateCandidatesAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx, + Value *S, uint64_t ElementSize, + Instruction *I); // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate // basis. - void allocateCandidateAndFindBasis(Candidate::Kind CT, const SCEV *B, - ConstantInt *Idx, Value *S, - Instruction *I); + void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B, + ConstantInt *Idx, Value *S, + Instruction *I); // Rewrites candidate C with respect to Basis. void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis); // A helper function that factors ArrayIdx to a product of a stride and a - // constant index, and invokes allocateCandidateAndFindBasis with the + // constant index, and invokes allocateCandidatesAndFindBasis with the // factorings. void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize, GetElementPtrInst *GEP); @@ -187,7 +202,7 @@ private: // Temporarily holds all instructions that are unlinked (but not deleted) by // rewriteCandidateWithBasis. These instructions will be actually removed // after all rewriting finishes. - DenseSet<Instruction *> UnlinkedInstructions; + std::vector<Instruction *> UnlinkedInstructions; }; } // anonymous namespace @@ -215,9 +230,9 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis, Basis.CandidateKind == C.CandidateKind); } -static bool isCompletelyFoldable(GetElementPtrInst *GEP, - const TargetTransformInfo *TTI, - const DataLayout *DL) { +static bool isGEPFoldable(GetElementPtrInst *GEP, + const TargetTransformInfo *TTI, + const DataLayout *DL) { GlobalVariable *BaseGV = nullptr; int64_t BaseOffset = 0; bool HasBaseReg = false; @@ -252,53 +267,143 @@ static bool isCompletelyFoldable(GetElementPtrInst *GEP, BaseOffset, HasBaseReg, Scale); } -// TODO: We currently implement an algorithm whose time complexity is linear to -// the number of existing candidates. However, a better algorithm exists. We -// could depth-first search the dominator tree, and maintain a hash table that -// contains all candidates that dominate the node being traversed. This hash -// table is indexed by the base and the stride of a candidate. Therefore, -// finding the immediate basis of a candidate boils down to one hash-table look -// up. -void StraightLineStrengthReduce::allocateCandidateAndFindBasis( - Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S, - Instruction *I) { - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { - // If &B[Idx * S] fits into an addressing mode, do not turn it into - // non-free computation. - if (isCompletelyFoldable(GEP, TTI, DL)) - return; +// Returns whether (Base + Index * Stride) can be folded to an addressing mode. +static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride, + TargetTransformInfo *TTI) { + return TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true, + Index->getSExtValue()); +} + +bool StraightLineStrengthReduce::isFoldable(const Candidate &C, + TargetTransformInfo *TTI, + const DataLayout *DL) { + if (C.CandidateKind == Candidate::Add) + return isAddFoldable(C.Base, C.Index, C.Stride, TTI); + if (C.CandidateKind == Candidate::GEP) + return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI, DL); + return false; +} + +// Returns true if GEP has zero or one non-zero index. +static bool hasOnlyOneNonZeroIndex(GetElementPtrInst *GEP) { + unsigned NumNonZeroIndices = 0; + for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) { + ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); + if (ConstIdx == nullptr || !ConstIdx->isZero()) + ++NumNonZeroIndices; + } + return NumNonZeroIndices <= 1; +} + +bool StraightLineStrengthReduce::isSimplestForm(const Candidate &C) { + if (C.CandidateKind == Candidate::Add) { + // B + 1 * S or B + (-1) * S + return C.Index->isOne() || C.Index->isMinusOne(); + } + if (C.CandidateKind == Candidate::Mul) { + // (B + 0) * S + return C.Index->isZero(); + } + if (C.CandidateKind == Candidate::GEP) { + // (char*)B + S or (char*)B - S + return ((C.Index->isOne() || C.Index->isMinusOne()) && + hasOnlyOneNonZeroIndex(cast<GetElementPtrInst>(C.Ins))); } + return false; +} +// TODO: We currently implement an algorithm whose time complexity is linear in +// the number of existing candidates. However, we could do better by using +// ScopedHashTable. Specifically, while traversing the dominator tree, we could +// maintain all the candidates that dominate the basic block being traversed in +// a ScopedHashTable. This hash table is indexed by the base and the stride of +// a candidate. Therefore, finding the immediate basis of a candidate boils down +// to one hash-table look up. +void StraightLineStrengthReduce::allocateCandidatesAndFindBasis( + Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S, + Instruction *I) { Candidate C(CT, B, Idx, S, I); - // Try to compute the immediate basis of C. - unsigned NumIterations = 0; - // Limit the scan radius to avoid running forever. - static const unsigned MaxNumIterations = 50; - for (auto Basis = Candidates.rbegin(); - Basis != Candidates.rend() && NumIterations < MaxNumIterations; - ++Basis, ++NumIterations) { - if (isBasisFor(*Basis, C)) { - C.Basis = &(*Basis); - break; + // SLSR can complicate an instruction in two cases: + // + // 1. If we can fold I into an addressing mode, computing I is likely free or + // takes only one instruction. + // + // 2. I is already in a simplest form. For example, when + // X = B + 8 * S + // Y = B + S, + // rewriting Y to X - 7 * S is probably a bad idea. + // + // In the above cases, we still add I to the candidate list so that I can be + // the basis of other candidates, but we leave I's basis blank so that I + // won't be rewritten. + if (!isFoldable(C, TTI, DL) && !isSimplestForm(C)) { + // Try to compute the immediate basis of C. + unsigned NumIterations = 0; + // Limit the scan radius to avoid running in quadratice time. + static const unsigned MaxNumIterations = 50; + for (auto Basis = Candidates.rbegin(); + Basis != Candidates.rend() && NumIterations < MaxNumIterations; + ++Basis, ++NumIterations) { + if (isBasisFor(*Basis, C)) { + C.Basis = &(*Basis); + break; + } } } // Regardless of whether we find a basis for C, we need to push C to the - // candidate list. + // candidate list so that it can be the basis of other candidates. Candidates.push_back(C); } -void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Instruction *I) { +void StraightLineStrengthReduce::allocateCandidatesAndFindBasis( + Instruction *I) { switch (I->getOpcode()) { + case Instruction::Add: + allocateCandidatesAndFindBasisForAdd(I); + break; case Instruction::Mul: - allocateCandidateAndFindBasisForMul(I); + allocateCandidatesAndFindBasisForMul(I); break; case Instruction::GetElementPtr: - allocateCandidateAndFindBasisForGEP(cast<GetElementPtrInst>(I)); + allocateCandidatesAndFindBasisForGEP(cast<GetElementPtrInst>(I)); break; } } -void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul( +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd( + Instruction *I) { + // Try matching B + i * S. + if (!isa<IntegerType>(I->getType())) + return; + + assert(I->getNumOperands() == 2 && "isn't I an add?"); + Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); + allocateCandidatesAndFindBasisForAdd(LHS, RHS, I); + if (LHS != RHS) + allocateCandidatesAndFindBasisForAdd(RHS, LHS, I); +} + +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd( + Value *LHS, Value *RHS, Instruction *I) { + Value *S = nullptr; + ConstantInt *Idx = nullptr; + if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) { + // I = LHS + RHS = LHS + Idx * S + allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I); + } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) { + // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx) + APInt One(Idx->getBitWidth(), 1); + Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue()); + allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I); + } else { + // At least, I = LHS + 1 * RHS + ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1); + allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS, + I); + } +} + +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul( Value *LHS, Value *RHS, Instruction *I) { Value *B = nullptr; ConstantInt *Idx = nullptr; @@ -306,54 +411,54 @@ void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul( if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) { // If LHS is in the form of "Base + Index", then I is in the form of // "(Base + Index) * RHS". - allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I); + allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I); } else { // Otherwise, at least try the form (LHS + 0) * RHS. ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0); - allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS, + allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS, I); } } -void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul( +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul( Instruction *I) { // Try matching (B + i) * S. // TODO: we could extend SLSR to float and vector types. if (!isa<IntegerType>(I->getType())) return; + assert(I->getNumOperands() == 2 && "isn't I a mul?"); Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); - allocateCandidateAndFindBasisForMul(LHS, RHS, I); + allocateCandidatesAndFindBasisForMul(LHS, RHS, I); if (LHS != RHS) { // Symmetrically, try to split RHS to Base + Index. - allocateCandidateAndFindBasisForMul(RHS, LHS, I); + allocateCandidatesAndFindBasisForMul(RHS, LHS, I); } } -void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP( +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP( const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize, Instruction *I) { - // I = B + sext(Idx *nsw S) *nsw ElementSize + // I = B + sext(Idx *nsw S) * ElementSize + // = B + (sext(Idx) * sext(S)) * ElementSize // = B + (sext(Idx) * ElementSize) * sext(S) // Casting to IntegerType is safe because we skipped vector GEPs. IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType())); ConstantInt *ScaledIdx = ConstantInt::get( IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true); - allocateCandidateAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I); + allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I); } void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize, GetElementPtrInst *GEP) { - // At least, ArrayIdx = ArrayIdx *s 1. - allocateCandidateAndFindBasisForGEP( + // At least, ArrayIdx = ArrayIdx *nsw 1. + allocateCandidatesAndFindBasisForGEP( Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1), ArrayIdx, ElementSize, GEP); Value *LHS = nullptr; ConstantInt *RHS = nullptr; - // TODO: handle shl. e.g., we could treat (S << 2) as (S * 4). - // // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx // itself. This would allow us to handle the shl case for free. However, // matching SCEVs has two issues: @@ -367,12 +472,19 @@ void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx, // sext'ed multiplication. if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) { // SLSR is currently unsafe if i * S may overflow. - // GEP = Base + sext(LHS *nsw RHS) *nsw ElementSize - allocateCandidateAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP); + // GEP = Base + sext(LHS *nsw RHS) * ElementSize + allocateCandidatesAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP); + } else if (match(ArrayIdx, m_NSWShl(m_Value(LHS), m_ConstantInt(RHS)))) { + // GEP = Base + sext(LHS <<nsw RHS) * ElementSize + // = Base + sext(LHS *nsw (1 << RHS)) * ElementSize + APInt One(RHS->getBitWidth(), 1); + ConstantInt *PowerOf2 = + ConstantInt::get(RHS->getContext(), One << RHS->getValue()); + allocateCandidatesAndFindBasisForGEP(Base, PowerOf2, LHS, ElementSize, GEP); } } -void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP( +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP( GetElementPtrInst *GEP) { // TODO: handle vector GEPs if (GEP->getType()->isVectorTy()) @@ -436,6 +548,7 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis, else BumpWithUglyGEP = true; } + // Compute Bump = C - Basis = (i' - i) * S. // Common case 1: if (i' - i) is 1, Bump = S. if (IndexOffset.getSExtValue() == 1) @@ -443,9 +556,24 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis, // Common case 2: if (i' - i) is -1, Bump = -S. if (IndexOffset.getSExtValue() == -1) return Builder.CreateNeg(C.Stride); - // Otherwise, Bump = (i' - i) * sext/trunc(S). - ConstantInt *Delta = ConstantInt::get(Basis.Ins->getContext(), IndexOffset); - Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, Delta->getType()); + + // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may + // have different bit widths. + IntegerType *DeltaType = + IntegerType::get(Basis.Ins->getContext(), IndexOffset.getBitWidth()); + Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType); + if (IndexOffset.isPowerOf2()) { + // If (i' - i) is a power of 2, Bump = sext/trunc(S) << log(i' - i). + ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2()); + return Builder.CreateShl(ExtendedStride, Exponent); + } + if ((-IndexOffset).isPowerOf2()) { + // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i). + ConstantInt *Exponent = + ConstantInt::get(DeltaType, (-IndexOffset).logBase2()); + return Builder.CreateNeg(Builder.CreateShl(ExtendedStride, Exponent)); + } + Constant *Delta = ConstantInt::get(DeltaType, IndexOffset); return Builder.CreateMul(ExtendedStride, Delta); } @@ -453,6 +581,9 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis( const Candidate &C, const Candidate &Basis) { assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base && C.Stride == Basis.Stride); + // We run rewriteCandidateWithBasis on all candidates in a post-order, so the + // basis of a candidate cannot be unlinked before the candidate. + assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked"); // An instruction can correspond to multiple candidates. Therefore, instead of // simply deleting an instruction when we rewrite it, we mark its parent as @@ -466,25 +597,38 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis( Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP); Value *Reduced = nullptr; // equivalent to but weaker than C.Ins switch (C.CandidateKind) { + case Candidate::Add: case Candidate::Mul: - Reduced = Builder.CreateAdd(Basis.Ins, Bump); + if (BinaryOperator::isNeg(Bump)) { + Reduced = + Builder.CreateSub(Basis.Ins, BinaryOperator::getNegArgument(Bump)); + } else { + Reduced = Builder.CreateAdd(Basis.Ins, Bump); + } break; case Candidate::GEP: { Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType()); + bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds(); if (BumpWithUglyGEP) { // C = (char *)Basis + Bump unsigned AS = Basis.Ins->getType()->getPointerAddressSpace(); Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS); Reduced = Builder.CreateBitCast(Basis.Ins, CharTy); - // We only considered inbounds GEP as candidates. - Reduced = Builder.CreateInBoundsGEP(Reduced, Bump); + if (InBounds) + Reduced = + Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump); + else + Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump); Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType()); } else { // C = gep Basis, Bump // Canonicalize bump to pointer size. Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy); - Reduced = Builder.CreateInBoundsGEP(Basis.Ins, Bump); + if (InBounds) + Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump); + else + Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump); } } break; @@ -497,7 +641,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis( // Unlink C.Ins so that we can skip other candidates also corresponding to // C.Ins. The actual deletion is postponed to the end of runOnFunction. C.Ins->removeFromParent(); - UnlinkedInstructions.insert(C.Ins); + UnlinkedInstructions.push_back(C.Ins); } bool StraightLineStrengthReduce::runOnFunction(Function &F) { @@ -512,7 +656,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) { for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT); node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) { for (auto &I : *node->getBlock()) - allocateCandidateAndFindBasis(&I); + allocateCandidatesAndFindBasis(&I); } // Rewrite candidates in the reverse depth-first order. This order makes sure diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 6c3ce58..4f23e20 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -887,7 +887,7 @@ void StructurizeCFG::createFlow() { /// no longer dominate all their uses. Not sure if this is really nessasary void StructurizeCFG::rebuildSSA() { SSAUpdater Updater; - for (const auto &BB : ParentRegion->blocks()) + for (auto *BB : ParentRegion->blocks()) for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp index 820544b..c1cd39a 100644 --- a/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/lib/Transforms/Utils/AddDiscriminators.cpp @@ -174,42 +174,51 @@ bool AddDiscriminators::runOnFunction(Function &F) { for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { BasicBlock *B = I; TerminatorInst *Last = B->getTerminator(); - DebugLoc LastLoc = Last->getDebugLoc(); - if (LastLoc.isUnknown()) continue; - DILocation LastDIL(LastLoc.getAsMDNode(Ctx)); + DILocation LastDIL = Last->getDebugLoc().get(); + if (!LastDIL) + continue; for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) { BasicBlock *Succ = Last->getSuccessor(I); Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime(); - DebugLoc FirstLoc = First->getDebugLoc(); - if (FirstLoc.isUnknown()) continue; - DILocation FirstDIL(FirstLoc.getAsMDNode(Ctx)); + DILocation FirstDIL = First->getDebugLoc().get(); + if (!FirstDIL) + continue; // If the first instruction (First) of Succ is at the same file // location as B's last instruction (Last), add a new // discriminator for First's location and all the instructions // in Succ that share the same location with First. - if (FirstDIL.atSameLineAs(LastDIL)) { + if (!FirstDIL->canDiscriminate(*LastDIL)) { // Create a new lexical scope and compute a new discriminator // number for it. - StringRef Filename = FirstDIL.getFilename(); - DIScope Scope = FirstDIL.getScope(); - DIFile File = Builder.createFile(Filename, Scope.getDirectory()); - unsigned Discriminator = FirstDIL.computeNewDiscriminator(Ctx); + StringRef Filename = FirstDIL->getFilename(); + auto *Scope = FirstDIL->getScope(); + DIFile File = Builder.createFile(Filename, Scope->getDirectory()); + + // FIXME: Calculate the discriminator here, based on local information, + // and delete MDLocation::computeNewDiscriminator(). The current + // solution gives different results depending on other modules in the + // same context. All we really need is to discriminate between + // FirstDIL and LastDIL -- a local map would suffice. + unsigned Discriminator = FirstDIL->computeNewDiscriminator(); DILexicalBlockFile NewScope = Builder.createLexicalBlockFile(Scope, File, Discriminator); - DILocation NewDIL = FirstDIL.copyWithNewScope(Ctx, NewScope); - DebugLoc newDebugLoc = DebugLoc::getFromDILocation(NewDIL); + auto *NewDIL = + MDLocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(), + NewScope, FirstDIL->getInlinedAt()); + DebugLoc newDebugLoc = NewDIL; // Attach this new debug location to First and every // instruction following First that shares the same location. for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1; ++I1) { - if (I1->getDebugLoc() != FirstLoc) break; + if (I1->getDebugLoc().get() != FirstDIL) + break; I1->setDebugLoc(newDebugLoc); - DEBUG(dbgs() << NewDIL.getFilename() << ":" << NewDIL.getLineNumber() - << ":" << NewDIL.getColumnNumber() << ":" - << NewDIL.getDiscriminator() << *I1 << "\n"); + DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine() + << ":" << NewDIL->getColumn() << ":" + << NewDIL->getDiscriminator() << *I1 << "\n"); } DEBUG(dbgs() << "\n"); Changed = true; diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index f04ea9c..f200b58 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -157,20 +157,21 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Find the MDNode which corresponds to the DISubprogram data that described F. static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) { for (DISubprogram Subprogram : Finder.subprograms()) { - if (Subprogram.describes(F)) return Subprogram; + if (Subprogram->describes(F)) + return Subprogram; } return nullptr; } // Add an operand to an existing MDNode. The new operand will be added at the // back of the operand list. -static void AddOperand(DICompileUnit CU, DIArray SPs, Metadata *NewSP) { +static void AddOperand(DICompileUnit CU, MDSubprogramArray SPs, Metadata *NewSP) { SmallVector<Metadata *, 16> NewSPs; - NewSPs.reserve(SPs->getNumOperands() + 1); - for (unsigned I = 0, E = SPs->getNumOperands(); I != E; ++I) - NewSPs.push_back(SPs->getOperand(I)); + NewSPs.reserve(SPs.size() + 1); + for (auto *SP : SPs) + NewSPs.push_back(SP); NewSPs.push_back(NewSP); - CU.replaceSubprograms(DIArray(MDNode::get(CU->getContext(), NewSPs))); + CU->replaceSubprograms(MDTuple::get(CU->getContext(), NewSPs)); } // Clone the module-level debug info associated with OldFunc. The cloned data @@ -186,15 +187,15 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc, // Ensure that OldFunc appears in the map. // (if it's already there it must point to NewFunc anyway) VMap[OldFunc] = NewFunc; - DISubprogram NewSubprogram(MapMetadata(OldSubprogramMDNode, VMap)); + DISubprogram NewSubprogram = + cast<MDSubprogram>(MapMetadata(OldSubprogramMDNode, VMap)); for (DICompileUnit CU : Finder.compile_units()) { - DIArray Subprograms(CU.getSubprograms()); - + auto Subprograms = CU->getSubprograms(); // If the compile unit's function list contains the old function, it should // also contain the new one. - for (unsigned i = 0; i < Subprograms.getNumElements(); i++) { - if ((MDNode*)Subprograms.getElement(i) == OldSubprogramMDNode) { + for (auto *SP : Subprograms) { + if (SP == OldSubprogramMDNode) { AddOperand(CU, Subprograms, NewSubprogram); break; } @@ -395,7 +396,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (Action == CloningDirector::CloneSuccessors) { // If the director says to skip with a terminate instruction, we still // need to clone this block's successors. - const TerminatorInst *TI = BB->getTerminator(); + const TerminatorInst *TI = NewBB->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) ToClone.push_back(TI->getSuccessor(i)); return; diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp index 52e2d59..44b7d25 100644 --- a/lib/Transforms/Utils/GlobalStatus.cpp +++ b/lib/Transforms/Utils/GlobalStatus.cpp @@ -150,7 +150,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, if (MSI->isVolatile()) return true; GS.StoredType = GlobalStatus::Stored; - } else if (ImmutableCallSite C = I) { + } else if (auto C = ImmutableCallSite(I)) { if (!C.isCallee(&U)) return true; GS.IsLoaded = true; diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index df3e1d4..a08ffbe 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -835,11 +835,10 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode, DenseMap<const MDLocation *, MDLocation *> &IANodes) { SmallVector<MDLocation*, 3> InlinedAtLocations; MDLocation *Last = InlinedAtNode; - DebugLoc CurInlinedAt = DL; + MDLocation *CurInlinedAt = DL; // Gather all the inlined-at nodes - while (MDLocation *IA = - cast_or_null<MDLocation>(CurInlinedAt.getInlinedAt(Ctx))) { + while (MDLocation *IA = CurInlinedAt->getInlinedAt()) { // Skip any we've already built nodes for if (MDLocation *Found = IANodes[IA]) { Last = Found; @@ -847,7 +846,7 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode, } InlinedAtLocations.push_back(IA); - CurInlinedAt = DebugLoc::getFromDILocation(IA); + CurInlinedAt = IA; } // Starting from the top, rebuild the nodes to point to the new inlined-at @@ -862,7 +861,7 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode, // And finally create the normal location for this instruction, referring to // the new inlined-at chain. - return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), Last); + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last); } /// Update inlined instructions' line numbers to @@ -870,11 +869,11 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode, static void fixupLineNumbers(Function *Fn, Function::iterator FI, Instruction *TheCall) { DebugLoc TheCallDL = TheCall->getDebugLoc(); - if (TheCallDL.isUnknown()) + if (!TheCallDL) return; auto &Ctx = Fn->getContext(); - auto *InlinedAtNode = cast<MDLocation>(TheCallDL.getAsMDNode(Ctx)); + MDLocation *InlinedAtNode = TheCallDL; // Create a unique call site, not to be confused with any other call from the // same location. @@ -891,7 +890,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { DebugLoc DL = BI->getDebugLoc(); - if (DL.isUnknown()) { + if (!DL) { // If the inlined instruction has no line number, make it look as if it // originates from the call location. This is important for // ((__always_inline__, __nodebug__)) functions which must use caller @@ -905,19 +904,6 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, BI->setDebugLoc(TheCallDL); } else { BI->setDebugLoc(updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes)); - if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { - LLVMContext &Ctx = BI->getContext(); - MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); - DVI->setOperand(2, MetadataAsValue::get( - Ctx, createInlinedVariable(DVI->getVariable(), - InlinedAt, Ctx))); - } else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) { - LLVMContext &Ctx = BI->getContext(); - MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); - DDI->setOperand(1, MetadataAsValue::get( - Ctx, createInlinedVariable(DDI->getVariable(), - InlinedAt, Ctx))); - } } } } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index bd15f9e..1c9760e 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -998,17 +998,14 @@ static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) { /// that has an associated llvm.dbg.decl intrinsic. bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI, DIBuilder &Builder) { - DIVariable DIVar(DDI->getVariable()); - DIExpression DIExpr(DDI->getExpression()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); + DIVariable DIVar = DDI->getVariable(); + DIExpression DIExpr = DDI->getExpression(); if (!DIVar) return false; if (LdStHasDebugValue(DIVar, SI)) return true; - Instruction *DbgVal = nullptr; // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. Argument *ExtendedArg = nullptr; @@ -1017,11 +1014,11 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); if (ExtendedArg) - DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, SI); + Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, + DDI->getDebugLoc(), SI); else - DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, - DIExpr, SI); - DbgVal->setDebugLoc(DDI->getDebugLoc()); + Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr, + DDI->getDebugLoc(), SI); return true; } @@ -1029,19 +1026,16 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, /// that has an associated llvm.dbg.decl intrinsic. bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, LoadInst *LI, DIBuilder &Builder) { - DIVariable DIVar(DDI->getVariable()); - DIExpression DIExpr(DDI->getExpression()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); + DIVariable DIVar = DDI->getVariable(); + DIExpression DIExpr = DDI->getExpression(); if (!DIVar) return false; if (LdStHasDebugValue(DIVar, LI)) return true; - Instruction *DbgVal = - Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, LI); - DbgVal->setDebugLoc(DDI->getDebugLoc()); + Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, + DDI->getDebugLoc(), LI); return true; } @@ -1083,10 +1077,9 @@ bool llvm::LowerDbgDeclare(Function &F) { // This is a call by-value or some other instruction that // takes a pointer to the variable. Insert a *value* // intrinsic that describes the alloca. - auto DbgVal = DIB.insertDbgValueIntrinsic( - AI, 0, DIVariable(DDI->getVariable()), - DIExpression(DDI->getExpression()), CI); - DbgVal->setDebugLoc(DDI->getDebugLoc()); + DIB.insertDbgValueIntrinsic(AI, 0, DIVariable(DDI->getVariable()), + DIExpression(DDI->getExpression()), + DDI->getDebugLoc(), CI); } DDI->eraseFromParent(); } @@ -1112,10 +1105,8 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, if (!DDI) return false; DebugLoc Loc = DDI->getDebugLoc(); - DIVariable DIVar(DDI->getVariable()); - DIExpression DIExpr(DDI->getExpression()); - assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); + DIVariable DIVar = DDI->getVariable(); + DIExpression DIExpr = DDI->getExpression(); if (!DIVar) return false; @@ -1127,16 +1118,14 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, SmallVector<uint64_t, 4> NewDIExpr; NewDIExpr.push_back(dwarf::DW_OP_deref); if (DIExpr) - for (unsigned i = 0, n = DIExpr.getNumElements(); i < n; ++i) - NewDIExpr.push_back(DIExpr.getElement(i)); + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIExpr = Builder.createExpression(NewDIExpr); } // Insert llvm.dbg.declare in the same basic block as the original alloca, // and remove old llvm.dbg.declare. BasicBlock *BB = AI->getParent(); - Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, BB) - ->setDebugLoc(Loc); + Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, Loc, BB); DDI->eraseFromParent(); return true; } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 6b3aa02..1dbce47 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -146,6 +146,13 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, /// Similarly, TripMultiple divides the number of times that the LatchBlock may /// execute without exiting the loop. /// +/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that +/// have a runtime (i.e. not compile time constant) trip count. Unrolling these +/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count" +/// iterations before branching into the unrolled loop. UnrollLoop will not +/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and +/// AllowExpensiveTripCount is false. +/// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// If a LoopPassManager is passed in, and the loop is fully removed, it will be @@ -154,8 +161,9 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available from the Pass it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, - bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI, - Pass *PP, LPPassManager *LPM, AssumptionCache *AC) { + bool AllowRuntime, bool AllowExpensiveTripCount, + unsigned TripMultiple, LoopInfo *LI, Pass *PP, + LPPassManager *LPM, AssumptionCache *AC) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -218,7 +226,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // flag is specified. bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); - if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) + if (RuntimeTripCount && + !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM)) return false; // Notify ScalarEvolution that the loop will be substantially changed, diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 381d8fc..d1774df 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -278,7 +278,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, /// ... /// End: /// -bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, +bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, + bool AllowExpensiveTripCount, LoopInfo *LI, LPPassManager *LPM) { // for now, only unroll loops that contain a single exit if (!L->getExitingBlock()) @@ -312,15 +313,20 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, if (isa<SCEVCouldNotCompute>(TripCountSC)) return false; + BasicBlock *Header = L->getHeader(); + const DataLayout &DL = Header->getModule()->getDataLayout(); + SCEVExpander Expander(*SE, DL, "loop-unroll"); + if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L)) + return false; + // We only handle cases when the unroll factor is a power of 2. // Count is the loop unroll factor, the number of extra copies added + 1. if (!isPowerOf2_32(Count)) return false; // This constraint lets us deal with an overflowing trip count easily; see the - // comment on ModVal below. This check is equivalent to `Log2(Count) < - // BEWidth`. - if (static_cast<uint64_t>(Count) > (1ULL << BEWidth)) + // comment on ModVal below. + if (Log2_32(Count) > BEWidth) return false; // If this loop is nested, then the loop unroller changes the code in @@ -333,18 +339,15 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; BasicBlock *PH = L->getLoopPreheader(); - BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); // It helps to splits the original preheader twice, one for the end of the // prolog code and one for a new loop preheader BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI); BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI); BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); - const DataLayout &DL = Header->getModule()->getDataLayout(); // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % (loop unroll factor + 1) - SCEVExpander Expander(*SE, DL, "loop-unroll"); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index 35c701e..014574d 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -93,3 +94,13 @@ llvm::collectUsedGlobalVariables(Module &M, SmallPtrSetImpl<GlobalValue *> &Set, } return GV; } + +Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) { + if (isa<Function>(FuncOrBitcast)) + return cast<Function>(FuncOrBitcast); + FuncOrBitcast->dump(); + std::string Err; + raw_string_ostream Stream(Err); + Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast; + report_fatal_error(Err); +} diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 4b34b19..54e1733 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -872,8 +872,10 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, } SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks; - SmallPtrSet<DomTreeNode *, 32> Visited; SmallVector<DomTreeNode *, 32> Worklist; + SmallPtrSet<DomTreeNode *, 32> VisitedPQ; + SmallPtrSet<DomTreeNode *, 32> VisitedWorklist; + while (!PQ.empty()) { DomTreeNodePair RootPair = PQ.top(); PQ.pop(); @@ -887,6 +889,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, Worklist.clear(); Worklist.push_back(Root); + VisitedWorklist.insert(Root); while (!Worklist.empty()) { DomTreeNode *Node = Worklist.pop_back_val(); @@ -905,7 +908,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, if (SuccLevel > RootLevel) continue; - if (!Visited.insert(SuccNode).second) + if (!VisitedPQ.insert(SuccNode).second) continue; BasicBlock *SuccBB = SuccNode->getBlock(); @@ -919,7 +922,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; ++CI) { - if (!Visited.count(*CI)) + if (VisitedWorklist.insert(*CI).second) Worklist.push_back(*CI); } } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index c7c0ca6..7c239cb 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1502,7 +1502,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if (isa<DbgInfoIntrinsic>(I)) continue; - // Only speculatively execution a single instruction (not counting the + // Only speculatively execute a single instruction (not counting the // terminator) for now. ++SpeculationCost; if (SpeculationCost > 1) @@ -3884,8 +3884,8 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { "switch.tableidx.zext"); Value *GEPIndices[] = { Builder.getInt32(0), Index }; - Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, - "switch.gep"); + Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array, + GEPIndices, "switch.gep"); return Builder.CreateLoad(GEP, "switch.load"); } } diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 5867d65..42102e7 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -222,7 +222,7 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of // the string .. we're concatenating). - Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); + Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr"); // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. @@ -303,7 +303,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p) - return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr"); + return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr"); return nullptr; } @@ -316,7 +316,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) { return Constant::getNullValue(CI->getType()); // strchr(s+n,c) -> gep(s+n+i,c) - return B.CreateGEP(SrcStr, B.getInt64(I), "strchr"); + return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr"); } Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { @@ -351,7 +351,7 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { return Constant::getNullValue(CI->getType()); // strrchr(s+n,c) -> gep(s+n+i,c) - return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr"); + return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr"); } Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { @@ -476,7 +476,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) Value *StrLen = EmitStrLen(Src, B, DL, TLI); - return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; + return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; } // See if we can get the length of the input string. @@ -487,7 +487,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { Type *PT = FT->getParamType(0); Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len); Value *DstEnd = - B.CreateGEP(Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); + B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. @@ -597,7 +597,7 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { if (I == StringRef::npos) // No match. return Constant::getNullValue(CI->getType()); - return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk"); + return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), "strpbrk"); } // strpbrk(s, "a") -> strchr(s, 'a') @@ -828,7 +828,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { return Constant::getNullValue(CI->getType()); // memchr(s+n,c,l) -> gep(s+n+i,c) - return B.CreateGEP(SrcStr, B.getInt64(I), "memchr"); + return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr"); } Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { @@ -1671,7 +1671,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); Value *Ptr = CastToCStr(CI->getArgOperand(0), B); B.CreateStore(V, Ptr); - Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul"); + Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); B.CreateStore(B.getInt8(0), Ptr); return ConstantInt::get(CI->getType(), 1); @@ -2276,7 +2276,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // __stpcpy_chk(x,x,...) -> x+strlen(x) if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) { Value *StrLen = EmitStrLen(Src, B, DL, TLI); - return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; + return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr; } // If a) we don't have any length information, or b) we know this will @@ -2284,25 +2284,25 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI, // st[rp]cpy_chk call which may fail at runtime if the size is too long. // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. - if (isFortifiedCallFoldable(CI, 2, 1, true)) { - Value *Ret = EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6)); - return Ret; - } else if (!OnlyLowerUnknownSize) { - // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. - uint64_t Len = GetStringLength(Src); - if (Len == 0) - return nullptr; + if (isFortifiedCallFoldable(CI, 2, 1, true)) + return EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6)); - Type *SizeTTy = DL.getIntPtrType(CI->getContext()); - Value *LenV = ConstantInt::get(SizeTTy, Len); - Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); - // If the function was an __stpcpy_chk, and we were able to fold it into - // a __memcpy_chk, we still need to return the correct end pointer. - if (Ret && Func == LibFunc::stpcpy_chk) - return B.CreateGEP(Dst, ConstantInt::get(SizeTTy, Len - 1)); - return Ret; - } - return nullptr; + if (OnlyLowerUnknownSize) + return nullptr; + + // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. + uint64_t Len = GetStringLength(Src); + if (Len == 0) + return nullptr; + + Type *SizeTTy = DL.getIntPtrType(CI->getContext()); + Value *LenV = ConstantInt::get(SizeTTy, Len); + Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); + // If the function was an __stpcpy_chk, and we were able to fold it into + // a __memcpy_chk, we still need to return the correct end pointer. + if (Ret && Func == LibFunc::stpcpy_chk) + return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1)); + return Ret; } Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, @@ -2322,8 +2322,18 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, } Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { - if (CI->isNoBuiltin()) - return nullptr; + // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here. + // Some clang users checked for _chk libcall availability using: + // __has_builtin(__builtin___memcpy_chk) + // When compiling with -fno-builtin, this is always true. + // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we + // end up with fortified libcalls, which isn't acceptable in a freestanding + // environment which only provides their non-fortified counterparts. + // + // Until we change clang and/or teach external users to check for availability + // differently, disregard the "nobuiltin" attribute and TLI::has. + // + // PR23093. LibFunc::Func Func; Function *Callee = CI->getCalledFunction(); @@ -2332,7 +2342,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C; // First, check that this is a known library functions. - if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func)) + if (!TLI->getLibFunc(FuncName, Func)) return nullptr; // We never change the calling convention. diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index b7d0ae4..8986932 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -93,6 +93,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/VectorUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include <algorithm> #include <map> #include <tuple> @@ -503,8 +504,7 @@ static std::string getDebugLocString(const Loop *L) { std::string Result; if (L) { raw_string_ostream OS(Result); - const DebugLoc LoopDbgLoc = L->getStartLoc(); - if (!LoopDbgLoc.isUnknown()) + if (const DebugLoc LoopDbgLoc = L->getStartLoc()) LoopDbgLoc.print(OS); else // Just print the module name. @@ -686,7 +686,7 @@ public: Index = B.CreateNeg(Index); else if (!StepValue->isOne()) Index = B.CreateMul(Index, StepValue); - return B.CreateGEP(StartValue, Index); + return B.CreateGEP(nullptr, StartValue, Index); case IK_NoInduction: return nullptr; @@ -1839,7 +1839,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { for (unsigned Part = 0; Part < UF; ++Part) { // Calculate the pointer for the specific unroll-part. - Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); + Value *PartPtr = + Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF)); if (Reverse) { // If we store to reverse consecutive memory locations then we need @@ -1847,8 +1848,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { StoredVal[Part] = reverseVector(StoredVal[Part]); // If the address is consecutive but reversed, then the // wide store needs to start at the last vector element. - PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); - PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); + PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF)); + PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF)); Mask[Part] = reverseVector(Mask[Part]); } @@ -1871,13 +1872,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { setDebugLocFromInst(Builder, LI); for (unsigned Part = 0; Part < UF; ++Part) { // Calculate the pointer for the specific unroll-part. - Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); + Value *PartPtr = + Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF)); if (Reverse) { // If the address is consecutive but reversed, then the // wide load needs to start at the last vector element. - PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); - PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); + PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF)); + PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF)); Mask[Part] = reverseVector(Mask[Part]); } @@ -4007,6 +4009,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() { if (!LAI->canVectorizeMemory()) return false; + if (LAI->hasStoreToLoopInvariantAddress()) { + emitAnalysis( + VectorizationReport() + << "write to a loop invariant address could not be vectorized"); + DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); + return false; + } + if (LAI->getNumRuntimePointerChecks() > VectorizerParams::RuntimeMemoryCheckThreshold) { emitAnalysis(VectorizationReport() @@ -4307,32 +4317,31 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I, } } -LoopVectorizationLegality::InductionKind -LoopVectorizationLegality::isInductionVariable(PHINode *Phi, - ConstantInt *&StepValue) { +bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, + ConstantInt *&StepValue) { Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) - return IK_NoInduction; + return false; // Check that the PHI is consecutive. const SCEV *PhiScev = SE->getSCEV(Phi); const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); if (!AR) { DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); - return IK_NoInduction; + return false; } const SCEV *Step = AR->getStepRecurrence(*SE); // Calculate the pointer stride and check if it is consecutive. const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); if (!C) - return IK_NoInduction; + return false; ConstantInt *CV = C->getValue(); if (PhiTy->isIntegerTy()) { StepValue = CV; - return IK_IntInduction; + return true; } assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); @@ -4340,14 +4349,28 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi, // The pointer stride cannot be determined if the pointer element type is not // sized. if (!PointerElementType->isSized()) - return IK_NoInduction; + return false; const DataLayout &DL = Phi->getModule()->getDataLayout(); int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType)); int64_t CVSize = CV->getSExtValue(); if (CVSize % Size) - return IK_NoInduction; + return false; StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size); + return true; +} + +LoopVectorizationLegality::InductionKind +LoopVectorizationLegality::isInductionVariable(PHINode *Phi, + ConstantInt *&StepValue) { + if (!isInductionPHI(Phi, SE, StepValue)) + return IK_NoInduction; + + Type *PhiTy = Phi->getType(); + // Found an Integer induction variable. + if (PhiTy->isIntegerTy()) + return IK_IntInduction; + // Found an Pointer induction variable. return IK_PtrInduction; } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8fc4cc1..7267f58 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1183,7 +1183,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { case Instruction::ICmp: case Instruction::FCmp: { // Check that all of the compares have the same predicate. - CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate(); + CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate(); Type *ComparedTy = cast<Instruction>(VL[0])->getOperand(0)->getType(); for (unsigned i = 1, e = VL.size(); i < e; ++i) { CmpInst *Cmp = cast<CmpInst>(VL[i]); @@ -2202,7 +2202,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { if (Value *V = alreadyVectorized(E->Scalars)) return V; - CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate(); + CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate(); Value *V; if (Opcode == Instruction::FCmp) V = Builder.CreateFCmp(P0, L, R); @@ -3101,9 +3101,7 @@ struct SLPVectorizer : public FunctionPass { // delete instructions. // Scan the blocks in the function in post order. - for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()), - e = po_end(&F.getEntryBlock()); it != e; ++it) { - BasicBlock *BB = *it; + for (auto BB : post_order(&F.getEntryBlock())) { // Vectorize trees that end at stores. if (unsigned count = collectStores(BB, R)) { (void)count; |