diff options
Diffstat (limited to 'lib/Transforms')
57 files changed, 2492 insertions, 1262 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 46480bd..56975ea 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -207,6 +207,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {    // Make sure that it is local to this module.    if (!F || !F->hasLocalLinkage()) return nullptr; +  // Don't promote arguments for variadic functions. Adding, removing, or +  // changing non-pack parameters can change the classification of pack +  // parameters. Frontends encode that classification at the call site in the +  // IR, while in the callee the classification is determined dynamically based +  // on the number of registers consumed so far. +  if (F->isVarArg()) return nullptr; +    // First check: see if there are any pointer arguments!  If not, quick exit.    SmallVector<Argument*, 16> PointerArgs;    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) @@ -227,12 +234,6 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {        isSelfRecursive = true;    } -  // Don't promote arguments for variadic functions. Adding, removing, or -  // changing non-pack parameters can change the classification of pack -  // parameters. Frontends encode that classification at the call site in the -  // IR, while in the callee the classification is determined dynamically based -  // on the number of registers consumed so far. -  if (F->isVarArg()) return nullptr;    const DataLayout &DL = F->getParent()->getDataLayout();    // Check to see which arguments are promotable.  If an argument is promotable, @@ -674,8 +675,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,        for (ScalarizeTable::iterator SI = ArgIndices.begin(),               E = ArgIndices.end(); SI != E; ++SI) {          // not allowed to dereference ->begin() if size() is 0 -        Params.push_back( -            GetElementPtrInst::getIndexedType(I->getType(), SI->second)); +        Params.push_back(GetElementPtrInst::getIndexedType( +            cast<PointerType>(I->getType()->getScalarType())->getElementType(), +            SI->second));          assert(Params.back());        } @@ -704,7 +706,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,    auto DI = FunctionDIs.find(F);    if (DI != FunctionDIs.end()) {      DISubprogram SP = DI->second; -    SP.replaceFunction(NF); +    SP->replaceFunction(NF);      // Ensure the map is updated so it can be reused on subsequent argument      // promotions of the same function.      FunctionDIs.erase(DI); @@ -860,7 +862,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,      // Update the callgraph to know that the callsite has been transformed.      CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; -    CalleeNode->replaceCallEdge(Call, New, NF_CGN); +    CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);      if (!Call->use_empty()) {        Call->replaceAllUsesWith(New); diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 4431311..3be23d5 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -73,8 +73,8 @@ namespace {        }        std::string getDescription() const { -        return std::string((IsArg ? "Argument #" : "Return value #")) -               + utostr(Idx) + " of function " + F->getName().str(); +        return (Twine(IsArg ? "Argument #" : "Return value #") + utostr(Idx) + +                " of function " + F->getName()).str();        }      }; @@ -304,7 +304,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {    auto DI = FunctionDIs.find(&Fn);    if (DI != FunctionDIs.end()) {      DISubprogram SP = DI->second; -    SP.replaceFunction(NF); +    SP->replaceFunction(NF);      // Ensure the map is updated so it can be reused on non-varargs argument      // eliminations of the same function.      FunctionDIs.erase(DI); @@ -482,7 +482,7 @@ DAE::Liveness DAE::SurveyUse(const Use *U,        return Result;      } -    if (ImmutableCallSite CS = V) { +    if (auto CS = ImmutableCallSite(V)) {        const Function *F = CS.getCalledFunction();        if (F) {          // Used in a direct call. @@ -1092,7 +1092,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {    // Patch the pointer to LLVM function in debug info descriptor.    auto DI = FunctionDIs.find(F);    if (DI != FunctionDIs.end()) -    DI->second.replaceFunction(NF); +    DI->second->replaceFunction(NF);    // Now that the old function is dead, delete it.    F->eraseFromParent(); diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 20b41fb..b8c4f5d 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -564,6 +564,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {      if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.      Value *NewPtr = NewGlobals[Val]; +    Type *NewTy = NewGlobals[Val]->getType();      // Form a shorter GEP if needed.      if (GEP->getNumOperands() > 3) { @@ -572,7 +573,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {          Idxs.push_back(NullInt);          for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)            Idxs.push_back(CE->getOperand(i)); -        NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs); +        NewPtr = +            ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs); +        NewTy = GetElementPtrInst::getIndexedType(NewTy, Idxs);        } else {          GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);          SmallVector<Value*, 8> Idxs; @@ -721,8 +724,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {          else            break;        if (Idxs.size() == GEPI->getNumOperands()-1) -        Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, -                          ConstantExpr::getGetElementPtr(NewV, Idxs)); +        Changed |= OptimizeAwayTrappingUsesOfValue( +            GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs));        if (GEPI->use_empty()) {          Changed = true;          GEPI->eraseFromParent(); @@ -2338,7 +2341,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,                Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);                Constant * const IdxList[] = {IdxZero, IdxZero}; -              Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList); +              Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);                if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))                  Ptr = ConstantFoldConstantExpression(CE, DL, TLI); @@ -2402,8 +2405,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,             i != e; ++i)          GEPOps.push_back(getVal(*i));        InstResult = -        ConstantExpr::getGetElementPtr(P, GEPOps, -                                       cast<GEPOperator>(GEP)->isInBounds()); +          ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps, +                                         cast<GEPOperator>(GEP)->isInBounds());        DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult              << "\n");      } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp index fe00d92..f3f8529 100644 --- a/lib/Transforms/IPO/LowerBitSets.cpp +++ b/lib/Transforms/IPO/LowerBitSets.cpp @@ -349,7 +349,8 @@ void LowerBitSets::allocateByteArrays() {      Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0),                          ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])}; -    Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(ByteArray, Idxs); +    Constant *GEP = ConstantExpr::getInBoundsGetElementPtr( +        ByteArrayConst->getType(), ByteArray, Idxs);      // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures      // that the pc-relative displacement is folded into the lea instead of the @@ -395,16 +396,17 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,      }      Constant *ByteArray = BAI->ByteArray; +    Type *Ty = BAI->ByteArray->getValueType();      if (!LinkerSubsectionsViaSymbols && AvoidReuse) {        // Each use of the byte array uses a different alias. This makes the        // backend less likely to reuse previously computed byte array addresses,        // improving the security of the CFI mechanism based on this pass. -      ByteArray = GlobalAlias::create( -          BAI->ByteArray->getType()->getElementType(), 0, -          GlobalValue::PrivateLinkage, "bits_use", ByteArray, M); +      ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0, +                                      GlobalValue::PrivateLinkage, "bits_use", +                                      ByteArray, M);      } -    Value *ByteAddr = B.CreateGEP(ByteArray, BitOffset); +    Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset);      Value *Byte = B.CreateLoad(ByteAddr);      Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask); @@ -546,8 +548,8 @@ void LowerBitSets::buildBitSetsFromGlobals(      // Multiply by 2 to account for padding elements.      Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),                                        ConstantInt::get(Int32Ty, I * 2)}; -    Constant *CombinedGlobalElemPtr = -        ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs); +    Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( +        NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs);      if (LinkerSubsectionsViaSymbols) {        Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr);      } else { diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index d28d563..502451b 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -59,6 +59,10 @@ static cl::opt<bool>  RunLoopRerolling("reroll-loops", cl::Hidden,                   cl::desc("Run the loop rerolling pass")); +static cl::opt<bool> +RunFloat2Int("float-to-int", cl::Hidden, cl::init(true), +             cl::desc("Run the float2int (float demotion) pass")); +  static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),                                      cl::Hidden,                                      cl::desc("Run the load combining pass")); @@ -307,6 +311,9 @@ void PassManagerBuilder::populateModulePassManager(    // we must insert a no-op module pass to reset the pass manager.    MPM.add(createBarrierNoopPass()); +  if (RunFloat2Int) +    MPM.add(createFloat2IntPass()); +    // Re-rotate loops in all our loop nests. These may have fallout out of    // rotated form due to GVN or other transformations, and the vectorizer relies    // on the rotated form. diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 816978e..ad7c5a0 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -305,33 +305,29 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {    SmallVector<Metadata *, 64> LiveSubprograms;    DenseSet<const MDNode *> VisitedSet; -  for (DICompileUnit DIC : F.compile_units()) { -    assert(DIC.Verify() && "DIC must verify as a DICompileUnit."); - +  for (MDCompileUnit *DIC : F.compile_units()) {      // Create our live subprogram list. -    DIArray SPs = DIC.getSubprograms(); +    MDSubprogramArray SPs = DIC->getSubprograms();      bool SubprogramChange = false; -    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { -      DISubprogram DISP(SPs.getElement(i)); -      assert(DISP.Verify() && "DISP must verify as a DISubprogram."); +    for (unsigned i = 0, e = SPs.size(); i != e; ++i) { +      DISubprogram DISP = SPs[i];        // Make sure we visit each subprogram only once.        if (!VisitedSet.insert(DISP).second)          continue;        // If the function referenced by DISP is not null, the function is live. -      if (DISP.getFunction()) +      if (DISP->getFunction())          LiveSubprograms.push_back(DISP);        else          SubprogramChange = true;      }      // Create our live global variable list. -    DIArray GVs = DIC.getGlobalVariables(); +    MDGlobalVariableArray GVs = DIC->getGlobalVariables();      bool GlobalVariableChange = false; -    for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) { -      DIGlobalVariable DIG(GVs.getElement(i)); -      assert(DIG.Verify() && "DIG must verify as DIGlobalVariable."); +    for (unsigned i = 0, e = GVs.size(); i != e; ++i) { +      DIGlobalVariable DIG = GVs[i];        // Make sure we only visit each global variable only once.        if (!VisitedSet.insert(DIG).second) @@ -339,7 +335,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {        // If the global variable referenced by DIG is not null, the global        // variable is live. -      if (DIG.getGlobal()) +      if (DIG->getVariable())          LiveGlobalVariables.push_back(DIG);        else          GlobalVariableChange = true; @@ -349,12 +345,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {      // subprogram list/global variable list with our new live subprogram/global      // variable list.      if (SubprogramChange) { -      DIC.replaceSubprograms(DIArray(MDNode::get(C, LiveSubprograms))); +      DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms));        Changed = true;      }      if (GlobalVariableChange) { -      DIC.replaceGlobalVariables(DIArray(MDNode::get(C, LiveGlobalVariables))); +      DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables));        Changed = true;      } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 21243c2..56b6cd3 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -197,12 +197,51 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {    return nullptr;  } +static Value *SimplifyX86insertps(const IntrinsicInst &II, +                                  InstCombiner::BuilderTy &Builder) { +  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { +    VectorType *VecTy = cast<VectorType>(II.getType()); +    ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); +     +    // The immediate permute control byte looks like this: +    //    [3:0] - zero mask for each 32-bit lane +    //    [5:4] - select one 32-bit destination lane +    //    [7:6] - select one 32-bit source lane + +    uint8_t Imm = CInt->getZExtValue(); +    uint8_t ZMask = Imm & 0xf; +    uint8_t DestLane = (Imm >> 4) & 0x3; +    uint8_t SourceLane = (Imm >> 6) & 0x3; + +    // If all zero mask bits are set, this was just a weird way to +    // generate a zero vector. +    if (ZMask == 0xf) +      return ZeroVector; +     +    // TODO: Model this case as two shuffles or a 'logical and' plus shuffle? +    if (ZMask) +      return nullptr; + +    assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type"); + +    // If we're not zeroing anything, this is a single shuffle. +    // Replace the selected destination lane with the selected source lane. +    // For all other lanes, pass the first source bits through. +    int ShuffleMask[4] = { 0, 1, 2, 3 }; +    ShuffleMask[DestLane] = SourceLane + 4; +     +    return Builder.CreateShuffleVector(II.getArgOperand(0), II.getArgOperand(1), +                                       ShuffleMask); +  } +  return nullptr; +} +  /// The shuffle mask for a perm2*128 selects any two halves of two 256-bit  /// source vectors, unless a zero bit is set. If a zero bit is set,  /// then ignore that half of the mask and clear that half of the vector.  static Value *SimplifyX86vperm2(const IntrinsicInst &II,                                  InstCombiner::BuilderTy &Builder) { -  if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { +  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {      VectorType *VecTy = cast<VectorType>(II.getType());      ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); @@ -415,112 +454,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      }      break; -  case Intrinsic::uadd_with_overflow: { -    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); -    OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, II); -    if (OR == OverflowResult::NeverOverflows) -      return CreateOverflowTuple(II, Builder->CreateNUWAdd(LHS, RHS), false); -    if (OR == OverflowResult::AlwaysOverflows) -      return CreateOverflowTuple(II, Builder->CreateAdd(LHS, RHS), true); -  } -  // FALL THROUGH uadd into sadd + +  case Intrinsic::uadd_with_overflow:    case Intrinsic::sadd_with_overflow: -    // Canonicalize constants into the RHS. +  case Intrinsic::umul_with_overflow: +  case Intrinsic::smul_with_overflow:      if (isa<Constant>(II->getArgOperand(0)) &&          !isa<Constant>(II->getArgOperand(1))) { +      // Canonicalize constants into the RHS.        Value *LHS = II->getArgOperand(0);        II->setArgOperand(0, II->getArgOperand(1));        II->setArgOperand(1, LHS);        return II;      } +    // fall through -    // X + undef -> undef -    if (isa<UndefValue>(II->getArgOperand(1))) -      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - -    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { -      // X + 0 -> {X, false} -      if (RHS->isZero()) { -        return CreateOverflowTuple(II, II->getArgOperand(0), false, -                                    /*ReUseName*/false); -      } -    } - -    // We can strength reduce reduce this signed add into a regular add if we -    // can prove that it will never overflow. -    if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) { -      Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); -      if (WillNotOverflowSignedAdd(LHS, RHS, *II)) { -        return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false); -      } -    } - -    break;    case Intrinsic::usub_with_overflow:    case Intrinsic::ssub_with_overflow: { -    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); -    // undef - X -> undef -    // X - undef -> undef -    if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) -      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - -    if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) { -      // X - 0 -> {X, false} -      if (ConstRHS->isZero()) { -        return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false); -      } -    } -    if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) { -      if (WillNotOverflowSignedSub(LHS, RHS, *II)) { -        return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false); -      } -    } else { -      if (WillNotOverflowUnsignedSub(LHS, RHS, *II)) { -        return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false); -      } -    } -    break; -  } -  case Intrinsic::umul_with_overflow: { -    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); -    OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, II); -    if (OR == OverflowResult::NeverOverflows) -      return CreateOverflowTuple(II, Builder->CreateNUWMul(LHS, RHS), false); -    if (OR == OverflowResult::AlwaysOverflows) -      return CreateOverflowTuple(II, Builder->CreateMul(LHS, RHS), true); -  } // FALL THROUGH -  case Intrinsic::smul_with_overflow: -    // Canonicalize constants into the RHS. -    if (isa<Constant>(II->getArgOperand(0)) && -        !isa<Constant>(II->getArgOperand(1))) { -      Value *LHS = II->getArgOperand(0); -      II->setArgOperand(0, II->getArgOperand(1)); -      II->setArgOperand(1, LHS); -      return II; -    } - -    // X * undef -> undef -    if (isa<UndefValue>(II->getArgOperand(1))) -      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); +    OverflowCheckFlavor OCF = +        IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID()); +    assert(OCF != OCF_INVALID && "unexpected!"); -    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) { -      // X*0 -> {0, false} -      if (RHSI->isZero()) -        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); +    Value *OperationResult = nullptr; +    Constant *OverflowResult = nullptr; +    if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1), +                              *II, OperationResult, OverflowResult)) +      return CreateOverflowTuple(II, OperationResult, OverflowResult); -      // X * 1 -> {X, false} -      if (RHSI->equalsInt(1)) { -        return CreateOverflowTuple(II, II->getArgOperand(0), false, -                                    /*ReUseName*/false); -      } -    } -    if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) { -      Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); -      if (WillNotOverflowSignedMul(LHS, RHS, *II)) { -        return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false); -      } -    }      break; +  } +    case Intrinsic::minnum:    case Intrinsic::maxnum: {      Value *Arg0 = II->getArgOperand(0); @@ -806,7 +769,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      }      break;    } - +  case Intrinsic::x86_sse41_insertps: +    if (Value *V = SimplifyX86insertps(*II, *Builder)) +      return ReplaceInstUsesWith(*II, V); +    break; +        case Intrinsic::x86_sse4a_insertqi: {      // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top      // ones undef diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index fe544c2..bd79a26 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1450,42 +1450,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {        CI.setOperand(0, GEP->getOperand(0));        return &CI;      } - -    // If the GEP has a single use, and the base pointer is a bitcast, and the -    // GEP computes a constant offset, see if we can convert these three -    // instructions into fewer.  This typically happens with unions and other -    // non-type-safe code. -    unsigned AS = GEP->getPointerAddressSpace(); -    unsigned OffsetBits = DL.getPointerSizeInBits(AS); -    APInt Offset(OffsetBits, 0); -    BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0)); -    if (GEP->hasOneUse() && BCI && GEP->accumulateConstantOffset(DL, Offset)) { -      // FIXME: This is insufficiently tested - just a no-crash test -      // (test/Transforms/InstCombine/2007-05-14-Crash.ll) -      // -      // Get the base pointer input of the bitcast, and the type it points to. -      Value *OrigBase = BCI->getOperand(0); -      SmallVector<Value*, 8> NewIndices; -      if (FindElementAtOffset(OrigBase->getType(), Offset.getSExtValue(), -                              NewIndices)) { -        // FIXME: This codepath is completely untested - could be unreachable -        // for all I know. -        // If we were able to index down into an element, create the GEP -        // and bitcast the result.  This eliminates one bitcast, potentially -        // two. -        Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() -                          ? Builder->CreateInBoundsGEP(OrigBase, NewIndices) -                          : Builder->CreateGEP( -                                OrigBase->getType()->getPointerElementType(), -                                OrigBase, NewIndices); -        NGEP->takeName(GEP); - -        if (isa<BitCastInst>(CI)) -          return new BitCastInst(NGEP, CI.getType()); -        assert(isa<PtrToIntInst>(CI)); -        return new PtrToIntInst(NGEP, CI.getType()); -      } -    }    }    return commonCastTransforms(CI); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 803b50a..223bba0 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2109,33 +2109,112 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,    return ExtractValueInst::Create(Call, 1, "sadd.overflow");  } -static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, -                                     InstCombiner &IC) { -  // Don't bother doing this transformation for pointers, don't do it for -  // vectors. -  if (!isa<IntegerType>(OrigAddV->getType())) return nullptr; +bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, +                                         Value *RHS, Instruction &OrigI, +                                         Value *&Result, Constant *&Overflow) { +  assert((!OrigI.isCommutative() || +          !(isa<Constant>(LHS) && !isa<Constant>(RHS))) && +         "call with a constant RHS if possible!"); + +  auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) { +    Result = OpResult; +    Overflow = OverflowVal; +    if (ReuseName) +      Result->takeName(&OrigI); +    return true; +  }; -  // If the add is a constant expr, then we don't bother transforming it. -  Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV); -  if (!OrigAdd) return nullptr; +  switch (OCF) { +  case OCF_INVALID: +    llvm_unreachable("bad overflow check kind!"); -  Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); +  case OCF_UNSIGNED_ADD: { +    OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI); +    if (OR == OverflowResult::NeverOverflows) +      return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(), +                       true); -  // Put the new code above the original add, in case there are any uses of the -  // add between the add and the compare. -  InstCombiner::BuilderTy *Builder = IC.Builder; -  Builder->SetInsertPoint(OrigAdd); +    if (OR == OverflowResult::AlwaysOverflows) +      return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true); +  } +  // FALL THROUGH uadd into sadd +  case OCF_SIGNED_ADD: { +    // X + undef -> undef +    if (isa<UndefValue>(RHS)) +      return SetResult(UndefValue::get(RHS->getType()), +                       UndefValue::get(Builder->getInt1Ty()), false); + +    if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) +      // X + 0 -> {X, false} +      if (ConstRHS->isZero()) +        return SetResult(LHS, Builder->getFalse(), false); + +    // We can strength reduce this signed add into a regular add if we can prove +    // that it will never overflow. +    if (OCF == OCF_SIGNED_ADD) +      if (WillNotOverflowSignedAdd(LHS, RHS, OrigI)) +        return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(), +                         true); +  } -  Module *M = I.getParent()->getParent()->getParent(); -  Type *Ty = LHS->getType(); -  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); -  CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); -  Value *Add = Builder->CreateExtractValue(Call, 0); +  case OCF_UNSIGNED_SUB: +  case OCF_SIGNED_SUB: { +    // undef - X -> undef +    // X - undef -> undef +    if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) +      return SetResult(UndefValue::get(LHS->getType()), +                       UndefValue::get(Builder->getInt1Ty()), false); + +    if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) +      // X - 0 -> {X, false} +      if (ConstRHS->isZero()) +        return SetResult(UndefValue::get(LHS->getType()), Builder->getFalse(), +                         false); + +    if (OCF == OCF_SIGNED_SUB) { +      if (WillNotOverflowSignedSub(LHS, RHS, OrigI)) +        return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(), +                         true); +    } else { +      if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI)) +        return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(), +                         true); +    } +    break; +  } -  IC.ReplaceInstUsesWith(*OrigAdd, Add); +  case OCF_UNSIGNED_MUL: { +    OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI); +    if (OR == OverflowResult::NeverOverflows) +      return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(), +                       true); +    if (OR == OverflowResult::AlwaysOverflows) +      return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true); +  } // FALL THROUGH +  case OCF_SIGNED_MUL: +    // X * undef -> undef +    if (isa<UndefValue>(RHS)) +      return SetResult(UndefValue::get(LHS->getType()), +                       UndefValue::get(Builder->getInt1Ty()), false); + +    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(RHS)) { +      // X * 0 -> {0, false} +      if (RHSI->isZero()) +        return SetResult(Constant::getNullValue(RHS->getType()), +                         Builder->getFalse(), false); + +      // X * 1 -> {X, false} +      if (RHSI->equalsInt(1)) +        return SetResult(LHS, Builder->getFalse(), false); +    } -  // The original icmp gets replaced with the overflow value. -  return ExtractValueInst::Create(Call, 1, "uadd.overflow"); +    if (OCF == OCF_SIGNED_MUL) +      if (WillNotOverflowSignedMul(LHS, RHS, OrigI)) +        return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(), +                         true); +  } + +  return false;  }  /// \brief Recognize and process idiom involving test for multiplication @@ -3432,21 +3511,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {          return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);      } -    // (a+b) <u a  --> llvm.uadd.with.overflow. -    // (a+b) <u b  --> llvm.uadd.with.overflow. -    if (I.getPredicate() == ICmpInst::ICMP_ULT && -        match(Op0, m_Add(m_Value(A), m_Value(B))) && -        (Op1 == A || Op1 == B)) -      if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) -        return R; - -    // a >u (a+b)  --> llvm.uadd.with.overflow. -    // b >u (a+b)  --> llvm.uadd.with.overflow. -    if (I.getPredicate() == ICmpInst::ICMP_UGT && -        match(Op1, m_Add(m_Value(A), m_Value(B))) && -        (Op0 == A || Op0 == B)) -      if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) -        return R; +    Instruction *AddI = nullptr; +    if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B), +                                     m_Instruction(AddI))) && +        isa<IntegerType>(A->getType())) { +      Value *Result; +      Constant *Overflow; +      if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result, +                                Overflow)) { +        ReplaceInstUsesWith(*AddI, Result); +        return ReplaceInstUsesWith(I, Overflow); +      } +    }      // (zext a) * (zext b)  --> llvm.umul.with.overflow.      if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index fb2321d..d0caf34 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -110,6 +110,41 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {    return false;  } + +/// \brief Specific patterns of overflow check idioms that we match. +enum OverflowCheckFlavor { +  OCF_UNSIGNED_ADD, +  OCF_SIGNED_ADD, +  OCF_UNSIGNED_SUB, +  OCF_SIGNED_SUB, +  OCF_UNSIGNED_MUL, +  OCF_SIGNED_MUL, + +  OCF_INVALID +}; + +/// \brief Returns the OverflowCheckFlavor corresponding to a overflow_with_op +/// intrinsic. +static inline OverflowCheckFlavor +IntrinsicIDToOverflowCheckFlavor(unsigned ID) { +  switch (ID) { +  default: +    return OCF_INVALID; +  case Intrinsic::uadd_with_overflow: +    return OCF_UNSIGNED_ADD; +  case Intrinsic::sadd_with_overflow: +    return OCF_SIGNED_ADD; +  case Intrinsic::usub_with_overflow: +    return OCF_UNSIGNED_SUB; +  case Intrinsic::ssub_with_overflow: +    return OCF_SIGNED_SUB; +  case Intrinsic::umul_with_overflow: +    return OCF_UNSIGNED_MUL; +  case Intrinsic::smul_with_overflow: +    return OCF_SIGNED_MUL; +  } +} +  /// \brief An IRBuilder inserter that adds new instructions to the instcombine  /// worklist.  class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter @@ -316,7 +351,7 @@ private:    bool ShouldChangeType(Type *From, Type *To) const;    Value *dyn_castNegVal(Value *V) const;    Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const; -  Type *FindElementAtOffset(Type *PtrTy, int64_t Offset, +  Type *FindElementAtOffset(PointerType *PtrTy, int64_t Offset,                              SmallVectorImpl<Value *> &NewIndices);    Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); @@ -329,6 +364,17 @@ private:    bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V,                            Type *Ty); +  /// \brief Try to optimize a sequence of instructions checking if an operation +  /// on LHS and RHS overflows. +  /// +  /// If a simplification is possible, stores the simplified result of the +  /// operation in OperationResult and result of the overflow check in +  /// OverflowResult, and return true.  If no simplification is possible, +  /// returns false. +  bool OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS, +                             Instruction &CtxI, Value *&OperationResult, +                             Constant *&OverflowResult); +    Instruction *visitCallSite(CallSite CS);    Instruction *tryOptimizeCall(CallInst *CI);    bool transformConstExprCastCall(CallSite CS); @@ -391,14 +437,10 @@ public:    }    /// Creates a result tuple for an overflow intrinsic \p II with a given -  /// \p Result and a constant \p Overflow value. If \p ReUseName is true the -  /// \p Result's name is taken from \p II. +  /// \p Result and a constant \p Overflow value.    Instruction *CreateOverflowTuple(IntrinsicInst *II, Value *Result, -                                   bool Overflow, bool ReUseName = true) { -    if (ReUseName) -      Result->takeName(II); -    Constant *V[] = {UndefValue::get(Result->getType()), -                     Overflow ? Builder->getTrue() : Builder->getFalse()}; +                                   Constant *Overflow) { +    Constant *V[] = {UndefValue::get(Result->getType()), Overflow};      StructType *ST = cast<StructType>(II->getType());      Constant *Struct = ConstantStruct::get(ST, V);      return InsertValueInst::Create(Struct, Result, 0); diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 6b0f268..d8a559c 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -84,7 +84,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,          continue;        } -      if (CallSite CS = I) { +      if (auto CS = CallSite(I)) {          // If this is the function being called then we treat it like a load and          // ignore it.          if (CS.isCallee(&U)) @@ -611,8 +611,10 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,      return false;    SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx); -  Type *AllocTy = -    GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops); +  Type *AllocTy = GetElementPtrInst::getIndexedType( +      cast<PointerType>(GEPI->getOperand(0)->getType()->getScalarType()) +          ->getElementType(), +      Ops);    if (!AllocTy || !AllocTy->isSized())      return false;    const DataLayout &DL = IC.getDataLayout(); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index b6beb65..24446c8 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -987,8 +987,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {      unsigned BegIdx = Mask.front();      VectorType *SrcTy = cast<VectorType>(V->getType());      unsigned VecBitWidth = SrcTy->getBitWidth(); -    unsigned SrcElemBitWidth = -        SrcTy->getElementType()->getPrimitiveSizeInBits(); +    unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());      assert(SrcElemBitWidth && "vector elements must have a bitwidth");      unsigned SrcNumElems = SrcTy->getNumElements();      SmallVector<BitCastInst *, 8> BCs; @@ -1000,7 +999,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {            BCs.push_back(BC);      for (BitCastInst *BC : BCs) {        Type *TgtTy = BC->getDestTy(); -      unsigned TgtElemBitWidth = TgtTy->getPrimitiveSizeInBits(); +      unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);        if (!TgtElemBitWidth)          continue;        unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 90551e4..3b46156 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -869,11 +869,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {  /// whether or not there is a sequence of GEP indices into the pointed type that  /// will land us at the specified offset.  If so, fill them into NewIndices and  /// return the resultant element type, otherwise return null. -Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, +Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,                                          SmallVectorImpl<Value *> &NewIndices) { -  assert(PtrTy->isPtrOrPtrVectorTy()); - -  Type *Ty = PtrTy->getPointerElementType(); +  Type *Ty = PtrTy->getElementType();    if (!Ty->isSized())      return nullptr; @@ -1611,12 +1609,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {              // %0 = GEP [10 x i8] addrspace(1)* X, ...              // addrspacecast i8 addrspace(1)* %0 to i8*              SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end()); -            Value *NewGEP = -                GEP.isInBounds() -                    ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, -                                                 GEP.getName()) -                    : Builder->CreateGEP(StrippedPtrTy->getElementType(), -                                         StrippedPtr, Idx, GEP.getName()); +            Value *NewGEP = GEP.isInBounds() +                                ? Builder->CreateInBoundsGEP( +                                      nullptr, StrippedPtr, Idx, GEP.getName()) +                                : Builder->CreateGEP(nullptr, StrippedPtr, Idx, +                                                     GEP.getName());              return new AddrSpaceCastInst(NewGEP, GEP.getType());            }          } @@ -1634,9 +1631,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {          Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };          Value *NewGEP =              GEP.isInBounds() -                ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) -                : Builder->CreateGEP(StrippedPtrTy->getElementType(), -                                     StrippedPtr, Idx, GEP.getName()); +                ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, Idx, +                                             GEP.getName()) +                : Builder->CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName());          // V and GEP are both pointer types --> BitCast          return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1669,10 +1666,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {              // GEP may not be "inbounds".              Value *NewGEP =                  GEP.isInBounds() && NSW -                    ? Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, +                    ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx,                                                   GEP.getName()) -                    : Builder->CreateGEP(StrippedPtrTy->getElementType(), -                                         StrippedPtr, NewIdx, GEP.getName()); +                    : Builder->CreateGEP(nullptr, StrippedPtr, NewIdx, +                                         GEP.getName());              // The NewGEP must be pointer typed, so must the old one -> BitCast              return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1710,9 +1707,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {                  Constant::getNullValue(DL.getIntPtrType(GEP.getType())),                  NewIdx}; -            Value *NewGEP = GEP.isInBounds() && NSW ? -              Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) : -              Builder->CreateGEP(SrcElTy, StrippedPtr, Off, GEP.getName()); +            Value *NewGEP = GEP.isInBounds() && NSW +                                ? Builder->CreateInBoundsGEP( +                                      SrcElTy, StrippedPtr, Off, GEP.getName()) +                                : Builder->CreateGEP(SrcElTy, StrippedPtr, Off, +                                                     GEP.getName());              // The NewGEP must be pointer typed, so must the old one -> BitCast              return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,                                                                   GEP.getType()); @@ -1774,9 +1773,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {        // GEP.        SmallVector<Value*, 8> NewIndices;        if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) { -        Value *NGEP = GEP.isInBounds() ? -          Builder->CreateInBoundsGEP(Operand, NewIndices) : -          Builder->CreateGEP(OpType->getElementType(), Operand, NewIndices); +        Value *NGEP = +            GEP.isInBounds() +                ? Builder->CreateInBoundsGEP(nullptr, Operand, NewIndices) +                : Builder->CreateGEP(nullptr, Operand, NewIndices);          if (NGEP->getType() == GEP.getType())            return ReplaceInstUsesWith(GEP, NGEP); @@ -2268,7 +2268,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {        // We need to insert these at the location of the old load, not at that of        // the extractvalue.        Builder->SetInsertPoint(L->getParent(), L); -      Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices); +      Value *GEP = Builder->CreateInBoundsGEP(L->getType(), +                                              L->getPointerOperand(), Indices);        // Returning the load directly will cause the main loop to insert it in        // the wrong spot, so use ReplaceInstUsesWith().        return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP)); @@ -2725,7 +2726,7 @@ bool InstCombiner::run() {          DEBUG(dbgs() << "IC: Old = " << *I << '\n'                       << "    New = " << *Result << '\n'); -        if (!I->getDebugLoc().isUnknown()) +        if (I->getDebugLoc())            Result->setDebugLoc(I->getDebugLoc());          // Everything uses the new instruction now.          I->replaceAllUsesWith(Result); diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 978c857..8d6d3ce 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -401,12 +401,12 @@ struct AddressSanitizer : public FunctionPass {      return SizeInBytes;    }    /// Check if we want (and can) handle this alloca. -  bool isInterestingAlloca(AllocaInst &AI) const; +  bool isInterestingAlloca(AllocaInst &AI);    /// If it is an interesting memory access, return the PointerOperand    /// and set IsWrite/Alignment. Otherwise return nullptr.    Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,                                     uint64_t *TypeSize, -                                   unsigned *Alignment) const; +                                   unsigned *Alignment);    void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I,                       bool UseCalls, const DataLayout &DL);    void instrumentPointerComparisonOrSubtraction(Instruction *I); @@ -458,6 +458,7 @@ struct AddressSanitizer : public FunctionPass {    Function *AsanMemmove, *AsanMemcpy, *AsanMemset;    InlineAsm *EmptyAsm;    GlobalsMetadata GlobalsMD; +  DenseMap<AllocaInst *, bool> ProcessedAllocas;    friend struct FunctionStackPoisoner;  }; @@ -804,13 +805,21 @@ void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {  }  /// Check if we want (and can) handle this alloca. -bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const { -  return (AI.getAllocatedType()->isSized() && -          // alloca() may be called with 0 size, ignore it. -          getAllocaSizeInBytes(&AI) > 0 && -          // We are only interested in allocas not promotable to registers. -          // Promotable allocas are common under -O0. -          (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI))); +bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) { +  auto PreviouslySeenAllocaInfo = ProcessedAllocas.find(&AI); + +  if (PreviouslySeenAllocaInfo != ProcessedAllocas.end()) +    return PreviouslySeenAllocaInfo->getSecond(); + +  bool IsInteresting = (AI.getAllocatedType()->isSized() && +    // alloca() may be called with 0 size, ignore it. +    getAllocaSizeInBytes(&AI) > 0 && +    // We are only interested in allocas not promotable to registers. +    // Promotable allocas are common under -O0. +    (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI))); + +  ProcessedAllocas[&AI] = IsInteresting; +  return IsInteresting;  }  /// If I is an interesting memory access, return the PointerOperand @@ -818,7 +827,7 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const {  Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,                                                     bool *IsWrite,                                                     uint64_t *TypeSize, -                                                   unsigned *Alignment) const { +                                                   unsigned *Alignment) {    // Skip memory accesses inserted by another instrumentation.    if (I->getMetadata("nosanitize")) return nullptr; @@ -959,18 +968,6 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,                                     UseCalls, Exp);  } -// Validate the result of Module::getOrInsertFunction called for an interface -// function of AddressSanitizer. If the instrumented module defines a function -// with the same name, their prototypes must match, otherwise -// getOrInsertFunction returns a bitcast. -static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { -  if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast); -  FuncOrBitcast->dump(); -  report_fatal_error( -      "trying to redefine an AddressSanitizer " -      "interface function"); -} -  Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore,                                                   Value *Addr, bool IsWrite,                                                   size_t AccessSizeIndex, @@ -1056,7 +1053,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,      // path is rarely taken. This seems to be the case for SPEC benchmarks.      TerminatorInst *CheckTerm = SplitBlockAndInsertIfThen(          Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000)); -    assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional()); +    assert(cast<BranchInst>(CheckTerm)->isUnconditional());      BasicBlock *NextBB = CheckTerm->getSuccessor(0);      IRB.SetInsertPoint(CheckTerm);      Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize); @@ -1219,17 +1216,17 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {  void AddressSanitizerModule::initializeCallbacks(Module &M) {    IRBuilder<> IRB(*C);    // Declare our poisoning and unpoisoning functions. -  AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( +  AsanPoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(        kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr));    AsanPoisonGlobals->setLinkage(Function::ExternalLinkage); -  AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( +  AsanUnpoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(        kAsanUnpoisonGlobalsName, IRB.getVoidTy(), nullptr));    AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);    // Declare functions that register/unregister globals. -  AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( +  AsanRegisterGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(        kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));    AsanRegisterGlobals->setLinkage(Function::ExternalLinkage); -  AsanUnregisterGlobals = checkInterfaceFunction( +  AsanUnregisterGlobals = checkSanitizerInterfaceFunction(        M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(),                              IntptrTy, IntptrTy, nullptr));    AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); @@ -1317,7 +1314,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {      Indices2[1] = IRB.getInt32(0);      G->replaceAllUsesWith( -        ConstantExpr::getGetElementPtr(NewGlobal, Indices2, true)); +        ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true));      NewGlobal->takeName(G);      G->eraseFromParent(); @@ -1399,44 +1396,44 @@ void AddressSanitizer::initializeCallbacks(Module &M) {        const std::string ExpStr = Exp ? "exp_" : "";        const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr;        AsanErrorCallbackSized[AccessIsWrite][Exp] = -          checkInterfaceFunction(M.getOrInsertFunction( +          checkSanitizerInterfaceFunction(M.getOrInsertFunction(                kAsanReportErrorTemplate + ExpStr + TypeStr + "_n",                IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));        AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] = -          checkInterfaceFunction(M.getOrInsertFunction( +          checkSanitizerInterfaceFunction(M.getOrInsertFunction(                ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N",                IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));        for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;             AccessSizeIndex++) {          const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex);          AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] = -            checkInterfaceFunction(M.getOrInsertFunction( +            checkSanitizerInterfaceFunction(M.getOrInsertFunction(                  kAsanReportErrorTemplate + ExpStr + Suffix, IRB.getVoidTy(),                  IntptrTy, ExpType, nullptr));          AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] = -            checkInterfaceFunction(M.getOrInsertFunction( +            checkSanitizerInterfaceFunction(M.getOrInsertFunction(                  ClMemoryAccessCallbackPrefix + ExpStr + Suffix, IRB.getVoidTy(),                  IntptrTy, ExpType, nullptr));        }      }    } -  AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction( +  AsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(        ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(),        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr)); -  AsanMemcpy = checkInterfaceFunction(M.getOrInsertFunction( +  AsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(        ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr)); -  AsanMemset = checkInterfaceFunction(M.getOrInsertFunction( +  AsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(        ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(),        IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, nullptr)); -  AsanHandleNoReturnFunc = checkInterfaceFunction( +  AsanHandleNoReturnFunc = checkSanitizerInterfaceFunction(        M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), nullptr)); -  AsanPtrCmpFunction = checkInterfaceFunction(M.getOrInsertFunction( +  AsanPtrCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(        kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr)); -  AsanPtrSubFunction = checkInterfaceFunction(M.getOrInsertFunction( +  AsanPtrSubFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(        kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));    // We insert an empty inline asm after __asan_report* to avoid callback merge.    EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), @@ -1461,7 +1458,7 @@ bool AddressSanitizer::doInitialization(Module &M) {    BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);    // call __asan_init in the module ctor.    IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB)); -  AsanInitFunction = checkInterfaceFunction( +  AsanInitFunction = checkSanitizerInterfaceFunction(        M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), nullptr));    AsanInitFunction->setLinkage(Function::ExternalLinkage);    IRB.CreateCall(AsanInitFunction); @@ -1613,16 +1610,17 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {    IRBuilder<> IRB(*C);    for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) {      std::string Suffix = itostr(i); -    AsanStackMallocFunc[i] = checkInterfaceFunction(M.getOrInsertFunction( -        kAsanStackMallocNameTemplate + Suffix, IntptrTy, IntptrTy, nullptr)); -    AsanStackFreeFunc[i] = checkInterfaceFunction( +    AsanStackMallocFunc[i] = checkSanitizerInterfaceFunction( +        M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy, +                              IntptrTy, nullptr)); +    AsanStackFreeFunc[i] = checkSanitizerInterfaceFunction(          M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix,                                IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));    } -  AsanPoisonStackMemoryFunc = checkInterfaceFunction( +  AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction(        M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(),                              IntptrTy, IntptrTy, nullptr)); -  AsanUnpoisonStackMemoryFunc = checkInterfaceFunction( +  AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction(        M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(),                              IntptrTy, IntptrTy, nullptr));  } @@ -1757,9 +1755,11 @@ void FunctionStackPoisoner::poisonStack() {    uint64_t LocalStackSize = L.FrameSize;    bool DoStackMalloc =        ClUseAfterReturn && LocalStackSize <= kMaxStackMallocSize; -  // Don't do dynamic alloca in presence of inline asm: too often it -  // makes assumptions on which registers are available. +  // Don't do dynamic alloca in presence of inline asm: too often it makes +  // assumptions on which registers are available. Don't do stack malloc in the +  // presence of inline asm on 32-bit platforms for the same reason.    bool DoDynamicAlloca = ClDynamicAllocaStack && !HasNonEmptyInlineAsm; +  DoStackMalloc &= !HasNonEmptyInlineAsm || ASan.LongSize != 32;    Value *StaticAlloca =        DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false); diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index b3925ee..06d5aed 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -753,7 +753,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {        // Patch the pointer to LLVM function in debug info descriptor.        auto DI = FunctionDIs.find(&F);        if (DI != FunctionDIs.end()) -        DI->second.replaceFunction(&F); +        DI->second->replaceFunction(&F);        UnwrappedFnMap[WrappedFnCst] = &F;        *i = NewF; @@ -1075,8 +1075,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,    }    case 2: {      IRBuilder<> IRB(Pos); -    Value *ShadowAddr1 = -        IRB.CreateGEP(ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1)); +    Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr, +                                       ConstantInt::get(DFS.IntptrTy, 1));      return combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign),                            IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign), Pos);    } @@ -1127,7 +1127,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,        BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);        DT.addNewBlock(NextBB, LastBr->getParent());        IRBuilder<> NextIRB(NextBB); -      WideAddr = NextIRB.CreateGEP(WideAddr, ConstantInt::get(DFS.IntptrTy, 1)); +      WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr, +                                   ConstantInt::get(DFS.IntptrTy, 1));        Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign);        ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);        LastBr->setSuccessor(0, NextBB); @@ -1213,7 +1214,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,      Value *ShadowVecAddr =          IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));      do { -      Value *CurShadowVecAddr = IRB.CreateConstGEP1_32(ShadowVecAddr, Offset); +      Value *CurShadowVecAddr = +          IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);        IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);        Size -= ShadowVecSize;        ++Offset; @@ -1221,7 +1223,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,      Offset *= ShadowVecSize;    }    while (Size > 0) { -    Value *CurShadowAddr = IRB.CreateConstGEP1_32(ShadowAddr, Offset); +    Value *CurShadowAddr = +        IRB.CreateConstGEP1_32(DFS.ShadowTy, ShadowAddr, Offset);      IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign);      --Size;      ++Offset; @@ -1469,17 +1472,17 @@ void DFSanVisitor::visitCallSite(CallSite CS) {            Args.push_back(DFSF.getShadow(*i));          if (FT->isVarArg()) { -          auto LabelVAAlloca = -              new AllocaInst(ArrayType::get(DFSF.DFS.ShadowTy, -                                            CS.arg_size() - FT->getNumParams()), -                             "labelva", DFSF.F->getEntryBlock().begin()); +          auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy, +                                           CS.arg_size() - FT->getNumParams()); +          auto *LabelVAAlloca = new AllocaInst(LabelVATy, "labelva", +                                               DFSF.F->getEntryBlock().begin());            for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) { -            auto LabelVAPtr = IRB.CreateStructGEP(LabelVAAlloca, n); +            auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n);              IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr);            } -          Args.push_back(IRB.CreateStructGEP(LabelVAAlloca, 0)); +          Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));          }          if (!FT->getReturnType()->isVoidTy()) { @@ -1565,10 +1568,11 @@ void DFSanVisitor::visitCallSite(CallSite CS) {        ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);        AllocaInst *VarArgShadow =            new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin()); -      Args.push_back(IRB.CreateConstGEP2_32(VarArgShadow, 0, 0)); +      Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));        for (unsigned n = 0; i != e; ++i, ++n) { -        IRB.CreateStore(DFSF.getShadow(*i), -                        IRB.CreateConstGEP2_32(VarArgShadow, 0, n)); +        IRB.CreateStore( +            DFSF.getShadow(*i), +            IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, n));          Args.push_back(*i);        }      } diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index a793e69..368a81d 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -149,10 +149,10 @@ ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {    return new GCOVProfiler(Options);  } -static StringRef getFunctionName(DISubprogram SP) { -  if (!SP.getLinkageName().empty()) -    return SP.getLinkageName(); -  return SP.getName(); +static StringRef getFunctionName(MDSubprogram *SP) { +  if (!SP->getLinkageName().empty()) +    return SP->getLinkageName(); +  return SP->getName();  }  namespace { @@ -163,7 +163,7 @@ namespace {      static const char *const BlockTag;      static const char *const EdgeTag; -    GCOVRecord() {} +    GCOVRecord() = default;      void writeBytes(const char *Bytes, int Size) {        os->write(Bytes, Size); @@ -315,7 +315,7 @@ namespace {             ReturnBlock(1, os) {        this->os = os; -      Function *F = SP.getFunction(); +      Function *F = SP->getFunction();        DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");        uint32_t i = 0; @@ -330,7 +330,7 @@ namespace {        std::string FunctionNameAndLine;        raw_string_ostream FNLOS(FunctionNameAndLine); -      FNLOS << getFunctionName(SP) << SP.getLineNumber(); +      FNLOS << getFunctionName(SP) << SP->getLine();        FNLOS.flush();        FuncChecksum = hash_value(FunctionNameAndLine);      } @@ -366,7 +366,7 @@ namespace {      void writeOut() {        writeBytes(FunctionTag, 4);        uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) + -          1 + lengthOfGCOVString(SP.getFilename()) + 1; +                          1 + lengthOfGCOVString(SP->getFilename()) + 1;        if (UseCfgChecksum)          ++BlockLen;        write(BlockLen); @@ -375,8 +375,8 @@ namespace {        if (UseCfgChecksum)          write(CfgChecksum);        writeGCOVString(getFunctionName(SP)); -      writeGCOVString(SP.getFilename()); -      write(SP.getLineNumber()); +      writeGCOVString(SP->getFilename()); +      write(SP->getLine());        // Emit count of blocks.        writeBytes(BlockTag, 4); @@ -437,12 +437,12 @@ std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {      }    } -  SmallString<128> Filename = CU.getFilename(); +  SmallString<128> Filename = CU->getFilename();    sys::path::replace_extension(Filename, NewStem);    StringRef FName = sys::path::filename(Filename);    SmallString<128> CurPath;    if (sys::fs::current_path(CurPath)) return FName; -  sys::path::append(CurPath, FName.str()); +  sys::path::append(CurPath, FName);    return CurPath.str();  } @@ -466,7 +466,8 @@ static bool functionHasLines(Function *F) {        if (isa<DbgInfoIntrinsic>(I)) continue;        const DebugLoc &Loc = I->getDebugLoc(); -      if (Loc.isUnknown()) continue; +      if (!Loc) +        continue;        // Artificial lines such as calls to the global constructors.        if (Loc.getLine() == 0) continue; @@ -486,21 +487,14 @@ void GCOVProfiler::emitProfileNotes() {      // this pass over the original .o's as they're produced, or run it after      // LTO, we'll generate the same .gcno files. -    DICompileUnit CU(CU_Nodes->getOperand(i)); +    DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i));      std::error_code EC;      raw_fd_ostream out(mangleName(CU, "gcno"), EC, sys::fs::F_None);      std::string EdgeDestinations; -    DIArray SPs = CU.getSubprograms();      unsigned FunctionIdent = 0; -    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { -      DISubprogram SP(SPs.getElement(i)); -      assert((!SP || SP.isSubprogram()) && -        "A MDNode in subprograms of a CU should be null or a DISubprogram."); -      if (!SP) -        continue; - -      Function *F = SP.getFunction(); +    for (auto *SP : CU->getSubprograms()) { +      Function *F = SP->getFunction();        if (!F) continue;        if (!functionHasLines(F)) continue; @@ -536,16 +530,18 @@ void GCOVProfiler::emitProfileNotes() {            if (isa<DbgInfoIntrinsic>(I)) continue;            const DebugLoc &Loc = I->getDebugLoc(); -          if (Loc.isUnknown()) continue; +          if (!Loc) +            continue;            // Artificial lines such as calls to the global constructors.            if (Loc.getLine() == 0) continue;            if (Line == Loc.getLine()) continue;            Line = Loc.getLine(); -          if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue; +          if (SP != getDISubprogram(Loc.getScope())) +            continue; -          GCOVLines &Lines = Block.getFile(SP.getFilename()); +          GCOVLines &Lines = Block.getFile(SP->getFilename());            Lines.addLine(Loc.getLine());          }        } @@ -574,16 +570,10 @@ bool GCOVProfiler::emitProfileArcs() {    bool Result = false;    bool InsertIndCounterIncrCode = false;    for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { -    DICompileUnit CU(CU_Nodes->getOperand(i)); -    DIArray SPs = CU.getSubprograms(); +    DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i));      SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP; -    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { -      DISubprogram SP(SPs.getElement(i)); -      assert((!SP || SP.isSubprogram()) && -        "A MDNode in subprograms of a CU should be null or a DISubprogram."); -      if (!SP) -        continue; -      Function *F = SP.getFunction(); +    for (auto *SP : CU->getSubprograms()) { +      Function *F = SP->getFunction();        if (!F) continue;        if (!functionHasLines(F)) continue;        if (!Result) Result = true; @@ -603,7 +593,7 @@ bool GCOVProfiler::emitProfileArcs() {                             GlobalValue::InternalLinkage,                             Constant::getNullValue(CounterTy),                             "__llvm_gcov_ctr"); -      CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP)); +      CountersBySP.push_back(std::make_pair(Counters, SP));        UniqueVector<BasicBlock *> ComplexEdgePreds;        UniqueVector<BasicBlock *> ComplexEdgeSuccs; @@ -628,7 +618,8 @@ bool GCOVProfiler::emitProfileArcs() {              SmallVector<Value *, 2> Idx;              Idx.push_back(Builder.getInt64(0));              Idx.push_back(Sel); -            Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx); +            Value *Counter = Builder.CreateInBoundsGEP(Counters->getValueType(), +                                                       Counters, Idx);              Value *Count = Builder.CreateLoad(Counter);              Count = Builder.CreateAdd(Count, Builder.getInt64(1));              Builder.CreateStore(Count, Counter); @@ -855,7 +846,7 @@ Function *GCOVProfiler::insertCounterWriteout(    NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");    if (CU_Nodes) {      for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { -      DICompileUnit CU(CU_Nodes->getOperand(i)); +      DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i));        std::string FilenameGcda = mangleName(CU, "gcda");        uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];        Builder.CreateCall3(StartFile, @@ -863,7 +854,7 @@ Function *GCOVProfiler::insertCounterWriteout(                            Builder.CreateGlobalStringPtr(ReversedVersion),                            Builder.getInt32(CfgChecksum));        for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) { -        DISubprogram SP(CountersBySP[j].second); +        auto *SP = cast_or_null<MDSubprogram>(CountersBySP[j].second);          uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();          Builder.CreateCall5(              EmitFunction, Builder.getInt32(j), @@ -922,7 +913,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {    Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());    Arg = std::next(Fn->arg_begin());    Arg->setName("counters"); -  Value *GEP = Builder.CreateGEP(Arg, ZExtPred); +  Value *GEP = Builder.CreateGEP(Type::getInt64PtrTy(*Ctx), Arg, ZExtPred);    Value *Counter = Builder.CreateLoad(GEP, "counter");    Cond = Builder.CreateICmpEQ(Counter,                                Constant::getNullValue( diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index c2aa1e2..2b35066 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -623,8 +623,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {        Value *IntptrOriginPtr =            IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));        for (unsigned i = 0; i < Size / IntptrSize; ++i) { -        Value *Ptr = -            i ? IRB.CreateConstGEP1_32(IntptrOriginPtr, i) : IntptrOriginPtr; +        Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i) +                       : IntptrOriginPtr;          IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);          Ofs += IntptrSize / kOriginSize;          CurrentAlignment = IntptrAlignment; @@ -632,7 +632,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {      }      for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) { -      Value *GEP = i ? IRB.CreateConstGEP1_32(OriginPtr, i) : OriginPtr; +      Value *GEP = +          i ? IRB.CreateConstGEP1_32(nullptr, OriginPtr, i) : OriginPtr;        IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);        CurrentAlignment = kMinOriginAlignment;      } @@ -2843,7 +2844,8 @@ struct VarArgAMD64Helper : public VarArgHelper {        Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);        Value *OverflowArgAreaShadowPtr =          MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB); -      Value *SrcPtr = IRB.CreateConstGEP1_32(VAArgTLSCopy, AMD64FpEndOffset); +      Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy, +                                             AMD64FpEndOffset);        IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);      }    } diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 289675e..662513d 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -140,16 +140,6 @@ class SanitizerCoverageModule : public ModulePass {  }  // namespace -static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { -  if (Function *F = dyn_cast<Function>(FuncOrBitcast)) -     return F; -  std::string Err; -  raw_string_ostream Stream(Err); -  Stream << "SanitizerCoverage interface function redefined: " -         << *FuncOrBitcast; -  report_fatal_error(Err); -} -  bool SanitizerCoverageModule::runOnModule(Module &M) {    if (!CoverageLevel) return false;    C = &(M.getContext()); @@ -167,16 +157,18 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {    ReturnInst::Create(*C, BasicBlock::Create(*C, "", CtorFunc));    appendToGlobalCtors(M, CtorFunc, kSanCtorAndDtorPriority); -  SanCovFunction = checkInterfaceFunction( +  SanCovFunction = checkSanitizerInterfaceFunction(        M.getOrInsertFunction(kSanCovName, VoidTy, Int32PtrTy, nullptr)); -  SanCovWithCheckFunction = checkInterfaceFunction( +  SanCovWithCheckFunction = checkSanitizerInterfaceFunction(        M.getOrInsertFunction(kSanCovWithCheckName, VoidTy, Int32PtrTy, nullptr)); -  SanCovIndirCallFunction = checkInterfaceFunction(M.getOrInsertFunction( -      kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr)); -  SanCovTraceCmpFunction = checkInterfaceFunction(M.getOrInsertFunction( -      kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr)); - -  SanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction( +  SanCovIndirCallFunction = +      checkSanitizerInterfaceFunction(M.getOrInsertFunction( +          kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr)); +  SanCovTraceCmpFunction = +      checkSanitizerInterfaceFunction(M.getOrInsertFunction( +          kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr)); + +  SanCovModuleInit = checkSanitizerInterfaceFunction(M.getOrInsertFunction(        kSanCovModuleInitName, Type::getVoidTy(*C), Int32PtrTy, IntptrTy,        Int8PtrTy, Int8PtrTy, nullptr));    SanCovModuleInit->setLinkage(Function::ExternalLinkage); @@ -186,9 +178,9 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {                              /*hasSideEffects=*/true);    if (ClExperimentalTracing) { -    SanCovTraceEnter = checkInterfaceFunction( +    SanCovTraceEnter = checkSanitizerInterfaceFunction(          M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr)); -    SanCovTraceBB = checkInterfaceFunction( +    SanCovTraceBB = checkSanitizerInterfaceFunction(          M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr));    } @@ -316,7 +308,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(      IRBuilder<> IRB(I);      CallSite CS(I);      Value *Callee = CS.getCalledValue(); -    if (dyn_cast<InlineAsm>(Callee)) continue; +    if (isa<InlineAsm>(Callee)) continue;      GlobalVariable *CalleeCache = new GlobalVariable(          *F.getParent(), Ty, false, GlobalValue::PrivateLinkage,          Constant::getNullValue(Ty), "__sancov_gen_callee_cache"); @@ -366,8 +358,8 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,    }    bool IsEntryBB = &BB == &F.getEntryBlock(); -  DebugLoc EntryLoc = IsEntryBB && !IP->getDebugLoc().isUnknown() -                          ? IP->getDebugLoc().getFnDebugLoc(*C) +  DebugLoc EntryLoc = IsEntryBB && IP->getDebugLoc() +                          ? IP->getDebugLoc().getFnDebugLoc()                            : IP->getDebugLoc();    IRBuilder<> IRB(IP);    IRB.SetCurrentDebugLocation(EntryLoc); diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index c3ba722..aa8ee5a 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -129,54 +129,48 @@ FunctionPass *llvm::createThreadSanitizerPass() {    return new ThreadSanitizer();  } -static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { -  if (Function *F = dyn_cast<Function>(FuncOrBitcast)) -     return F; -  FuncOrBitcast->dump(); -  report_fatal_error("ThreadSanitizer interface function redefined"); -} -  void ThreadSanitizer::initializeCallbacks(Module &M) {    IRBuilder<> IRB(M.getContext());    // Initialize the callbacks. -  TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction( +  TsanFuncEntry = checkSanitizerInterfaceFunction(M.getOrInsertFunction(        "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); -  TsanFuncExit = checkInterfaceFunction(M.getOrInsertFunction( -      "__tsan_func_exit", IRB.getVoidTy(), nullptr)); +  TsanFuncExit = checkSanitizerInterfaceFunction( +      M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), nullptr));    OrdTy = IRB.getInt32Ty();    for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {      const size_t ByteSize = 1 << i;      const size_t BitSize = ByteSize * 8;      SmallString<32> ReadName("__tsan_read" + itostr(ByteSize)); -    TsanRead[i] = checkInterfaceFunction(M.getOrInsertFunction( +    TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(          ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));      SmallString<32> WriteName("__tsan_write" + itostr(ByteSize)); -    TsanWrite[i] = checkInterfaceFunction(M.getOrInsertFunction( +    TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(          WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));      SmallString<64> UnalignedReadName("__tsan_unaligned_read" +          itostr(ByteSize)); -    TsanUnalignedRead[i] = checkInterfaceFunction(M.getOrInsertFunction( -        UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); +    TsanUnalignedRead[i] = +        checkSanitizerInterfaceFunction(M.getOrInsertFunction( +            UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));      SmallString<64> UnalignedWriteName("__tsan_unaligned_write" +          itostr(ByteSize)); -    TsanUnalignedWrite[i] = checkInterfaceFunction(M.getOrInsertFunction( -        UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); +    TsanUnalignedWrite[i] = +        checkSanitizerInterfaceFunction(M.getOrInsertFunction( +            UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));      Type *Ty = Type::getIntNTy(M.getContext(), BitSize);      Type *PtrTy = Ty->getPointerTo();      SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) +                                     "_load"); -    TsanAtomicLoad[i] = checkInterfaceFunction(M.getOrInsertFunction( -        AtomicLoadName, Ty, PtrTy, OrdTy, nullptr)); +    TsanAtomicLoad[i] = checkSanitizerInterfaceFunction( +        M.getOrInsertFunction(AtomicLoadName, Ty, PtrTy, OrdTy, nullptr));      SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) +                                      "_store"); -    TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction( -        AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, -        nullptr)); +    TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( +        AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr));      for (int op = AtomicRMWInst::FIRST_BINOP;          op <= AtomicRMWInst::LAST_BINOP; ++op) { @@ -199,34 +193,34 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {        else          continue;        SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart); -      TsanAtomicRMW[op][i] = checkInterfaceFunction(M.getOrInsertFunction( -          RMWName, Ty, PtrTy, Ty, OrdTy, nullptr)); +      TsanAtomicRMW[op][i] = checkSanitizerInterfaceFunction( +          M.getOrInsertFunction(RMWName, Ty, PtrTy, Ty, OrdTy, nullptr));      }      SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) +                                    "_compare_exchange_val"); -    TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction( +    TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(          AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr));    } -  TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction( -      "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(), -      IRB.getInt8PtrTy(), nullptr)); -  TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction( +  TsanVptrUpdate = checkSanitizerInterfaceFunction( +      M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(), +                            IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), nullptr)); +  TsanVptrLoad = checkSanitizerInterfaceFunction(M.getOrInsertFunction(        "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr)); -  TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction( +  TsanAtomicThreadFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(        "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, nullptr)); -  TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction( +  TsanAtomicSignalFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(        "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, nullptr)); -  MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction( -    "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), -    IRB.getInt8PtrTy(), IntptrTy, nullptr)); -  MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction( -    "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), -    IntptrTy, nullptr)); -  MemsetFn = checkInterfaceFunction(M.getOrInsertFunction( -    "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), -    IntptrTy, nullptr)); +  MemmoveFn = checkSanitizerInterfaceFunction( +      M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), +                            IRB.getInt8PtrTy(), IntptrTy, nullptr)); +  MemcpyFn = checkSanitizerInterfaceFunction( +      M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), +                            IRB.getInt8PtrTy(), IntptrTy, nullptr)); +  MemsetFn = checkSanitizerInterfaceFunction( +      M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), +                            IRB.getInt32Ty(), IntptrTy, nullptr));  }  bool ThreadSanitizer::doInitialization(Module &M) { diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h index 87de33b..d4fef10 100644 --- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h +++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h @@ -54,8 +54,6 @@ public:                              RetainAutorelease(nullptr),                              RetainAutoreleaseRV(nullptr) { } -  ~ARCRuntimeEntryPoints() { } -    void init(Module *M) {      TheModule = M;      AutoreleaseRV = nullptr; diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index b197c97..4edd029 100644 --- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -45,7 +45,7 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,    default: break;    } -  ImmutableCallSite CS = static_cast<const Value *>(Inst); +  ImmutableCallSite CS(Inst);    assert(CS && "Only calls can alter reference counts!");    // See if AliasAnalysis can help us with the call. @@ -99,7 +99,7 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,      // of any other dynamic reference-counted pointers.      if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA()))        return false; -  } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) { +  } else if (auto CS = ImmutableCallSite(Inst)) {      // For calls, just check the arguments (and not the callee operand).      for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),           OE = CS.arg_end(); OI != OE; ++OI) { diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index 5aa2b97..8918909 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -50,9 +50,9 @@ struct AlignmentFromAssumptions : public FunctionPass {      initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry());    } -  bool runOnFunction(Function &F); +  bool runOnFunction(Function &F) override; -  virtual void getAnalysisUsage(AnalysisUsage &AU) const { +  void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<AssumptionCacheTracker>();      AU.addRequired<ScalarEvolution>();      AU.addRequired<DominatorTreeWrapperPass>(); diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk index cf30f39..16f2ead 100644 --- a/lib/Transforms/Scalar/Android.mk +++ b/lib/Transforms/Scalar/Android.mk @@ -11,6 +11,7 @@ transforms_scalar_SRC_FILES := \    DeadStoreElimination.cpp \    EarlyCSE.cpp \    FlattenCFGPass.cpp \ +  Float2Int.cpp \    GVN.cpp \    IndVarSimplify.cpp \    InductiveRangeCheckElimination.cpp \ @@ -30,6 +31,7 @@ transforms_scalar_SRC_FILES := \    LowerExpectIntrinsic.cpp \    MemCpyOptimizer.cpp \    MergedLoadStoreMotion.cpp \ +  NaryReassociate.cpp \    PartiallyInlineLibCalls.cpp \    PlaceSafepoints.cpp \    Reassociate.cpp \ diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index d12fdb7..c29fcc3 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMScalarOpts    DeadStoreElimination.cpp    EarlyCSE.cpp    FlattenCFGPass.cpp +  Float2Int.cpp    GVN.cpp    InductiveRangeCheckElimination.cpp    IndVarSimplify.cpp @@ -28,6 +29,7 @@ add_llvm_library(LLVMScalarOpts    LowerExpectIntrinsic.cpp    MemCpyOptimizer.cpp    MergedLoadStoreMotion.cpp +  NaryReassociate.cpp    PartiallyInlineLibCalls.cpp    PlaceSafepoints.cpp    Reassociate.cpp diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index cb8981b..01952cf 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -168,7 +168,7 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {        return true;      }    } -  if (CallSite CS = I) { +  if (auto CS = CallSite(I)) {      if (Function *F = CS.getCalledFunction()) {        if (TLI && TLI->has(LibFunc::strcpy) &&            F->getName() == TLI->getName(LibFunc::strcpy)) { @@ -262,7 +262,7 @@ static bool isRemovable(Instruction *I) {      }    } -  if (CallSite CS = I) +  if (auto CS = CallSite(I))      return CS.getInstruction()->use_empty();    return false; @@ -306,7 +306,7 @@ static Value *getStoredPointerOperand(Instruction *I) {      }    } -  CallSite CS = I; +  CallSite CS(I);    // All the supported functions so far happen to have dest as their first    // argument.    return CS.getArgument(0); @@ -780,7 +780,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {        continue;      } -    if (CallSite CS = cast<Value>(BBI)) { +    if (auto CS = CallSite(BBI)) {        // Remove allocation function calls from the list of dead stack objects;         // there can't be any references before the definition.        if (isAllocLikeFn(BBI, TLI)) diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp new file mode 100644 index 0000000..c931422 --- /dev/null +++ b/lib/Transforms/Scalar/Float2Int.cpp @@ -0,0 +1,540 @@ +//===- Float2Int.cpp - Demote floating point ops to work on integers ------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Float2Int pass, which aims to demote floating +// point operations to work on integers, where that is losslessly possible. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "float2int" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include <deque> +#include <functional> // For std::function +using namespace llvm; + +// The algorithm is simple. Start at instructions that convert from the +// float to the int domain: fptoui, fptosi and fcmp. Walk up the def-use +// graph, using an equivalence datastructure to unify graphs that interfere. +// +// Mappable instructions are those with an integer corrollary that, given +// integer domain inputs, produce an integer output; fadd, for example. +// +// If a non-mappable instruction is seen, this entire def-use graph is marked +// as non-transformable. If we see an instruction that converts from the  +// integer domain to FP domain (uitofp,sitofp), we terminate our walk. + +/// The largest integer type worth dealing with. +static cl::opt<unsigned> +MaxIntegerBW("float2int-max-integer-bw", cl::init(64), cl::Hidden, +             cl::desc("Max integer bitwidth to consider in float2int" +                      "(default=64)")); + +namespace { +  struct Float2Int : public FunctionPass { +    static char ID; // Pass identification, replacement for typeid +    Float2Int() : FunctionPass(ID) { +      initializeFloat2IntPass(*PassRegistry::getPassRegistry()); +    } + +    bool runOnFunction(Function &F) override; +    void getAnalysisUsage(AnalysisUsage &AU) const override { +      AU.setPreservesCFG(); +    } + +    void findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots); +    ConstantRange seen(Instruction *I, ConstantRange R); +    ConstantRange badRange(); +    ConstantRange unknownRange(); +    ConstantRange validateRange(ConstantRange R); +    void walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots); +    void walkForwards(); +    bool validateAndTransform(); +    Value *convert(Instruction *I, Type *ToTy); +    void cleanup(); + +    MapVector<Instruction*, ConstantRange > SeenInsts; +    SmallPtrSet<Instruction*,8> Roots; +    EquivalenceClasses<Instruction*> ECs; +    MapVector<Instruction*, Value*> ConvertedInsts; +    LLVMContext *Ctx; +  }; +} + +char Float2Int::ID = 0; +INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false) + +// Given a FCmp predicate, return a matching ICmp predicate if one +// exists, otherwise return BAD_ICMP_PREDICATE. +static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) { +  switch (P) { +  case CmpInst::FCMP_OEQ: +  case CmpInst::FCMP_UEQ: +    return CmpInst::ICMP_EQ; +  case CmpInst::FCMP_OGT: +  case CmpInst::FCMP_UGT: +    return CmpInst::ICMP_SGT; +  case CmpInst::FCMP_OGE: +  case CmpInst::FCMP_UGE: +    return CmpInst::ICMP_SGE; +  case CmpInst::FCMP_OLT: +  case CmpInst::FCMP_ULT: +    return CmpInst::ICMP_SLT; +  case CmpInst::FCMP_OLE: +  case CmpInst::FCMP_ULE: +    return CmpInst::ICMP_SLE; +  case CmpInst::FCMP_ONE: +  case CmpInst::FCMP_UNE: +    return CmpInst::ICMP_NE; +  default: +    return CmpInst::BAD_ICMP_PREDICATE; +  } +} + +// Given a floating point binary operator, return the matching +// integer version. +static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) { +  switch (Opcode) { +  default: llvm_unreachable("Unhandled opcode!"); +  case Instruction::FAdd: return Instruction::Add; +  case Instruction::FSub: return Instruction::Sub; +  case Instruction::FMul: return Instruction::Mul; +  } +} + +// Find the roots - instructions that convert from the FP domain to +// integer domain. +void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) { +  for (auto &I : inst_range(F)) { +    switch (I.getOpcode()) { +    default: break; +    case Instruction::FPToUI: +    case Instruction::FPToSI: +      Roots.insert(&I); +      break; +    case Instruction::FCmp: +      if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) !=  +          CmpInst::BAD_ICMP_PREDICATE) +        Roots.insert(&I); +      break; +    } +  } +} + +// Helper - mark I as having been traversed, having range R. +ConstantRange Float2Int::seen(Instruction *I, ConstantRange R) { +  DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n"); +  if (SeenInsts.find(I) != SeenInsts.end()) +    SeenInsts.find(I)->second = R; +  else +    SeenInsts.insert(std::make_pair(I, R)); +  return R; +} + +// Helper - get a range representing a poison value. +ConstantRange Float2Int::badRange() { +  return ConstantRange(MaxIntegerBW + 1, true); +} +ConstantRange Float2Int::unknownRange() { +  return ConstantRange(MaxIntegerBW + 1, false); +} +ConstantRange Float2Int::validateRange(ConstantRange R) { +  if (R.getBitWidth() > MaxIntegerBW + 1) +    return badRange(); +  return R; +} + +// The most obvious way to structure the search is a depth-first, eager +// search from each root. However, that require direct recursion and so +// can only handle small instruction sequences. Instead, we split the search +// up into two phases: +//   - walkBackwards:  A breadth-first walk of the use-def graph starting from +//                     the roots. Populate "SeenInsts" with interesting +//                     instructions and poison values if they're obvious and +//                     cheap to compute. Calculate the equivalance set structure +//                     while we're here too. +//   - walkForwards:  Iterate over SeenInsts in reverse order, so we visit +//                     defs before their uses. Calculate the real range info. + +// Breadth-first walk of the use-def graph; determine the set of nodes  +// we care about and eagerly determine if some of them are poisonous. +void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) { +  std::deque<Instruction*> Worklist(Roots.begin(), Roots.end()); +  while (!Worklist.empty()) { +    Instruction *I = Worklist.back(); +    Worklist.pop_back(); + +    if (SeenInsts.find(I) != SeenInsts.end()) +      // Seen already. +      continue; + +    switch (I->getOpcode()) { +      // FIXME: Handle select and phi nodes. +    default: +      // Path terminated uncleanly. +      seen(I, badRange()); +      break; + +    case Instruction::UIToFP: { +      // Path terminated cleanly. +      unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits(); +      APInt Min = APInt::getMinValue(BW).zextOrSelf(MaxIntegerBW+1); +      APInt Max = APInt::getMaxValue(BW).zextOrSelf(MaxIntegerBW+1); +      seen(I, validateRange(ConstantRange(Min, Max))); +      continue; +    } + +    case Instruction::SIToFP: { +      // Path terminated cleanly. +      unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits(); +      APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(MaxIntegerBW+1); +      APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(MaxIntegerBW+1); +      seen(I, validateRange(ConstantRange(SMin, SMax))); +      continue; +    } + +    case Instruction::FAdd: +    case Instruction::FSub: +    case Instruction::FMul: +    case Instruction::FPToUI: +    case Instruction::FPToSI: +    case Instruction::FCmp: +      seen(I, unknownRange()); +      break; +    } +   +    for (Value *O : I->operands()) { +      if (Instruction *OI = dyn_cast<Instruction>(O)) { +        // Unify def-use chains if they interfere. +        ECs.unionSets(I, OI); +	if (SeenInsts.find(I)->second != badRange()) +          Worklist.push_back(OI); +      } else if (!isa<ConstantFP>(O)) {       +        // Not an instruction or ConstantFP? we can't do anything. +        seen(I, badRange()); +      } +    } +  } +} + +// Walk forwards down the list of seen instructions, so we visit defs before +// uses. +void Float2Int::walkForwards() { +  for (auto It = SeenInsts.rbegin(), E = SeenInsts.rend(); It != E; ++It) { +    if (It->second != unknownRange()) +      continue; + +    Instruction *I = It->first; +    std::function<ConstantRange(ArrayRef<ConstantRange>)> Op; +    switch (I->getOpcode()) { +      // FIXME: Handle select and phi nodes. +    default: +    case Instruction::UIToFP: +    case Instruction::SIToFP: +      llvm_unreachable("Should have been handled in walkForwards!"); + +    case Instruction::FAdd: +      Op = [](ArrayRef<ConstantRange> Ops) { +        assert(Ops.size() == 2 && "FAdd is a binary operator!"); +        return Ops[0].add(Ops[1]); +      }; +      break; + +    case Instruction::FSub: +      Op = [](ArrayRef<ConstantRange> Ops) { +        assert(Ops.size() == 2 && "FSub is a binary operator!"); +        return Ops[0].sub(Ops[1]); +      }; +      break; + +    case Instruction::FMul: +      Op = [](ArrayRef<ConstantRange> Ops) { +        assert(Ops.size() == 2 && "FMul is a binary operator!"); +        return Ops[0].multiply(Ops[1]); +      }; +      break; + +    // +    // Root-only instructions - we'll only see these if they're the +    //                          first node in a walk. +    // +    case Instruction::FPToUI: +    case Instruction::FPToSI: +      Op = [](ArrayRef<ConstantRange> Ops) { +        assert(Ops.size() == 1 && "FPTo[US]I is a unary operator!"); +        return Ops[0]; +      }; +      break; + +    case Instruction::FCmp: +      Op = [](ArrayRef<ConstantRange> Ops) { +        assert(Ops.size() == 2 && "FCmp is a binary operator!"); +        return Ops[0].unionWith(Ops[1]); +      }; +      break; +    } + +    bool Abort = false; +    SmallVector<ConstantRange,4> OpRanges; +    for (Value *O : I->operands()) { +      if (Instruction *OI = dyn_cast<Instruction>(O)) { +        assert(SeenInsts.find(OI) != SeenInsts.end() && +	       "def not seen before use!"); +        OpRanges.push_back(SeenInsts.find(OI)->second); +      } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) { +        // Work out if the floating point number can be losslessly represented +        // as an integer. +        // APFloat::convertToInteger(&Exact) purports to do what we want, but +        // the exactness can be too precise. For example, negative zero can +        // never be exactly converted to an integer. +        // +        // Instead, we ask APFloat to round itself to an integral value - this +        // preserves sign-of-zero - then compare the result with the original. +        // +        APFloat F = CF->getValueAPF(); + +        // First, weed out obviously incorrect values. Non-finite numbers +        // can't be represented and neither can negative zero, unless  +        // we're in fast math mode. +        if (!F.isFinite() || +            (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) && +	     !I->hasNoSignedZeros())) { +          seen(I, badRange()); +          Abort = true; +          break; +        } + +        APFloat NewF = F; +        auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven); +        if (Res != APFloat::opOK || NewF.compare(F) != APFloat::cmpEqual) { +          seen(I, badRange()); +          Abort = true; +          break; +        } +        // OK, it's representable. Now get it. +        APSInt Int(MaxIntegerBW+1, false); +        bool Exact; +        CF->getValueAPF().convertToInteger(Int, +                                           APFloat::rmNearestTiesToEven, +                                           &Exact); +        OpRanges.push_back(ConstantRange(Int)); +      } else { +        llvm_unreachable("Should have already marked this as badRange!"); +      } +    } + +    // Reduce the operands' ranges to a single range and return. +    if (!Abort) +      seen(I, Op(OpRanges));     +  } +} + +// If there is a valid transform to be done, do it. +bool Float2Int::validateAndTransform() { +  bool MadeChange = false; + +  // Iterate over every disjoint partition of the def-use graph. +  for (auto It = ECs.begin(), E = ECs.end(); It != E; ++It) { +    ConstantRange R(MaxIntegerBW + 1, false); +    bool Fail = false; +    Type *ConvertedToTy = nullptr; + +    // For every member of the partition, union all the ranges together. +    for (auto MI = ECs.member_begin(It), ME = ECs.member_end(); +         MI != ME; ++MI) { +      Instruction *I = *MI; +      auto SeenI = SeenInsts.find(I); +      if (SeenI == SeenInsts.end()) +        continue; + +      R = R.unionWith(SeenI->second); +      // We need to ensure I has no users that have not been seen. +      // If it does, transformation would be illegal. +      // +      // Don't count the roots, as they terminate the graphs. +      if (Roots.count(I) == 0) { +        // Set the type of the conversion while we're here. +        if (!ConvertedToTy) +          ConvertedToTy = I->getType(); +        for (User *U : I->users()) { +          Instruction *UI = dyn_cast<Instruction>(U); +          if (!UI || SeenInsts.find(UI) == SeenInsts.end()) { +            DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n"); +            Fail = true; +            break; +          } +        } +      } +      if (Fail) +        break; +    } + +    // If the set was empty, or we failed, or the range is poisonous, +    // bail out. +    if (ECs.member_begin(It) == ECs.member_end() || Fail || +        R.isFullSet() || R.isSignWrappedSet()) +      continue; +    assert(ConvertedToTy && "Must have set the convertedtoty by this point!"); +     +    // The number of bits required is the maximum of the upper and +    // lower limits, plus one so it can be signed. +    unsigned MinBW = std::max(R.getLower().getMinSignedBits(), +                              R.getUpper().getMinSignedBits()) + 1; +    DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n"); + +    // If we've run off the realms of the exactly representable integers, +    // the floating point result will differ from an integer approximation. + +    // Do we need more bits than are in the mantissa of the type we converted +    // to? semanticsPrecision returns the number of mantissa bits plus one +    // for the sign bit. +    unsigned MaxRepresentableBits +      = APFloat::semanticsPrecision(ConvertedToTy->getFltSemantics()) - 1; +    if (MinBW > MaxRepresentableBits) { +      DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n"); +      continue; +    } +    if (MinBW > 64) { +      DEBUG(dbgs() << "F2I: Value requires more than 64 bits to represent!\n"); +      continue; +    } + +    // OK, R is known to be representable. Now pick a type for it. +    // FIXME: Pick the smallest legal type that will fit. +    Type *Ty = (MinBW > 32) ? Type::getInt64Ty(*Ctx) : Type::getInt32Ty(*Ctx); + +    for (auto MI = ECs.member_begin(It), ME = ECs.member_end(); +         MI != ME; ++MI) +      convert(*MI, Ty); +    MadeChange = true; +  } + +  return MadeChange; +} + +Value *Float2Int::convert(Instruction *I, Type *ToTy) { +  if (ConvertedInsts.find(I) != ConvertedInsts.end()) +    // Already converted this instruction. +    return ConvertedInsts[I]; + +  SmallVector<Value*,4> NewOperands; +  for (Value *V : I->operands()) { +    // Don't recurse if we're an instruction that terminates the path. +    if (I->getOpcode() == Instruction::UIToFP || +        I->getOpcode() == Instruction::SIToFP) { +      NewOperands.push_back(V); +    } else if (Instruction *VI = dyn_cast<Instruction>(V)) { +      NewOperands.push_back(convert(VI, ToTy)); +    } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) { +      APSInt Val(ToTy->getPrimitiveSizeInBits(), /*IsUnsigned=*/false); +      bool Exact; +      CF->getValueAPF().convertToInteger(Val, +                                         APFloat::rmNearestTiesToEven, +                                         &Exact); +      NewOperands.push_back(ConstantInt::get(ToTy, Val)); +    } else { +      llvm_unreachable("Unhandled operand type?"); +    } +  } + +  // Now create a new instruction. +  IRBuilder<> IRB(I); +  Value *NewV = nullptr; +  switch (I->getOpcode()) { +  default: llvm_unreachable("Unhandled instruction!"); + +  case Instruction::FPToUI: +    NewV = IRB.CreateZExtOrTrunc(NewOperands[0], I->getType()); +    break; + +  case Instruction::FPToSI: +    NewV = IRB.CreateSExtOrTrunc(NewOperands[0], I->getType()); +    break; + +  case Instruction::FCmp: { +    CmpInst::Predicate P = mapFCmpPred(cast<CmpInst>(I)->getPredicate()); +    assert(P != CmpInst::BAD_ICMP_PREDICATE && "Unhandled predicate!"); +    NewV = IRB.CreateICmp(P, NewOperands[0], NewOperands[1], I->getName()); +    break; +  } + +  case Instruction::UIToFP: +    NewV = IRB.CreateZExtOrTrunc(NewOperands[0], ToTy); +    break; + +  case Instruction::SIToFP: +    NewV = IRB.CreateSExtOrTrunc(NewOperands[0], ToTy); +    break; + +  case Instruction::FAdd: +  case Instruction::FSub: +  case Instruction::FMul: +    NewV = IRB.CreateBinOp(mapBinOpcode(I->getOpcode()), +                           NewOperands[0], NewOperands[1], +                           I->getName()); +    break; +  } + +  // If we're a root instruction, RAUW. +  if (Roots.count(I)) +    I->replaceAllUsesWith(NewV); + +  ConvertedInsts[I] = NewV; +  return NewV; +} + +// Perform dead code elimination on the instructions we just modified. +void Float2Int::cleanup() { +  for (auto I = ConvertedInsts.rbegin(), E = ConvertedInsts.rend(); +       I != E; ++I) +    I->first->eraseFromParent(); +} + +bool Float2Int::runOnFunction(Function &F) { +  if (skipOptnoneFunction(F)) +    return false; + +  DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n"); +  // Clear out all state. +  ECs = EquivalenceClasses<Instruction*>(); +  SeenInsts.clear(); +  ConvertedInsts.clear(); +  Roots.clear(); + +  Ctx = &F.getParent()->getContext(); + +  findRoots(F, Roots); + +  walkBackwards(Roots); +  walkForwards(); + +  bool Modified = validateAndTransform(); +  if (Modified) +    cleanup(); +  return Modified; +} + +FunctionPass *llvm::createFloat2IntPass() { +  return new Float2Int(); +} + diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index c73e60f..97d5b6d 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1102,7 +1102,8 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,                                   Type::getInt8PtrTy(Src->getContext(), AS));    Constant *OffsetCst =      ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); -  Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); +  Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, +                                       OffsetCst);    Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));    if (ConstantFoldLoadFromConstPtr(Src, DL))      return Offset; @@ -1263,7 +1264,8 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,                                   Type::getInt8PtrTy(Src->getContext(), AS));    Constant *OffsetCst =      ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); -  Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); +  Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src, +                                       OffsetCst);    Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));    return ConstantFoldLoadFromConstPtr(Src, DL);  } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 51e8041..ab8e5b8 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1274,55 +1274,6 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,  //  LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.  //===----------------------------------------------------------------------===// -/// Check for expressions that ScalarEvolution generates to compute -/// BackedgeTakenInfo. If these expressions have not been reduced, then -/// expanding them may incur additional cost (albeit in the loop preheader). -static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, -                                SmallPtrSetImpl<const SCEV*> &Processed, -                                ScalarEvolution *SE) { -  if (!Processed.insert(S).second) -    return false; - -  // If the backedge-taken count is a UDiv, it's very likely a UDiv that -  // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a -  // precise expression, rather than a UDiv from the user's code. If we can't -  // find a UDiv in the code with some simple searching, assume the former and -  // forego rewriting the loop. -  if (isa<SCEVUDivExpr>(S)) { -    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); -    if (!OrigCond) return true; -    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); -    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); -    if (R != S) { -      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); -      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); -      if (L != S) -        return true; -    } -  } - -  // Recurse past add expressions, which commonly occur in the -  // BackedgeTakenCount. They may already exist in program code, and if not, -  // they are not too expensive rematerialize. -  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { -    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); -         I != E; ++I) { -      if (isHighCostExpansion(*I, BI, Processed, SE)) -        return true; -    } -    return false; -  } - -  // HowManyLessThans uses a Max expression whenever the loop is not guarded by -  // the exit condition. -  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S)) -    return true; - -  // If we haven't recognized an expensive SCEV pattern, assume it's an -  // expression produced by program code. -  return false; -} -  /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken  /// count expression can be safely and cheaply expanded into an instruction  /// sequence that can be used by LinearFunctionTestReplace. @@ -1336,7 +1287,8 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,  /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).  /// However, we don't yet have a strong motivation for converting loop tests  /// into inequality tests. -static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { +static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE, +                                        SCEVExpander &Rewriter) {    const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);    if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||        BackedgeTakenCount->isZero()) @@ -1346,12 +1298,10 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {      return false;    // Can't rewrite non-branch yet. -  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); -  if (!BI) +  if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))      return false; -  SmallPtrSet<const SCEV*, 8> Processed; -  if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE)) +  if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))      return false;    return true; @@ -1637,7 +1587,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,             && "unit stride pointer IV must be i8*");      IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); -    return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit"); +    return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");    }    else {      // In any other case, convert both IVInit and IVCount to integers before @@ -1691,7 +1641,7 @@ LinearFunctionTestReplace(Loop *L,                            const SCEV *BackedgeTakenCount,                            PHINode *IndVar,                            SCEVExpander &Rewriter) { -  assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); +  assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");    // Initialize CmpIndVar and IVCount to their preincremented values.    Value *CmpIndVar = IndVar; @@ -1936,7 +1886,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {    // If we have a trip count expression, rewrite the loop's exit condition    // using it.  We can currently only handle loops with a single exit. -  if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) { +  if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) {      PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);      if (IndVar) {        // Check preconditions for proper SCEVExpander operation. SCEV does not diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp index 1f33f72..c19cd19 100644 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -41,9 +41,9 @@ struct PointerOffsetPair {  };  struct LoadPOPPair { +  LoadPOPPair() = default;    LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O)        : Load(L), POP(P), InsertOrder(O) {} -  LoadPOPPair() {}    LoadInst *Load;    PointerOffsetPair POP;    /// \brief The new load needs to be created before the first load in IR order. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 8445d5f..099f227 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -28,7 +28,7 @@  //  // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however  // it's useful to think about these as the same register, with some uses using -// the value of the register before the add and some using // it after. In this +// the value of the register before the add and some using it after. In this  // example, the icmp is a post-increment user, since it uses %i.next, which is  // the value of the induction variable after the increment. The other common  // case of post-increment users is users outside the loop. @@ -112,8 +112,6 @@ public:    /// a particular register.    SmallBitVector UsedByIndices; -  RegSortData() {} -    void print(raw_ostream &OS) const;    void dump() const;  }; diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 600cbde..3de3b3d 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -165,6 +165,7 @@ namespace {        UP.MaxCount = UINT_MAX;        UP.Partial = CurrentAllowPartial;        UP.Runtime = CurrentRuntime; +      UP.AllowExpensiveTripCount = false;        TTI.getUnrollingPreferences(L, UP);      } @@ -886,8 +887,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {    }    // Unroll the loop. -  if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this, -                  &LPM, &AC)) +  if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount, +                  TripMultiple, LI, this, &LPM, &AC))      return false;    return true; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 2b5a078..d651473 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1045,7 +1045,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {          RepeatInstruction = processMemCpy(M);        else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))          RepeatInstruction = processMemMove(M); -      else if (CallSite CS = (Value*)I) { +      else if (auto CS = CallSite(I)) {          for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)            if (CS.isByValArgument(i))              MadeChange |= processByValArgument(CS, i); diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp new file mode 100644 index 0000000..fea7641 --- /dev/null +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -0,0 +1,252 @@ +//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass reassociates n-ary add expressions and eliminates the redundancy +// exposed by the reassociation. +// +// A motivating example: +// +//   void foo(int a, int b) { +//     bar(a + b); +//     bar((a + 2) + b); +//   } +// +// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify +// the above code to +// +//   int t = a + b; +//   bar(t); +//   bar(t + 2); +// +// However, the Reassociate pass is unable to do that because it processes each +// instruction individually and believes (a + 2) + b is the best form according +// to its rank system. +// +// To address this limitation, NaryReassociate reassociates an expression in a +// form that reuses existing instructions. As a result, NaryReassociate can +// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that +// (a + b) is computed before. +// +// NaryReassociate works as follows. For every instruction in the form of (a + +// b) + c, it checks whether a + c or b + c is already computed by a dominating +// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b + +// c) + a and removes the redundancy accordingly. To efficiently look up whether +// an expression is computed before, we store each instruction seen and its SCEV +// into an SCEV-to-instruction map. +// +// Although the algorithm pattern-matches only ternary additions, it +// automatically handles many >3-ary expressions by walking through the function +// in the depth-first order. For example, given +// +//   (a + c) + d +//   ((a + b) + c) + d +// +// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites +// ((a + c) + b) + d into ((a + c) + d) + b. +// +// Finally, the above dominator-based algorithm may need to be run multiple +// iterations before emitting optimal code. One source of this need is that we +// only split an operand when it is used only once. The above algorithm can +// eliminate an instruction and decrease the usage count of its operands. As a +// result, an instruction that previously had multiple uses may become a +// single-use instruction and thus eligible for split consideration. For +// example, +// +//   ac = a + c +//   ab = a + b +//   abc = ab + c +//   ab2 = ab + b +//   ab2c = ab2 + c +// +// In the first iteration, we cannot reassociate abc to ac+b because ab is used +// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a +// result, ab2 becomes dead and ab will be used only once in the second +// iteration. +// +// Limitations and TODO items: +// +// 1) We only considers n-ary adds for now. This should be extended and +// generalized. +// +// 2) Besides arithmetic operations, similar reassociation can be applied to +// GEPs. For example, if +//   X = &arr[a] +// dominates +//   Y = &arr[a + b] +// we may rewrite Y into X + b. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "nary-reassociate" + +namespace { +class NaryReassociate : public FunctionPass { +public: +  static char ID; + +  NaryReassociate(): FunctionPass(ID) { +    initializeNaryReassociatePass(*PassRegistry::getPassRegistry()); +  } + +  bool runOnFunction(Function &F) override; + +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.addPreserved<DominatorTreeWrapperPass>(); +    AU.addPreserved<ScalarEvolution>(); +    AU.addPreserved<TargetLibraryInfoWrapperPass>(); +    AU.addRequired<DominatorTreeWrapperPass>(); +    AU.addRequired<ScalarEvolution>(); +    AU.addRequired<TargetLibraryInfoWrapperPass>(); +    AU.setPreservesCFG(); +  } + +private: +  // Runs only one iteration of the dominator-based algorithm. See the header +  // comments for why we need multiple iterations. +  bool doOneIteration(Function &F); +  // Reasssociates I to a better form. +  Instruction *tryReassociateAdd(Instruction *I); +  // A helper function for tryReassociateAdd. LHS and RHS are explicitly passed. +  Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I); +  // Rewrites I to LHS + RHS if LHS is computed already. +  Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I); + +  DominatorTree *DT; +  ScalarEvolution *SE; +  TargetLibraryInfo *TLI; +  // A lookup table quickly telling which instructions compute the given SCEV. +  // Note that there can be multiple instructions at different locations +  // computing to the same SCEV, so we map a SCEV to an instruction list.  For +  // example, +  // +  //   if (p1) +  //     foo(a + b); +  //   if (p2) +  //     bar(a + b); +  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> SeenExprs; +}; +} // anonymous namespace + +char NaryReassociate::ID = 0; +INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation", +                      false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation", +                    false, false) + +FunctionPass *llvm::createNaryReassociatePass() { +  return new NaryReassociate(); +} + +bool NaryReassociate::runOnFunction(Function &F) { +  if (skipOptnoneFunction(F)) +    return false; + +  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); +  SE = &getAnalysis<ScalarEvolution>(); +  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + +  bool Changed = false, ChangedInThisIteration; +  do { +    ChangedInThisIteration = doOneIteration(F); +    Changed |= ChangedInThisIteration; +  } while (ChangedInThisIteration); +  return Changed; +} + +bool NaryReassociate::doOneIteration(Function &F) { +  bool Changed = false; +  SeenExprs.clear(); +  // Traverse the dominator tree in the depth-first order. This order makes sure +  // all bases of a candidate are in Candidates when we process it. +  for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT); +       Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) { +    BasicBlock *BB = Node->getBlock(); +    for (auto I = BB->begin(); I != BB->end(); ++I) { +      if (I->getOpcode() == Instruction::Add) { +        if (Instruction *NewI = tryReassociateAdd(I)) { +          Changed = true; +          SE->forgetValue(I); +          I->replaceAllUsesWith(NewI); +          RecursivelyDeleteTriviallyDeadInstructions(I, TLI); +          I = NewI; +        } +        // We should add the rewritten instruction because tryReassociateAdd may +        // have invalidated the original one. +        SeenExprs[SE->getSCEV(I)].push_back(I); +      } +    } +  } +  return Changed; +} + +Instruction *NaryReassociate::tryReassociateAdd(Instruction *I) { +  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); +  if (auto *NewI = tryReassociateAdd(LHS, RHS, I)) +    return NewI; +  if (auto *NewI = tryReassociateAdd(RHS, LHS, I)) +    return NewI; +  return nullptr; +} + +Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS, +                                                Instruction *I) { +  Value *A = nullptr, *B = nullptr; +  // To be conservative, we reassociate I only when it is the only user of A+B. +  if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) { +    // I = (A + B) + RHS +    //   = (A + RHS) + B or (B + RHS) + A +    const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B); +    const SCEV *RHSExpr = SE->getSCEV(RHS); +    if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I)) +      return NewI; +    if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I)) +      return NewI; +  } +  return nullptr; +} + +Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr, +                                                 Value *RHS, Instruction *I) { +  auto Pos = SeenExprs.find(LHSExpr); +  // Bail out if LHSExpr is not previously seen. +  if (Pos == SeenExprs.end()) +    return nullptr; + +  auto &LHSCandidates = Pos->second; +  // Look for the closest dominator LHS of I that computes LHSExpr, and replace +  // I with LHS + RHS. +  // +  // Because we traverse the dominator tree in the pre-order, a +  // candidate that doesn't dominate the current instruction won't dominate any +  // future instruction either. Therefore, we pop it out of the stack. This +  // optimization makes the algorithm O(n). +  while (!LHSCandidates.empty()) { +    Instruction *LHS = LHSCandidates.back(); +    if (DT->dominates(LHS, I)) { +      Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I); +      NewI->takeName(I); +      return NewI; +    } +    LHSCandidates.pop_back(); +  } +  return nullptr; +} diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp index 944725a..536f2a6 100644 --- a/lib/Transforms/Scalar/PlaceSafepoints.cpp +++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp @@ -217,7 +217,7 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,    BasicBlock *Current = Pred;    while (true) {      for (Instruction &I : *Current) { -      if (CallSite CS = &I) +      if (auto CS = CallSite(&I))          // Note: Technically, needing a safepoint isn't quite the right          // condition here.  We should instead be checking if the target method          // has an @@ -424,8 +424,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F,      // We need to stop going forward as soon as we see a call that can      // grow the stack (i.e. the call target has a non-zero frame      // size). -    if (CallSite CS = cursor) { -      (void)CS; // Silence an unused variable warning by gcc 4.8.2 +    if (CallSite(cursor)) {        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(cursor)) {          // llvm.assume(...) are not really calls.          if (II->getIntrinsicID() == Intrinsic::assume) { diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index f5d21ff..ba48e0a 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -17,6 +17,7 @@  #include "llvm/ADT/SetOperations.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/CallSite.h"  #include "llvm/IR/Dominators.h" @@ -49,11 +50,20 @@ static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden,  // Print the liveset found at the insert location  static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,                                    cl::init(false)); -static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", -                                      cl::Hidden, cl::init(false)); +static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden, +                                      cl::init(false));  // Print out the base pointers for debugging -static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", -                                       cl::Hidden, cl::init(false)); +static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden, +                                       cl::init(false)); + +#ifdef XDEBUG +static bool ClobberNonLive = true; +#else +static bool ClobberNonLive = false; +#endif +static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live", +                                                  cl::location(ClobberNonLive), +                                                  cl::Hidden);  namespace {  struct RewriteStatepointsForGC : public FunctionPass { @@ -85,6 +95,22 @@ INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",                      "Make relocations explicit at statepoints", false, false)  namespace { +struct GCPtrLivenessData { +  /// Values defined in this block. +  DenseMap<BasicBlock *, DenseSet<Value *>> KillSet; +  /// Values used in this block (and thus live); does not included values +  /// killed within this block. +  DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet; + +  /// Values live into this basic block (i.e. used by any +  /// instruction in this basic block or ones reachable from here) +  DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn; + +  /// Values live out of this basic block (i.e. live into +  /// any successor block) +  DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut; +}; +  // The type of the internal cache used inside the findBasePointers family  // of functions.  From the callers perspective, this is an opaque type and  // should not be inspected. @@ -105,10 +131,6 @@ struct PartiallyConstructedSafepointRecord {    /// Mapping from live pointers to a base-defining-value    DenseMap<llvm::Value *, llvm::Value *> PointerToBase; -  /// Any new values which were added to the IR during base pointer analysis -  /// for this safepoint -  DenseSet<llvm::Value *> NewInsertedDefs; -    /// The *new* gc.statepoint instruction itself.  This produces the token    /// that normal path gc.relocates and the gc.result are tied to.    Instruction *StatepointToken; @@ -119,6 +141,15 @@ struct PartiallyConstructedSafepointRecord {  };  } +/// Compute the live-in set for every basic block in the function +static void computeLiveInValues(DominatorTree &DT, Function &F, +                                GCPtrLivenessData &Data); + +/// Given results from the dataflow liveness computation, find the set of live +/// Values at a particular instruction. +static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, +                              StatepointLiveSetTy &out); +  // TODO: Once we can get to the GCStrategy, this becomes  // Optional<bool> isGCManagedPointer(const Value *V) const override { @@ -131,129 +162,46 @@ static bool isGCPointerType(const Type *T) {    return false;  } -/// Return true if the Value is a gc reference type which is potentially used -/// after the instruction 'loc'.  This is only used with the edge reachability -/// liveness code.  Note: It is assumed the V dominates loc. -static bool isLiveGCReferenceAt(Value &V, Instruction *loc, DominatorTree &DT, -                                LoopInfo *LI) { -  if (!isGCPointerType(V.getType())) -    return false; - -  if (V.use_empty()) -    return false; - -  // Given assumption that V dominates loc, this may be live -  return true; +// Return true if this type is one which a) is a gc pointer or contains a GC +// pointer and b) is of a type this code expects to encounter as a live value. +// (The insertion code will assert that a type which matches (a) and not (b) +// is not encountered.) +static bool isHandledGCPointerType(Type *T) { +  // We fully support gc pointers +  if (isGCPointerType(T)) +    return true; +  // We partially support vectors of gc pointers. The code will assert if it +  // can't handle something. +  if (auto VT = dyn_cast<VectorType>(T)) +    if (isGCPointerType(VT->getElementType())) +      return true; +  return false;  }  #ifndef NDEBUG -static bool isAggWhichContainsGCPtrType(Type *Ty) { +/// Returns true if this type contains a gc pointer whether we know how to +/// handle that type or not. +static bool containsGCPtrType(Type *Ty) { +  if (isGCPointerType(Ty)) +    return true;    if (VectorType *VT = dyn_cast<VectorType>(Ty))      return isGCPointerType(VT->getScalarType());    if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) -    return isGCPointerType(AT->getElementType()) || -           isAggWhichContainsGCPtrType(AT->getElementType()); +    return containsGCPtrType(AT->getElementType());    if (StructType *ST = dyn_cast<StructType>(Ty)) -    return std::any_of(ST->subtypes().begin(), ST->subtypes().end(), -                       [](Type *SubType) { -                         return isGCPointerType(SubType) || -                                isAggWhichContainsGCPtrType(SubType); -                       }); +    return std::any_of( +        ST->subtypes().begin(), ST->subtypes().end(), +        [](Type *SubType) { return containsGCPtrType(SubType); });    return false;  } -#endif - -// Conservatively identifies any definitions which might be live at the -// given instruction. The  analysis is performed immediately before the -// given instruction. Values defined by that instruction are not considered -// live.  Values used by that instruction are considered live. -// -// preconditions: valid IR graph, term is either a terminator instruction or -// a call instruction, pred is the basic block of term, DT, LI are valid -// -// side effects: none, does not mutate IR -// -//  postconditions: populates liveValues as discussed above -static void findLiveGCValuesAtInst(Instruction *term, BasicBlock *pred, -                                   DominatorTree &DT, LoopInfo *LI, -                                   StatepointLiveSetTy &liveValues) { -  liveValues.clear(); - -  assert(isa<CallInst>(term) || isa<InvokeInst>(term) || term->isTerminator()); -  Function *F = pred->getParent(); - -  auto is_live_gc_reference = -      [&](Value &V) { return isLiveGCReferenceAt(V, term, DT, LI); }; - -  // Are there any gc pointer arguments live over this point?  This needs to be -  // special cased since arguments aren't defined in basic blocks. -  for (Argument &arg : F->args()) { -    assert(!isAggWhichContainsGCPtrType(arg.getType()) && -           "support for FCA unimplemented"); - -    if (is_live_gc_reference(arg)) { -      liveValues.insert(&arg); -    } -  } - -  // Walk through all dominating blocks - the ones which can contain -  // definitions used in this block - and check to see if any of the values -  // they define are used in locations potentially reachable from the -  // interesting instruction. -  BasicBlock *BBI = pred; -  while (true) { -    if (TraceLSP) { -      errs() << "[LSP] Looking at dominating block " << pred->getName() << "\n"; -    } -    assert(DT.dominates(BBI, pred)); -    assert(isPotentiallyReachable(BBI, pred, &DT) && -           "dominated block must be reachable"); - -    // Walk through the instructions in dominating blocks and keep any -    // that have a use potentially reachable from the block we're -    // considering putting the safepoint in -    for (Instruction &inst : *BBI) { -      if (TraceLSP) { -        errs() << "[LSP] Looking at instruction "; -        inst.dump(); -      } - -      if (pred == BBI && (&inst) == term) { -        if (TraceLSP) { -          errs() << "[LSP] stopped because we encountered the safepoint " -                    "instruction.\n"; -        } - -        // If we're in the block which defines the interesting instruction, -        // we don't want to include any values as live which are defined -        // _after_ the interesting line or as part of the line itself -        // i.e. "term" is the call instruction for a call safepoint, the -        // results of the call should not be considered live in that stackmap -        break; -      } - -      assert(!isAggWhichContainsGCPtrType(inst.getType()) && -             "support for FCA unimplemented"); - -      if (is_live_gc_reference(inst)) { -        if (TraceLSP) { -          errs() << "[LSP] found live value for this safepoint "; -          inst.dump(); -          term->dump(); -        } -        liveValues.insert(&inst); -      } -    } -    if (!DT.getNode(BBI)->getIDom()) { -      assert(BBI == &F->getEntryBlock() && -             "failed to find a dominator for something other than " -             "the entry block"); -      break; -    } -    BBI = DT.getNode(BBI)->getIDom()->getBlock(); -  } +// Returns true if this is a type which a) is a gc pointer or contains a GC +// pointer and b) is of a type which the code doesn't expect (i.e. first class +// aggregates).  Used to trip assertions. +static bool isUnhandledGCPointerType(Type *Ty) { +  return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty);  } +#endif  static bool order_by_name(llvm::Value *a, llvm::Value *b) {    if (a->hasName() && b->hasName()) { @@ -268,16 +216,17 @@ static bool order_by_name(llvm::Value *a, llvm::Value *b) {    }  } -/// Find the initial live set. Note that due to base pointer -/// insertion, the live set may be incomplete. -static void -analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS, -                          PartiallyConstructedSafepointRecord &result) { +// Conservatively identifies any definitions which might be live at the +// given instruction. The  analysis is performed immediately before the +// given instruction. Values defined by that instruction are not considered +// live.  Values used by that instruction are considered live. +static void analyzeParsePointLiveness( +    DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, +    const CallSite &CS, PartiallyConstructedSafepointRecord &result) {    Instruction *inst = CS.getInstruction(); -  BasicBlock *BB = inst->getParent();    StatepointLiveSetTy liveset; -  findLiveGCValuesAtInst(inst, BB, DT, nullptr, liveset); +  findLiveSetAtInst(inst, OriginalLivenessData, liveset);    if (PrintLiveSet) {      // Note: This output is used by several of the test cases @@ -299,10 +248,49 @@ analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS,    result.liveset = liveset;  } -/// True iff this value is the null pointer constant (of any pointer type) -static bool LLVM_ATTRIBUTE_UNUSED isNullConstant(Value *V) { -  return isa<Constant>(V) && isa<PointerType>(V->getType()) && -         cast<Constant>(V)->isNullValue(); +/// If we can trivially determine that this vector contains only base pointers, +/// return the base instruction. +static Value *findBaseOfVector(Value *I) { +  assert(I->getType()->isVectorTy() && +         cast<VectorType>(I->getType())->getElementType()->isPointerTy() && +         "Illegal to ask for the base pointer of a non-pointer type"); + +  // Each case parallels findBaseDefiningValue below, see that code for +  // detailed motivation. + +  if (isa<Argument>(I)) +    // An incoming argument to the function is a base pointer +    return I; + +  // We shouldn't see the address of a global as a vector value? +  assert(!isa<GlobalVariable>(I) && +         "unexpected global variable found in base of vector"); + +  // inlining could possibly introduce phi node that contains +  // undef if callee has multiple returns +  if (isa<UndefValue>(I)) +    // utterly meaningless, but useful for dealing with partially optimized +    // code. +    return I; + +  // Due to inheritance, this must be _after_ the global variable and undef +  // checks +  if (Constant *Con = dyn_cast<Constant>(I)) { +    assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && +           "order of checks wrong!"); +    assert(Con->isNullValue() && "null is the only case which makes sense"); +    return Con; +  } + +  if (isa<LoadInst>(I)) +    return I; + +  // Note: This code is currently rather incomplete.  We are essentially only +  // handling cases where the vector element is trivially a base pointer.  We +  // need to update the entire base pointer construction algorithm to know how +  // to track vector elements and potentially scalarize, but the case which +  // would motivate the work hasn't shown up in real workloads yet. +  llvm_unreachable("no base found for vector element");  }  /// Helper function for findBasePointer - Will return a value which either a) @@ -312,52 +300,36 @@ static Value *findBaseDefiningValue(Value *I) {    assert(I->getType()->isPointerTy() &&           "Illegal to ask for the base pointer of a non-pointer type"); -  // There are instructions which can never return gc pointer values.  Sanity -  // check -  // that this is actually true. -  assert(!isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) && -         !isa<ShuffleVectorInst>(I) && "Vector types are not gc pointers"); -  assert((!isa<Instruction>(I) || isa<InvokeInst>(I) || -          !cast<Instruction>(I)->isTerminator()) && -         "With the exception of invoke terminators don't define values"); -  assert(!isa<StoreInst>(I) && !isa<FenceInst>(I) && -         "Can't be definitions to start with"); -  assert(!isa<ICmpInst>(I) && !isa<FCmpInst>(I) && -         "Comparisons don't give ops"); -  // There's a bunch of instructions which just don't make sense to apply to -  // a pointer.  The only valid reason for this would be pointer bit -  // twiddling which we're just not going to support. -  assert((!isa<Instruction>(I) || !cast<Instruction>(I)->isBinaryOp()) && -         "Binary ops on pointer values are meaningless.  Unless your " -         "bit-twiddling which we don't support"); - -  if (Argument *Arg = dyn_cast<Argument>(I)) { +  // This case is a bit of a hack - it only handles extracts from vectors which +  // trivially contain only base pointers.  See note inside the function for +  // how to improve this. +  if (auto *EEI = dyn_cast<ExtractElementInst>(I)) { +    Value *VectorOperand = EEI->getVectorOperand(); +    Value *VectorBase = findBaseOfVector(VectorOperand); +    (void)VectorBase; +    assert(VectorBase && "extract element not known to be a trivial base"); +    return EEI; +  } + +  if (isa<Argument>(I))      // An incoming argument to the function is a base pointer      // We should have never reached here if this argument isn't an gc value -    assert(Arg->getType()->isPointerTy() && -           "Base for pointer must be another pointer"); -    return Arg; -  } +    return I; -  if (GlobalVariable *global = dyn_cast<GlobalVariable>(I)) { +  if (isa<GlobalVariable>(I))      // base case -    assert(global->getType()->isPointerTy() && -           "Base for pointer must be another pointer"); -    return global; -  } +    return I;    // inlining could possibly introduce phi node that contains    // undef if callee has multiple returns -  if (UndefValue *undef = dyn_cast<UndefValue>(I)) { -    assert(undef->getType()->isPointerTy() && -           "Base for pointer must be another pointer"); -    return undef; // utterly meaningless, but useful for dealing with -                  // partially optimized code. -  } +  if (isa<UndefValue>(I)) +    // utterly meaningless, but useful for dealing with +    // partially optimized code. +    return I;    // Due to inheritance, this must be _after_ the global variable and undef    // checks -  if (Constant *con = dyn_cast<Constant>(I)) { +  if (Constant *Con = dyn_cast<Constant>(I)) {      assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&             "order of checks wrong!");      // Note: Finding a constant base for something marked for relocation @@ -368,49 +340,30 @@ static Value *findBaseDefiningValue(Value *I) {      // off a potentially null value and have proven it null.  We also use      // null pointers in dead paths of relocation phis (which we might later      // want to find a base pointer for). -    assert(con->getType()->isPointerTy() && -           "Base for pointer must be another pointer"); -    assert(con->isNullValue() && "null is the only case which makes sense"); -    return con; +    assert(isa<ConstantPointerNull>(Con) && +           "null is the only case which makes sense"); +    return Con;    }    if (CastInst *CI = dyn_cast<CastInst>(I)) { -    Value *def = CI->stripPointerCasts(); -    assert(def->getType()->isPointerTy() && -           "Base for pointer must be another pointer"); +    Value *Def = CI->stripPointerCasts();      // If we find a cast instruction here, it means we've found a cast which is      // not simply a pointer cast (i.e. an inttoptr).  We don't know how to      // handle int->ptr conversion. -    assert(!isa<CastInst>(def) && "shouldn't find another cast here"); -    return findBaseDefiningValue(def); +    assert(!isa<CastInst>(Def) && "shouldn't find another cast here"); +    return findBaseDefiningValue(Def);    } -  if (LoadInst *LI = dyn_cast<LoadInst>(I)) { -    if (LI->getType()->isPointerTy()) { -      Value *Op = LI->getOperand(0); -      (void)Op; -      // Has to be a pointer to an gc object, or possibly an array of such? -      assert(Op->getType()->isPointerTy()); -      return LI; // The value loaded is an gc base itself -    } -  } -  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { -    Value *Op = GEP->getOperand(0); -    if (Op->getType()->isPointerTy()) { -      return findBaseDefiningValue(Op); // The base of this GEP is the base -    } -  } +  if (isa<LoadInst>(I)) +    return I; // The value loaded is an gc base itself -  if (AllocaInst *alloc = dyn_cast<AllocaInst>(I)) { -    // An alloca represents a conceptual stack slot.  It's the slot itself -    // that the GC needs to know about, not the value in the slot. -    assert(alloc->getType()->isPointerTy() && -           "Base for pointer must be another pointer"); -    return alloc; -  } +  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) +    // The base of this GEP is the base +    return findBaseDefiningValue(GEP->getPointerOperand());    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {      switch (II->getIntrinsicID()) { +    case Intrinsic::experimental_gc_result_ptr:      default:        // fall through to general call handling        break; @@ -418,11 +371,6 @@ static Value *findBaseDefiningValue(Value *I) {      case Intrinsic::experimental_gc_result_float:      case Intrinsic::experimental_gc_result_int:        llvm_unreachable("these don't produce pointers"); -    case Intrinsic::experimental_gc_result_ptr: -      // This is just a special case of the CallInst check below to handle a -      // statepoint with deopt args which hasn't been rewritten for GC yet. -      // TODO: Assert that the statepoint isn't rewritten yet. -      return II;      case Intrinsic::experimental_gc_relocate: {        // Rerunning safepoint insertion after safepoints are already        // inserted is not supported.  It could probably be made to work, @@ -440,41 +388,27 @@ static Value *findBaseDefiningValue(Value *I) {    // We assume that functions in the source language only return base    // pointers.  This should probably be generalized via attributes to support    // both source language and internal functions. -  if (CallInst *call = dyn_cast<CallInst>(I)) { -    assert(call->getType()->isPointerTy() && -           "Base for pointer must be another pointer"); -    return call; -  } -  if (InvokeInst *invoke = dyn_cast<InvokeInst>(I)) { -    assert(invoke->getType()->isPointerTy() && -           "Base for pointer must be another pointer"); -    return invoke; -  } +  if (isa<CallInst>(I) || isa<InvokeInst>(I)) +    return I;    // I have absolutely no idea how to implement this part yet.  It's not    // neccessarily hard, I just haven't really looked at it yet.    assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented"); -  if (AtomicCmpXchgInst *cas = dyn_cast<AtomicCmpXchgInst>(I)) { +  if (isa<AtomicCmpXchgInst>(I))      // A CAS is effectively a atomic store and load combined under a      // predicate.  From the perspective of base pointers, we just treat it -    // like a load.  We loaded a pointer from a address in memory, that value -    // had better be a valid base pointer. -    return cas->getPointerOperand(); -  } -  if (AtomicRMWInst *atomic = dyn_cast<AtomicRMWInst>(I)) { -    assert(AtomicRMWInst::Xchg == atomic->getOperation() && -           "All others are binary ops which don't apply to base pointers"); -    // semantically, a load, store pair.  Treat it the same as a standard load -    return atomic->getPointerOperand(); -  } +    // like a load. +    return I; + +  assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are " +                                   "binary ops which don't apply to pointers");    // The aggregate ops.  Aggregates can either be in the heap or on the    // stack, but in either case, this is simply a field load.  As a result,    // this is a defining definition of the base just like a load is. -  if (ExtractValueInst *ev = dyn_cast<ExtractValueInst>(I)) { -    return ev; -  } +  if (isa<ExtractValueInst>(I)) +    return I;    // We should never see an insert vector since that would require we be    // tracing back a struct value not a pointer value. @@ -485,23 +419,21 @@ static Value *findBaseDefiningValue(Value *I) {    // return a value which dynamically selects from amoung several base    // derived pointers (each with it's own base potentially).  It's the job of    // the caller to resolve these. -  if (SelectInst *select = dyn_cast<SelectInst>(I)) { -    return select; -  } - -  return cast<PHINode>(I); +  assert((isa<SelectInst>(I) || isa<PHINode>(I)) && +         "missing instruction case in findBaseDefiningValing"); +  return I;  }  /// Returns the base defining value for this value. -static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &cache) { -  Value *&Cached = cache[I]; +static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) { +  Value *&Cached = Cache[I];    if (!Cached) {      Cached = findBaseDefiningValue(I);    } -  assert(cache[I] != nullptr); +  assert(Cache[I] != nullptr);    if (TraceLSP) { -    errs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName() +    dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()             << "\n";    }    return Cached; @@ -509,25 +441,26 @@ static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &cache) {  /// Return a base pointer for this value if known.  Otherwise, return it's  /// base defining value. -static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &cache) { -  Value *def = findBaseDefiningValueCached(I, cache); -  auto Found = cache.find(def); -  if (Found != cache.end()) { +static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) { +  Value *Def = findBaseDefiningValueCached(I, Cache); +  auto Found = Cache.find(Def); +  if (Found != Cache.end()) {      // Either a base-of relation, or a self reference.  Caller must check.      return Found->second;    }    // Only a BDV available -  return def; +  return Def;  }  /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,  /// is it known to be a base pointer?  Or do we need to continue searching. -static bool isKnownBaseResult(Value *v) { -  if (!isa<PHINode>(v) && !isa<SelectInst>(v)) { +static bool isKnownBaseResult(Value *V) { +  if (!isa<PHINode>(V) && !isa<SelectInst>(V)) {      // no recursion possible      return true;    } -  if (cast<Instruction>(v)->getMetadata("is_base_value")) { +  if (isa<Instruction>(V) && +      cast<Instruction>(V)->getMetadata("is_base_value")) {      // This is a previously inserted base phi or select.  We know      // that this is a base value.      return true; @@ -647,8 +580,7 @@ private:  /// from.  For gc objects, this is simply itself.  On success, returns a value  /// which is the base pointer.  (This is reliable and can be used for  /// relocation.)  On failure, returns nullptr. -static Value *findBasePointer(Value *I, DefiningValueMapTy &cache, -                              DenseSet<llvm::Value *> &NewInsertedDefs) { +static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {    Value *def = findBaseOrBDV(I, cache);    if (isKnownBaseResult(def)) { @@ -687,7 +619,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,      done = true;      // Since we're adding elements to 'states' as we run, we can't keep      // iterators into the set. -    SmallVector<Value*, 16> Keys; +    SmallVector<Value *, 16> Keys;      Keys.reserve(states.size());      for (auto Pair : states) {        Value *V = Pair.first; @@ -777,7 +709,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,    // We want to keep naming deterministic in the loop that follows, so    // sort the keys before iteration.  This is useful in allowing us to    // write stable tests. Note that there is no invalidation issue here. -  SmallVector<Value*, 16> Keys; +  SmallVector<Value *, 16> Keys;    Keys.reserve(states.size());    for (auto Pair : states) {      Value *V = Pair.first; @@ -792,13 +724,12 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,      assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");      if (!state.isConflict())        continue; -     +      if (isa<PHINode>(v)) {        int num_preds =            std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));        assert(num_preds > 0 && "how did we reach here");        PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v); -      NewInsertedDefs.insert(phi);        // Add metadata marking this as a base value        auto *const_1 = ConstantInt::get(            Type::getInt32Ty( @@ -815,7 +746,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,        UndefValue *undef = UndefValue::get(sel->getType());        SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,                                                 undef, "base_select", sel); -      NewInsertedDefs.insert(basesel);        // Add metadata marking this as a base value        auto *const_1 = ConstantInt::get(            Type::getInt32Ty( @@ -838,7 +768,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,      assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");      if (!state.isConflict())        continue; -     +      if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {        PHINode *phi = cast<PHINode>(v);        unsigned NumPHIValues = phi->getNumIncomingValues(); @@ -866,8 +796,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,              assert(states.count(base));              base = states[base].getBase();              assert(base != nullptr && "unknown PhiState!"); -            assert(NewInsertedDefs.count(base) && -                   "should have already added this in a prev. iteration!");            }            // In essense this assert states: the only way two @@ -898,7 +826,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,          if (base->getType() != basephi->getType()) {            base = new BitCastInst(base, basephi->getType(), "cast",                                   InBB->getTerminator()); -          NewInsertedDefs.insert(base);          }          basephi->addIncoming(base, InBB);        } @@ -924,7 +851,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,          // The cast is needed since base traversal may strip away bitcasts          if (base->getType() != basesel->getType()) {            base = new BitCastInst(base, basesel->getType(), "cast", basesel); -          NewInsertedDefs.insert(base);          }          basesel->setOperand(i, base);        } @@ -979,18 +905,18 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,  // post condition: PointerToBase contains one (derived, base) pair for every  // pointer in live.  Note that derived can be equal to base if the original  // pointer was a base pointer. -static void findBasePointers(const StatepointLiveSetTy &live, -                             DenseMap<llvm::Value *, llvm::Value *> &PointerToBase, -                             DominatorTree *DT, DefiningValueMapTy &DVCache, -                             DenseSet<llvm::Value *> &NewInsertedDefs) { +static void +findBasePointers(const StatepointLiveSetTy &live, +                 DenseMap<llvm::Value *, llvm::Value *> &PointerToBase, +                 DominatorTree *DT, DefiningValueMapTy &DVCache) {    // For the naming of values inserted to be deterministic - which makes for    // much cleaner and more stable tests - we need to assign an order to the    // live values.  DenseSets do not provide a deterministic order across runs. -  SmallVector<Value*, 64> Temp; +  SmallVector<Value *, 64> Temp;    Temp.insert(Temp.end(), live.begin(), live.end());    std::sort(Temp.begin(), Temp.end(), order_by_name);    for (Value *ptr : Temp) { -    Value *base = findBasePointer(ptr, DVCache, NewInsertedDefs); +    Value *base = findBasePointer(ptr, DVCache);      assert(base && "failed to find base pointer");      PointerToBase[ptr] = base;      assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) || @@ -1001,10 +927,10 @@ static void findBasePointers(const StatepointLiveSetTy &live,      // If you see this trip and like to live really dangerously, the code should      // be correct, just with idioms the verifier can't handle.  You can try      // disabling the verifier at your own substaintial risk. -    assert(!isNullConstant(base) && "the relocation code needs adjustment to " -                                    "handle the relocation of a null pointer " -                                    "constant without causing false positives " -                                    "in the safepoint ir verifier."); +    assert(!isa<ConstantPointerNull>(base) && +           "the relocation code needs adjustment to handle the relocation of " +           "a null pointer constant without causing false positives in the " +           "safepoint ir verifier.");    }  } @@ -1014,14 +940,13 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,                               const CallSite &CS,                               PartiallyConstructedSafepointRecord &result) {    DenseMap<llvm::Value *, llvm::Value *> PointerToBase; -  DenseSet<llvm::Value *> NewInsertedDefs; -  findBasePointers(result.liveset, PointerToBase, &DT, DVCache, NewInsertedDefs); +  findBasePointers(result.liveset, PointerToBase, &DT, DVCache);    if (PrintBasePointers) {      // Note: Need to print these in a stable order since this is checked in      // some tests.      errs() << "Base Pairs (w/o Relocation):\n"; -    SmallVector<Value*, 64> Temp; +    SmallVector<Value *, 64> Temp;      Temp.reserve(PointerToBase.size());      for (auto Pair : PointerToBase) {        Temp.push_back(Pair.first); @@ -1029,90 +954,59 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,      std::sort(Temp.begin(), Temp.end(), order_by_name);      for (Value *Ptr : Temp) {        Value *Base = PointerToBase[Ptr]; -      errs() << " derived %" << Ptr->getName() << " base %" -             << Base->getName() << "\n"; +      errs() << " derived %" << Ptr->getName() << " base %" << Base->getName() +             << "\n";      }    }    result.PointerToBase = PointerToBase; -  result.NewInsertedDefs = NewInsertedDefs;  } -/// Check for liveness of items in the insert defs and add them to the live -/// and base pointer sets -static void fixupLiveness(DominatorTree &DT, const CallSite &CS, -                          const DenseSet<Value *> &allInsertedDefs, -                          PartiallyConstructedSafepointRecord &result) { -  Instruction *inst = CS.getInstruction(); - -  auto liveset = result.liveset; -  auto PointerToBase = result.PointerToBase; +/// Given an updated version of the dataflow liveness results, update the +/// liveset and base pointer maps for the call site CS. +static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, +                                  const CallSite &CS, +                                  PartiallyConstructedSafepointRecord &result); -  auto is_live_gc_reference = -      [&](Value &V) { return isLiveGCReferenceAt(V, inst, DT, nullptr); }; - -  // For each new definition, check to see if a) the definition dominates the -  // instruction we're interested in, and b) one of the uses of that definition -  // is edge-reachable from the instruction we're interested in.  This is the -  // same definition of liveness we used in the intial liveness analysis -  for (Value *newDef : allInsertedDefs) { -    if (liveset.count(newDef)) { -      // already live, no action needed -      continue; -    } - -    // PERF: Use DT to check instruction domination might not be good for -    // compilation time, and we could change to optimal solution if this -    // turn to be a issue -    if (!DT.dominates(cast<Instruction>(newDef), inst)) { -      // can't possibly be live at inst -      continue; -    } - -    if (is_live_gc_reference(*newDef)) { -      // Add the live new defs into liveset and PointerToBase -      liveset.insert(newDef); -      PointerToBase[newDef] = newDef; -    } -  } - -  result.liveset = liveset; -  result.PointerToBase = PointerToBase; -} - -static void fixupLiveReferences( -    Function &F, DominatorTree &DT, Pass *P, -    const DenseSet<llvm::Value *> &allInsertedDefs, -    ArrayRef<CallSite> toUpdate, +static void recomputeLiveInValues( +    Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,      MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) { +  // TODO-PERF: reuse the original liveness, then simply run the dataflow +  // again.  The old values are still live and will help it stablize quickly. +  GCPtrLivenessData RevisedLivenessData; +  computeLiveInValues(DT, F, RevisedLivenessData);    for (size_t i = 0; i < records.size(); i++) {      struct PartiallyConstructedSafepointRecord &info = records[i];      const CallSite &CS = toUpdate[i]; -    fixupLiveness(DT, CS, allInsertedDefs, info); +    recomputeLiveInValues(RevisedLivenessData, CS, info);    }  } -// Normalize basic block to make it ready to be target of invoke statepoint. -// It means spliting it to have single predecessor. Return newly created BB -// ready to be successor of invoke statepoint. -static BasicBlock *normalizeBBForInvokeSafepoint(BasicBlock *BB, -                                                 BasicBlock *InvokeParent, -                                                 Pass *P) { -  BasicBlock *ret = BB; - +// When inserting gc.relocate calls, we need to ensure there are no uses +// of the original value between the gc.statepoint and the gc.relocate call. +// One case which can arise is a phi node starting one of the successor blocks. +// We also need to be able to insert the gc.relocates only on the path which +// goes through the statepoint.  We might need to split an edge to make this +// possible. +static BasicBlock * +normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, Pass *P) { +  DominatorTree *DT = nullptr; +  if (auto *DTP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) +    DT = &DTP->getDomTree(); + +  BasicBlock *Ret = BB;    if (!BB->getUniquePredecessor()) { -    ret = SplitBlockPredecessors(BB, InvokeParent, ""); +    Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, DT);    } -  // Another requirement for such basic blocks is to not have any phi nodes. -  // Since we just ensured that new BB will have single predecessor, -  // all phi nodes in it will have one value. Here it would be naturall place -  // to -  // remove them all. But we can not do this because we are risking to remove -  // one of the values stored in liveset of another statepoint. We will do it -  // later after placing all safepoints. +  // Now that 'ret' has unique predecessor we can safely remove all phi nodes +  // from it +  FoldSingleEntryPHINodes(Ret); +  assert(!isa<PHINode>(Ret->begin())); -  return ret; +  // At this point, we can safely insert a gc.relocate as the first instruction +  // in Ret if needed. +  return Ret;  }  static int find_index(ArrayRef<Value *> livevec, Value *val) { @@ -1180,7 +1074,7 @@ static void CreateGCRelocates(ArrayRef<llvm::Value *> liveVariables,      // combination.  This results is some blow up the function declarations in      // the IR, but removes the need for argument bitcasts which shrinks the IR      // greatly and makes it much more readable. -    SmallVector<Type *, 1> types;                    // one per 'any' type +    SmallVector<Type *, 1> types;                 // one per 'any' type      types.push_back(liveVariables[i]->getType()); // result type      Value *gc_relocate_decl = Intrinsic::getDeclaration(          M, Intrinsic::experimental_gc_relocate, types); @@ -1298,8 +1192,10 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */      token = invoke;      // Generate gc relocates in exceptional path -    BasicBlock *unwindBlock = normalizeBBForInvokeSafepoint( -        toReplace->getUnwindDest(), invoke->getParent(), P); +    BasicBlock *unwindBlock = toReplace->getUnwindDest(); +    assert(!isa<PHINode>(unwindBlock->begin()) && +           unwindBlock->getUniquePredecessor() && +           "can't safely insert in this block!");      Instruction *IP = &*(unwindBlock->getFirstInsertionPt());      Builder.SetInsertPoint(IP); @@ -1319,8 +1215,10 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */                              exceptional_token, Builder);      // Generate gc relocates and returns for normal block -    BasicBlock *normalDest = normalizeBBForInvokeSafepoint( -        toReplace->getNormalDest(), invoke->getParent(), P); +    BasicBlock *normalDest = toReplace->getNormalDest(); +    assert(!isa<PHINode>(normalDest->begin()) && +           normalDest->getUniquePredecessor() && +           "can't safely insert in this block!");      IP = &*(normalDest->getFirstInsertionPt());      Builder.SetInsertPoint(IP); @@ -1333,7 +1231,7 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */    // Take the name of the original value call if it had one.    token->takeName(CS.getInstruction()); -  // The GCResult is already inserted, we just need to find it +// The GCResult is already inserted, we just need to find it  #ifndef NDEBUG    Instruction *toReplace = CS.getInstruction();    assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) && @@ -1351,7 +1249,6 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */    // Second, create a gc.relocate for every live variable    CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder); -  }  namespace { @@ -1383,7 +1280,7 @@ static void stablize_order(SmallVectorImpl<Value *> &basevec,  // Replace an existing gc.statepoint with a new one and a set of gc.relocates  // which make the relocations happening at this safepoint explicit. -//  +//  // WARNING: Does not do any fixup to adjust users of the original live  // values.  That's the callers responsibility.  static void @@ -1458,14 +1355,13 @@ static void relocationViaAlloca(      Function &F, DominatorTree &DT, ArrayRef<Value *> live,      ArrayRef<struct PartiallyConstructedSafepointRecord> records) {  #ifndef NDEBUG -  int initialAllocaNum = 0; - -  // record initial number of allocas -  for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end; -       itr++) { -    if (isa<AllocaInst>(*itr)) -      initialAllocaNum++; -  } +  // record initial number of (static) allocas; we'll check we have the same +  // number when we get done. +  int InitialAllocaNum = 0; +  for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E; +       I++) +    if (isa<AllocaInst>(*I)) +      InitialAllocaNum++;  #endif    // TODO-PERF: change data structures, reserve @@ -1505,47 +1401,49 @@ static void relocationViaAlloca(      // In case if it was invoke statepoint      // we will insert stores for exceptional path gc relocates.      if (isa<InvokeInst>(Statepoint)) { -      insertRelocationStores(info.UnwindToken->users(), -                             allocaMap, visitedLiveValues); +      insertRelocationStores(info.UnwindToken->users(), allocaMap, +                             visitedLiveValues);      } -#ifndef NDEBUG -    // As a debuging aid, pretend that an unrelocated pointer becomes null at -    // the gc.statepoint.  This will turn some subtle GC problems into slightly -    // easier to debug SEGVs -    SmallVector<AllocaInst *, 64> ToClobber; -    for (auto Pair : allocaMap) { -      Value *Def = Pair.first; -      AllocaInst *Alloca = cast<AllocaInst>(Pair.second); - -      // This value was relocated -      if (visitedLiveValues.count(Def)) { -        continue; +    if (ClobberNonLive) { +      // As a debuging aid, pretend that an unrelocated pointer becomes null at +      // the gc.statepoint.  This will turn some subtle GC problems into +      // slightly easier to debug SEGVs.  Note that on large IR files with +      // lots of gc.statepoints this is extremely costly both memory and time +      // wise. +      SmallVector<AllocaInst *, 64> ToClobber; +      for (auto Pair : allocaMap) { +        Value *Def = Pair.first; +        AllocaInst *Alloca = cast<AllocaInst>(Pair.second); + +        // This value was relocated +        if (visitedLiveValues.count(Def)) { +          continue; +        } +        ToClobber.push_back(Alloca);        } -      ToClobber.push_back(Alloca); -    } -    auto InsertClobbersAt = [&](Instruction *IP) { -      for (auto *AI : ToClobber) { -        auto AIType = cast<PointerType>(AI->getType()); -        auto PT = cast<PointerType>(AIType->getElementType()); -        Constant *CPN = ConstantPointerNull::get(PT); -        StoreInst *store = new StoreInst(CPN, AI); -        store->insertBefore(IP); -      } -    }; +      auto InsertClobbersAt = [&](Instruction *IP) { +        for (auto *AI : ToClobber) { +          auto AIType = cast<PointerType>(AI->getType()); +          auto PT = cast<PointerType>(AIType->getElementType()); +          Constant *CPN = ConstantPointerNull::get(PT); +          StoreInst *store = new StoreInst(CPN, AI); +          store->insertBefore(IP); +        } +      }; -    // Insert the clobbering stores.  These may get intermixed with the -    // gc.results and gc.relocates, but that's fine.   -    if (auto II = dyn_cast<InvokeInst>(Statepoint)) { -      InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt()); -      InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt()); -    } else { -      BasicBlock::iterator Next(cast<CallInst>(Statepoint)); -      Next++; -      InsertClobbersAt(Next); +      // Insert the clobbering stores.  These may get intermixed with the +      // gc.results and gc.relocates, but that's fine. +      if (auto II = dyn_cast<InvokeInst>(Statepoint)) { +        InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt()); +        InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt()); +      } else { +        BasicBlock::iterator Next(cast<CallInst>(Statepoint)); +        Next++; +        InsertClobbersAt(Next); +      }      } -#endif    }    // update use with load allocas and add store for gc_relocated    for (auto Pair : allocaMap) { @@ -1603,11 +1501,11 @@ static void relocationViaAlloca(          assert(!inst->isTerminator() &&                 "The only TerminatorInst that can produce a value is "                 "InvokeInst which is handled above."); -         store->insertAfter(inst); +        store->insertAfter(inst);        }      } else {        assert((isa<Argument>(def) || isa<GlobalVariable>(def) || -              (isa<Constant>(def) && cast<Constant>(def)->isNullValue())) && +              isa<ConstantPointerNull>(def)) &&               "Must be argument or global");        store->insertAfter(cast<Instruction>(alloca));      } @@ -1621,12 +1519,11 @@ static void relocationViaAlloca(    }  #ifndef NDEBUG -  for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end; -       itr++) { -    if (isa<AllocaInst>(*itr)) -      initialAllocaNum--; -  } -  assert(initialAllocaNum == 0 && "We must not introduce any extra allocas"); +  for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E; +       I++) +    if (isa<AllocaInst>(*I)) +      InitialAllocaNum--; +  assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");  #endif  } @@ -1647,76 +1544,155 @@ template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {    }  } -static Function *getUseHolder(Module &M) { -  FunctionType *ftype = -      FunctionType::get(Type::getVoidTy(M.getContext()), true); -  Function *Func = cast<Function>(M.getOrInsertFunction("__tmp_use", ftype)); -  return Func; -} -  /// Insert holders so that each Value is obviously live through the entire -/// liftetime of the call. +/// lifetime of the call.  static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values, -                                 SmallVectorImpl<CallInst *> &holders) { +                                 SmallVectorImpl<CallInst *> &Holders) { +  if (Values.empty()) +    // No values to hold live, might as well not insert the empty holder +    return; +    Module *M = CS.getInstruction()->getParent()->getParent()->getParent(); -  Function *Func = getUseHolder(*M); +  // Use a dummy vararg function to actually hold the values live +  Function *Func = cast<Function>(M->getOrInsertFunction( +      "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));    if (CS.isCall()) {      // For call safepoints insert dummy calls right after safepoint -    BasicBlock::iterator next(CS.getInstruction()); -    next++; -    CallInst *base_holder = CallInst::Create(Func, Values, "", next); -    holders.push_back(base_holder); -  } else if (CS.isInvoke()) { -    // For invoke safepooints insert dummy calls both in normal and -    // exceptional destination blocks -    InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction()); -    CallInst *normal_holder = CallInst::Create( -        Func, Values, "", invoke->getNormalDest()->getFirstInsertionPt()); -    CallInst *unwind_holder = CallInst::Create( -        Func, Values, "", invoke->getUnwindDest()->getFirstInsertionPt()); -    holders.push_back(normal_holder); -    holders.push_back(unwind_holder); -  } else -    llvm_unreachable("unsupported call type"); +    BasicBlock::iterator Next(CS.getInstruction()); +    Next++; +    Holders.push_back(CallInst::Create(Func, Values, "", Next)); +    return; +  } +  // For invoke safepooints insert dummy calls both in normal and +  // exceptional destination blocks +  auto *II = cast<InvokeInst>(CS.getInstruction()); +  Holders.push_back(CallInst::Create( +      Func, Values, "", II->getNormalDest()->getFirstInsertionPt())); +  Holders.push_back(CallInst::Create( +      Func, Values, "", II->getUnwindDest()->getFirstInsertionPt()));  }  static void findLiveReferences(      Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,      MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) { +  GCPtrLivenessData OriginalLivenessData; +  computeLiveInValues(DT, F, OriginalLivenessData);    for (size_t i = 0; i < records.size(); i++) {      struct PartiallyConstructedSafepointRecord &info = records[i];      const CallSite &CS = toUpdate[i]; -    analyzeParsePointLiveness(DT, CS, info); +    analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info);    }  } -static void addBasesAsLiveValues(StatepointLiveSetTy &liveset, -                                 DenseMap<Value *, Value *> &PointerToBase) { -  // Identify any base pointers which are used in this safepoint, but not -  // themselves relocated.  We need to relocate them so that later inserted -  // safepoints can get the properly relocated base register. -  DenseSet<Value *> missing; -  for (Value *L : liveset) { -    assert(PointerToBase.find(L) != PointerToBase.end()); -    Value *base = PointerToBase[L]; -    assert(base); -    if (liveset.find(base) == liveset.end()) { -      assert(PointerToBase.find(base) == PointerToBase.end()); -      // uniqued by set insert -      missing.insert(base); +/// Remove any vector of pointers from the liveset by scalarizing them over the +/// statepoint instruction.  Adds the scalarized pieces to the liveset.  It +/// would be preferrable to include the vector in the statepoint itself, but +/// the lowering code currently does not handle that.  Extending it would be +/// slightly non-trivial since it requires a format change.  Given how rare +/// such cases are (for the moment?) scalarizing is an acceptable comprimise. +static void splitVectorValues(Instruction *StatepointInst, +                              StatepointLiveSetTy &LiveSet, DominatorTree &DT) { +  SmallVector<Value *, 16> ToSplit; +  for (Value *V : LiveSet) +    if (isa<VectorType>(V->getType())) +      ToSplit.push_back(V); + +  if (ToSplit.empty()) +    return; + +  Function &F = *(StatepointInst->getParent()->getParent()); + +  DenseMap<Value *, AllocaInst *> AllocaMap; +  // First is normal return, second is exceptional return (invoke only) +  DenseMap<Value *, std::pair<Value *, Value *>> Replacements; +  for (Value *V : ToSplit) { +    LiveSet.erase(V); + +    AllocaInst *Alloca = +        new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI()); +    AllocaMap[V] = Alloca; + +    VectorType *VT = cast<VectorType>(V->getType()); +    IRBuilder<> Builder(StatepointInst); +    SmallVector<Value *, 16> Elements; +    for (unsigned i = 0; i < VT->getNumElements(); i++) +      Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i))); +    LiveSet.insert(Elements.begin(), Elements.end()); + +    auto InsertVectorReform = [&](Instruction *IP) { +      Builder.SetInsertPoint(IP); +      Builder.SetCurrentDebugLocation(IP->getDebugLoc()); +      Value *ResultVec = UndefValue::get(VT); +      for (unsigned i = 0; i < VT->getNumElements(); i++) +        ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i], +                                                Builder.getInt32(i)); +      return ResultVec; +    }; + +    if (isa<CallInst>(StatepointInst)) { +      BasicBlock::iterator Next(StatepointInst); +      Next++; +      Instruction *IP = &*(Next); +      Replacements[V].first = InsertVectorReform(IP); +      Replacements[V].second = nullptr; +    } else { +      InvokeInst *Invoke = cast<InvokeInst>(StatepointInst); +      // We've already normalized - check that we don't have shared destination +      // blocks +      BasicBlock *NormalDest = Invoke->getNormalDest(); +      assert(!isa<PHINode>(NormalDest->begin())); +      BasicBlock *UnwindDest = Invoke->getUnwindDest(); +      assert(!isa<PHINode>(UnwindDest->begin())); +      // Insert insert element sequences in both successors +      Instruction *IP = &*(NormalDest->getFirstInsertionPt()); +      Replacements[V].first = InsertVectorReform(IP); +      IP = &*(UnwindDest->getFirstInsertionPt()); +      Replacements[V].second = InsertVectorReform(IP);      }    } +  for (Value *V : ToSplit) { +    AllocaInst *Alloca = AllocaMap[V]; + +    // Capture all users before we start mutating use lists +    SmallVector<Instruction *, 16> Users; +    for (User *U : V->users()) +      Users.push_back(cast<Instruction>(U)); + +    for (Instruction *I : Users) { +      if (auto Phi = dyn_cast<PHINode>(I)) { +        for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) +          if (V == Phi->getIncomingValue(i)) { +            LoadInst *Load = new LoadInst( +                Alloca, "", Phi->getIncomingBlock(i)->getTerminator()); +            Phi->setIncomingValue(i, Load); +          } +      } else { +        LoadInst *Load = new LoadInst(Alloca, "", I); +        I->replaceUsesOfWith(V, Load); +      } +    } -  // Note that we want these at the end of the list, otherwise -  // register placement gets screwed up once we lower to STATEPOINT -  // instructions.  This is an utter hack, but there doesn't seem to be a -  // better one. -  for (Value *base : missing) { -    assert(base); -    liveset.insert(base); -    PointerToBase[base] = base; -  } -  assert(liveset.size() == PointerToBase.size()); +    // Store the original value and the replacement value into the alloca +    StoreInst *Store = new StoreInst(V, Alloca); +    if (auto I = dyn_cast<Instruction>(V)) +      Store->insertAfter(I); +    else +      Store->insertAfter(Alloca); + +    // Normal return for invoke, or call return +    Instruction *Replacement = cast<Instruction>(Replacements[V].first); +    (new StoreInst(Replacement, Alloca))->insertAfter(Replacement); +    // Unwind return for invoke only +    Replacement = cast_or_null<Instruction>(Replacements[V].second); +    if (Replacement) +      (new StoreInst(Replacement, Alloca))->insertAfter(Replacement); +  } + +  // apply mem2reg to promote alloca to SSA +  SmallVector<AllocaInst *, 16> Allocas; +  for (Value *V : ToSplit) +    Allocas.push_back(AllocaMap[V]); +  PromoteMemToReg(Allocas, DT);  }  static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, @@ -1734,6 +1710,20 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,    }  #endif +  // When inserting gc.relocates for invokes, we need to be able to insert at +  // the top of the successor blocks.  See the comment on +  // normalForInvokeSafepoint on exactly what is needed.  Note that this step +  // may restructure the CFG. +  for (CallSite CS : toUpdate) { +    if (!CS.isInvoke()) +      continue; +    InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction()); +    normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(), +                                P); +    normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(), +                                P); +  } +    // A list of dummy calls added to the IR to keep various values obviously    // live in the IR.  We'll remove all of these when done.    SmallVector<CallInst *, 64> holders; @@ -1749,7 +1739,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,      SmallVector<Value *, 64> DeoptValues;      for (Use &U : StatepointCS.vm_state_args()) {        Value *Arg = cast<Value>(&U); -      if (isGCPointerType(Arg->getType())) +      assert(!isUnhandledGCPointerType(Arg->getType()) && +             "support for FCA unimplemented"); +      if (isHandledGCPointerType(Arg->getType()))          DeoptValues.push_back(Arg);      }      insertUseHolderAfter(CS, DeoptValues, holders); @@ -1767,6 +1759,17 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,    // site.    findLiveReferences(F, DT, P, toUpdate, records); +  // Do a limited scalarization of any live at safepoint vector values which +  // contain pointers.  This enables this pass to run after vectorization at +  // the cost of some possible performance loss.  TODO: it would be nice to +  // natively support vectors all the way through the backend so we don't need +  // to scalarize here. +  for (size_t i = 0; i < records.size(); i++) { +    struct PartiallyConstructedSafepointRecord &info = records[i]; +    Instruction *statepoint = toUpdate[i].getInstruction(); +    splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT); +  } +    // B) Find the base pointers for each live pointer    /* scope for caching */ {      // Cache the 'defining value' relation used in the computation and @@ -1790,13 +1793,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,    //   gep a + 1    //   safepoint 2    //   br loop -  DenseSet<llvm::Value *> allInsertedDefs; -  for (size_t i = 0; i < records.size(); i++) { -    struct PartiallyConstructedSafepointRecord &info = records[i]; -    allInsertedDefs.insert(info.NewInsertedDefs.begin(), -                           info.NewInsertedDefs.end()); -  } -    // We insert some dummy calls after each safepoint to definitely hold live    // the base pointers which were identified for that safepoint.  We'll then    // ask liveness for _every_ base inserted to see what is now live.  Then we @@ -1813,22 +1809,11 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,      insertUseHolderAfter(CS, Bases, holders);    } -  // Add the bases explicitly to the live vector set.  This may result in a few -  // extra relocations, but the base has to be available whenever a pointer -  // derived from it is used.  Thus, we need it to be part of the statepoint's -  // gc arguments list.  TODO: Introduce an explicit notion (in the following -  // code) of the GC argument list as seperate from the live Values at a -  // given statepoint. -  for (size_t i = 0; i < records.size(); i++) { -    struct PartiallyConstructedSafepointRecord &info = records[i]; -    addBasesAsLiveValues(info.liveset, info.PointerToBase); -  } +  // By selecting base pointers, we've effectively inserted new uses. Thus, we +  // need to rerun liveness.  We may *also* have inserted new defs, but that's +  // not the key issue. +  recomputeLiveInValues(F, DT, P, toUpdate, records); -  // If we inserted any new values, we need to adjust our notion of what is -  // live at a particular safepoint. -  if (!allInsertedDefs.empty()) { -    fixupLiveReferences(F, DT, P, allInsertedDefs, toUpdate, records); -  }    if (PrintBasePointers) {      for (size_t i = 0; i < records.size(); i++) {        struct PartiallyConstructedSafepointRecord &info = records[i]; @@ -1858,25 +1843,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,    }    toUpdate.clear(); // prevent accident use of invalid CallSites -  // In case if we inserted relocates in a different basic block than the -  // original safepoint (this can happen for invokes). We need to be sure that -  // original values were not used in any of the phi nodes at the -  // beginning of basic block containing them. Because we know that all such -  // blocks will have single predecessor we can safely assume that all phi -  // nodes have single entry (because of normalizeBBForInvokeSafepoint). -  // Just remove them all here. -  for (size_t i = 0; i < records.size(); i++) { -    Instruction *I = records[i].StatepointToken; - -    if (InvokeInst *invoke = dyn_cast<InvokeInst>(I)) { -      FoldSingleEntryPHINodes(invoke->getNormalDest()); -      assert(!isa<PHINode>(invoke->getNormalDest()->begin())); - -      FoldSingleEntryPHINodes(invoke->getUnwindDest()); -      assert(!isa<PHINode>(invoke->getUnwindDest()->begin())); -    } -  } -    // Do all the fixups of the original live variables to their relocated selves    SmallVector<Value *, 128> live;    for (size_t i = 0; i < records.size(); i++) { @@ -1889,6 +1855,24 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,      Statepoint statepoint(info.StatepointToken);      live.insert(live.end(), statepoint.gc_args_begin(),                  statepoint.gc_args_end()); +#ifndef NDEBUG +    // Do some basic sanity checks on our liveness results before performing +    // relocation.  Relocation can and will turn mistakes in liveness results +    // into non-sensical code which is must harder to debug. +    // TODO: It would be nice to test consistency as well +    assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) && +           "statepoint must be reachable or liveness is meaningless"); +    for (Value *V : statepoint.gc_args()) { +      if (!isa<Instruction>(V)) +        // Non-instruction values trivial dominate all possible uses +        continue; +      auto LiveInst = cast<Instruction>(V); +      assert(DT.isReachableFromEntry(LiveInst->getParent()) && +             "unreachable values should never be live"); +      assert(DT.dominates(LiveInst, info.StatepointToken) && +             "basic SSA liveness expectation violated by liveness analysis"); +    } +#endif    }    unique_unsorted(live); @@ -1924,18 +1908,285 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {    if (!shouldRewriteStatepointsIn(F))      return false; -  // Gather all the statepoints which need rewritten. +  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + +  // Gather all the statepoints which need rewritten.  Be careful to only +  // consider those in reachable code since we need to ask dominance queries +  // when rewriting.  We'll delete the unreachable ones in a moment.    SmallVector<CallSite, 64> ParsePointNeeded; +  bool HasUnreachableStatepoint = false;    for (Instruction &I : inst_range(F)) {      // TODO: only the ones with the flag set! -    if (isStatepoint(I)) -      ParsePointNeeded.push_back(CallSite(&I)); +    if (isStatepoint(I)) { +      if (DT.isReachableFromEntry(I.getParent())) +        ParsePointNeeded.push_back(CallSite(&I)); +      else +        HasUnreachableStatepoint = true; +    }    } +  bool MadeChange = false; + +  // Delete any unreachable statepoints so that we don't have unrewritten +  // statepoints surviving this pass.  This makes testing easier and the +  // resulting IR less confusing to human readers.  Rather than be fancy, we +  // just reuse a utility function which removes the unreachable blocks. +  if (HasUnreachableStatepoint) +    MadeChange |= removeUnreachableBlocks(F); +    // Return early if no work to do.    if (ParsePointNeeded.empty()) -    return false; +    return MadeChange; + +  // As a prepass, go ahead and aggressively destroy single entry phi nodes. +  // These are created by LCSSA.  They have the effect of increasing the size +  // of liveness sets for no good reason.  It may be harder to do this post +  // insertion since relocations and base phis can confuse things. +  for (BasicBlock &BB : F) +    if (BB.getUniquePredecessor()) { +      MadeChange = true; +      FoldSingleEntryPHINodes(&BB); +    } -  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); -  return insertParsePoints(F, DT, this, ParsePointNeeded); +  MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded); +  return MadeChange; +} + +// liveness computation via standard dataflow +// ------------------------------------------------------------------- + +// TODO: Consider using bitvectors for liveness, the set of potentially +// interesting values should be small and easy to pre-compute. + +/// Is this value a constant consisting of entirely null values? +static bool isConstantNull(Value *V) { +  return isa<Constant>(V) && cast<Constant>(V)->isNullValue(); +} + +/// Compute the live-in set for the location rbegin starting from +/// the live-out set of the basic block +static void computeLiveInValues(BasicBlock::reverse_iterator rbegin, +                                BasicBlock::reverse_iterator rend, +                                DenseSet<Value *> &LiveTmp) { + +  for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) { +    Instruction *I = &*ritr; + +    // KILL/Def - Remove this definition from LiveIn +    LiveTmp.erase(I); + +    // Don't consider *uses* in PHI nodes, we handle their contribution to +    // predecessor blocks when we seed the LiveOut sets +    if (isa<PHINode>(I)) +      continue; + +    // USE - Add to the LiveIn set for this instruction +    for (Value *V : I->operands()) { +      assert(!isUnhandledGCPointerType(V->getType()) && +             "support for FCA unimplemented"); +      if (isHandledGCPointerType(V->getType()) && !isConstantNull(V) && +          !isa<UndefValue>(V)) { +        // The choice to exclude null and undef is arbitrary here.  Reconsider? +        LiveTmp.insert(V); +      } +    } +  } +} + +static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) { + +  for (BasicBlock *Succ : successors(BB)) { +    const BasicBlock::iterator E(Succ->getFirstNonPHI()); +    for (BasicBlock::iterator I = Succ->begin(); I != E; I++) { +      PHINode *Phi = cast<PHINode>(&*I); +      Value *V = Phi->getIncomingValueForBlock(BB); +      assert(!isUnhandledGCPointerType(V->getType()) && +             "support for FCA unimplemented"); +      if (isHandledGCPointerType(V->getType()) && !isConstantNull(V) && +          !isa<UndefValue>(V)) { +        // The choice to exclude null and undef is arbitrary here.  Reconsider? +        LiveTmp.insert(V); +      } +    } +  } +} + +static DenseSet<Value *> computeKillSet(BasicBlock *BB) { +  DenseSet<Value *> KillSet; +  for (Instruction &I : *BB) +    if (isHandledGCPointerType(I.getType())) +      KillSet.insert(&I); +  return KillSet; +} + +#ifndef NDEBUG +/// Check that the items in 'Live' dominate 'TI'.  This is used as a basic +/// sanity check for the liveness computation. +static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live, +                          TerminatorInst *TI, bool TermOkay = false) { +  for (Value *V : Live) { +    if (auto *I = dyn_cast<Instruction>(V)) { +      // The terminator can be a member of the LiveOut set.  LLVM's definition +      // of instruction dominance states that V does not dominate itself.  As +      // such, we need to special case this to allow it. +      if (TermOkay && TI == I) +        continue; +      assert(DT.dominates(I, TI) && +             "basic SSA liveness expectation violated by liveness analysis"); +    } +  } +} + +/// Check that all the liveness sets used during the computation of liveness +/// obey basic SSA properties.  This is useful for finding cases where we miss +/// a def. +static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data, +                          BasicBlock &BB) { +  checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator()); +  checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true); +  checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator()); +} +#endif + +static void computeLiveInValues(DominatorTree &DT, Function &F, +                                GCPtrLivenessData &Data) { + +  SmallSetVector<BasicBlock *, 200> Worklist; +  auto AddPredsToWorklist = [&](BasicBlock *BB) { +    // We use a SetVector so that we don't have duplicates in the worklist. +    Worklist.insert(pred_begin(BB), pred_end(BB)); +  }; +  auto NextItem = [&]() { +    BasicBlock *BB = Worklist.back(); +    Worklist.pop_back(); +    return BB; +  }; + +  // Seed the liveness for each individual block +  for (BasicBlock &BB : F) { +    Data.KillSet[&BB] = computeKillSet(&BB); +    Data.LiveSet[&BB].clear(); +    computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]); + +#ifndef NDEBUG +    for (Value *Kill : Data.KillSet[&BB]) +      assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill"); +#endif + +    Data.LiveOut[&BB] = DenseSet<Value *>(); +    computeLiveOutSeed(&BB, Data.LiveOut[&BB]); +    Data.LiveIn[&BB] = Data.LiveSet[&BB]; +    set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]); +    set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]); +    if (!Data.LiveIn[&BB].empty()) +      AddPredsToWorklist(&BB); +  } + +  // Propagate that liveness until stable +  while (!Worklist.empty()) { +    BasicBlock *BB = NextItem(); + +    // Compute our new liveout set, then exit early if it hasn't changed +    // despite the contribution of our successor. +    DenseSet<Value *> LiveOut = Data.LiveOut[BB]; +    const auto OldLiveOutSize = LiveOut.size(); +    for (BasicBlock *Succ : successors(BB)) { +      assert(Data.LiveIn.count(Succ)); +      set_union(LiveOut, Data.LiveIn[Succ]); +    } +    // assert OutLiveOut is a subset of LiveOut +    if (OldLiveOutSize == LiveOut.size()) { +      // If the sets are the same size, then we didn't actually add anything +      // when unioning our successors LiveIn  Thus, the LiveIn of this block +      // hasn't changed. +      continue; +    } +    Data.LiveOut[BB] = LiveOut; + +    // Apply the effects of this basic block +    DenseSet<Value *> LiveTmp = LiveOut; +    set_union(LiveTmp, Data.LiveSet[BB]); +    set_subtract(LiveTmp, Data.KillSet[BB]); + +    assert(Data.LiveIn.count(BB)); +    const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB]; +    // assert: OldLiveIn is a subset of LiveTmp +    if (OldLiveIn.size() != LiveTmp.size()) { +      Data.LiveIn[BB] = LiveTmp; +      AddPredsToWorklist(BB); +    } +  } // while( !worklist.empty() ) + +#ifndef NDEBUG +  // Sanity check our ouput against SSA properties.  This helps catch any +  // missing kills during the above iteration. +  for (BasicBlock &BB : F) { +    checkBasicSSA(DT, Data, BB); +  } +#endif +} + +static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, +                              StatepointLiveSetTy &Out) { + +  BasicBlock *BB = Inst->getParent(); + +  // Note: The copy is intentional and required +  assert(Data.LiveOut.count(BB)); +  DenseSet<Value *> LiveOut = Data.LiveOut[BB]; + +  // We want to handle the statepoint itself oddly.  It's +  // call result is not live (normal), nor are it's arguments +  // (unless they're used again later).  This adjustment is +  // specifically what we need to relocate +  BasicBlock::reverse_iterator rend(Inst); +  computeLiveInValues(BB->rbegin(), rend, LiveOut); +  LiveOut.erase(Inst); +  Out.insert(LiveOut.begin(), LiveOut.end()); +} + +static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, +                                  const CallSite &CS, +                                  PartiallyConstructedSafepointRecord &Info) { +  Instruction *Inst = CS.getInstruction(); +  StatepointLiveSetTy Updated; +  findLiveSetAtInst(Inst, RevisedLivenessData, Updated); + +#ifndef NDEBUG +  DenseSet<Value *> Bases; +  for (auto KVPair : Info.PointerToBase) { +    Bases.insert(KVPair.second); +  } +#endif +  // We may have base pointers which are now live that weren't before.  We need +  // to update the PointerToBase structure to reflect this. +  for (auto V : Updated) +    if (!Info.PointerToBase.count(V)) { +      assert(Bases.count(V) && "can't find base for unexpected live value"); +      Info.PointerToBase[V] = V; +      continue; +    } + +#ifndef NDEBUG +  for (auto V : Updated) { +    assert(Info.PointerToBase.count(V) && +           "must be able to find base for live value"); +  } +#endif + +  // Remove any stale base mappings - this can happen since our liveness is +  // more precise then the one inherent in the base pointer analysis +  DenseSet<Value *> ToErase; +  for (auto KVPair : Info.PointerToBase) +    if (!Updated.count(KVPair.first)) +      ToErase.insert(KVPair.first); +  for (auto V : ToErase) +    Info.PointerToBase.erase(V); + +#ifndef NDEBUG +  for (auto KVPair : Info.PointerToBase) +    assert(Updated.count(KVPair.first) && "record for non-live value"); +#endif + +  Info.liveset = Updated;  } diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 875a007..bc068f7 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1012,7 +1012,8 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {    Constant *Ptr = Operands[0];    auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end()); -  markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices)); +  markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, +                                                  Indices));  }  void SCCPSolver::visitStoreInst(StoreInst &SI) { diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 06b000f..59dc528 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -1166,10 +1166,9 @@ public:        } else {          continue;        } -      Instruction *DbgVal = -          DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()), -                                      DIExpression(DVI->getExpression()), Inst); -      DbgVal->setDebugLoc(DVI->getDebugLoc()); +      DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()), +                                  DIExpression(DVI->getExpression()), +                                  DVI->getDebugLoc(), Inst);      }    }  }; @@ -1552,7 +1551,8 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,    if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())      return BasePtr; -  return IRB.CreateInBoundsGEP(BasePtr, Indices, NamePrefix + "sroa_idx"); +  return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices, +                               NamePrefix + "sroa_idx");  }  /// \brief Get a natural GEP off of the BasePtr walking through Ty toward @@ -1803,7 +1803,8 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,      OffsetPtr = Int8PtrOffset == 0                      ? Int8Ptr -                    : IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset), +                    : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr, +                                            IRB.getInt(Int8PtrOffset),                                              NamePrefix + "sroa_raw_idx");    }    Ptr = OffsetPtr; @@ -3250,7 +3251,8 @@ private:      void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {        assert(Ty->isSingleValueType());        // Load the single value and insert it using the indices. -      Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"); +      Value *GEP = +          IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");        Value *Load = IRB.CreateLoad(GEP, Name + ".load");        Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");        DEBUG(dbgs() << "          to: " << *Load << "\n"); @@ -3283,7 +3285,7 @@ private:        // Extract the single value and store it using the indices.        Value *Store = IRB.CreateStore(            IRB.CreateExtractValue(Agg, Indices, Name + ".extract"), -          IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep")); +          IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"));        (void)Store;        DEBUG(dbgs() << "          to: " << *Store << "\n");      } @@ -4188,14 +4190,14 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {        // Create a piece expression describing the new partition or reuse AI's        // expression if there is only one partition.        DIExpression PieceExpr = Expr; -      if (IsSplit || Expr.isBitPiece()) { +      if (IsSplit || Expr->isBitPiece()) {          // If this alloca is already a scalar replacement of a larger aggregate,          // Piece.Offset describes the offset inside the scalar. -        uint64_t Offset = Expr.isBitPiece() ? Expr.getBitPieceOffset() : 0; +        uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0;          uint64_t Start = Offset + Piece.Offset;          uint64_t Size = Piece.Size; -        if (Expr.isBitPiece()) { -          uint64_t AbsEnd = Expr.getBitPieceOffset() + Expr.getBitPieceSize(); +        if (Expr->isBitPiece()) { +          uint64_t AbsEnd = Expr->getBitPieceOffset() + Expr->getBitPieceSize();            if (Start >= AbsEnd)              // No need to describe a SROAed padding.              continue; @@ -4208,8 +4210,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {        if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca))          OldDDI->eraseFromParent(); -      auto *NewDDI = DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, &AI); -      NewDDI->setDebugLoc(DbgDecl->getDebugLoc()); +      DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(), +                        &AI);      }    }    return Changed; diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp index 3e7cf04..f99fe3f 100644 --- a/lib/Transforms/Scalar/SampleProfile.cpp +++ b/lib/Transforms/Scalar/SampleProfile.cpp @@ -217,16 +217,16 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {  /// \returns The profiled weight of I.  unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {    DebugLoc DLoc = Inst.getDebugLoc(); -  if (DLoc.isUnknown()) +  if (!DLoc)      return 0;    unsigned Lineno = DLoc.getLine();    if (Lineno < HeaderLineno)      return 0; -  DILocation DIL(DLoc.getAsMDNode(*Ctx)); +  DILocation DIL = DLoc.get();    int LOffset = Lineno - HeaderLineno; -  unsigned Discriminator = DIL.getDiscriminator(); +  unsigned Discriminator = DIL->getDiscriminator();    unsigned Weight = Samples->samplesAt(LOffset, Discriminator);    DEBUG(dbgs() << "    " << Lineno << "." << Discriminator << ":" << Inst                 << " (line offset: " << LOffset << "." << Discriminator @@ -642,9 +642,8 @@ void SampleProfileLoader::propagateWeights(Function &F) {  /// \returns the line number where \p F is defined. If it returns 0,  ///          it means that there is no debug information available for \p F.  unsigned SampleProfileLoader::getFunctionLoc(Function &F) { -  DISubprogram S = getDISubprogram(&F); -  if (S.isSubprogram()) -    return S.getLineNumber(); +  if (MDSubprogram *S = getDISubprogram(&F)) +    return S->getLine();    // If could not find the start of \p F, emit a diagnostic to inform the user    // about the missed opportunity. diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 6cc8411..42095ae 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -59,6 +59,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {    initializeLowerExpectIntrinsicPass(Registry);    initializeMemCpyOptPass(Registry);    initializeMergedLoadStoreMotionPass(Registry); +  initializeNaryReassociatePass(Registry);    initializePartiallyInlineLibCallsPass(Registry);    initializeReassociatePass(Registry);    initializeRegToMemPass(Registry); @@ -77,6 +78,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {    initializeLoadCombinePass(Registry);    initializePlaceBackedgeSafepointsImplPass(Registry);    initializePlaceSafepointsPass(Registry); +  initializeFloat2IntPass(Registry);  }  void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index acd8585..693c5ae 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1117,10 +1117,9 @@ public:        } else {          continue;        } -      Instruction *DbgVal = DIB->insertDbgValueIntrinsic( -          Arg, 0, DIVariable(DVI->getVariable()), -          DIExpression(DVI->getExpression()), Inst); -      DbgVal->setDebugLoc(DVI->getDebugLoc()); +      DIB->insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()), +                                   DIExpression(DVI->getExpression()), +                                   DVI->getDebugLoc(), Inst);      }    }  }; @@ -2135,7 +2134,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,      // split the alloca again later.      unsigned AS = AI->getType()->getAddressSpace();      Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS)); -    V = Builder.CreateGEP(V, Builder.getInt64(NewOffset)); +    V = Builder.CreateGEP(Builder.getInt8Ty(), V, Builder.getInt64(NewOffset));      IdxTy = NewElts[Idx]->getAllocatedType();      uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset; diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index a457cba..d55dc6a 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -213,7 +213,7 @@ Value *Scatterer::operator[](unsigned I) {        CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");      }      if (I != 0) -      CV[I] = Builder.CreateConstGEP1_32(CV[0], I, +      CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,                                           V->getName() + ".i" + Twine(I));    } else {      // Search through a chain of InsertElementInsts looking for element I. diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 1a04d74..8af4753 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -757,14 +757,16 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(          }        }        // Create an ugly GEP with a single index for each index. -      ResultPtr = Builder.CreateGEP(ResultPtr, Idx, "uglygep"); +      ResultPtr = +          Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");      }    }    // Create a GEP with the constant offset index.    if (AccumulativeByteOffset != 0) {      Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset); -    ResultPtr = Builder.CreateGEP(ResultPtr, Offset, "uglygep"); +    ResultPtr = +        Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");    }    if (ResultPtr->getType() != Variadic->getType())      ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType()); diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index e71031c..2fc9368 100644 --- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -15,42 +15,46 @@  //  // There are many optimizations we can perform in the domain of SLSR. This file  // for now contains only an initial step. Specifically, we look for strength -// reduction candidates in two forms: +// reduction candidates in the following forms:  // -// Form 1: (B + i) * S -// Form 2: &B[i * S] +// Form 1: B + i * S +// Form 2: (B + i) * S +// Form 3: &B[i * S]  //  // where S is an integer variable, and i is a constant integer. If we found two -// candidates +// candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2 +// in a simpler way with respect to S1. For example,  // -// S1: X = (B + i) * S -// S2: Y = (B + i') * S +// S1: X = B + i * S +// S2: Y = B + i' * S   => X + (i' - i) * S  // -// or +// S1: X = (B + i) * S +// S2: Y = (B + i') * S => X + (i' - i) * S  //  // S1: X = &B[i * S] -// S2: Y = &B[i' * S] -// -// and S1 dominates S2, we call S1 a basis of S2, and can replace S2 with +// S2: Y = &B[i' * S]   => &X[(i' - i) * S]  // -// Y = X + (i' - i) * S +// Note: (i' - i) * S is folded to the extent possible.  // -// or +// This rewriting is in general a good idea. The code patterns we focus on +// usually come from loop unrolling, so (i' - i) * S is likely the same +// across iterations and can be reused. When that happens, the optimized form +// takes only one add starting from the second iteration.  // -// Y = &X[(i' - i) * S] -// -// where (i' - i) * S is folded to the extent possible. When S2 has multiple -// bases, we pick the one that is closest to S2, or S2's "immediate" basis. +// When such rewriting is possible, we call S1 a "basis" of S2. When S2 has +// multiple bases, we choose to rewrite S2 with respect to its "immediate" +// basis, the basis that is the closest ancestor in the dominator tree.  //  // TODO:  // -// - Handle candidates in the form of B + i * S -//  // - Floating point arithmetics when fast math is enabled.  //  // - SLSR may decrease ILP at the architecture level. Targets that are very  //   sensitive to ILP may want to disable it. Having SLSR to consider ILP is  //   left as future work. +// +// - When (i' - i) is constant but i and i' are not, we could still perform +//   SLSR.  #include <vector>  #include "llvm/ADT/DenseSet.h" @@ -72,13 +76,12 @@ namespace {  class StraightLineStrengthReduce : public FunctionPass {  public: -  // SLSR candidate. Such a candidate must be in the form of -  //   (Base + Index) * Stride -  // or -  //   Base[..][Index * Stride][..] +  // SLSR candidate. Such a candidate must be in one of the forms described in +  // the header comments.    struct Candidate : public ilist_node<Candidate> {      enum Kind {        Invalid, // reserved for the default constructor +      Add,     // B + i * S        Mul,     // (B + i) * S        GEP,     // &B[..][i * S][..]      }; @@ -92,14 +95,14 @@ public:            Basis(nullptr) {}      Kind CandidateKind;      const SCEV *Base; -    // Note that Index and Stride of a GEP candidate may not have the same -    // integer type. In that case, during rewriting, Stride will be +    // Note that Index and Stride of a GEP candidate do not necessarily have the +    // same integer type. In that case, during rewriting, Stride will be      // sign-extended or truncated to Index's type.      ConstantInt *Index;      Value *Stride;      // The instruction this candidate corresponds to. It helps us to rewrite a      // candidate with respect to its immediate basis. Note that one instruction -    // can corresponds to multiple candidates depending on how you associate the +    // can correspond to multiple candidates depending on how you associate the      // expression. For instance,      //      // (a + 1) * (b + 2) @@ -143,31 +146,43 @@ private:    // Returns true if Basis is a basis for C, i.e., Basis dominates C and they    // share the same base and stride.    bool isBasisFor(const Candidate &Basis, const Candidate &C); +  // Returns whether the candidate can be folded into an addressing mode. +  bool isFoldable(const Candidate &C, TargetTransformInfo *TTI, +                  const DataLayout *DL); +  // Returns true if C is already in a simplest form and not worth being +  // rewritten. +  bool isSimplestForm(const Candidate &C);    // Checks whether I is in a candidate form. If so, adds all the matching forms    // to Candidates, and tries to find the immediate basis for each of them. -  void allocateCandidateAndFindBasis(Instruction *I); +  void allocateCandidatesAndFindBasis(Instruction *I); +  // Allocate candidates and find bases for Add instructions. +  void allocateCandidatesAndFindBasisForAdd(Instruction *I); +  // Given I = LHS + RHS, factors RHS into i * S and makes (LHS + i * S) a +  // candidate. +  void allocateCandidatesAndFindBasisForAdd(Value *LHS, Value *RHS, +                                            Instruction *I);    // Allocate candidates and find bases for Mul instructions. -  void allocateCandidateAndFindBasisForMul(Instruction *I); +  void allocateCandidatesAndFindBasisForMul(Instruction *I);    // Splits LHS into Base + Index and, if succeeds, calls -  // allocateCandidateAndFindBasis. -  void allocateCandidateAndFindBasisForMul(Value *LHS, Value *RHS, -                                           Instruction *I); +  // allocateCandidatesAndFindBasis. +  void allocateCandidatesAndFindBasisForMul(Value *LHS, Value *RHS, +                                            Instruction *I);    // Allocate candidates and find bases for GetElementPtr instructions. -  void allocateCandidateAndFindBasisForGEP(GetElementPtrInst *GEP); +  void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP);    // A helper function that scales Idx with ElementSize before invoking -  // allocateCandidateAndFindBasis. -  void allocateCandidateAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx, -                                           Value *S, uint64_t ElementSize, -                                           Instruction *I); +  // allocateCandidatesAndFindBasis. +  void allocateCandidatesAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx, +                                            Value *S, uint64_t ElementSize, +                                            Instruction *I);    // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate    // basis. -  void allocateCandidateAndFindBasis(Candidate::Kind CT, const SCEV *B, -                                     ConstantInt *Idx, Value *S, -                                     Instruction *I); +  void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B, +                                      ConstantInt *Idx, Value *S, +                                      Instruction *I);    // Rewrites candidate C with respect to Basis.    void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis);    // A helper function that factors ArrayIdx to a product of a stride and a -  // constant index, and invokes allocateCandidateAndFindBasis with the +  // constant index, and invokes allocateCandidatesAndFindBasis with the    // factorings.    void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,                          GetElementPtrInst *GEP); @@ -187,7 +202,7 @@ private:    // Temporarily holds all instructions that are unlinked (but not deleted) by    // rewriteCandidateWithBasis. These instructions will be actually removed    // after all rewriting finishes. -  DenseSet<Instruction *> UnlinkedInstructions; +  std::vector<Instruction *> UnlinkedInstructions;  };  }  // anonymous namespace @@ -215,9 +230,9 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,            Basis.CandidateKind == C.CandidateKind);  } -static bool isCompletelyFoldable(GetElementPtrInst *GEP, -                                 const TargetTransformInfo *TTI, -                                 const DataLayout *DL) { +static bool isGEPFoldable(GetElementPtrInst *GEP, +                          const TargetTransformInfo *TTI, +                          const DataLayout *DL) {    GlobalVariable *BaseGV = nullptr;    int64_t BaseOffset = 0;    bool HasBaseReg = false; @@ -252,53 +267,143 @@ static bool isCompletelyFoldable(GetElementPtrInst *GEP,                                      BaseOffset, HasBaseReg, Scale);  } -// TODO: We currently implement an algorithm whose time complexity is linear to -// the number of existing candidates. However, a better algorithm exists. We -// could depth-first search the dominator tree, and maintain a hash table that -// contains all candidates that dominate the node being traversed.  This hash -// table is indexed by the base and the stride of a candidate.  Therefore, -// finding the immediate basis of a candidate boils down to one hash-table look -// up. -void StraightLineStrengthReduce::allocateCandidateAndFindBasis( -    Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S, -    Instruction *I) { -  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { -    // If &B[Idx * S] fits into an addressing mode, do not turn it into -    // non-free computation. -    if (isCompletelyFoldable(GEP, TTI, DL)) -      return; +// Returns whether (Base + Index * Stride) can be folded to an addressing mode. +static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride, +                          TargetTransformInfo *TTI) { +  return TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true, +                                    Index->getSExtValue()); +} + +bool StraightLineStrengthReduce::isFoldable(const Candidate &C, +                                            TargetTransformInfo *TTI, +                                            const DataLayout *DL) { +  if (C.CandidateKind == Candidate::Add) +    return isAddFoldable(C.Base, C.Index, C.Stride, TTI); +  if (C.CandidateKind == Candidate::GEP) +    return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI, DL); +  return false; +} + +// Returns true if GEP has zero or one non-zero index. +static bool hasOnlyOneNonZeroIndex(GetElementPtrInst *GEP) { +  unsigned NumNonZeroIndices = 0; +  for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) { +    ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); +    if (ConstIdx == nullptr || !ConstIdx->isZero()) +      ++NumNonZeroIndices; +  } +  return NumNonZeroIndices <= 1; +} + +bool StraightLineStrengthReduce::isSimplestForm(const Candidate &C) { +  if (C.CandidateKind == Candidate::Add) { +    // B + 1 * S or B + (-1) * S +    return C.Index->isOne() || C.Index->isMinusOne(); +  } +  if (C.CandidateKind == Candidate::Mul) { +    // (B + 0) * S +    return C.Index->isZero(); +  } +  if (C.CandidateKind == Candidate::GEP) { +    // (char*)B + S or (char*)B - S +    return ((C.Index->isOne() || C.Index->isMinusOne()) && +            hasOnlyOneNonZeroIndex(cast<GetElementPtrInst>(C.Ins)));    } +  return false; +} +// TODO: We currently implement an algorithm whose time complexity is linear in +// the number of existing candidates. However, we could do better by using +// ScopedHashTable. Specifically, while traversing the dominator tree, we could +// maintain all the candidates that dominate the basic block being traversed in +// a ScopedHashTable. This hash table is indexed by the base and the stride of +// a candidate. Therefore, finding the immediate basis of a candidate boils down +// to one hash-table look up. +void StraightLineStrengthReduce::allocateCandidatesAndFindBasis( +    Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S, +    Instruction *I) {    Candidate C(CT, B, Idx, S, I); -  // Try to compute the immediate basis of C. -  unsigned NumIterations = 0; -  // Limit the scan radius to avoid running forever. -  static const unsigned MaxNumIterations = 50; -  for (auto Basis = Candidates.rbegin(); -       Basis != Candidates.rend() && NumIterations < MaxNumIterations; -       ++Basis, ++NumIterations) { -    if (isBasisFor(*Basis, C)) { -      C.Basis = &(*Basis); -      break; +  // SLSR can complicate an instruction in two cases: +  // +  // 1. If we can fold I into an addressing mode, computing I is likely free or +  // takes only one instruction. +  // +  // 2. I is already in a simplest form. For example, when +  //      X = B + 8 * S +  //      Y = B + S, +  //    rewriting Y to X - 7 * S is probably a bad idea. +  // +  // In the above cases, we still add I to the candidate list so that I can be +  // the basis of other candidates, but we leave I's basis blank so that I +  // won't be rewritten. +  if (!isFoldable(C, TTI, DL) && !isSimplestForm(C)) { +    // Try to compute the immediate basis of C. +    unsigned NumIterations = 0; +    // Limit the scan radius to avoid running in quadratice time. +    static const unsigned MaxNumIterations = 50; +    for (auto Basis = Candidates.rbegin(); +         Basis != Candidates.rend() && NumIterations < MaxNumIterations; +         ++Basis, ++NumIterations) { +      if (isBasisFor(*Basis, C)) { +        C.Basis = &(*Basis); +        break; +      }      }    }    // Regardless of whether we find a basis for C, we need to push C to the -  // candidate list. +  // candidate list so that it can be the basis of other candidates.    Candidates.push_back(C);  } -void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Instruction *I) { +void StraightLineStrengthReduce::allocateCandidatesAndFindBasis( +    Instruction *I) {    switch (I->getOpcode()) { +  case Instruction::Add: +    allocateCandidatesAndFindBasisForAdd(I); +    break;    case Instruction::Mul: -    allocateCandidateAndFindBasisForMul(I); +    allocateCandidatesAndFindBasisForMul(I);      break;    case Instruction::GetElementPtr: -    allocateCandidateAndFindBasisForGEP(cast<GetElementPtrInst>(I)); +    allocateCandidatesAndFindBasisForGEP(cast<GetElementPtrInst>(I));      break;    }  } -void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul( +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd( +    Instruction *I) { +  // Try matching B + i * S. +  if (!isa<IntegerType>(I->getType())) +    return; + +  assert(I->getNumOperands() == 2 && "isn't I an add?"); +  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); +  allocateCandidatesAndFindBasisForAdd(LHS, RHS, I); +  if (LHS != RHS) +    allocateCandidatesAndFindBasisForAdd(RHS, LHS, I); +} + +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd( +    Value *LHS, Value *RHS, Instruction *I) { +  Value *S = nullptr; +  ConstantInt *Idx = nullptr; +  if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) { +    // I = LHS + RHS = LHS + Idx * S +    allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I); +  } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) { +    // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx) +    APInt One(Idx->getBitWidth(), 1); +    Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue()); +    allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I); +  } else { +    // At least, I = LHS + 1 * RHS +    ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1); +    allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS, +                                   I); +  } +} + +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(      Value *LHS, Value *RHS, Instruction *I) {    Value *B = nullptr;    ConstantInt *Idx = nullptr; @@ -306,54 +411,54 @@ void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul(    if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) {      // If LHS is in the form of "Base + Index", then I is in the form of      // "(Base + Index) * RHS". -    allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I); +    allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);    } else {      // Otherwise, at least try the form (LHS + 0) * RHS.      ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0); -    allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS, +    allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,                                    I);    }  } -void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul( +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(      Instruction *I) {    // Try matching (B + i) * S.    // TODO: we could extend SLSR to float and vector types.    if (!isa<IntegerType>(I->getType()))      return; +  assert(I->getNumOperands() == 2 && "isn't I a mul?");    Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); -  allocateCandidateAndFindBasisForMul(LHS, RHS, I); +  allocateCandidatesAndFindBasisForMul(LHS, RHS, I);    if (LHS != RHS) {      // Symmetrically, try to split RHS to Base + Index. -    allocateCandidateAndFindBasisForMul(RHS, LHS, I); +    allocateCandidatesAndFindBasisForMul(RHS, LHS, I);    }  } -void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP( +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(      const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize,      Instruction *I) { -  // I = B + sext(Idx *nsw S) *nsw ElementSize +  // I = B + sext(Idx *nsw S) * ElementSize +  //   = B + (sext(Idx) * sext(S)) * ElementSize    //   = B + (sext(Idx) * ElementSize) * sext(S)    // Casting to IntegerType is safe because we skipped vector GEPs.    IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType()));    ConstantInt *ScaledIdx = ConstantInt::get(        IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true); -  allocateCandidateAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I); +  allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);  }  void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,                                                    const SCEV *Base,                                                    uint64_t ElementSize,                                                    GetElementPtrInst *GEP) { -  // At least, ArrayIdx = ArrayIdx *s 1. -  allocateCandidateAndFindBasisForGEP( +  // At least, ArrayIdx = ArrayIdx *nsw 1. +  allocateCandidatesAndFindBasisForGEP(        Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1),        ArrayIdx, ElementSize, GEP);    Value *LHS = nullptr;    ConstantInt *RHS = nullptr; -  // TODO: handle shl. e.g., we could treat (S << 2) as (S * 4). -  //    // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx    // itself. This would allow us to handle the shl case for free. However,    // matching SCEVs has two issues: @@ -367,12 +472,19 @@ void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,    // sext'ed multiplication.    if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) {      // SLSR is currently unsafe if i * S may overflow. -    // GEP = Base + sext(LHS *nsw RHS) *nsw ElementSize -    allocateCandidateAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP); +    // GEP = Base + sext(LHS *nsw RHS) * ElementSize +    allocateCandidatesAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP); +  } else if (match(ArrayIdx, m_NSWShl(m_Value(LHS), m_ConstantInt(RHS)))) { +    // GEP = Base + sext(LHS <<nsw RHS) * ElementSize +    //     = Base + sext(LHS *nsw (1 << RHS)) * ElementSize +    APInt One(RHS->getBitWidth(), 1); +    ConstantInt *PowerOf2 = +        ConstantInt::get(RHS->getContext(), One << RHS->getValue()); +    allocateCandidatesAndFindBasisForGEP(Base, PowerOf2, LHS, ElementSize, GEP);    }  } -void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP( +void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(      GetElementPtrInst *GEP) {    // TODO: handle vector GEPs    if (GEP->getType()->isVectorTy()) @@ -436,6 +548,7 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,      else        BumpWithUglyGEP = true;    } +    // Compute Bump = C - Basis = (i' - i) * S.    // Common case 1: if (i' - i) is 1, Bump = S.    if (IndexOffset.getSExtValue() == 1) @@ -443,9 +556,24 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,    // Common case 2: if (i' - i) is -1, Bump = -S.    if (IndexOffset.getSExtValue() == -1)      return Builder.CreateNeg(C.Stride); -  // Otherwise, Bump = (i' - i) * sext/trunc(S). -  ConstantInt *Delta = ConstantInt::get(Basis.Ins->getContext(), IndexOffset); -  Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, Delta->getType()); + +  // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may +  // have different bit widths. +  IntegerType *DeltaType = +      IntegerType::get(Basis.Ins->getContext(), IndexOffset.getBitWidth()); +  Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType); +  if (IndexOffset.isPowerOf2()) { +    // If (i' - i) is a power of 2, Bump = sext/trunc(S) << log(i' - i). +    ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2()); +    return Builder.CreateShl(ExtendedStride, Exponent); +  } +  if ((-IndexOffset).isPowerOf2()) { +    // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i). +    ConstantInt *Exponent = +        ConstantInt::get(DeltaType, (-IndexOffset).logBase2()); +    return Builder.CreateNeg(Builder.CreateShl(ExtendedStride, Exponent)); +  } +  Constant *Delta = ConstantInt::get(DeltaType, IndexOffset);    return Builder.CreateMul(ExtendedStride, Delta);  } @@ -453,6 +581,9 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(      const Candidate &C, const Candidate &Basis) {    assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base &&           C.Stride == Basis.Stride); +  // We run rewriteCandidateWithBasis on all candidates in a post-order, so the +  // basis of a candidate cannot be unlinked before the candidate. +  assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked");    // An instruction can correspond to multiple candidates. Therefore, instead of    // simply deleting an instruction when we rewrite it, we mark its parent as @@ -466,25 +597,38 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(    Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP);    Value *Reduced = nullptr; // equivalent to but weaker than C.Ins    switch (C.CandidateKind) { +  case Candidate::Add:    case Candidate::Mul: -    Reduced = Builder.CreateAdd(Basis.Ins, Bump); +    if (BinaryOperator::isNeg(Bump)) { +      Reduced = +          Builder.CreateSub(Basis.Ins, BinaryOperator::getNegArgument(Bump)); +    } else { +      Reduced = Builder.CreateAdd(Basis.Ins, Bump); +    }      break;    case Candidate::GEP:      {        Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType()); +      bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();        if (BumpWithUglyGEP) {          // C = (char *)Basis + Bump          unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();          Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);          Reduced = Builder.CreateBitCast(Basis.Ins, CharTy); -        // We only considered inbounds GEP as candidates. -        Reduced = Builder.CreateInBoundsGEP(Reduced, Bump); +        if (InBounds) +          Reduced = +              Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump); +        else +          Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump);          Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());        } else {          // C = gep Basis, Bump          // Canonicalize bump to pointer size.          Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy); -        Reduced = Builder.CreateInBoundsGEP(Basis.Ins, Bump); +        if (InBounds) +          Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump); +        else +          Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump);        }      }      break; @@ -497,7 +641,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(    // Unlink C.Ins so that we can skip other candidates also corresponding to    // C.Ins. The actual deletion is postponed to the end of runOnFunction.    C.Ins->removeFromParent(); -  UnlinkedInstructions.insert(C.Ins); +  UnlinkedInstructions.push_back(C.Ins);  }  bool StraightLineStrengthReduce::runOnFunction(Function &F) { @@ -512,7 +656,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {    for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT);         node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) {      for (auto &I : *node->getBlock()) -      allocateCandidateAndFindBasis(&I); +      allocateCandidatesAndFindBasis(&I);    }    // Rewrite candidates in the reverse depth-first order. This order makes sure diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 6c3ce58..4f23e20 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -887,7 +887,7 @@ void StructurizeCFG::createFlow() {  /// no longer dominate all their uses. Not sure if this is really nessasary  void StructurizeCFG::rebuildSSA() {    SSAUpdater Updater; -  for (const auto &BB : ParentRegion->blocks()) +  for (auto *BB : ParentRegion->blocks())      for (BasicBlock::iterator II = BB->begin(), IE = BB->end();           II != IE; ++II) { diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp index 820544b..c1cd39a 100644 --- a/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/lib/Transforms/Utils/AddDiscriminators.cpp @@ -174,42 +174,51 @@ bool AddDiscriminators::runOnFunction(Function &F) {    for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {      BasicBlock *B = I;      TerminatorInst *Last = B->getTerminator(); -    DebugLoc LastLoc = Last->getDebugLoc(); -    if (LastLoc.isUnknown()) continue; -    DILocation LastDIL(LastLoc.getAsMDNode(Ctx)); +    DILocation LastDIL = Last->getDebugLoc().get(); +    if (!LastDIL) +      continue;      for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) {        BasicBlock *Succ = Last->getSuccessor(I);        Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime(); -      DebugLoc FirstLoc = First->getDebugLoc(); -      if (FirstLoc.isUnknown()) continue; -      DILocation FirstDIL(FirstLoc.getAsMDNode(Ctx)); +      DILocation FirstDIL = First->getDebugLoc().get(); +      if (!FirstDIL) +        continue;        // If the first instruction (First) of Succ is at the same file        // location as B's last instruction (Last), add a new        // discriminator for First's location and all the instructions        // in Succ that share the same location with First. -      if (FirstDIL.atSameLineAs(LastDIL)) { +      if (!FirstDIL->canDiscriminate(*LastDIL)) {          // Create a new lexical scope and compute a new discriminator          // number for it. -        StringRef Filename = FirstDIL.getFilename(); -        DIScope Scope = FirstDIL.getScope(); -        DIFile File = Builder.createFile(Filename, Scope.getDirectory()); -        unsigned Discriminator = FirstDIL.computeNewDiscriminator(Ctx); +        StringRef Filename = FirstDIL->getFilename(); +        auto *Scope = FirstDIL->getScope(); +        DIFile File = Builder.createFile(Filename, Scope->getDirectory()); + +        // FIXME: Calculate the discriminator here, based on local information, +        // and delete MDLocation::computeNewDiscriminator().  The current +        // solution gives different results depending on other modules in the +        // same context.  All we really need is to discriminate between +        // FirstDIL and LastDIL -- a local map would suffice. +        unsigned Discriminator = FirstDIL->computeNewDiscriminator();          DILexicalBlockFile NewScope =              Builder.createLexicalBlockFile(Scope, File, Discriminator); -        DILocation NewDIL = FirstDIL.copyWithNewScope(Ctx, NewScope); -        DebugLoc newDebugLoc = DebugLoc::getFromDILocation(NewDIL); +        auto *NewDIL = +            MDLocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(), +                            NewScope, FirstDIL->getInlinedAt()); +        DebugLoc newDebugLoc = NewDIL;          // Attach this new debug location to First and every          // instruction following First that shares the same location.          for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1;               ++I1) { -          if (I1->getDebugLoc() != FirstLoc) break; +          if (I1->getDebugLoc().get() != FirstDIL) +            break;            I1->setDebugLoc(newDebugLoc); -          DEBUG(dbgs() << NewDIL.getFilename() << ":" << NewDIL.getLineNumber() -                       << ":" << NewDIL.getColumnNumber() << ":" -                       << NewDIL.getDiscriminator() << *I1 << "\n"); +          DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine() +                       << ":" << NewDIL->getColumn() << ":" +                       << NewDIL->getDiscriminator() << *I1 << "\n");          }          DEBUG(dbgs() << "\n");          Changed = true; diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index f04ea9c..f200b58 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -157,20 +157,21 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,  // Find the MDNode which corresponds to the DISubprogram data that described F.  static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) {    for (DISubprogram Subprogram : Finder.subprograms()) { -    if (Subprogram.describes(F)) return Subprogram; +    if (Subprogram->describes(F)) +      return Subprogram;    }    return nullptr;  }  // Add an operand to an existing MDNode. The new operand will be added at the  // back of the operand list. -static void AddOperand(DICompileUnit CU, DIArray SPs, Metadata *NewSP) { +static void AddOperand(DICompileUnit CU, MDSubprogramArray SPs, Metadata *NewSP) {    SmallVector<Metadata *, 16> NewSPs; -  NewSPs.reserve(SPs->getNumOperands() + 1); -  for (unsigned I = 0, E = SPs->getNumOperands(); I != E; ++I) -    NewSPs.push_back(SPs->getOperand(I)); +  NewSPs.reserve(SPs.size() + 1); +  for (auto *SP : SPs) +    NewSPs.push_back(SP);    NewSPs.push_back(NewSP); -  CU.replaceSubprograms(DIArray(MDNode::get(CU->getContext(), NewSPs))); +  CU->replaceSubprograms(MDTuple::get(CU->getContext(), NewSPs));  }  // Clone the module-level debug info associated with OldFunc. The cloned data @@ -186,15 +187,15 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,    // Ensure that OldFunc appears in the map.    // (if it's already there it must point to NewFunc anyway)    VMap[OldFunc] = NewFunc; -  DISubprogram NewSubprogram(MapMetadata(OldSubprogramMDNode, VMap)); +  DISubprogram NewSubprogram = +      cast<MDSubprogram>(MapMetadata(OldSubprogramMDNode, VMap));    for (DICompileUnit CU : Finder.compile_units()) { -    DIArray Subprograms(CU.getSubprograms()); - +    auto Subprograms = CU->getSubprograms();      // If the compile unit's function list contains the old function, it should      // also contain the new one. -    for (unsigned i = 0; i < Subprograms.getNumElements(); i++) { -      if ((MDNode*)Subprograms.getElement(i) == OldSubprogramMDNode) { +    for (auto *SP : Subprograms) { +      if (SP == OldSubprogramMDNode) {          AddOperand(CU, Subprograms, NewSubprogram);          break;        } @@ -395,7 +396,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,      if (Action == CloningDirector::CloneSuccessors) {        // If the director says to skip with a terminate instruction, we still        // need to clone this block's successors. -      const TerminatorInst *TI = BB->getTerminator(); +      const TerminatorInst *TI = NewBB->getTerminator();        for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)          ToClone.push_back(TI->getSuccessor(i));        return; diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp index 52e2d59..44b7d25 100644 --- a/lib/Transforms/Utils/GlobalStatus.cpp +++ b/lib/Transforms/Utils/GlobalStatus.cpp @@ -150,7 +150,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,          if (MSI->isVolatile())            return true;          GS.StoredType = GlobalStatus::Stored; -      } else if (ImmutableCallSite C = I) { +      } else if (auto C = ImmutableCallSite(I)) {          if (!C.isCallee(&U))            return true;          GS.IsLoaded = true; diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index df3e1d4..a08ffbe 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -835,11 +835,10 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode,                      DenseMap<const MDLocation *, MDLocation *> &IANodes) {    SmallVector<MDLocation*, 3> InlinedAtLocations;    MDLocation *Last = InlinedAtNode; -  DebugLoc CurInlinedAt = DL; +  MDLocation *CurInlinedAt = DL;    // Gather all the inlined-at nodes -  while (MDLocation *IA = -             cast_or_null<MDLocation>(CurInlinedAt.getInlinedAt(Ctx))) { +  while (MDLocation *IA = CurInlinedAt->getInlinedAt()) {      // Skip any we've already built nodes for      if (MDLocation *Found = IANodes[IA]) {        Last = Found; @@ -847,7 +846,7 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode,      }      InlinedAtLocations.push_back(IA); -    CurInlinedAt = DebugLoc::getFromDILocation(IA); +    CurInlinedAt = IA;    }    // Starting from the top, rebuild the nodes to point to the new inlined-at @@ -862,7 +861,7 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode,    // And finally create the normal location for this instruction, referring to    // the new inlined-at chain. -  return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), Last); +  return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last);  }  /// Update inlined instructions' line numbers to @@ -870,11 +869,11 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode,  static void fixupLineNumbers(Function *Fn, Function::iterator FI,                               Instruction *TheCall) {    DebugLoc TheCallDL = TheCall->getDebugLoc(); -  if (TheCallDL.isUnknown()) +  if (!TheCallDL)      return;    auto &Ctx = Fn->getContext(); -  auto *InlinedAtNode = cast<MDLocation>(TheCallDL.getAsMDNode(Ctx)); +  MDLocation *InlinedAtNode = TheCallDL;    // Create a unique call site, not to be confused with any other call from the    // same location. @@ -891,7 +890,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,      for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();           BI != BE; ++BI) {        DebugLoc DL = BI->getDebugLoc(); -      if (DL.isUnknown()) { +      if (!DL) {          // If the inlined instruction has no line number, make it look as if it          // originates from the call location. This is important for          // ((__always_inline__, __nodebug__)) functions which must use caller @@ -905,19 +904,6 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,          BI->setDebugLoc(TheCallDL);        } else {          BI->setDebugLoc(updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes)); -        if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { -          LLVMContext &Ctx = BI->getContext(); -          MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); -          DVI->setOperand(2, MetadataAsValue::get( -                                 Ctx, createInlinedVariable(DVI->getVariable(), -                                                            InlinedAt, Ctx))); -        } else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) { -          LLVMContext &Ctx = BI->getContext(); -          MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); -          DDI->setOperand(1, MetadataAsValue::get( -                                 Ctx, createInlinedVariable(DDI->getVariable(), -                                                            InlinedAt, Ctx))); -        }        }      }    } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index bd15f9e..1c9760e 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -998,17 +998,14 @@ static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {  /// that has an associated llvm.dbg.decl intrinsic.  bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,                                             StoreInst *SI, DIBuilder &Builder) { -  DIVariable DIVar(DDI->getVariable()); -  DIExpression DIExpr(DDI->getExpression()); -  assert((!DIVar || DIVar.isVariable()) && -         "Variable in DbgDeclareInst should be either null or a DIVariable."); +  DIVariable DIVar = DDI->getVariable(); +  DIExpression DIExpr = DDI->getExpression();    if (!DIVar)      return false;    if (LdStHasDebugValue(DIVar, SI))      return true; -  Instruction *DbgVal = nullptr;    // If an argument is zero extended then use argument directly. The ZExt    // may be zapped by an optimization pass in future.    Argument *ExtendedArg = nullptr; @@ -1017,11 +1014,11 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,    if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))      ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));    if (ExtendedArg) -    DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, SI); +    Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, +                                    DDI->getDebugLoc(), SI);    else -    DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, -                                             DIExpr, SI); -  DbgVal->setDebugLoc(DDI->getDebugLoc()); +    Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr, +                                    DDI->getDebugLoc(), SI);    return true;  } @@ -1029,19 +1026,16 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,  /// that has an associated llvm.dbg.decl intrinsic.  bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,                                             LoadInst *LI, DIBuilder &Builder) { -  DIVariable DIVar(DDI->getVariable()); -  DIExpression DIExpr(DDI->getExpression()); -  assert((!DIVar || DIVar.isVariable()) && -         "Variable in DbgDeclareInst should be either null or a DIVariable."); +  DIVariable DIVar = DDI->getVariable(); +  DIExpression DIExpr = DDI->getExpression();    if (!DIVar)      return false;    if (LdStHasDebugValue(DIVar, LI))      return true; -  Instruction *DbgVal = -      Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, LI); -  DbgVal->setDebugLoc(DDI->getDebugLoc()); +  Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, +                                  DDI->getDebugLoc(), LI);    return true;  } @@ -1083,10 +1077,9 @@ bool llvm::LowerDbgDeclare(Function &F) {            // This is a call by-value or some other instruction that            // takes a pointer to the variable. Insert a *value*            // intrinsic that describes the alloca. -          auto DbgVal = DIB.insertDbgValueIntrinsic( -              AI, 0, DIVariable(DDI->getVariable()), -              DIExpression(DDI->getExpression()), CI); -          DbgVal->setDebugLoc(DDI->getDebugLoc()); +          DIB.insertDbgValueIntrinsic(AI, 0, DIVariable(DDI->getVariable()), +                                      DIExpression(DDI->getExpression()), +                                      DDI->getDebugLoc(), CI);          }        DDI->eraseFromParent();      } @@ -1112,10 +1105,8 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,    if (!DDI)      return false;    DebugLoc Loc = DDI->getDebugLoc(); -  DIVariable DIVar(DDI->getVariable()); -  DIExpression DIExpr(DDI->getExpression()); -  assert((!DIVar || DIVar.isVariable()) && -         "Variable in DbgDeclareInst should be either null or a DIVariable."); +  DIVariable DIVar = DDI->getVariable(); +  DIExpression DIExpr = DDI->getExpression();    if (!DIVar)      return false; @@ -1127,16 +1118,14 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,      SmallVector<uint64_t, 4> NewDIExpr;      NewDIExpr.push_back(dwarf::DW_OP_deref);      if (DIExpr) -      for (unsigned i = 0, n = DIExpr.getNumElements(); i < n; ++i) -        NewDIExpr.push_back(DIExpr.getElement(i)); +      NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());      DIExpr = Builder.createExpression(NewDIExpr);    }    // Insert llvm.dbg.declare in the same basic block as the original alloca,    // and remove old llvm.dbg.declare.    BasicBlock *BB = AI->getParent(); -  Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, BB) -    ->setDebugLoc(Loc); +  Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, Loc, BB);    DDI->eraseFromParent();    return true;  } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 6b3aa02..1dbce47 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -146,6 +146,13 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,  /// Similarly, TripMultiple divides the number of times that the LatchBlock may  /// execute without exiting the loop.  /// +/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that +/// have a runtime (i.e. not compile time constant) trip count.  Unrolling these +/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count" +/// iterations before branching into the unrolled loop.  UnrollLoop will not +/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and +/// AllowExpensiveTripCount is false. +///  /// The LoopInfo Analysis that is passed will be kept consistent.  ///  /// If a LoopPassManager is passed in, and the loop is fully removed, it will be @@ -154,8 +161,9 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,  /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are  /// available from the Pass it must also preserve those analyses.  bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, -                      bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI, -                      Pass *PP, LPPassManager *LPM, AssumptionCache *AC) { +                      bool AllowRuntime, bool AllowExpensiveTripCount, +                      unsigned TripMultiple, LoopInfo *LI, Pass *PP, +                      LPPassManager *LPM, AssumptionCache *AC) {    BasicBlock *Preheader = L->getLoopPreheader();    if (!Preheader) {      DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n"); @@ -218,7 +226,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,    // flag is specified.    bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); -  if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) +  if (RuntimeTripCount && +      !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM))      return false;    // Notify ScalarEvolution that the loop will be substantially changed, diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 381d8fc..d1774df 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -278,7 +278,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,  /// ...  /// End:  /// -bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, +bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, +                                   bool AllowExpensiveTripCount, LoopInfo *LI,                                     LPPassManager *LPM) {    // for now, only unroll loops that contain a single exit    if (!L->getExitingBlock()) @@ -312,15 +313,20 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,    if (isa<SCEVCouldNotCompute>(TripCountSC))      return false; +  BasicBlock *Header = L->getHeader(); +  const DataLayout &DL = Header->getModule()->getDataLayout(); +  SCEVExpander Expander(*SE, DL, "loop-unroll"); +  if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L)) +    return false; +    // We only handle cases when the unroll factor is a power of 2.    // Count is the loop unroll factor, the number of extra copies added + 1.    if (!isPowerOf2_32(Count))      return false;    // This constraint lets us deal with an overflowing trip count easily; see the -  // comment on ModVal below.  This check is equivalent to `Log2(Count) < -  // BEWidth`. -  if (static_cast<uint64_t>(Count) > (1ULL << BEWidth)) +  // comment on ModVal below. +  if (Log2_32(Count) > BEWidth)      return false;    // If this loop is nested, then the loop unroller changes the code in @@ -333,18 +339,15 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,    auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;    BasicBlock *PH = L->getLoopPreheader(); -  BasicBlock *Header = L->getHeader();    BasicBlock *Latch = L->getLoopLatch();    // It helps to splits the original preheader twice, one for the end of the    // prolog code and one for a new loop preheader    BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);    BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);    BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); -  const DataLayout &DL = Header->getModule()->getDataLayout();    // Compute the number of extra iterations required, which is:    //  extra iterations = run-time trip count % (loop unroll factor + 1) -  SCEVExpander Expander(*SE, DL, "loop-unroll");    Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),                                              PreHeaderBR);    Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index 35c701e..014574d 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -17,6 +17,7 @@  #include "llvm/IR/Function.h"  #include "llvm/IR/IRBuilder.h"  #include "llvm/IR/Module.h" +#include "llvm/Support/raw_ostream.h"  using namespace llvm; @@ -93,3 +94,13 @@ llvm::collectUsedGlobalVariables(Module &M, SmallPtrSetImpl<GlobalValue *> &Set,    }    return GV;  } + +Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) { +  if (isa<Function>(FuncOrBitcast)) +    return cast<Function>(FuncOrBitcast); +  FuncOrBitcast->dump(); +  std::string Err; +  raw_string_ostream Stream(Err); +  Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast; +  report_fatal_error(Err); +} diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 4b34b19..54e1733 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -872,8 +872,10 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,    }    SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks; -  SmallPtrSet<DomTreeNode *, 32> Visited;    SmallVector<DomTreeNode *, 32> Worklist; +  SmallPtrSet<DomTreeNode *, 32> VisitedPQ; +  SmallPtrSet<DomTreeNode *, 32> VisitedWorklist; +    while (!PQ.empty()) {      DomTreeNodePair RootPair = PQ.top();      PQ.pop(); @@ -887,6 +889,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,      Worklist.clear();      Worklist.push_back(Root); +    VisitedWorklist.insert(Root);      while (!Worklist.empty()) {        DomTreeNode *Node = Worklist.pop_back_val(); @@ -905,7 +908,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,          if (SuccLevel > RootLevel)            continue; -        if (!Visited.insert(SuccNode).second) +        if (!VisitedPQ.insert(SuccNode).second)            continue;          BasicBlock *SuccBB = SuccNode->getBlock(); @@ -919,7 +922,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,        for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;             ++CI) { -        if (!Visited.count(*CI)) +        if (VisitedWorklist.insert(*CI).second)            Worklist.push_back(*CI);        }      } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index c7c0ca6..7c239cb 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1502,7 +1502,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,      if (isa<DbgInfoIntrinsic>(I))        continue; -    // Only speculatively execution a single instruction (not counting the +    // Only speculatively execute a single instruction (not counting the      // terminator) for now.      ++SpeculationCost;      if (SpeculationCost > 1) @@ -3884,8 +3884,8 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {                                     "switch.tableidx.zext");        Value *GEPIndices[] = { Builder.getInt32(0), Index }; -      Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, -                                             "switch.gep"); +      Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array, +                                             GEPIndices, "switch.gep");        return Builder.CreateLoad(GEP, "switch.load");      }    } diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 5867d65..42102e7 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -222,7 +222,7 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,    // Now that we have the destination's length, we must index into the    // destination's pointer to get the actual memcpy destination (end of    // the string .. we're concatenating). -  Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); +  Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr");    // We have enough information to now generate the memcpy call to do the    // concatenation for us.  Make a memcpy to copy the nul byte with align = 1. @@ -303,7 +303,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {    StringRef Str;    if (!getConstantStringInfo(SrcStr, Str)) {      if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p) -      return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr"); +      return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");      return nullptr;    } @@ -316,7 +316,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {      return Constant::getNullValue(CI->getType());    // strchr(s+n,c)  -> gep(s+n+i,c) -  return B.CreateGEP(SrcStr, B.getInt64(I), "strchr"); +  return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");  }  Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) { @@ -351,7 +351,7 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {      return Constant::getNullValue(CI->getType());    // strrchr(s+n,c) -> gep(s+n+i,c) -  return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr"); +  return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr");  }  Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) { @@ -476,7 +476,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {    Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);    if (Dst == Src) { // stpcpy(x,x)  -> x+strlen(x)      Value *StrLen = EmitStrLen(Src, B, DL, TLI); -    return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; +    return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;    }    // See if we can get the length of the input string. @@ -487,7 +487,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {    Type *PT = FT->getParamType(0);    Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);    Value *DstEnd = -      B.CreateGEP(Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1)); +      B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));    // We have enough information to now generate the memcpy call to do the    // copy for us.  Make a memcpy to copy the nul byte with align = 1. @@ -597,7 +597,7 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {      if (I == StringRef::npos) // No match.        return Constant::getNullValue(CI->getType()); -    return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk"); +    return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), "strpbrk");    }    // strpbrk(s, "a") -> strchr(s, 'a') @@ -828,7 +828,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {      return Constant::getNullValue(CI->getType());    // memchr(s+n,c,l) -> gep(s+n+i,c) -  return B.CreateGEP(SrcStr, B.getInt64(I), "memchr"); +  return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr");  }  Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { @@ -1671,7 +1671,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {      Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");      Value *Ptr = CastToCStr(CI->getArgOperand(0), B);      B.CreateStore(V, Ptr); -    Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul"); +    Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");      B.CreateStore(B.getInt8(0), Ptr);      return ConstantInt::get(CI->getType(), 1); @@ -2276,7 +2276,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,    // __stpcpy_chk(x,x,...)  -> x+strlen(x)    if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {      Value *StrLen = EmitStrLen(Src, B, DL, TLI); -    return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; +    return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;    }    // If a) we don't have any length information, or b) we know this will @@ -2284,25 +2284,25 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,    // st[rp]cpy_chk call which may fail at runtime if the size is too long.    // TODO: It might be nice to get a maximum length out of the possible    // string lengths for varying. -  if (isFortifiedCallFoldable(CI, 2, 1, true)) { -    Value *Ret = EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6)); -    return Ret; -  } else if (!OnlyLowerUnknownSize) { -    // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. -    uint64_t Len = GetStringLength(Src); -    if (Len == 0) -      return nullptr; +  if (isFortifiedCallFoldable(CI, 2, 1, true)) +    return EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6)); -    Type *SizeTTy = DL.getIntPtrType(CI->getContext()); -    Value *LenV = ConstantInt::get(SizeTTy, Len); -    Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); -    // If the function was an __stpcpy_chk, and we were able to fold it into -    // a __memcpy_chk, we still need to return the correct end pointer. -    if (Ret && Func == LibFunc::stpcpy_chk) -      return B.CreateGEP(Dst, ConstantInt::get(SizeTTy, Len - 1)); -    return Ret; -  } -  return nullptr; +  if (OnlyLowerUnknownSize) +    return nullptr; + +  // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. +  uint64_t Len = GetStringLength(Src); +  if (Len == 0) +    return nullptr; + +  Type *SizeTTy = DL.getIntPtrType(CI->getContext()); +  Value *LenV = ConstantInt::get(SizeTTy, Len); +  Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI); +  // If the function was an __stpcpy_chk, and we were able to fold it into +  // a __memcpy_chk, we still need to return the correct end pointer. +  if (Ret && Func == LibFunc::stpcpy_chk) +    return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1)); +  return Ret;  }  Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI, @@ -2322,8 +2322,18 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,  }  Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) { -  if (CI->isNoBuiltin()) -    return nullptr; +  // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here. +  // Some clang users checked for _chk libcall availability using: +  //   __has_builtin(__builtin___memcpy_chk) +  // When compiling with -fno-builtin, this is always true. +  // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we +  // end up with fortified libcalls, which isn't acceptable in a freestanding +  // environment which only provides their non-fortified counterparts. +  // +  // Until we change clang and/or teach external users to check for availability +  // differently, disregard the "nobuiltin" attribute and TLI::has. +  // +  // PR23093.    LibFunc::Func Func;    Function *Callee = CI->getCalledFunction(); @@ -2332,7 +2342,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {    bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;    // First, check that this is a known library functions. -  if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func)) +  if (!TLI->getLibFunc(FuncName, Func))      return nullptr;    // We never change the calling convention. diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index b7d0ae4..8986932 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -93,6 +93,7 @@  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/VectorUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h"  #include <algorithm>  #include <map>  #include <tuple> @@ -503,8 +504,7 @@ static std::string getDebugLocString(const Loop *L) {    std::string Result;    if (L) {      raw_string_ostream OS(Result); -    const DebugLoc LoopDbgLoc = L->getStartLoc(); -    if (!LoopDbgLoc.isUnknown()) +    if (const DebugLoc LoopDbgLoc = L->getStartLoc())        LoopDbgLoc.print(OS);      else        // Just print the module name. @@ -686,7 +686,7 @@ public:            Index = B.CreateNeg(Index);          else if (!StepValue->isOne())            Index = B.CreateMul(Index, StepValue); -        return B.CreateGEP(StartValue, Index); +        return B.CreateGEP(nullptr, StartValue, Index);        case IK_NoInduction:          return nullptr; @@ -1839,7 +1839,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {      for (unsigned Part = 0; Part < UF; ++Part) {        // Calculate the pointer for the specific unroll-part. -      Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); +      Value *PartPtr = +          Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));        if (Reverse) {          // If we store to reverse consecutive memory locations then we need @@ -1847,8 +1848,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {          StoredVal[Part] = reverseVector(StoredVal[Part]);          // If the address is consecutive but reversed, then the          // wide store needs to start at the last vector element. -        PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); -        PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); +        PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF)); +        PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));          Mask[Part] = reverseVector(Mask[Part]);        } @@ -1871,13 +1872,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {    setDebugLocFromInst(Builder, LI);    for (unsigned Part = 0; Part < UF; ++Part) {      // Calculate the pointer for the specific unroll-part. -    Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF)); +    Value *PartPtr = +        Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));      if (Reverse) {        // If the address is consecutive but reversed, then the        // wide load needs to start at the last vector element. -      PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); -      PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); +      PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF)); +      PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));        Mask[Part] = reverseVector(Mask[Part]);      } @@ -4007,6 +4009,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {    if (!LAI->canVectorizeMemory())      return false; +  if (LAI->hasStoreToLoopInvariantAddress()) { +    emitAnalysis( +        VectorizationReport() +        << "write to a loop invariant address could not be vectorized"); +    DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); +    return false; +  } +    if (LAI->getNumRuntimePointerChecks() >        VectorizerParams::RuntimeMemoryCheckThreshold) {      emitAnalysis(VectorizationReport() @@ -4307,32 +4317,31 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,    }  } -LoopVectorizationLegality::InductionKind -LoopVectorizationLegality::isInductionVariable(PHINode *Phi, -                                               ConstantInt *&StepValue) { +bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE, +                          ConstantInt *&StepValue) {    Type *PhiTy = Phi->getType();    // We only handle integer and pointer inductions variables.    if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) -    return IK_NoInduction; +    return false;    // Check that the PHI is consecutive.    const SCEV *PhiScev = SE->getSCEV(Phi);    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);    if (!AR) {      DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); -    return IK_NoInduction; +    return false;    }    const SCEV *Step = AR->getStepRecurrence(*SE);    // Calculate the pointer stride and check if it is consecutive.    const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);    if (!C) -    return IK_NoInduction; +    return false;    ConstantInt *CV = C->getValue();    if (PhiTy->isIntegerTy()) {      StepValue = CV; -    return IK_IntInduction; +    return true;    }    assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); @@ -4340,14 +4349,28 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi,    // The pointer stride cannot be determined if the pointer element type is not    // sized.    if (!PointerElementType->isSized()) -    return IK_NoInduction; +    return false;    const DataLayout &DL = Phi->getModule()->getDataLayout();    int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));    int64_t CVSize = CV->getSExtValue();    if (CVSize % Size) -    return IK_NoInduction; +    return false;    StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size); +  return true; +} + +LoopVectorizationLegality::InductionKind +LoopVectorizationLegality::isInductionVariable(PHINode *Phi, +                                               ConstantInt *&StepValue) { +  if (!isInductionPHI(Phi, SE, StepValue)) +    return IK_NoInduction; + +  Type *PhiTy = Phi->getType(); +  // Found an Integer induction variable. +  if (PhiTy->isIntegerTy()) +    return IK_IntInduction; +  // Found an Pointer induction variable.    return IK_PtrInduction;  } diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8fc4cc1..7267f58 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1183,7 +1183,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {      case Instruction::ICmp:      case Instruction::FCmp: {        // Check that all of the compares have the same predicate. -      CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate(); +      CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();        Type *ComparedTy = cast<Instruction>(VL[0])->getOperand(0)->getType();        for (unsigned i = 1, e = VL.size(); i < e; ++i) {          CmpInst *Cmp = cast<CmpInst>(VL[i]); @@ -2202,7 +2202,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {        if (Value *V = alreadyVectorized(E->Scalars))          return V; -      CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate(); +      CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();        Value *V;        if (Opcode == Instruction::FCmp)          V = Builder.CreateFCmp(P0, L, R); @@ -3101,9 +3101,7 @@ struct SLPVectorizer : public FunctionPass {      // delete instructions.      // Scan the blocks in the function in post order. -    for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()), -         e = po_end(&F.getEntryBlock()); it != e; ++it) { -      BasicBlock *BB = *it; +    for (auto BB : post_order(&F.getEntryBlock())) {        // Vectorize trees that end at stores.        if (unsigned count = collectStores(BB, R)) {          (void)count;  | 
