diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 286 |
1 files changed, 118 insertions, 168 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index b0b6188..15103d7 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -1382,11 +1382,7 @@ class SROA : public FunctionPass { /// \brief A collection of instructions to delete. /// We try to batch deletions to simplify code and make things a bit more /// efficient. - SmallVector<Instruction *, 8> DeadInsts; - - /// \brief A set to prevent repeatedly marking an instruction split into many - /// uses as dead. Only used to guard insertion into DeadInsts. - SmallPtrSet<Instruction *, 4> DeadSplitInsts; + SetVector<Instruction *, SmallVector<Instruction *, 8> > DeadInsts; /// \brief Post-promotion worklist. /// @@ -1573,7 +1569,7 @@ private: do { LoadInst *LI = Loads.pop_back_val(); LI->replaceAllUsesWith(NewPN); - Pass.DeadInsts.push_back(LI); + Pass.DeadInsts.insert(LI); } while (!Loads.empty()); // Inject loads into all of the pred blocks. @@ -1717,7 +1713,7 @@ private: DEBUG(dbgs() << " speculated to: " << *V << "\n"); LI->replaceAllUsesWith(V); - Pass.DeadInsts.push_back(LI); + Pass.DeadInsts.insert(LI); } } }; @@ -2134,8 +2130,13 @@ static bool isVectorPromotionViable(const DataLayout &TD, } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) { // Disable vector promotion when there are loads or stores of an FCA. return false; - } else if (!isa<LoadInst>(I->U->getUser()) && - !isa<StoreInst>(I->U->getUser())) { + } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + if (LI->isVolatile()) + return false; + } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + if (SI->isVolatile()) + return false; + } else { return false; } } @@ -2241,18 +2242,23 @@ static bool isIntegerWideningViable(const DataLayout &TD, static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name) { + DEBUG(dbgs() << " start: " << *V << "\n"); IntegerType *IntTy = cast<IntegerType>(V->getType()); assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && "Element extends past full value"); uint64_t ShAmt = 8*Offset; if (DL.isBigEndian()) ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); - if (ShAmt) + if (ShAmt) { V = IRB.CreateLShr(V, ShAmt, Name + ".shift"); + DEBUG(dbgs() << " shifted: " << *V << "\n"); + } assert(Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot extract to a larger integer!"); - if (Ty != IntTy) + if (Ty != IntTy) { V = IRB.CreateTrunc(V, Ty, Name + ".trunc"); + DEBUG(dbgs() << " trunced: " << *V << "\n"); + } return V; } @@ -2262,20 +2268,27 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, IntegerType *Ty = cast<IntegerType>(V->getType()); assert(Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot insert a larger integer!"); - if (Ty != IntTy) + DEBUG(dbgs() << " start: " << *V << "\n"); + if (Ty != IntTy) { V = IRB.CreateZExt(V, IntTy, Name + ".ext"); + DEBUG(dbgs() << " extended: " << *V << "\n"); + } assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && "Element store outside of alloca store"); uint64_t ShAmt = 8*Offset; if (DL.isBigEndian()) ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); - if (ShAmt) + if (ShAmt) { V = IRB.CreateShl(V, ShAmt, Name + ".shift"); + DEBUG(dbgs() << " shifted: " << *V << "\n"); + } if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) { APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt); Old = IRB.CreateAnd(Old, Mask, Name + ".mask"); + DEBUG(dbgs() << " masked: " << *Old << "\n"); V = IRB.CreateOr(Old, V, Name + ".insert"); + DEBUG(dbgs() << " inserted: " << *V << "\n"); } return V; } @@ -2442,30 +2455,21 @@ private: void deleteIfTriviallyDead(Value *V) { Instruction *I = cast<Instruction>(V); if (isInstructionTriviallyDead(I)) - Pass.DeadInsts.push_back(I); + Pass.DeadInsts.insert(I); } - bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) { - Value *Result; + Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) { + Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); if (LI.getType() == VecTy->getElementType() || BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { - Result = IRB.CreateExtractElement( - IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), - getIndex(IRB, BeginOffset), getName(".extract")); - } else { - Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); + V = IRB.CreateExtractElement(V, getIndex(IRB, BeginOffset), + getName(".extract")); } - if (Result->getType() != LI.getType()) - Result = convertValue(TD, IRB, Result, LI.getType()); - LI.replaceAllUsesWith(Result); - Pass.DeadInsts.push_back(&LI); - - DEBUG(dbgs() << " to: " << *Result << "\n"); - return true; + return V; } - bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { + Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { assert(IntTy && "We cannot insert an integer to the alloca"); assert(!LI.isVolatile()); Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), @@ -2473,12 +2477,10 @@ private: V = convertValue(TD, IRB, V, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset, - getName(".extract")); - LI.replaceAllUsesWith(V); - Pass.DeadInsts.push_back(&LI); - DEBUG(dbgs() << " to: " << *V << "\n"); - return true; + if (Offset > 0 || EndOffset < NewAllocaEndOffset) + V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset, + getName(".extract")); + return V; } bool visitLoadInst(LoadInst &LI) { @@ -2488,7 +2490,29 @@ private: IRBuilder<> IRB(&LI); uint64_t Size = EndOffset - BeginOffset; - if (Size < TD.getTypeStoreSize(LI.getType())) { + bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType()); + Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8) + : LI.getType(); + bool IsPtrAdjusted = false; + Value *V; + if (VecTy) { + V = rewriteVectorizedLoadInst(IRB, LI, OldOp); + } else if (IntTy && LI.getType()->isIntegerTy()) { + V = rewriteIntegerLoad(IRB, LI); + } else if (BeginOffset == NewAllocaBeginOffset && + canConvertValue(TD, NewAllocaTy, LI.getType())) { + V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + LI.isVolatile(), getName(".load")); + } else { + Type *LTy = TargetTy->getPointerTo(); + V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy), + getPartitionTypeAlign(TargetTy), + LI.isVolatile(), getName(".load")); + IsPtrAdjusted = true; + } + V = convertValue(TD, IRB, V, TargetTy); + + if (IsSplitIntLoad) { assert(!LI.isVolatile()); assert(LI.getType()->isIntegerTy() && "Only integer type loads and stores are split"); @@ -2498,21 +2522,8 @@ private: assert(LI.getType()->getIntegerBitWidth() == TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && "Only alloca-wide loads can be split and recomposed"); - IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8); - bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) && - canConvertValue(TD, NewAllocaTy, NarrowTy); - Value *V; // Move the insertion point just past the load so that we can refer to it. IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI))); - if (IsConvertable) - V = convertValue(TD, IRB, - IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")), - NarrowTy); - else - V = IRB.CreateAlignedLoad( - getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()), - getPartitionTypeAlign(NarrowTy), getName(".load")); // Create a placeholder value with the same type as LI to use as the // basis for the new value. This allows us to replace the uses of LI with // the computed value, and then replace the placeholder with LI, leaving @@ -2524,104 +2535,40 @@ private: LI.replaceAllUsesWith(V); Placeholder->replaceAllUsesWith(&LI); delete Placeholder; - if (Pass.DeadSplitInsts.insert(&LI)) - Pass.DeadInsts.push_back(&LI); - DEBUG(dbgs() << " to: " << *V << "\n"); - return IsConvertable; - } - - if (VecTy) - return rewriteVectorizedLoadInst(IRB, LI, OldOp); - if (IntTy && LI.getType()->isIntegerTy()) - return rewriteIntegerLoad(IRB, LI); - - if (BeginOffset == NewAllocaBeginOffset && - canConvertValue(TD, NewAllocaTy, LI.getType())) { - Value *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - LI.isVolatile(), getName(".load")); - Value *NewV = convertValue(TD, IRB, NewLI, LI.getType()); - LI.replaceAllUsesWith(NewV); - Pass.DeadInsts.push_back(&LI); - - DEBUG(dbgs() << " to: " << *NewLI << "\n"); - return !LI.isVolatile(); + } else { + LI.replaceAllUsesWith(V); } - assert(!IntTy && "Invalid load found with int-op widening enabled"); - - Value *NewPtr = getAdjustedAllocaPtr(IRB, - LI.getPointerOperand()->getType()); - LI.setOperand(0, NewPtr); - LI.setAlignment(getPartitionTypeAlign(LI.getType())); - DEBUG(dbgs() << " to: " << LI << "\n"); - + Pass.DeadInsts.insert(&LI); deleteIfTriviallyDead(OldOp); - return NewPtr == &NewAI && !LI.isVolatile(); - } - - bool rewriteWideStoreInst(IRBuilder<> &IRB, StoreInst &SI, Type *ValueTy, - unsigned Size) { - assert(!SI.isVolatile()); - assert(ValueTy->isIntegerTy() && - "Only integer type loads and stores are split"); - assert(ValueTy->getIntegerBitWidth() == - TD.getTypeStoreSizeInBits(ValueTy) && - "Non-byte-multiple bit width"); - assert(ValueTy->getIntegerBitWidth() == - TD.getTypeSizeInBits(OldAI.getAllocatedType()) && - "Only alloca-wide stores can be split and recomposed"); - IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); - Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy, - BeginOffset, getName(".extract")); - StoreInst *NewSI; - bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) && - canConvertValue(TD, NarrowTy, NewAllocaTy); - if (IsConvertable) - NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy), - &NewAI, NewAI.getAlignment()); - else - NewSI = IRB.CreateAlignedStore( - V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()), - getPartitionTypeAlign(NarrowTy)); - (void)NewSI; - if (Pass.DeadSplitInsts.insert(&SI)) - Pass.DeadInsts.push_back(&SI); - - DEBUG(dbgs() << " to: " << *NewSI << "\n"); - return IsConvertable; + DEBUG(dbgs() << " to: " << *V << "\n"); + return !LI.isVolatile() && !IsPtrAdjusted; } - bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, StoreInst &SI, - Value *OldOp) { - Value *V = SI.getValueOperand(); - Type *ValueTy = V->getType(); - if (ValueTy == ElementTy || + bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V, + StoreInst &SI, Value *OldOp) { + if (V->getType() == ElementTy || BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { - if (ValueTy != ElementTy) + if (V->getType() != ElementTy) V = convertValue(TD, IRB, V, ElementTy); LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")); V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset), getName(".insert")); - } else if (ValueTy != VecTy) { - uint64_t Size = EndOffset - BeginOffset; - if (Size < TD.getTypeStoreSize(ValueTy)) - return rewriteWideStoreInst(IRB, SI, ValueTy, Size); - + } else if (V->getType() != VecTy) { V = convertValue(TD, IRB, V, VecTy); } StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); - Pass.DeadInsts.push_back(&SI); + Pass.DeadInsts.insert(&SI); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return true; } - bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) { + bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) { assert(IntTy && "We cannot extract an integer from the alloca"); assert(!SI.isVolatile()); - Value *V = SI.getValueOperand(); if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".oldload")); @@ -2633,7 +2580,7 @@ private: } V = convertValue(TD, IRB, V, NewAllocaTy); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); - Pass.DeadInsts.push_back(&SI); + Pass.DeadInsts.insert(&SI); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return true; @@ -2645,46 +2592,53 @@ private: assert(OldOp == OldPtr); IRBuilder<> IRB(&SI); - if (VecTy) - return rewriteVectorizedStoreInst(IRB, SI, OldOp); - Type *ValueTy = SI.getValueOperand()->getType(); - - uint64_t Size = EndOffset - BeginOffset; - if (Size < TD.getTypeStoreSize(ValueTy)) - return rewriteWideStoreInst(IRB, SI, ValueTy, Size); - - if (IntTy && ValueTy->isIntegerTy()) - return rewriteIntegerStore(IRB, SI); + Value *V = SI.getValueOperand(); // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after promoting this alloca. - if (ValueTy->isPointerTy()) - if (AllocaInst *AI = dyn_cast<AllocaInst>(SI.getValueOperand() - ->stripInBoundsOffsets())) + if (V->getType()->isPointerTy()) + if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) Pass.PostPromotionWorklist.insert(AI); - if (BeginOffset == NewAllocaBeginOffset && - canConvertValue(TD, ValueTy, NewAllocaTy)) { - Value *NewV = convertValue(TD, IRB, SI.getValueOperand(), NewAllocaTy); - StoreInst *NewSI = IRB.CreateAlignedStore(NewV, &NewAI, NewAI.getAlignment(), - SI.isVolatile()); - (void)NewSI; - Pass.DeadInsts.push_back(&SI); - - DEBUG(dbgs() << " to: " << *NewSI << "\n"); - return !SI.isVolatile(); + uint64_t Size = EndOffset - BeginOffset; + if (Size < TD.getTypeStoreSize(V->getType())) { + assert(!SI.isVolatile()); + assert(V->getType()->isIntegerTy() && + "Only integer type loads and stores are split"); + assert(V->getType()->getIntegerBitWidth() == + TD.getTypeStoreSizeInBits(V->getType()) && + "Non-byte-multiple bit width"); + assert(V->getType()->getIntegerBitWidth() == + TD.getTypeSizeInBits(OldAI.getAllocatedType()) && + "Only alloca-wide stores can be split and recomposed"); + IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); + V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, + getName(".extract")); } - assert(!IntTy && "Invalid store found with int-op widening enabled"); - - Value *NewPtr = getAdjustedAllocaPtr(IRB, - SI.getPointerOperand()->getType()); - SI.setOperand(1, NewPtr); - SI.setAlignment(getPartitionTypeAlign(SI.getValueOperand()->getType())); - DEBUG(dbgs() << " to: " << SI << "\n"); + if (VecTy) + return rewriteVectorizedStoreInst(IRB, V, SI, OldOp); + if (IntTy && V->getType()->isIntegerTy()) + return rewriteIntegerStore(IRB, V, SI); + StoreInst *NewSI; + if (BeginOffset == NewAllocaBeginOffset && + canConvertValue(TD, V->getType(), NewAllocaTy)) { + V = convertValue(TD, IRB, V, NewAllocaTy); + NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), + SI.isVolatile()); + } else { + Value *NewPtr = getAdjustedAllocaPtr(IRB, V->getType()->getPointerTo()); + NewSI = IRB.CreateAlignedStore(V, NewPtr, + getPartitionTypeAlign(V->getType()), + SI.isVolatile()); + } + (void)NewSI; + Pass.DeadInsts.insert(&SI); deleteIfTriviallyDead(OldOp); - return NewPtr == &NewAI && !SI.isVolatile(); + + DEBUG(dbgs() << " to: " << *NewSI << "\n"); + return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile(); } bool visitMemSetInst(MemSetInst &II) { @@ -2704,8 +2658,7 @@ private: } // Record this instruction for deletion. - if (Pass.DeadSplitInsts.insert(&II)) - Pass.DeadInsts.push_back(&II); + Pass.DeadInsts.insert(&II); Type *AllocaTy = NewAI.getAllocatedType(); Type *ScalarTy = AllocaTy->getScalarType(); @@ -2861,8 +2814,7 @@ private: return false; } // Record this instruction for deletion. - if (Pass.DeadSplitInsts.insert(&II)) - Pass.DeadInsts.push_back(&II); + Pass.DeadInsts.insert(&II); bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && EndOffset == NewAllocaEndOffset; @@ -2972,8 +2924,7 @@ private: assert(II.getArgOperand(1) == OldPtr); // Record this instruction for deletion. - if (Pass.DeadSplitInsts.insert(&II)) - Pass.DeadInsts.push_back(&II); + Pass.DeadInsts.insert(&II); ConstantInt *Size = ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()), @@ -3542,7 +3493,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) { DI != DE; ++DI) { Changed = true; (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType())); - DeadInsts.push_back(*DI); + DeadInsts.insert(*DI); } for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(), DE = P.dead_op_end(); @@ -3553,7 +3504,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) { if (Instruction *OldI = dyn_cast<Instruction>(OldV)) if (isInstructionTriviallyDead(OldI)) { Changed = true; - DeadInsts.push_back(OldI); + DeadInsts.insert(OldI); } } @@ -3574,7 +3525,6 @@ bool SROA::runOnAlloca(AllocaInst &AI) { /// We also record the alloca instructions deleted here so that they aren't /// subsequently handed to mem2reg to promote. void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { - DeadSplitInsts.clear(); while (!DeadInsts.empty()) { Instruction *I = DeadInsts.pop_back_val(); DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); @@ -3586,7 +3536,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { // Zero out the operand and see if it becomes trivially dead. *OI = 0; if (isInstructionTriviallyDead(U)) - DeadInsts.push_back(U); + DeadInsts.insert(U); } if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) |