diff options
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 182 | ||||
-rw-r--r-- | test/CodeGen/X86/lsr-loop-exit-cond.ll | 134 | ||||
-rw-r--r-- | test/CodeGen/X86/lsr-negative-stride.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/remat-mov-1.ll (renamed from test/CodeGen/X86/remat-mov0.ll) | 2 |
4 files changed, 275 insertions, 45 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 9568449..127ef56 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -43,6 +43,7 @@ STATISTIC(NumVariable, "Number of PHIs with variable strides"); STATISTIC(NumEliminated, "Number of strides eliminated"); STATISTIC(NumShadow, "Number of Shadow IVs optimized"); STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses"); +STATISTIC(NumLoopCond, "Number of loop terminating conds optimized"); static cl::opt<bool> EnableFullLSRMode("enable-full-lsr", cl::init(false), @@ -122,6 +123,10 @@ namespace { /// particular stride. std::map<SCEVHandle, IVsOfOneStride> IVsByStride; + /// StrideNoReuse - Keep track of all the strides whose ivs cannot be + /// reused (nor should they be rewritten to reuse other strides). + SmallSet<SCEVHandle, 4> StrideNoReuse; + /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable: /// We use this to iterate over the IVUsesByStride collection without being /// dependent on random ordering of pointers in the process. @@ -184,8 +189,8 @@ namespace { SCEVHandle CheckForIVReuse(bool, bool, bool, const SCEVHandle&, IVExpr&, const Type*, const std::vector<BasedUser>& UsersToProcess); - bool ValidStride(bool, int64_t, - const std::vector<BasedUser>& UsersToProcess); + bool ValidScale(bool, int64_t, + const std::vector<BasedUser>& UsersToProcess); SCEVHandle CollectIVUsers(const SCEVHandle &Stride, IVUsersOfOneStride &Uses, Loop *L, @@ -213,6 +218,7 @@ namespace { SCEVHandle Stride, SCEVHandle CommonExprs, Value *CommonBaseV, + Instruction *IVIncInsertPt, const Loop *L, SCEVExpander &PreheaderRewriter); void StrengthReduceStridedIVUsers(const SCEVHandle &Stride, @@ -799,7 +805,7 @@ static bool fitsInAddressMode(const SCEVHandle &V, const Type *UseTy, /// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are /// loop varying to the Imm operand. static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm, - Loop *L, ScalarEvolution *SE) { + Loop *L, ScalarEvolution *SE) { if (Val->isLoopInvariant(L)) return; // Nothing to do. if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { @@ -1122,16 +1128,15 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, return Result; } -/// ValidStride - Check whether the given Scale is valid for all loads and +/// ValidScale - Check whether the given Scale is valid for all loads and /// stores in UsersToProcess. /// -bool LoopStrengthReduce::ValidStride(bool HasBaseReg, - int64_t Scale, +bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale, const std::vector<BasedUser>& UsersToProcess) { if (!TLI) return true; - for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) { + for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) { // If this is a load or other access, pass the type of the access in. const Type *AccessTy = Type::VoidTy; if (isAddressUse(UsersToProcess[i].Inst, @@ -1186,13 +1191,17 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, const SCEVHandle &Stride, IVExpr &IV, const Type *Ty, const std::vector<BasedUser>& UsersToProcess) { + if (StrideNoReuse.count(Stride)) + return SE->getIntegerSCEV(0, Stride->getType()); + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) { int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e; ++NewStride) { std::map<SCEVHandle, IVsOfOneStride>::iterator SI = IVsByStride.find(StrideOrder[NewStride]); - if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first)) + if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) || + StrideNoReuse.count(SI->first)) continue; int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); if (SI->first != Stride && @@ -1206,7 +1215,7 @@ SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, // multiplications. if (Scale == 1 || (AllUsesAreAddresses && - ValidStride(HasBaseReg, Scale, UsersToProcess))) + ValidScale(HasBaseReg, Scale, UsersToProcess))) for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(), IE = SI->second.IVs.end(); II != IE; ++II) // FIXME: Only handle base == 0 for now. @@ -1302,7 +1311,7 @@ SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride, // field of the use, so that we don't try to use something before it is // computed. MoveLoopVariantsToImmediateField(UsersToProcess.back().Base, - UsersToProcess.back().Imm, L, SE); + UsersToProcess.back().Imm, L, SE); assert(UsersToProcess.back().Base->isLoopInvariant(L) && "Base value is not loop invariant!"); } @@ -1452,6 +1461,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( /// Return the created phi node. /// static PHINode *InsertAffinePhi(SCEVHandle Start, SCEVHandle Step, + Instruction *IVIncInsertPt, const Loop *L, SCEVExpander &Rewriter) { assert(Start->isLoopInvariant(L) && "New PHI start is not loop invariant!"); @@ -1475,16 +1485,17 @@ static PHINode *InsertAffinePhi(SCEVHandle Start, SCEVHandle Step, IncAmount = Rewriter.SE.getNegativeSCEV(Step); // Insert an add instruction right before the terminator corresponding - // to the back-edge. + // to the back-edge or just before the only use. The location is determined + // by the caller and passed in as IVIncInsertPt. Value *StepV = Rewriter.expandCodeFor(IncAmount, Ty, Preheader->getTerminator()); Instruction *IncV; if (isNegative) { IncV = BinaryOperator::CreateSub(PN, StepV, "lsr.iv.next", - LatchBlock->getTerminator()); + IVIncInsertPt); } else { IncV = BinaryOperator::CreateAdd(PN, StepV, "lsr.iv.next", - LatchBlock->getTerminator()); + IVIncInsertPt); } if (!isa<ConstantInt>(StepV)) ++NumVariable; @@ -1541,6 +1552,7 @@ LoopStrengthReduce::PrepareToStrengthReduceFully( // Rewrite the UsersToProcess records, creating a separate PHI for each // unique Base value. + Instruction *IVIncInsertPt = L->getLoopLatch()->getTerminator(); for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) { // TODO: The uses are grouped by base, but not sorted. We arbitrarily // pick the first Imm value here to start with, and adjust it for the @@ -1548,7 +1560,7 @@ LoopStrengthReduce::PrepareToStrengthReduceFully( SCEVHandle Imm = UsersToProcess[i].Imm; SCEVHandle Base = UsersToProcess[i].Base; SCEVHandle Start = SE->getAddExpr(CommonExprs, Base, Imm); - PHINode *Phi = InsertAffinePhi(Start, Stride, L, + PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L, PreheaderRewriter); // Loop over all the users with the same base. do { @@ -1561,6 +1573,18 @@ LoopStrengthReduce::PrepareToStrengthReduceFully( } } +/// FindIVIncInsertPt - Return the location to insert the increment instruction. +/// If the only use if a use of postinc value, (must be the loop termination +/// condition), then insert it just before the use. +static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess, + const Loop *L) { + if (UsersToProcess.size() == 1 && + UsersToProcess[0].isUseOfPostIncrementedValue && + L->contains(UsersToProcess[0].Inst->getParent())) + return UsersToProcess[0].Inst; + return L->getLoopLatch()->getTerminator(); +} + /// PrepareToStrengthReduceWithNewPhi - Insert a new induction variable for the /// given users to share. /// @@ -1570,12 +1594,13 @@ LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi( SCEVHandle Stride, SCEVHandle CommonExprs, Value *CommonBaseV, + Instruction *IVIncInsertPt, const Loop *L, SCEVExpander &PreheaderRewriter) { DOUT << " Inserting new PHI:\n"; PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV), - Stride, L, + Stride, IVIncInsertPt, L, PreheaderRewriter); // Remember this in case a later stride is multiple of this. @@ -1590,8 +1615,8 @@ LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi( DOUT << "\n"; } -/// PrepareToStrengthReduceWithNewPhi - Prepare for the given users to reuse -/// an induction variable with a stride that is a factor of the current +/// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to +/// reuse an induction variable with a stride that is a factor of the current /// induction variable. /// void @@ -1727,6 +1752,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, BasicBlock *Preheader = L->getLoopPreheader(); Instruction *PreInsertPt = Preheader->getTerminator(); BasicBlock *LatchBlock = L->getLoopLatch(); + Instruction *IVIncInsertPt = LatchBlock->getTerminator(); Value *CommonBaseV = Constant::getNullValue(ReplacedTy); @@ -1755,13 +1781,15 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, AllUsesAreOutsideLoop, Stride, ReuseIV, ReplacedTy, UsersToProcess); - if (isa<SCEVConstant>(RewriteFactor) && - cast<SCEVConstant>(RewriteFactor)->isZero()) - PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs, - CommonBaseV, L, PreheaderRewriter); - else + if (!RewriteFactor->isZero()) PrepareToStrengthReduceFromSmallerStride(UsersToProcess, CommonBaseV, ReuseIV, PreInsertPt); + else { + IVIncInsertPt = FindIVIncInsertPt(UsersToProcess, L); + PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs, + CommonBaseV, IVIncInsertPt, + L, PreheaderRewriter); + } } // Process all the users now, replacing their strided uses with @@ -1800,7 +1828,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, // FIXME: Use emitted users to emit other users. BasedUser &User = UsersToProcess.back(); - DOUT << " Examining use "; + DOUT << " Examining "; + if (User.isUseOfPostIncrementedValue) + DOUT << "postinc"; + else + DOUT << "preinc"; + DOUT << " use "; DEBUG(WriteAsOperand(*DOUT, UsersToProcess.back().OperandValToReplace, /*PrintType=*/false)); DOUT << " in Inst: " << *(User.Inst); @@ -1810,11 +1843,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEVHandle &Stride, Value *RewriteOp = User.Phi; if (User.isUseOfPostIncrementedValue) { RewriteOp = User.Phi->getIncomingValueForBlock(LatchBlock); - // If this user is in the loop, make sure it is the last thing in the - // loop to ensure it is dominated by the increment. - if (L->contains(User.Inst->getParent())) - User.Inst->moveBefore(LatchBlock->getTerminator()); + // loop to ensure it is dominated by the increment. In case it's the + // only use of the iv, the increment instruction is already before the + // use. + if (L->contains(User.Inst->getParent()) && User.Inst != IVIncInsertPt) + User.Inst->moveBefore(IVIncInsertPt); } SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp); @@ -2085,7 +2119,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, // if it's likely the new stride uses will be rewritten using the // stride of the compare instruction. if (AllUsesAreAddresses && - ValidStride(!CommonExprs->isZero(), Scale, UsersToProcess)) + ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) continue; // If scale is negative, use swapped predicate unless it's testing @@ -2304,8 +2338,8 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { if (!DestTy) continue; if (TLI) { - /* If target does not support DestTy natively then do not apply - this transformation. */ + // If target does not support DestTy natively then do not apply + // this transformation. MVT DVT = TLI->getValueType(DestTy); if (!TLI->isTypeLegal(DVT)) continue; } @@ -2380,8 +2414,6 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) { // TODO: implement optzns here. OptimizeShadowIV(L); - - OptimizeLoopTermCond(L); } /// OptimizeLoopTermCond - Change loop terminating condition to use the @@ -2391,23 +2423,78 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // can, we want to change it to use a post-incremented version of its // induction variable, to allow coalescing the live ranges for the IV into // one register value. - PHINode *SomePHI = cast<PHINode>(L->getHeader()->begin()); - BasicBlock *Preheader = L->getLoopPreheader(); - BasicBlock *LatchBlock = - SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader); - BranchInst *TermBr = dyn_cast<BranchInst>(LatchBlock->getTerminator()); - if (!TermBr || TermBr->isUnconditional() || - !isa<ICmpInst>(TermBr->getCondition())) + BasicBlock *LatchBlock = L->getLoopLatch(); + BasicBlock *ExitBlock = L->getExitingBlock(); + if (!ExitBlock) + // Multiple exits, just look at the exit in the latch block if there is one. + ExitBlock = LatchBlock; + BranchInst *TermBr = dyn_cast<BranchInst>(ExitBlock->getTerminator()); + if (!TermBr) + return; + if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition())) return; - ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); // Search IVUsesByStride to find Cond's IVUse if there is one. IVStrideUse *CondUse = 0; const SCEVHandle *CondStride = 0; - + ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); if (!FindIVUserForCond(Cond, CondUse, CondStride)) return; // setcc doesn't use the IV. + if (ExitBlock != LatchBlock) { + if (!Cond->hasOneUse()) + // See below, we don't want the condition to be cloned. + return; + + // If exiting block is the latch block, we know it's safe and profitable to + // transform the icmp to use post-inc iv. Otherwise do so only if it would + // not reuse another iv and its iv would be reused by other uses. We are + // optimizing for the case where the icmp is the only use of the iv. + IVUsersOfOneStride &StrideUses = IVUsesByStride[*CondStride]; + for (unsigned i = 0, e = StrideUses.Users.size(); i != e; ++i) { + if (StrideUses.Users[i].User == Cond) + continue; + if (!StrideUses.Users[i].isUseOfPostIncrementedValue) + return; + } + + // FIXME: This is expensive, and worse still ChangeCompareStride does a + // similar check. Can we perform all the icmp related transformations after + // StrengthReduceStridedIVUsers? + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride)) { + int64_t SInt = SC->getValue()->getSExtValue(); + for (unsigned NewStride = 0, ee = StrideOrder.size(); NewStride != ee; + ++NewStride) { + std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = + IVUsesByStride.find(StrideOrder[NewStride]); + if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride) + continue; + int64_t SSInt = + cast<SCEVConstant>(SI->first)->getValue()->getSExtValue(); + if (SSInt == SInt) + return; // This can definitely be reused. + if (unsigned(abs(SSInt)) < SInt || (SSInt % SInt) != 0) + continue; + int64_t Scale = SSInt / SInt; + bool AllUsesAreAddresses = true; + bool AllUsesAreOutsideLoop = true; + std::vector<BasedUser> UsersToProcess; + SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L, + AllUsesAreAddresses, + AllUsesAreOutsideLoop, + UsersToProcess); + // Avoid rewriting the compare instruction with an iv of new stride + // if it's likely the new stride uses will be rewritten using the + // stride of the compare instruction. + if (AllUsesAreAddresses && + ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) + return; + } + } + + StrideNoReuse.insert(*CondStride); + } + // If the trip count is computed in terms of an smax (due to ScalarEvolution // being unable to find a sufficient guard, for example), change the loop // comparison to use SLT instead of NE. @@ -2415,7 +2502,8 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // If possible, change stride and operands of the compare instruction to // eliminate one stride. - Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); + if (ExitBlock == LatchBlock) + Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); // It's possible for the setcc instruction to be anywhere in the loop, and // possible for it to have multiple users. If it is not immediately before @@ -2431,7 +2519,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // Clone the IVUse, as the old use still exists! IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond, - CondUse->OperandValToReplace); + CondUse->OperandValToReplace); CondUse = &IVUsesByStride[*CondStride].Users.back(); } } @@ -2442,6 +2530,8 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { CondUse->Offset = SE->getMinusSCEV(CondUse->Offset, *CondStride); CondUse->isUseOfPostIncrementedValue = true; Changed = true; + + ++NumLoopCond; } // OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding @@ -2582,6 +2672,11 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // computation of some other indvar to decide when to terminate the loop. OptimizeIndvars(L); + // Change loop terminating condition to use the postinc iv when possible + // and optimize loop terminating compare. FIXME: Move this after + // StrengthReduceStridedIVUsers? + OptimizeLoopTermCond(L); + // FIXME: We can shrink overlarge IV's here. e.g. if the code has // computation in i64 values and the target doesn't support i64, demote // the computation to 32-bit if safe. @@ -2616,6 +2711,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { IVUsesByStride.clear(); IVsByStride.clear(); StrideOrder.clear(); + StrideNoReuse.clear(); // Clean up after ourselves if (!DeadInsts.empty()) diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll new file mode 100644 index 0000000..c998268 --- /dev/null +++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -0,0 +1,134 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | %prcontext decq 1 | grep jne + +@Te0 = external global [256 x i32] ; <[256 x i32]*> [#uses=5] +@Te1 = external global [256 x i32] ; <[256 x i32]*> [#uses=4] +@Te3 = external global [256 x i32] ; <[256 x i32]*> [#uses=2] + +define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp { +entry: + %0 = load i32* %rk, align 4 ; <i32> [#uses=1] + %1 = getelementptr i32* %rk, i64 1 ; <i32*> [#uses=1] + %2 = load i32* %1, align 4 ; <i32> [#uses=1] + %tmp15 = add i32 %r, -1 ; <i32> [#uses=1] + %tmp.16 = zext i32 %tmp15 to i64 ; <i64> [#uses=2] + br label %bb + +bb: ; preds = %bb1, %entry + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ] ; <i64> [#uses=3] + %s1.0 = phi i32 [ %2, %entry ], [ %56, %bb1 ] ; <i32> [#uses=2] + %s0.0 = phi i32 [ %0, %entry ], [ %43, %bb1 ] ; <i32> [#uses=2] + %tmp18 = shl i64 %indvar, 4 ; <i64> [#uses=4] + %rk26 = bitcast i32* %rk to i8* ; <i8*> [#uses=6] + %3 = lshr i32 %s0.0, 24 ; <i32> [#uses=1] + %4 = zext i32 %3 to i64 ; <i64> [#uses=1] + %5 = getelementptr [256 x i32]* @Te0, i64 0, i64 %4 ; <i32*> [#uses=1] + %6 = load i32* %5, align 4 ; <i32> [#uses=1] + %7 = lshr i32 %s1.0, 16 ; <i32> [#uses=1] + %8 = and i32 %7, 255 ; <i32> [#uses=1] + %9 = zext i32 %8 to i64 ; <i64> [#uses=1] + %10 = getelementptr [256 x i32]* @Te1, i64 0, i64 %9 ; <i32*> [#uses=1] + %11 = load i32* %10, align 4 ; <i32> [#uses=1] + %ctg2.sum2728 = or i64 %tmp18, 8 ; <i64> [#uses=1] + %12 = getelementptr i8* %rk26, i64 %ctg2.sum2728 ; <i8*> [#uses=1] + %13 = bitcast i8* %12 to i32* ; <i32*> [#uses=1] + %14 = load i32* %13, align 4 ; <i32> [#uses=1] + %15 = xor i32 %11, %6 ; <i32> [#uses=1] + %16 = xor i32 %15, %14 ; <i32> [#uses=3] + %17 = lshr i32 %s1.0, 24 ; <i32> [#uses=1] + %18 = zext i32 %17 to i64 ; <i64> [#uses=1] + %19 = getelementptr [256 x i32]* @Te0, i64 0, i64 %18 ; <i32*> [#uses=1] + %20 = load i32* %19, align 4 ; <i32> [#uses=1] + %21 = and i32 %s0.0, 255 ; <i32> [#uses=1] + %22 = zext i32 %21 to i64 ; <i64> [#uses=1] + %23 = getelementptr [256 x i32]* @Te3, i64 0, i64 %22 ; <i32*> [#uses=1] + %24 = load i32* %23, align 4 ; <i32> [#uses=1] + %ctg2.sum2930 = or i64 %tmp18, 12 ; <i64> [#uses=1] + %25 = getelementptr i8* %rk26, i64 %ctg2.sum2930 ; <i8*> [#uses=1] + %26 = bitcast i8* %25 to i32* ; <i32*> [#uses=1] + %27 = load i32* %26, align 4 ; <i32> [#uses=1] + %28 = xor i32 %24, %20 ; <i32> [#uses=1] + %29 = xor i32 %28, %27 ; <i32> [#uses=4] + %30 = lshr i32 %16, 24 ; <i32> [#uses=1] + %31 = zext i32 %30 to i64 ; <i64> [#uses=1] + %32 = getelementptr [256 x i32]* @Te0, i64 0, i64 %31 ; <i32*> [#uses=1] + %33 = load i32* %32, align 4 ; <i32> [#uses=2] + %exitcond = icmp eq i64 %indvar, %tmp.16 ; <i1> [#uses=1] + br i1 %exitcond, label %bb2, label %bb1 + +bb1: ; preds = %bb + %ctg2.sum31 = add i64 %tmp18, 16 ; <i64> [#uses=1] + %34 = getelementptr i8* %rk26, i64 %ctg2.sum31 ; <i8*> [#uses=1] + %35 = bitcast i8* %34 to i32* ; <i32*> [#uses=1] + %36 = lshr i32 %29, 16 ; <i32> [#uses=1] + %37 = and i32 %36, 255 ; <i32> [#uses=1] + %38 = zext i32 %37 to i64 ; <i64> [#uses=1] + %39 = getelementptr [256 x i32]* @Te1, i64 0, i64 %38 ; <i32*> [#uses=1] + %40 = load i32* %39, align 4 ; <i32> [#uses=1] + %41 = load i32* %35, align 4 ; <i32> [#uses=1] + %42 = xor i32 %40, %33 ; <i32> [#uses=1] + %43 = xor i32 %42, %41 ; <i32> [#uses=1] + %44 = lshr i32 %29, 24 ; <i32> [#uses=1] + %45 = zext i32 %44 to i64 ; <i64> [#uses=1] + %46 = getelementptr [256 x i32]* @Te0, i64 0, i64 %45 ; <i32*> [#uses=1] + %47 = load i32* %46, align 4 ; <i32> [#uses=1] + %48 = and i32 %16, 255 ; <i32> [#uses=1] + %49 = zext i32 %48 to i64 ; <i64> [#uses=1] + %50 = getelementptr [256 x i32]* @Te3, i64 0, i64 %49 ; <i32*> [#uses=1] + %51 = load i32* %50, align 4 ; <i32> [#uses=1] + %ctg2.sum32 = add i64 %tmp18, 20 ; <i64> [#uses=1] + %52 = getelementptr i8* %rk26, i64 %ctg2.sum32 ; <i8*> [#uses=1] + %53 = bitcast i8* %52 to i32* ; <i32*> [#uses=1] + %54 = load i32* %53, align 4 ; <i32> [#uses=1] + %55 = xor i32 %51, %47 ; <i32> [#uses=1] + %56 = xor i32 %55, %54 ; <i32> [#uses=1] + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %bb + +bb2: ; preds = %bb + %tmp10 = shl i64 %tmp.16, 4 ; <i64> [#uses=2] + %ctg2.sum = add i64 %tmp10, 16 ; <i64> [#uses=1] + %tmp1213 = getelementptr i8* %rk26, i64 %ctg2.sum ; <i8*> [#uses=1] + %57 = bitcast i8* %tmp1213 to i32* ; <i32*> [#uses=1] + %58 = and i32 %33, -16777216 ; <i32> [#uses=1] + %59 = lshr i32 %29, 16 ; <i32> [#uses=1] + %60 = and i32 %59, 255 ; <i32> [#uses=1] + %61 = zext i32 %60 to i64 ; <i64> [#uses=1] + %62 = getelementptr [256 x i32]* @Te1, i64 0, i64 %61 ; <i32*> [#uses=1] + %63 = load i32* %62, align 4 ; <i32> [#uses=1] + %64 = and i32 %63, 16711680 ; <i32> [#uses=1] + %65 = or i32 %64, %58 ; <i32> [#uses=1] + %66 = load i32* %57, align 4 ; <i32> [#uses=1] + %67 = xor i32 %65, %66 ; <i32> [#uses=2] + %68 = lshr i32 %29, 8 ; <i32> [#uses=1] + %69 = zext i32 %68 to i64 ; <i64> [#uses=1] + %70 = getelementptr [256 x i32]* @Te0, i64 0, i64 %69 ; <i32*> [#uses=1] + %71 = load i32* %70, align 4 ; <i32> [#uses=1] + %72 = and i32 %71, -16777216 ; <i32> [#uses=1] + %73 = and i32 %16, 255 ; <i32> [#uses=1] + %74 = zext i32 %73 to i64 ; <i64> [#uses=1] + %75 = getelementptr [256 x i32]* @Te1, i64 0, i64 %74 ; <i32*> [#uses=1] + %76 = load i32* %75, align 4 ; <i32> [#uses=1] + %77 = and i32 %76, 16711680 ; <i32> [#uses=1] + %78 = or i32 %77, %72 ; <i32> [#uses=1] + %ctg2.sum25 = add i64 %tmp10, 20 ; <i64> [#uses=1] + %79 = getelementptr i8* %rk26, i64 %ctg2.sum25 ; <i8*> [#uses=1] + %80 = bitcast i8* %79 to i32* ; <i32*> [#uses=1] + %81 = load i32* %80, align 4 ; <i32> [#uses=1] + %82 = xor i32 %78, %81 ; <i32> [#uses=2] + %83 = lshr i32 %67, 24 ; <i32> [#uses=1] + %84 = trunc i32 %83 to i8 ; <i8> [#uses=1] + store i8 %84, i8* %out, align 1 + %85 = lshr i32 %67, 16 ; <i32> [#uses=1] + %86 = trunc i32 %85 to i8 ; <i8> [#uses=1] + %87 = getelementptr i8* %out, i64 1 ; <i8*> [#uses=1] + store i8 %86, i8* %87, align 1 + %88 = getelementptr i8* %out, i64 4 ; <i8*> [#uses=1] + %89 = lshr i32 %82, 24 ; <i32> [#uses=1] + %90 = trunc i32 %89 to i8 ; <i8> [#uses=1] + store i8 %90, i8* %88, align 1 + %91 = lshr i32 %82, 16 ; <i32> [#uses=1] + %92 = trunc i32 %91 to i8 ; <i8> [#uses=1] + %93 = getelementptr i8* %out, i64 5 ; <i8*> [#uses=1] + store i8 %92, i8* %93, align 1 + ret void +} diff --git a/test/CodeGen/X86/lsr-negative-stride.ll b/test/CodeGen/X86/lsr-negative-stride.ll index 43b507b..28d041f 100644 --- a/test/CodeGen/X86/lsr-negative-stride.ll +++ b/test/CodeGen/X86/lsr-negative-stride.ll @@ -16,7 +16,7 @@ ;} -define i32 @t(i32 %a, i32 %b) { +define i32 @t(i32 %a, i32 %b) nounwind { entry: %tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1] br i1 %tmp1434, label %bb17, label %bb.outer diff --git a/test/CodeGen/X86/remat-mov0.ll b/test/CodeGen/X86/remat-mov-1.ll index 360628c..98b7bb4 100644 --- a/test/CodeGen/X86/remat-mov0.ll +++ b/test/CodeGen/X86/remat-mov-1.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 2 +; RUN: llvm-as < %s | llc -march=x86 | grep 4294967295 | grep mov | count 2 %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } %struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* } |