diff options
-rw-r--r-- | lib/Transforms/Vectorize/SLPVectorizer.cpp | 23 | ||||
-rw-r--r-- | test/Transforms/SLPVectorizer/X86/phi3.ll | 35 |
2 files changed, 48 insertions, 10 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 60d294b..9312b4b 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1401,30 +1401,33 @@ void BoUpSLP::vectorizeTree() { Value *Vec = E->VectorizedValue; assert(Vec && "Can't find vectorizable value"); + Value *Lane = Builder.getInt32(it->Lane); // Generate extracts for out-of-tree users. // Find the insertion point for the extractelement lane. - Instruction *Loc = 0; if (PHINode *PN = dyn_cast<PHINode>(Vec)) { - Loc = PN->getParent()->getFirstInsertionPt(); + Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt()); + Value *Ex = Builder.CreateExtractElement(Vec, Lane); + User->replaceUsesOfWith(Scalar, Ex); } else if (isa<Instruction>(Vec)){ if (PHINode *PH = dyn_cast<PHINode>(User)) { for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) { if (PH->getIncomingValue(i) == Scalar) { - Loc = PH->getIncomingBlock(i)->getTerminator(); - break; + Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator()); + Value *Ex = Builder.CreateExtractElement(Vec, Lane); + PH->setOperand(i, Ex); } } - assert(Loc && "Unable to find incoming value for the PHI"); } else { - Loc = cast<Instruction>(User); + Builder.SetInsertPoint(cast<Instruction>(User)); + Value *Ex = Builder.CreateExtractElement(Vec, Lane); + User->replaceUsesOfWith(Scalar, Ex); } } else { - Loc = F->getEntryBlock().begin(); + Builder.SetInsertPoint(F->getEntryBlock().begin()); + Value *Ex = Builder.CreateExtractElement(Vec, Lane); + User->replaceUsesOfWith(Scalar, Ex); } - Builder.SetInsertPoint(Loc); - Value *Ex = Builder.CreateExtractElement(Vec, Builder.getInt32(it->Lane)); - User->replaceUsesOfWith(Scalar, Ex); DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); } diff --git a/test/Transforms/SLPVectorizer/X86/phi3.ll b/test/Transforms/SLPVectorizer/X86/phi3.ll new file mode 100644 index 0000000..fd8d361 --- /dev/null +++ b/test/Transforms/SLPVectorizer/X86/phi3.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +%struct.GPar.0.16.26 = type { [0 x double], double } + +@d = external global double, align 8 + +declare %struct.GPar.0.16.26* @Rf_gpptr(...) + +define void @Rf_GReset() { +entry: + %sub = fsub double -0.000000e+00, undef + %0 = load double* @d, align 8 + %sub1 = fsub double -0.000000e+00, %0 + br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label %if.then, label %if.end7 + +if.then: ; preds = %entry + %sub2 = fsub double %sub, undef + %div.i = fdiv double %sub2, undef + %sub4 = fsub double %sub1, undef + %div.i16 = fdiv double %sub4, undef + %cmp = fcmp ogt double %div.i, %div.i16 + br i1 %cmp, label %if.then6, label %if.end7 + +if.then6: ; preds = %if.then + br label %if.end7 + +if.end7: ; preds = %if.then6, %if.then, %entry + %g.0 = phi double [ 0.000000e+00, %if.then6 ], [ %sub, %if.then ], [ %sub, %entry ] + ret void +} + + |