Do not add cse-ed instructions into the visited map because we dont want to consider them as a candidate for replacement of instructions to be visited.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184966 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nadav Rotem <nrotem@apple.com> 2013-06-26 16:54:53 +0000
committer: Nadav Rotem <nrotem@apple.com> 2013-06-26 16:54:53 +0000
commit: 29acf7e03af9b5524daa1e7523e0296cc766ff24 (patch)
tree: 97c116b8abc4ca1848f3fa2d4a0c5580042f196c
parent: c19bd321362166805194cbaf170e06a4790d2da9 (diff)
download: external_llvm-29acf7e03af9b5524daa1e7523e0296cc766ff24.zip
external_llvm-29acf7e03af9b5524daa1e7523e0296cc766ff24.tar.gz
external_llvm-29acf7e03af9b5524daa1e7523e0296cc766ff24.tar.bz2
2 files changed, 93 insertions, 5 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9c8244b..bb37994 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1258,6 +1258,8 @@ Value *FuncSLP::vectorizeArith(ArrayRef<Value *> Operands) {
   for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
     Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
     Operands[i]->replaceAllUsesWith(S);
+    Instruction *I = cast<Instruction>(Operands[i]);
+    I->eraseFromParent();
   }
 
   return Vec;
@@ -1280,7 +1282,7 @@ void FuncSLP::optimizeGatherSequence() {
     // Check if it has a preheader.
     BasicBlock *PreHeader = L->getLoopPreheader();
     if (!PreHeader)
-      return;
+      continue;
 
     // If the vector or the element that we insert into it are
     // instructions that are defined in this basic block then we can't
@@ -1310,17 +1312,19 @@ void FuncSLP::optimizeGatherSequence() {
       if (!Insert || !GatherSeq.count(Insert))
         continue;
 
-     // Check if we can replace this instruction with any of the
-     // visited instructions.
+      // Check if we can replace this instruction with any of the
+      // visited instructions.
       for (SmallPtrSet<Instruction*, 16>::iterator v = Visited.begin(),
            ve = Visited.end(); v != ve; ++v) {
         if (Insert->isIdenticalTo(*v) &&
-          DT->dominates((*v)->getParent(), Insert->getParent())) {
+            DT->dominates((*v)->getParent(), Insert->getParent())) {
           Insert->replaceAllUsesWith(*v);
+          Insert = 0;
           break;
         }
       }
-      Visited.insert(Insert);
+      if (Insert)
+        Visited.insert(Insert);
     }
   }
 }
diff --git a/test/Transforms/SLPVectorizer/X86/cse.ll b/test/Transforms/SLPVectorizer/X86/cse.ll
index d286798..1c1e4b5 100644
--- a/test/Transforms/SLPVectorizer/X86/cse.ll
+++ b/test/Transforms/SLPVectorizer/X86/cse.ll
@@ -134,3 +134,87 @@ define i32 @test2(double* nocapture %G, i32 %k) {
   ret i32 undef
 }
 
+
+;int foo(double *A, int n) {
+;  A[0] = A[0] * 7.9 * n + 6.0;
+;  A[1] = A[1] * 7.9 * n + 6.0;
+;  A[2] = A[2] * 7.9 * n + 6.0;
+;  A[3] = A[3] * 7.9 * n + 6.0;
+;}
+;CHECK: @foo4
+;CHECK: insertelement <2 x double>
+;CHECK: insertelement <2 x double>
+;CHECK-NOT: insertelement <2 x double>
+;CHECK: ret
+define i32 @foo4(double* nocapture %A, i32 %n) {
+entry:
+  %0 = load double* %A, align 8
+  %mul = fmul double %0, 7.900000e+00
+  %conv = sitofp i32 %n to double
+  %mul1 = fmul double %conv, %mul
+  %add = fadd double %mul1, 6.000000e+00
+  store double %add, double* %A, align 8
+  %arrayidx3 = getelementptr inbounds double* %A, i64 1
+  %1 = load double* %arrayidx3, align 8
+  %mul4 = fmul double %1, 7.900000e+00
+  %mul6 = fmul double %conv, %mul4
+  %add7 = fadd double %mul6, 6.000000e+00
+  store double %add7, double* %arrayidx3, align 8
+  %arrayidx9 = getelementptr inbounds double* %A, i64 2
+  %2 = load double* %arrayidx9, align 8
+  %mul10 = fmul double %2, 7.900000e+00
+  %mul12 = fmul double %conv, %mul10
+  %add13 = fadd double %mul12, 6.000000e+00
+  store double %add13, double* %arrayidx9, align 8
+  %arrayidx15 = getelementptr inbounds double* %A, i64 3
+  %3 = load double* %arrayidx15, align 8
+  %mul16 = fmul double %3, 7.900000e+00
+  %mul18 = fmul double %conv, %mul16
+  %add19 = fadd double %mul18, 6.000000e+00
+  store double %add19, double* %arrayidx15, align 8
+  ret i32 undef
+}
+
+;int partial_mrg(double *A, int n) {
+;  A[0] = A[0] * n;
+;  A[1] = A[1] * n;
+;  if (n < 4) return 0;
+;  A[2] = A[2] * n;
+;  A[3] = A[3] * (n+4);
+;}
+;CHECK: @partial_mrg
+;CHECK: insertelement <2 x double>
+;CHECK: insertelement <2 x double>
+;CHECK: insertelement <2 x double>
+;CHECK-NOT: insertelement <2 x double>
+;CHECK: ret
+define i32 @partial_mrg(double* nocapture %A, i32 %n) {
+entry:
+  %0 = load double* %A, align 8
+  %conv = sitofp i32 %n to double
+  %mul = fmul double %conv, %0
+  store double %mul, double* %A, align 8
+  %arrayidx2 = getelementptr inbounds double* %A, i64 1
+  %1 = load double* %arrayidx2, align 8
+  %mul4 = fmul double %conv, %1
+  store double %mul4, double* %arrayidx2, align 8
+  %cmp = icmp slt i32 %n, 4
+  br i1 %cmp, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  %arrayidx7 = getelementptr inbounds double* %A, i64 2
+  %2 = load double* %arrayidx7, align 8
+  %mul9 = fmul double %conv, %2
+  store double %mul9, double* %arrayidx7, align 8
+  %arrayidx11 = getelementptr inbounds double* %A, i64 3
+  %3 = load double* %arrayidx11, align 8
+  %add = add nsw i32 %n, 4
+  %conv12 = sitofp i32 %add to double
+  %mul13 = fmul double %conv12, %3
+  store double %mul13, double* %arrayidx11, align 8
+  br label %return
+
+return:                                           ; preds = %entry, %if.end
+  ret i32 0
+}
+
author	Nadav Rotem <nrotem@apple.com>	2013-06-26 16:54:53 +0000
committer	Nadav Rotem <nrotem@apple.com>	2013-06-26 16:54:53 +0000
commit	29acf7e03af9b5524daa1e7523e0296cc766ff24 (patch)
tree	97c116b8abc4ca1848f3fa2d4a0c5580042f196c
parent	c19bd321362166805194cbaf170e06a4790d2da9 (diff)
download	external_llvm-29acf7e03af9b5524daa1e7523e0296cc766ff24.zip external_llvm-29acf7e03af9b5524daa1e7523e0296cc766ff24.tar.gz external_llvm-29acf7e03af9b5524daa1e7523e0296cc766ff24.tar.bz2