aboutsummaryrefslogtreecommitdiffstats
path: root/test/Transforms/SLPVectorizer
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2013-07-07 06:57:07 +0000
committerNadav Rotem <nrotem@apple.com>2013-07-07 06:57:07 +0000
commit369cc938d261de3295eb70d0738f54ef1a82806c (patch)
tree10497d00e5f4f7a2cd0a9d4b741da0d448787f10 /test/Transforms/SLPVectorizer
parent95a1b3484d7daf7830161f1613fc812303641abe (diff)
downloadexternal_llvm-369cc938d261de3295eb70d0738f54ef1a82806c.zip
external_llvm-369cc938d261de3295eb70d0738f54ef1a82806c.tar.gz
external_llvm-369cc938d261de3295eb70d0738f54ef1a82806c.tar.bz2
SLPVectorizer: Implement DCE as part of vectorization.
This is a complete re-write if the bottom-up vectorization class. Before this commit we scanned the instruction tree 3 times. First in search of merge points for the trees. Second, for estimating the cost. And finally for vectorization. There was a lot of code duplication and adding the DCE exposed bugs. The new design is simpler and DCE was a part of the design. In this implementation we build the tree once. After that we estimate the cost by scanning the different entries in the constructed tree (in any order). The vectorization phase also works on the built tree. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185774 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms/SLPVectorizer')
-rw-r--r--test/Transforms/SLPVectorizer/X86/crash_7zip.ll38
-rw-r--r--test/Transforms/SLPVectorizer/X86/crash_bullet.ll38
-rw-r--r--test/Transforms/SLPVectorizer/X86/crash_bullet2.ll38
-rw-r--r--test/Transforms/SLPVectorizer/X86/crash_dequeue.ll40
-rw-r--r--test/Transforms/SLPVectorizer/X86/crash_flop7.ll46
-rw-r--r--test/Transforms/SLPVectorizer/X86/crash_lame.ll24
-rw-r--r--test/Transforms/SLPVectorizer/X86/crash_lencod.ll66
-rw-r--r--test/Transforms/SLPVectorizer/X86/crash_lencod2.ll23
-rw-r--r--test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll53
-rw-r--r--test/Transforms/SLPVectorizer/X86/crash_rc4.ll28
-rw-r--r--test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll113
-rw-r--r--test/Transforms/SLPVectorizer/X86/crash_smallpt.ll65
-rw-r--r--test/Transforms/SLPVectorizer/X86/crash_smallpt2.ll46
-rw-r--r--test/Transforms/SLPVectorizer/X86/diamond.ll6
-rw-r--r--test/Transforms/SLPVectorizer/X86/long_chains.ll13
-rw-r--r--test/Transforms/SLPVectorizer/X86/saxpy.ll16
16 files changed, 645 insertions, 8 deletions
diff --git a/test/Transforms/SLPVectorizer/X86/crash_7zip.ll b/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
new file mode 100644
index 0000000..51b1c08
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334 = type { %struct._CLzmaProps.0.27.54.81.102.123.144.165.180.195.228.258.333, i16*, i8*, i8*, i32, i32, i64, i64, i32, i32, i32, [4 x i32], i32, i32, i32, i32, i32, [20 x i8] }
+%struct._CLzmaProps.0.27.54.81.102.123.144.165.180.195.228.258.333 = type { i32, i32, i32, i32 }
+
+define fastcc void @LzmaDec_DecodeReal2(%struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p) {
+entry:
+ %range20.i = getelementptr inbounds %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p, i64 0, i32 4
+ %code21.i = getelementptr inbounds %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* %p, i64 0, i32 5
+ br label %do.body66.i
+
+do.body66.i: ; preds = %do.cond.i, %entry
+ %range.2.i = phi i32 [ %range.4.i, %do.cond.i ], [ undef, %entry ]
+ %code.2.i = phi i32 [ %code.4.i, %do.cond.i ], [ undef, %entry ]
+ %.range.2.i = select i1 undef, i32 undef, i32 %range.2.i
+ %.code.2.i = select i1 undef, i32 undef, i32 %code.2.i
+ br i1 undef, label %do.cond.i, label %if.else.i
+
+if.else.i: ; preds = %do.body66.i
+ %sub91.i = sub i32 %.range.2.i, undef
+ %sub92.i = sub i32 %.code.2.i, undef
+ br label %do.cond.i
+
+do.cond.i: ; preds = %if.else.i, %do.body66.i
+ %range.4.i = phi i32 [ %sub91.i, %if.else.i ], [ undef, %do.body66.i ]
+ %code.4.i = phi i32 [ %sub92.i, %if.else.i ], [ %.code.2.i, %do.body66.i ]
+ br i1 undef, label %do.body66.i, label %do.end1006.i
+
+do.end1006.i: ; preds = %do.cond.i
+ %.range.4.i = select i1 undef, i32 undef, i32 %range.4.i
+ %.code.4.i = select i1 undef, i32 undef, i32 %code.4.i
+ store i32 %.range.4.i, i32* %range20.i, align 4
+ store i32 %.code.4.i, i32* %code21.i, align 4
+ ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_bullet.ll b/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
new file mode 100644
index 0000000..565905d
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960" = type { i32, i32 }
+
+define void @_ZN23btGeneric6DofConstraint8getInfo1EPN17btTypedConstraint17btConstraintInfo1E(%"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* nocapture %info) {
+entry:
+ br i1 undef, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ ret void
+
+if.else: ; preds = %entry
+ %m_numConstraintRows4 = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* %info, i64 0, i32 0
+ %nub5 = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* %info, i64 0, i32 1
+ br i1 undef, label %land.lhs.true.i.1, label %if.then7.1
+
+land.lhs.true.i.1: ; preds = %if.else
+ br i1 undef, label %for.inc.1, label %if.then7.1
+
+if.then7.1: ; preds = %land.lhs.true.i.1, %if.else
+ %inc.1 = add nsw i32 0, 1
+ store i32 %inc.1, i32* %m_numConstraintRows4, align 4
+ %dec.1 = add nsw i32 6, -1
+ store i32 %dec.1, i32* %nub5, align 4
+ br label %for.inc.1
+
+for.inc.1: ; preds = %if.then7.1, %land.lhs.true.i.1
+ %0 = phi i32 [ %dec.1, %if.then7.1 ], [ 6, %land.lhs.true.i.1 ]
+ %1 = phi i32 [ %inc.1, %if.then7.1 ], [ 0, %land.lhs.true.i.1 ]
+ %inc.2 = add nsw i32 %1, 1
+ store i32 %inc.2, i32* %m_numConstraintRows4, align 4
+ %dec.2 = add nsw i32 %0, -1
+ store i32 %dec.2, i32* %nub5, align 4
+ unreachable
+}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_bullet2.ll b/test/Transforms/SLPVectorizer/X86/crash_bullet2.ll
new file mode 100644
index 0000000..df026d1
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_bullet2.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332 = type { float, [3 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], [3 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], %class.btVector4.7.32.67.92.117.142.177.187.262.282.331, %class.btVector4.7.32.67.92.117.142.177.187.262.282.331, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, %class.btVector3.5.30.65.90.115.140.175.185.260.280.330, [4 x float], float, float, [4 x float], float, float, [16 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], [16 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330], [16 x %class.btVector3.5.30.65.90.115.140.175.185.260.280.330] }
+%class.btVector3.5.30.65.90.115.140.175.185.260.280.330 = type { [4 x float] }
+%class.btVector4.7.32.67.92.117.142.177.187.262.282.331 = type { %class.btVector3.5.30.65.90.115.140.175.185.260.280.330 }
+
+define void @_ZN30GIM_TRIANGLE_CALCULATION_CACHE18triangle_collisionERK9btVector3S2_S2_fS2_S2_S2_fR25GIM_TRIANGLE_CONTACT_DATA(%class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this) {
+entry:
+ %arrayidx26 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 1
+ %arrayidx36 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 2
+ %0 = load float* %arrayidx36, align 4
+ %add587 = fadd float undef, undef
+ %sub600 = fsub float %add587, undef
+ store float %sub600, float* undef, align 4
+ %sub613 = fsub float %add587, %sub600
+ store float %sub613, float* %arrayidx26, align 4
+ %add626 = fadd float %0, undef
+ %sub639 = fsub float %add626, undef
+ %sub652 = fsub float %add626, %sub639
+ store float %sub652, float* %arrayidx36, align 4
+ br i1 undef, label %if.else1609, label %if.then1595
+
+if.then1595: ; preds = %entry
+ br i1 undef, label %return, label %for.body.lr.ph.i.i1702
+
+for.body.lr.ph.i.i1702: ; preds = %if.then1595
+ unreachable
+
+if.else1609: ; preds = %entry
+ unreachable
+
+return: ; preds = %if.then1595
+ ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll b/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
new file mode 100644
index 0000000..ce01590
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+%"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731" = type { double*, double*, double*, double** }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(%"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* nocapture %__last) {
+entry:
+ %_M_cur2.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 0
+ %0 = load double** %_M_cur2.i.i, align 8
+ %_M_first3.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 1
+ %_M_cur2.i.i81 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__last, i64 0, i32 0
+ %1 = load double** %_M_cur2.i.i81, align 8
+ %_M_first3.i.i83 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__last, i64 0, i32 1
+ %2 = load double** %_M_first3.i.i83, align 8
+ br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i.preheader
+
+while.cond.i.preheader: ; preds = %entry
+ br label %while.cond.i
+
+while.cond.i: ; preds = %while.body.i, %while.cond.i.preheader
+ br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.body.i
+
+while.body.i: ; preds = %while.cond.i
+ br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i
+
+_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit: ; preds = %while.body.i, %while.cond.i, %entry
+ %3 = phi double* [ %2, %entry ], [ %2, %while.cond.i ], [ undef, %while.body.i ]
+ %4 = phi double* [ %0, %entry ], [ %1, %while.cond.i ], [ undef, %while.body.i ]
+ store double* %4, double** %_M_cur2.i.i, align 8
+ store double* %3, double** %_M_first3.i.i, align 8
+ br i1 undef, label %if.then.i55, label %while.cond
+
+if.then.i55: ; preds = %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit
+ br label %while.cond
+
+while.cond: ; preds = %while.cond, %if.then.i55, %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit
+ br label %while.cond
+}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_flop7.ll b/test/Transforms/SLPVectorizer/X86/crash_flop7.ll
new file mode 100644
index 0000000..e11be48
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_flop7.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Function Attrs: nounwind ssp uwtable
+define void @main() #0 {
+entry:
+ br i1 undef, label %while.body, label %while.end
+
+while.body: ; preds = %entry
+ unreachable
+
+while.end: ; preds = %entry
+ br i1 undef, label %for.end80, label %for.body75.lr.ph
+
+for.body75.lr.ph: ; preds = %while.end
+ br label %for.body75
+
+for.body75: ; preds = %for.body75, %for.body75.lr.ph
+ br label %for.body75
+
+for.end80: ; preds = %while.end
+ br i1 undef, label %for.end300, label %for.body267.lr.ph
+
+for.body267.lr.ph: ; preds = %for.end80
+ br label %for.body267
+
+for.body267: ; preds = %for.body267, %for.body267.lr.ph
+ %s.71010 = phi double [ 0.000000e+00, %for.body267.lr.ph ], [ %add297, %for.body267 ]
+ %mul269 = fmul double undef, undef
+ %mul270 = fmul double %mul269, %mul269
+ %add282 = fadd double undef, undef
+ %mul283 = fmul double %mul269, %add282
+ %add293 = fadd double undef, undef
+ %mul294 = fmul double %mul270, %add293
+ %add295 = fadd double undef, %mul294
+ %div296 = fdiv double %mul283, %add295
+ %add297 = fadd double %s.71010, %div296
+ br i1 undef, label %for.body267, label %for.end300
+
+for.end300: ; preds = %for.body267, %for.end80
+ unreachable
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/SLPVectorizer/X86/crash_lame.ll b/test/Transforms/SLPVectorizer/X86/crash_lame.ll
new file mode 100644
index 0000000..cfc3fa3
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_lame.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Function Attrs: nounwind ssp uwtable
+define fastcc void @dct36(double* %inbuf) #0 {
+entry:
+ %arrayidx41 = getelementptr inbounds double* %inbuf, i64 2
+ %arrayidx44 = getelementptr inbounds double* %inbuf, i64 1
+ %0 = load double* %arrayidx44, align 8, !tbaa !0
+ %add46 = fadd double %0, undef
+ store double %add46, double* %arrayidx41, align 8, !tbaa !0
+ %1 = load double* %inbuf, align 8, !tbaa !0
+ %add49 = fadd double %1, %0
+ store double %add49, double* %arrayidx44, align 8, !tbaa !0
+ ret void
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
new file mode 100644
index 0000000..b35a5d7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Function Attrs: nounwind ssp uwtable
+define void @RCModelEstimator() {
+entry:
+ br i1 undef, label %for.body.lr.ph, label %for.end.thread
+
+for.end.thread: ; preds = %entry
+ unreachable
+
+for.body.lr.ph: ; preds = %entry
+ br i1 undef, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ br i1 undef, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %for.body.lr.ph
+ br i1 undef, label %for.body3, label %if.end103
+
+for.cond14.preheader: ; preds = %for.inc11
+ br i1 undef, label %for.body16.lr.ph, label %if.end103
+
+for.body16.lr.ph: ; preds = %for.cond14.preheader
+ br label %for.body16
+
+for.body3: ; preds = %for.inc11, %for.end
+ br i1 undef, label %if.then7, label %for.inc11
+
+if.then7: ; preds = %for.body3
+ br label %for.inc11
+
+for.inc11: ; preds = %if.then7, %for.body3
+ br i1 false, label %for.cond14.preheader, label %for.body3
+
+for.body16: ; preds = %for.body16, %for.body16.lr.ph
+ br i1 undef, label %for.end39, label %for.body16
+
+for.end39: ; preds = %for.body16
+ br i1 undef, label %if.end103, label %for.cond45.preheader
+
+for.cond45.preheader: ; preds = %for.end39
+ br i1 undef, label %if.then88, label %if.else
+
+if.then88: ; preds = %for.cond45.preheader
+ %mul89 = fmul double 0.000000e+00, 0.000000e+00
+ %mul90 = fmul double 0.000000e+00, 0.000000e+00
+ %sub91 = fsub double %mul89, %mul90
+ %div92 = fdiv double %sub91, undef
+ %mul94 = fmul double 0.000000e+00, 0.000000e+00
+ %mul95 = fmul double 0.000000e+00, 0.000000e+00
+ %sub96 = fsub double %mul94, %mul95
+ %div97 = fdiv double %sub96, undef
+ br label %if.end103
+
+if.else: ; preds = %for.cond45.preheader
+ br label %if.end103
+
+if.end103: ; preds = %if.else, %if.then88, %for.end39, %for.cond14.preheader, %for.end
+ %0 = phi double [ 0.000000e+00, %for.end39 ], [ %div97, %if.then88 ], [ 0.000000e+00, %if.else ], [ 0.000000e+00, %for.cond14.preheader ], [ 0.000000e+00, %for.end ]
+ %1 = phi double [ undef, %for.end39 ], [ %div92, %if.then88 ], [ undef, %if.else ], [ 0.000000e+00, %for.cond14.preheader ], [ 0.000000e+00, %for.end ]
+ ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/crash_lencod2.ll b/test/Transforms/SLPVectorizer/X86/crash_lencod2.ll
new file mode 100644
index 0000000..d1e719c
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_lencod2.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Function Attrs: nounwind ssp uwtable
+define void @intrapred_luma() #0 {
+entry:
+ %conv153 = trunc i32 undef to i16
+ %arrayidx154 = getelementptr inbounds [13 x i16]* undef, i64 0, i64 12
+ store i16 %conv153, i16* %arrayidx154, align 8, !tbaa !0
+ %arrayidx155 = getelementptr inbounds [13 x i16]* undef, i64 0, i64 11
+ store i16 %conv153, i16* %arrayidx155, align 2, !tbaa !0
+ %arrayidx156 = getelementptr inbounds [13 x i16]* undef, i64 0, i64 10
+ store i16 %conv153, i16* %arrayidx156, align 4, !tbaa !0
+ ret void
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"short", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll b/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
new file mode 100644
index 0000000..b3ca235
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define void @main() {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.end44, %entry
+ br label %for.cond4.preheader
+
+for.cond4.preheader: ; preds = %if.then25, %for.body
+ br label %for.body6
+
+for.body6: ; preds = %for.inc21, %for.cond4.preheader
+ br label %for.body12
+
+for.body12: ; preds = %if.end, %for.body6
+ %fZImg.069 = phi double [ undef, %for.body6 ], [ %add19, %if.end ]
+ %fZReal.068 = phi double [ undef, %for.body6 ], [ %add20, %if.end ]
+ %mul13 = fmul double %fZReal.068, %fZReal.068
+ %mul14 = fmul double %fZImg.069, %fZImg.069
+ %add15 = fadd double %mul13, %mul14
+ %cmp16 = fcmp ogt double %add15, 4.000000e+00
+ br i1 %cmp16, label %for.inc21, label %if.end
+
+if.end: ; preds = %for.body12
+ %mul18 = fmul double undef, %fZImg.069
+ %add19 = fadd double undef, %mul18
+ %sub = fsub double %mul13, %mul14
+ %add20 = fadd double undef, %sub
+ br i1 undef, label %for.body12, label %for.inc21
+
+for.inc21: ; preds = %if.end, %for.body12
+ br i1 undef, label %for.end23, label %for.body6
+
+for.end23: ; preds = %for.inc21
+ br i1 undef, label %if.then25, label %if.then26
+
+if.then25: ; preds = %for.end23
+ br i1 undef, label %for.end44, label %for.cond4.preheader
+
+if.then26: ; preds = %for.end23
+ unreachable
+
+for.end44: ; preds = %if.then25
+ br i1 undef, label %for.end48, label %for.body
+
+for.end48: ; preds = %for.end44
+ ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/crash_rc4.ll b/test/Transforms/SLPVectorizer/X86/crash_rc4.ll
new file mode 100644
index 0000000..2037470
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_rc4.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.rc4_state.0.24 = type { i32, i32, [256 x i32] }
+
+define void @rc4_crypt(%struct.rc4_state.0.24* nocapture %s) {
+entry:
+ %x1 = getelementptr inbounds %struct.rc4_state.0.24* %s, i64 0, i32 0
+ %y2 = getelementptr inbounds %struct.rc4_state.0.24* %s, i64 0, i32 1
+ br i1 undef, label %for.body, label %for.end
+
+for.body: ; preds = %for.body, %entry
+ %x.045 = phi i32 [ %conv4, %for.body ], [ undef, %entry ]
+ %conv4 = and i32 undef, 255
+ %conv7 = and i32 undef, 255
+ %idxprom842 = zext i32 %conv7 to i64
+ br i1 undef, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %x.0.lcssa = phi i32 [ undef, %entry ], [ %conv4, %for.body ]
+ %y.0.lcssa = phi i32 [ undef, %entry ], [ %conv7, %for.body ]
+ store i32 %x.0.lcssa, i32* %x1, align 4
+ store i32 %y.0.lcssa, i32* %y2, align 4
+ ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll b/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
new file mode 100644
index 0000000..0541545
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
@@ -0,0 +1,113 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171 = type { i32, i32, i32, i32, i32, i32, [8 x i8] }
+
+define void @SIM4() {
+entry:
+ br i1 undef, label %return, label %lor.lhs.false
+
+lor.lhs.false: ; preds = %entry
+ br i1 undef, label %return, label %if.end
+
+if.end: ; preds = %lor.lhs.false
+ br i1 undef, label %for.end605, label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %if.end
+ br label %for.body
+
+for.body: ; preds = %for.inc603, %for.body.lr.ph
+ br i1 undef, label %for.inc603, label %if.end12
+
+if.end12: ; preds = %for.body
+ br i1 undef, label %land.lhs.true, label %land.lhs.true167
+
+land.lhs.true: ; preds = %if.end12
+ br i1 undef, label %if.then17, label %land.lhs.true167
+
+if.then17: ; preds = %land.lhs.true
+ br i1 undef, label %if.end98, label %land.rhs.lr.ph
+
+land.rhs.lr.ph: ; preds = %if.then17
+ unreachable
+
+if.end98: ; preds = %if.then17
+ %from299 = getelementptr inbounds %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171* undef, i64 0, i32 1
+ br i1 undef, label %land.lhs.true167, label %if.then103
+
+if.then103: ; preds = %if.end98
+ %.sub100 = select i1 undef, i32 250, i32 undef
+ %mul114 = shl nsw i32 %.sub100, 2
+ %from1115 = getelementptr inbounds %struct._exon_t.12.103.220.363.480.649.740.857.1039.1065.1078.1091.1117.1130.1156.1169.1195.1221.1234.1286.1299.1312.1338.1429.1455.1468.1494.1520.1884.1897.1975.2066.2105.2170.2171* undef, i64 0, i32 0
+ %cond125 = select i1 undef, i32 undef, i32 %mul114
+ br label %for.cond.i
+
+for.cond.i: ; preds = %land.rhs.i874, %if.then103
+ %row.0.i = phi i32 [ undef, %land.rhs.i874 ], [ %.sub100, %if.then103 ]
+ %col.0.i = phi i32 [ undef, %land.rhs.i874 ], [ %cond125, %if.then103 ]
+ br i1 undef, label %land.rhs.i874, label %for.end.i
+
+land.rhs.i874: ; preds = %for.cond.i
+ br i1 undef, label %for.cond.i, label %for.end.i
+
+for.end.i: ; preds = %land.rhs.i874, %for.cond.i
+ br i1 undef, label %if.then.i, label %if.end.i
+
+if.then.i: ; preds = %for.end.i
+ %add14.i = add nsw i32 %row.0.i, undef
+ %add15.i = add nsw i32 %col.0.i, undef
+ br label %extend_bw.exit
+
+if.end.i: ; preds = %for.end.i
+ %add16.i = add i32 %cond125, %.sub100
+ %cmp26514.i = icmp slt i32 %add16.i, 0
+ br i1 %cmp26514.i, label %for.end33.i, label %for.body28.lr.ph.i
+
+for.body28.lr.ph.i: ; preds = %if.end.i
+ br label %for.end33.i
+
+for.end33.i: ; preds = %for.body28.lr.ph.i, %if.end.i
+ br i1 undef, label %for.end58.i, label %for.body52.lr.ph.i
+
+for.body52.lr.ph.i: ; preds = %for.end33.i
+ br label %for.end58.i
+
+for.end58.i: ; preds = %for.body52.lr.ph.i, %for.end33.i
+ br label %while.cond260.i
+
+while.cond260.i: ; preds = %land.rhs263.i, %for.end58.i
+ br i1 undef, label %land.rhs263.i, label %while.end275.i
+
+land.rhs263.i: ; preds = %while.cond260.i
+ br i1 undef, label %while.cond260.i, label %while.end275.i
+
+while.end275.i: ; preds = %land.rhs263.i, %while.cond260.i
+ br label %extend_bw.exit
+
+extend_bw.exit: ; preds = %while.end275.i, %if.then.i
+ %add14.i1262 = phi i32 [ %add14.i, %if.then.i ], [ undef, %while.end275.i ]
+ %add15.i1261 = phi i32 [ %add15.i, %if.then.i ], [ undef, %while.end275.i ]
+ br i1 false, label %if.then157, label %land.lhs.true167
+
+if.then157: ; preds = %extend_bw.exit
+ %add158 = add nsw i32 %add14.i1262, 1
+ store i32 %add158, i32* %from299, align 4
+ %add160 = add nsw i32 %add15.i1261, 1
+ store i32 %add160, i32* %from1115, align 4
+ br label %land.lhs.true167
+
+land.lhs.true167: ; preds = %if.then157, %extend_bw.exit, %if.end98, %land.lhs.true, %if.end12
+ unreachable
+
+for.inc603: ; preds = %for.body
+ br i1 undef, label %for.body, label %for.end605
+
+for.end605: ; preds = %for.inc603, %if.end
+ unreachable
+
+return: ; preds = %lor.lhs.false, %entry
+ ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
new file mode 100644
index 0000000..ac7e412
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.Ray.5.11.53.113.119.137.149.185.329.389.415 = type { %struct.Vec.0.6.48.108.114.132.144.180.324.384.414, %struct.Vec.0.6.48.108.114.132.144.180.324.384.414 }
+%struct.Vec.0.6.48.108.114.132.144.180.324.384.414 = type { double, double, double }
+
+; Function Attrs: ssp uwtable
+define void @main() #0 {
+entry:
+ br i1 undef, label %cond.true, label %cond.end
+
+cond.true: ; preds = %entry
+ unreachable
+
+cond.end: ; preds = %entry
+ br label %invoke.cont
+
+invoke.cont: ; preds = %invoke.cont, %cond.end
+ br i1 undef, label %arrayctor.cont, label %invoke.cont
+
+arrayctor.cont: ; preds = %invoke.cont
+ %agg.tmp99208.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.415* undef, i64 0, i32 0, i32 0
+ %agg.tmp99208.sroa.1.8.idx388 = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.415* undef, i64 0, i32 0, i32 1
+ %agg.tmp101211.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.415* undef, i64 0, i32 1, i32 0
+ %agg.tmp101211.sroa.1.8.idx390 = getelementptr inbounds %struct.Ray.5.11.53.113.119.137.149.185.329.389.415* undef, i64 0, i32 1, i32 1
+ br label %for.cond36.preheader
+
+for.cond36.preheader: ; preds = %_Z5clampd.exit.1, %arrayctor.cont
+ br i1 undef, label %for.body42.lr.ph.us, label %_Z5clampd.exit.1
+
+cond.false51.us: ; preds = %for.body42.lr.ph.us
+ unreachable
+
+cond.true48.us: ; preds = %for.body42.lr.ph.us
+ br i1 undef, label %cond.true63.us, label %cond.false66.us
+
+cond.false66.us: ; preds = %cond.true48.us
+ %add.i276.us = fadd double 0.000000e+00, undef
+ %add.i264.us = fadd double %add.i276.us, 0.000000e+00
+ %add4.i267.us = fadd double undef, 0xBFA5CC2D1960285F
+ %mul.i254.us = fmul double %add.i264.us, 1.400000e+02
+ %mul2.i256.us = fmul double %add4.i267.us, 1.400000e+02
+ %add.i243.us = fadd double %mul.i254.us, 5.000000e+01
+ %add4.i246.us = fadd double %mul2.i256.us, 5.200000e+01
+ %mul.i.i.us = fmul double undef, %add.i264.us
+ %mul2.i.i.us = fmul double undef, %add4.i267.us
+ store double %add.i243.us, double* %agg.tmp99208.sroa.0.0.idx, align 8
+ store double %add4.i246.us, double* %agg.tmp99208.sroa.1.8.idx388, align 8
+ store double %mul.i.i.us, double* %agg.tmp101211.sroa.0.0.idx, align 8
+ store double %mul2.i.i.us, double* %agg.tmp101211.sroa.1.8.idx390, align 8
+ unreachable
+
+cond.true63.us: ; preds = %cond.true48.us
+ unreachable
+
+for.body42.lr.ph.us: ; preds = %for.cond36.preheader
+ br i1 undef, label %cond.true48.us, label %cond.false51.us
+
+_Z5clampd.exit.1: ; preds = %for.cond36.preheader
+ br label %for.cond36.preheader
+}
+
+attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/SLPVectorizer/X86/crash_smallpt2.ll b/test/Transforms/SLPVectorizer/X86/crash_smallpt2.ll
new file mode 100644
index 0000000..84c7b3a
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_smallpt2.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601 = type { %struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600, %struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600 }
+%struct.Vec.0.6.48.90.132.186.192.198.234.252.258.264.270.276.282.288.378.432.438.450.456.594.600 = type { double, double, double }
+
+; Function Attrs: ssp uwtable
+define void @_Z8radianceRK3RayiPt() #0 {
+entry:
+ br i1 undef, label %if.then78, label %if.then38
+
+if.then38: ; preds = %entry
+ %mul.i.i790 = fmul double undef, undef
+ %mul3.i.i792 = fmul double undef, undef
+ %mul.i764 = fmul double undef, %mul3.i.i792
+ %mul4.i767 = fmul double undef, undef
+ %sub.i768 = fsub double %mul.i764, %mul4.i767
+ %mul6.i770 = fmul double undef, %mul.i.i790
+ %mul9.i772 = fmul double undef, %mul3.i.i792
+ %sub10.i773 = fsub double %mul6.i770, %mul9.i772
+ %mul.i736 = fmul double undef, %sub.i768
+ %mul2.i738 = fmul double undef, %sub10.i773
+ %mul.i727 = fmul double undef, %mul.i736
+ %mul2.i729 = fmul double undef, %mul2.i738
+ %add.i716 = fadd double undef, %mul.i727
+ %add4.i719 = fadd double undef, %mul2.i729
+ %add.i695 = fadd double undef, %add.i716
+ %add4.i698 = fadd double undef, %add4.i719
+ %mul.i.i679 = fmul double undef, %add.i695
+ %mul2.i.i680 = fmul double undef, %add4.i698
+ %agg.tmp74663.sroa.0.0.idx = getelementptr inbounds %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0
+ store double %mul.i.i679, double* %agg.tmp74663.sroa.0.0.idx, align 8
+ %agg.tmp74663.sroa.1.8.idx943 = getelementptr inbounds %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 1
+ store double %mul2.i.i680, double* %agg.tmp74663.sroa.1.8.idx943, align 8
+ br label %return
+
+if.then78: ; preds = %entry
+ br label %return
+
+return: ; preds = %if.then78, %if.then38
+ ret void
+}
+
+attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/SLPVectorizer/X86/diamond.ll b/test/Transforms/SLPVectorizer/X86/diamond.ll
index 008f09d..2a237ea 100644
--- a/test/Transforms/SLPVectorizer/X86/diamond.ll
+++ b/test/Transforms/SLPVectorizer/X86/diamond.ll
@@ -50,7 +50,8 @@ entry:
; }
; CHECK: @extr_user
-; CHECK: load i32*
+; CHECK: load <4 x i32>
+; CHECK-NEXT: extractelement <4 x i32>
; CHECK: store <4 x i32>
; CHECK-NEXT: ret
define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
@@ -79,7 +80,8 @@ entry:
; In this example we have an external user that is not the first element in the vector.
; CHECK: @extr_user1
-; CHECK: load i32*
+; CHECK: load <4 x i32>
+; CHECK-NEXT: extractelement <4 x i32>
; CHECK: store <4 x i32>
; CHECK-NEXT: ret
define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
diff --git a/test/Transforms/SLPVectorizer/X86/long_chains.ll b/test/Transforms/SLPVectorizer/X86/long_chains.ll
index 0a2ace3..5af3e6d 100644
--- a/test/Transforms/SLPVectorizer/X86/long_chains.ll
+++ b/test/Transforms/SLPVectorizer/X86/long_chains.ll
@@ -3,12 +3,13 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
+; At this point we can't vectorize only parts of the tree.
+
; CHECK: test
-; CHECK: sitofp i8
-; CHECK-NEXT: sitofp i8
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: fmul <2 x double>
+; CHECK: insertelement <2 x i8>
+; CHECK: insertelement <2 x i8>
+; CHECK: sitofp <2 x i8>
+; CHECK: fmul <2 x double>
; CHECK: ret
define i32 @test(double* nocapture %A, i8* nocapture %B) {
entry:
@@ -18,7 +19,7 @@ entry:
%add = add i8 %0, 3
%add4 = add i8 %1, 3
%conv6 = sitofp i8 %add to double
- %conv7 = sitofp i8 %add4 to double ; <--- This is inefficient. The chain stops here.
+ %conv7 = sitofp i8 %add4 to double
%mul = fmul double %conv6, %conv6
%add8 = fadd double %mul, 1.000000e+00
%mul9 = fmul double %conv7, %conv7
diff --git a/test/Transforms/SLPVectorizer/X86/saxpy.ll b/test/Transforms/SLPVectorizer/X86/saxpy.ll
index b520913..4626341 100644
--- a/test/Transforms/SLPVectorizer/X86/saxpy.ll
+++ b/test/Transforms/SLPVectorizer/X86/saxpy.ll
@@ -43,3 +43,19 @@ define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a,
ret void
}
+; Make sure we don't crash on this one.
+define void @SAXPY_crash(i32* noalias nocapture %x, i32* noalias nocapture %y, i64 %i) {
+ %1 = add i64 %i, 1
+ %2 = getelementptr inbounds i32* %x, i64 %1
+ %3 = getelementptr inbounds i32* %y, i64 %1
+ %4 = load i32* %3, align 4
+ %5 = add nsw i32 undef, %4
+ store i32 %5, i32* %2, align 4
+ %6 = add i64 %i, 2
+ %7 = getelementptr inbounds i32* %x, i64 %6
+ %8 = getelementptr inbounds i32* %y, i64 %6
+ %9 = load i32* %8, align 4
+ %10 = add nsw i32 undef, %9
+ store i32 %10, i32* %7, align 4
+ ret void
+}