diff options
author | Stephen Hines <srhines@google.com> | 2014-12-04 19:51:48 +0000 |
---|---|---|
committer | Android Git Automerger <android-git-automerger@android.com> | 2014-12-04 19:51:48 +0000 |
commit | a21bbdfad461e957fa42ac9d6860ddc9de2da3e9 (patch) | |
tree | 8d32ff2094b47e15a8def30d62fd7dee6e009de3 /test/Transforms/LoopVectorize | |
parent | 6b8c6a5088c221af2b25065b8b6b8b0fec8a116f (diff) | |
parent | 876d6995443e99d13696f3941c3a789a4daa7c7a (diff) | |
download | external_llvm-a21bbdfad461e957fa42ac9d6860ddc9de2da3e9.zip external_llvm-a21bbdfad461e957fa42ac9d6860ddc9de2da3e9.tar.gz external_llvm-a21bbdfad461e957fa42ac9d6860ddc9de2da3e9.tar.bz2 |
am 876d6995: Merge "Update aosp/master LLVM for rebase to r222494."
* commit '876d6995443e99d13696f3941c3a789a4daa7c7a':
Update aosp/master LLVM for rebase to r222494.
Diffstat (limited to 'test/Transforms/LoopVectorize')
98 files changed, 945 insertions, 174 deletions
diff --git a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll index 1e1396f..a292afb 100644 --- a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll +++ b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll index aa7cc0e..b3eae69 100644 --- a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll +++ b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce ; Check that we don't fall into an infinite loop. define void @test() nounwind { diff --git a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll index ae9f998..16d64ea 100644 --- a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll +++ b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -dce -force-vector-unroll=1 -force-vector-width=4 +; RUN: opt < %s -loop-vectorize -dce -force-vector-interleave=1 -force-vector-width=4 ; Check that we don't crash. diff --git a/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll b/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll new file mode 100644 index 0000000..a01d543 --- /dev/null +++ b/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll @@ -0,0 +1,31 @@ +; RUN: opt < %s -loop-vectorize -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 -S | FileCheck %s +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +%struct.anon = type { [100 x i32], i32, [100 x i32] } + +@Foo = common global %struct.anon zeroinitializer, align 4 + +; CHECK-LABEL: @foo( +; CHECK: load <4 x i32>* +; CHECK: sdiv <4 x i32> +; CHECK: store <4 x i32> + +define void @foo(){ +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %div = sdiv i32 %0, 2 + %arrayidx2 = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv + store i32 %div, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 100 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll index fce3b70..9c69ba8 100644 --- a/test/Transforms/LoopVectorize/X86/already-vectorized.ll +++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll @@ -41,6 +41,6 @@ for.end: ; preds = %for.body ; Now, we check for the Hint metadata ; CHECK: [[vect]] = metadata !{metadata [[vect]], metadata [[width:![0-9]+]], metadata [[unroll:![0-9]+]]} ; CHECK: [[width]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 1} -; CHECK: [[unroll]] = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1} +; CHECK: [[unroll]] = metadata !{metadata !"llvm.loop.interleave.count", i32 1} ; CHECK: [[scalar]] = metadata !{metadata [[scalar]], metadata [[width]], metadata [[unroll]]} diff --git a/test/Transforms/LoopVectorize/X86/assume.ll b/test/Transforms/LoopVectorize/X86/assume.ll new file mode 100644 index 0000000..a94e24d --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/assume.ll @@ -0,0 +1,100 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) #0 { +entry: + br label %for.body + +; CHECK-LABEL: @test1 +; CHECK: vector.body: +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: for.body: +; CHECK: ret void + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp1 = fcmp ogt float %0, 1.000000e+02 + tail call void @llvm.assume(i1 %cmp1) + %add = fadd float %0, 1.000000e+00 + %arrayidx5 = getelementptr inbounds float* %a, i64 %indvars.iv + store float %add, float* %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv, 1599 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Function Attrs: nounwind +declare void @llvm.assume(i1) #1 + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } + +%struct.data = type { float*, float* } + +; Function Attrs: nounwind uwtable +define void @test2(%struct.data* nocapture readonly %d) #0 { +entry: + %b = getelementptr inbounds %struct.data* %d, i64 0, i32 1 + %0 = load float** %b, align 8 + %ptrint = ptrtoint float* %0 to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + %a = getelementptr inbounds %struct.data* %d, i64 0, i32 0 + %1 = load float** %a, align 8 + %ptrint2 = ptrtoint float* %1 to i64 + %maskedptr3 = and i64 %ptrint2, 31 + %maskcond4 = icmp eq i64 %maskedptr3, 0 + br label %for.body + +; CHECK-LABEL: @test2 +; CHECK: vector.body: +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: @llvm.assume +; CHECK: for.body: +; CHECK: ret void + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + tail call void @llvm.assume(i1 %maskcond) + %arrayidx = getelementptr inbounds float* %0, i64 %indvars.iv + %2 = load float* %arrayidx, align 4 + %add = fadd float %2, 1.000000e+00 + tail call void @llvm.assume(i1 %maskcond4) + %arrayidx5 = getelementptr inbounds float* %1, i64 %indvars.iv + store float %add, float* %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv, 1599 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll index e1113fd..05403cd 100644 --- a/test/Transforms/LoopVectorize/X86/gcc-examples.ll +++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -S | FileCheck %s -; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-unroll=0 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-interleave=0 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll index d6120e7..0650d94 100644 --- a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll index 8716cff..fd69dc4 100644 --- a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll +++ b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -vectorizer-min-trip-count=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -vectorizer-min-trip-count=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll index 2c47fcb..0b542a9 100644 --- a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll +++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll index 7e156a9..b580d73 100644 --- a/test/Transforms/LoopVectorize/X86/parallel-loops.ll +++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LoopVectorize/X86/powof2div.ll b/test/Transforms/LoopVectorize/X86/powof2div.ll new file mode 100644 index 0000000..054da8e --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/powof2div.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.anon = type { [100 x i32], i32, [100 x i32] } + +@Foo = common global %struct.anon zeroinitializer, align 4 + +;CHECK-LABEL: @foo( +;CHECK: load <4 x i32>* +;CHECK: sdiv <4 x i32> +;CHECK: store <4 x i32> + +define void @foo(){ +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %div = sdiv i32 %0, 2 + %arrayidx2 = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv + store i32 %div, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 100 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll index dfa4faa..f9a0281 100644 --- a/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/test/Transforms/LoopVectorize/X86/small-size.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/X86/tripcount.ll b/test/Transforms/LoopVectorize/X86/tripcount.ll index 6b38bac..a4ec694 100644 --- a/test/Transforms/LoopVectorize/X86/tripcount.ll +++ b/test/Transforms/LoopVectorize/X86/tripcount.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -mcpu=prescott < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -mcpu=prescott < %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" target triple = "i386-unknown-freebsd11.0" diff --git a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll index d5024bb..716dc08 100644 --- a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll +++ b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll @@ -1,6 +1,6 @@ -; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S \ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-interleave=0 -dce -S \ ; RUN: | FileCheck %s --check-prefix=CHECK-VECTOR -; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=1 -force-vector-unroll=0 -dce -S \ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=1 -force-vector-interleave=0 -dce -S \ ; RUN: | FileCheck %s --check-prefix=CHECK-SCALAR target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/X86/unroll_selection.ll b/test/Transforms/LoopVectorize/X86/unroll_selection.ll index 2d7b663..c684b4e 100644 --- a/test/Transforms/LoopVectorize/X86/unroll_selection.ll +++ b/test/Transforms/LoopVectorize/X86/unroll_selection.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-interleave=0 -dce -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll index 59bb8d0..e57cfef 100644 --- a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll +++ b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -mcpu=corei7-avx -debug -S < %s 2>&1 | FileCheck %s +; RUN: opt -basicaa -loop-vectorize -mcpu=corei7-avx -debug -S < %s 2>&1 | FileCheck %s ; REQUIRES: asserts target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll index 6cdd29b..7bce11d 100644 --- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll +++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll @@ -24,10 +24,11 @@ ; File, line, and column should match those specified in the metadata ; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations -; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization was not specified +; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info ; CHECK: remark: source.cpp:13:5: loop not vectorized: vector width and interleave count are explicitly set to 1 ; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds -; CHECK: remark: source.cpp:19:5: loop not vectorized: vectorization is explicitly enabled +; CHECK: remark: source.cpp:19:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info +; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization ; CHECK: _Z4testPii ; CHECK-NOT: x i32> @@ -121,40 +122,40 @@ attributes #0 = { nounwind } !llvm.module.flags = !{!9, !10} !llvm.ident = !{!11} -!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} +!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0\001\00\006\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{metadata !"source.cpp", metadata !"."} !2 = metadata !{} !3 = metadata !{metadata !4, metadata !7, metadata !8} -!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 1} -!5 = metadata !{i32 786473, metadata !1} -!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} -!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_disabled", metadata !"test_disabled", metadata !"", i32 10, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z13test_disabledPii, null, null, metadata !2, i32 10} -!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_array_bounds", metadata !"test_array_bounds", metadata !"", i32 16, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, metadata !2, i32 16} +!4 = metadata !{metadata !"0x2e\00test\00test\00\001\000\001\000\006\00256\001\001", metadata !1, metadata !5, metadata !6, null, void (i32*, i32)* @_Z4testPii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [test] +!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] [./source.cpp] +!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{metadata !"0x2e\00test_disabled\00test_disabled\00\0010\000\001\000\006\00256\001\0010", metadata !1, metadata !5, metadata !6, null, void (i32*, i32)* @_Z13test_disabledPii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 10] [def] [test_disabled] +!8 = metadata !{metadata !"0x2e\00test_array_bounds\00test_array_bounds\00\0016\000\001\000\006\00256\001\0016", metadata !1, metadata !5, metadata !6, null, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 16] [def] [test_array_bounds] !9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} -!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 2} !11 = metadata !{metadata !"clang version 3.5.0"} !12 = metadata !{i32 3, i32 8, metadata !13, null} -!13 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0} +!13 = metadata !{metadata !"0xb\003\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] !14 = metadata !{metadata !14, metadata !15, metadata !15} !15 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} !16 = metadata !{i32 4, i32 5, metadata !17, null} -!17 = metadata !{i32 786443, metadata !1, metadata !13, i32 3, i32 36, i32 0, i32 1} +!17 = metadata !{metadata !"0xb\003\0036\000", metadata !1, metadata !13} ; [ DW_TAG_lexical_block ] !18 = metadata !{metadata !19, metadata !19, i64 0} !19 = metadata !{metadata !"int", metadata !20, i64 0} !20 = metadata !{metadata !"omnipotent char", metadata !21, i64 0} !21 = metadata !{metadata !"Simple C/C++ TBAA"} !22 = metadata !{i32 5, i32 9, metadata !23, null} -!23 = metadata !{i32 786443, metadata !1, metadata !17, i32 5, i32 9, i32 0, i32 2} +!23 = metadata !{metadata !"0xb\005\009\000", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ] !24 = metadata !{i32 8, i32 1, metadata !4, null} !25 = metadata !{i32 12, i32 8, metadata !26, null} -!26 = metadata !{i32 786443, metadata !1, metadata !7, i32 12, i32 3, i32 0, i32 3} +!26 = metadata !{metadata !"0xb\0012\003\000", metadata !1, metadata !7} ; [ DW_TAG_lexical_block ] !27 = metadata !{metadata !27, metadata !28, metadata !29} -!28 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1} +!28 = metadata !{metadata !"llvm.loop.interleave.count", i32 1} !29 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1} !30 = metadata !{i32 13, i32 5, metadata !26, null} !31 = metadata !{i32 14, i32 1, metadata !7, null} !32 = metadata !{i32 18, i32 8, metadata !33, null} -!33 = metadata !{i32 786443, metadata !1, metadata !8, i32 18, i32 3, i32 0, i32 4} +!33 = metadata !{metadata !"0xb\0018\003\000", metadata !1, metadata !8} ; [ DW_TAG_lexical_block ] !34 = metadata !{metadata !34, metadata !15} !35 = metadata !{i32 19, i32 5, metadata !33, null} !36 = metadata !{i32 20, i32 1, metadata !8, null} diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll index f683447..14e541a 100644 --- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll +++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=VECTORIZED %s -; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s -; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s ; This code has all the !dbg annotations needed to track source line information, ; but is missing the llvm.dbg.cu annotation. This prevents code generation from @@ -52,23 +52,23 @@ declare void @ibar(i32*) #1 !1 = metadata !{metadata !"vectorization-remarks.c", metadata !"."} !2 = metadata !{} !3 = metadata !{metadata !4} -!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @foo, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [foo] -!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [./vectorization-remarks.c] -!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!4 = metadata !{metadata !"0x2e\00foo\00foo\00\005\000\001\000\006\00256\001\006", metadata !1, metadata !5, metadata !6, null, i32 (i32)* @foo, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [foo] +!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] [./vectorization-remarks.c] +!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} -!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 2} !9 = metadata !{metadata !"clang version 3.5.0 "} -!10 = metadata !{i32 8, i32 3, metadata !4, null} ; [ DW_TAG_imported_declaration ] +!10 = metadata !{i32 8, i32 3, metadata !4, null} !11 = metadata !{metadata !12, metadata !12, i64 0} !12 = metadata !{metadata !"int", metadata !13, i64 0} !13 = metadata !{metadata !"omnipotent char", metadata !14, i64 0} !14 = metadata !{metadata !"Simple C/C++ TBAA"} !15 = metadata !{i32 17, i32 8, metadata !16, null} -!16 = metadata !{i32 786443, metadata !1, metadata !17, i32 17, i32 8, i32 2, i32 3} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] -!17 = metadata !{i32 786443, metadata !1, metadata !18, i32 17, i32 8, i32 1, i32 2} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] -!18 = metadata !{i32 786443, metadata !1, metadata !4, i32 17, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] +!16 = metadata !{metadata !"0xb\0017\008\002", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] +!17 = metadata !{metadata !"0xb\0017\008\001", metadata !1, metadata !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] +!18 = metadata !{metadata !"0xb\0017\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] !19 = metadata !{i32 18, i32 5, metadata !20, null} -!20 = metadata !{i32 786443, metadata !1, metadata !18, i32 17, i32 27, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] +!20 = metadata !{metadata !"0xb\0017\0027\000", metadata !1, metadata !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] !21 = metadata !{metadata !13, metadata !13, i64 0} !22 = metadata !{i32 20, i32 3, metadata !4, null} !23 = metadata !{i32 21, i32 3, metadata !4, null} diff --git a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll index efc93d9..d8e5403 100644 --- a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll +++ b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll @@ -1,4 +1,4 @@ -; RUN: opt -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S < %s | FileCheck %s +; RUN: opt -O3 -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.7.0" diff --git a/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll b/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll index a099daa..cab333d 100644 --- a/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll +++ b/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -S -mtriple=xcore | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S -mtriple=xcore | FileCheck %s target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32" target triple = "xcore" diff --git a/test/Transforms/LoopVectorize/align.ll b/test/Transforms/LoopVectorize/align.ll index 84b0361..f2fb8b9 100644 --- a/test/Transforms/LoopVectorize/align.ll +++ b/test/Transforms/LoopVectorize/align.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/bsd_regex.ll b/test/Transforms/LoopVectorize/bsd_regex.ll index 7b71272..7a3e798 100644 --- a/test/Transforms/LoopVectorize/bsd_regex.ll +++ b/test/Transforms/LoopVectorize/bsd_regex.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=2 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=2 < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll index 2648bbe..d7cbad0 100644 --- a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll +++ b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/calloc.ll b/test/Transforms/LoopVectorize/calloc.ll index 7e79916..5f441f3 100644 --- a/test/Transforms/LoopVectorize/calloc.ll +++ b/test/Transforms/LoopVectorize/calloc.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/cast-induction.ll b/test/Transforms/LoopVectorize/cast-induction.ll index 255ce9c..4f92d33 100644 --- a/test/Transforms/LoopVectorize/cast-induction.ll +++ b/test/Transforms/LoopVectorize/cast-induction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s ; rdar://problem/12848162 diff --git a/test/Transforms/LoopVectorize/conditional-assignment.ll b/test/Transforms/LoopVectorize/conditional-assignment.ll new file mode 100644 index 0000000..50fa329 --- /dev/null +++ b/test/Transforms/LoopVectorize/conditional-assignment.ll @@ -0,0 +1,58 @@ +; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s + +; CHECK: remark: source.c:2:8: loop not vectorized: store that is conditionally executed prevents vectorization + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; Function Attrs: nounwind ssp uwtable +define void @conditional_store(i32* noalias nocapture %indices) #0 { +entry: + br label %for.body, !dbg !10 + +for.body: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds i32* %indices, i64 %indvars.iv, !dbg !12 + %0 = load i32* %arrayidx, align 4, !dbg !12, !tbaa !14 + %cmp1 = icmp eq i32 %0, 1024, !dbg !12 + br i1 %cmp1, label %if.then, label %for.inc, !dbg !12 + +if.then: ; preds = %for.body + store i32 0, i32* %arrayidx, align 4, !dbg !18, !tbaa !14 + br label %for.inc, !dbg !18 + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10 + %exitcond = icmp eq i64 %indvars.iv.next, 4096, !dbg !10 + br i1 %exitcond, label %for.end, label %for.body, !dbg !10 + +for.end: ; preds = %for.inc + ret void, !dbg !19 +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = metadata !{metadata !"0x11\0012\00clang version 3.6.0\001\00\000\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] +!1 = metadata !{metadata !"source.c", metadata !"."} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{metadata !"0x2e\00conditional_store\00conditional_store\00\001\000\001\000\006\00256\001\001", metadata !1, metadata !5, metadata !6, null, void (i32*)* @conditional_store, null, null, metadata !2} ; [ DW_TAG_subprogram ] +!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] +!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] +!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} +!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 2} +!9 = metadata !{metadata !"clang version 3.6.0"} +!10 = metadata !{i32 2, i32 8, metadata !11, null} +!11 = metadata !{metadata !"0xb\002\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] +!12 = metadata !{i32 3, i32 9, metadata !13, null} +!13 = metadata !{metadata !"0xb\003\009\000", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ] +!14 = metadata !{metadata !15, metadata !15, i64 0} +!15 = metadata !{metadata !"int", metadata !16, i64 0} +!16 = metadata !{metadata !"omnipotent char", metadata !17, i64 0} +!17 = metadata !{metadata !"Simple C/C++ TBAA"} +!18 = metadata !{i32 3, i32 29, metadata !13, null} +!19 = metadata !{i32 4, i32 1, metadata !4, null} diff --git a/test/Transforms/LoopVectorize/control-flow.ll b/test/Transforms/LoopVectorize/control-flow.ll index e4ba77f..452b7ae 100644 --- a/test/Transforms/LoopVectorize/control-flow.ll +++ b/test/Transforms/LoopVectorize/control-flow.ll @@ -11,7 +11,7 @@ ; } ; CHECK: remark: source.cpp:5:9: loop not vectorized: loop control flow is not understood by vectorizer -; CHECK: remark: source.cpp:5:9: loop not vectorized: vectorization was not specified +; CHECK: remark: source.cpp:5:9: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info ; CHECK: _Z4testPii ; CHECK-NOT: x i32> @@ -55,21 +55,21 @@ attributes #0 = { nounwind } !llvm.module.flags = !{!7, !8} !llvm.ident = !{!9} -!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} +!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0\001\00\006\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{metadata !"source.cpp", metadata !"."} !2 = metadata !{} !3 = metadata !{metadata !4} -!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 2} -!5 = metadata !{i32 786473, metadata !1} -!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} +!4 = metadata !{metadata !"0x2e\00test\00test\00\001\000\001\000\006\00256\001\002", metadata !1, metadata !5, metadata !6, null, i32 (i32*, i32)* @_Z4testPii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [test] +!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] [./source.cpp] +!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} -!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 2} !9 = metadata !{metadata !"clang version 3.5.0"} !10 = metadata !{i32 3, i32 8, metadata !11, null} -!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0} +!11 = metadata !{metadata !"0xb\003\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] !12 = metadata !{i32 5, i32 9, metadata !13, null} -!13 = metadata !{i32 786443, metadata !1, metadata !14, i32 5, i32 9, i32 0, i32 2} -!14 = metadata !{i32 786443, metadata !1, metadata !11, i32 4, i32 3, i32 0, i32 1} +!13 = metadata !{metadata !"0xb\005\009\000", metadata !1, metadata !14} ; [ DW_TAG_lexical_block ] +!14 = metadata !{metadata !"0xb\004\003\000", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ] !15 = metadata !{metadata !16, metadata !16, i64 0} !16 = metadata !{metadata !"int", metadata !17, i64 0} !17 = metadata !{metadata !"omnipotent char", metadata !18, i64 0} diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll index c8215a1..f32f610 100644 --- a/test/Transforms/LoopVectorize/cpp-new-array.ll +++ b/test/Transforms/LoopVectorize/cpp-new-array.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll index 2497b25..91d07d4 100644 --- a/test/Transforms/LoopVectorize/dbg.value.ll +++ b/test/Transforms/LoopVectorize/dbg.value.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine | FileCheck %s +; RUN: opt < %s -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine | FileCheck %s ; Make sure we vectorize with debugging turned on. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.8.0" ; CHECK-LABEL: @test( define i32 @test() #0 { entry: - tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !9), !dbg !18 + tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !9, metadata !{}), !dbg !18 br label %for.body, !dbg !18 for.body: @@ -25,7 +25,7 @@ for.body: %arrayidx4 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv, !dbg !19 store i32 %add, i32* %arrayidx4, align 4, !dbg !19 %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !18 - tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9), !dbg !18 + tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9, metadata !{}), !dbg !18 %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !18 %exitcond = icmp ne i32 %lftr.wideiv, 1024, !dbg !18 br i1 %exitcond, label %for.body, label %for.end, !dbg !18 @@ -34,9 +34,9 @@ for.end: ret i32 0, !dbg !24 } -declare void @llvm.dbg.declare(metadata, metadata) #1 +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 -declare void @llvm.dbg.value(metadata, i64, metadata) #1 +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "relocation-model"="pic" "ssp-buffers-size"="8" } attributes #1 = { nounwind readnone } @@ -44,27 +44,27 @@ attributes #1 = { nounwind readnone } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!26} -!0 = metadata !{i32 786449, metadata !25, i32 4, metadata !"clang", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !11, null, metadata !""} +!0 = metadata !{metadata !"0x11\004\00clang\001\00\000\00\000", metadata !25, metadata !1, metadata !1, metadata !2, metadata !11, null} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !2 = metadata !{metadata !3} -!3 = metadata !{i32 786478, metadata !25, metadata !4, metadata !"test", metadata !"test", metadata !"test", i32 5, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @test, null, null, metadata !8, i32 5} -!4 = metadata !{i32 786473, metadata !25} -!5 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!3 = metadata !{metadata !"0x2e\00test\00test\00test\005\000\001\000\006\00256\001\005", metadata !25, metadata !4, metadata !5, null, i32 ()* @test, null, null, metadata !8} ; [ DW_TAG_subprogram ] +!4 = metadata !{metadata !"0x29", metadata !25} ; [ DW_TAG_file_type ] +!5 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !6 = metadata !{metadata !7} -!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} +!7 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] !8 = metadata !{metadata !9} -!9 = metadata !{i32 786688, metadata !10, metadata !"i", metadata !4, i32 6, metadata !7, i32 0, i32 0} -!10 = metadata !{i32 786443, metadata !25, metadata !3, i32 6, i32 0, i32 0} +!9 = metadata !{metadata !"0x100\00i\006\000", metadata !10, metadata !4, metadata !7} ; [ DW_TAG_auto_variable ] +!10 = metadata !{metadata !"0xb\006\000\000", metadata !25, metadata !3} ; [ DW_TAG_lexical_block ] !11 = metadata !{metadata !12, metadata !16, metadata !17} -!12 = metadata !{i32 786484, i32 0, null, metadata !"A", metadata !"A", metadata !"", metadata !4, i32 1, metadata !13, i32 0, i32 1, [1024 x i32]* @A, null} -!13 = metadata !{i32 786433, null, null, null, i32 0, i64 32768, i64 32, i32 0, i32 0, metadata !7, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32768, align 32, offset 0] [from int] +!12 = metadata !{metadata !"0x34\00A\00A\00\001\000\001", null, metadata !4, metadata !13, [1024 x i32]* @A, null} ; [ DW_TAG_variable ] +!13 = metadata !{metadata !"0x1\00\000\0032768\0032\000\000", null, null, metadata !7, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32768, align 32, offset 0] [from int] !14 = metadata !{metadata !15} !15 = metadata !{i32 786465, i64 0, i64 1024} -!16 = metadata !{i32 786484, i32 0, null, metadata !"B", metadata !"B", metadata !"", metadata !4, i32 2, metadata !13, i32 0, i32 1, [1024 x i32]* @B, null} -!17 = metadata !{i32 786484, i32 0, null, metadata !"C", metadata !"C", metadata !"", metadata !4, i32 3, metadata !13, i32 0, i32 1, [1024 x i32]* @C, null} +!16 = metadata !{metadata !"0x34\00B\00B\00\002\000\001", null, metadata !4, metadata !13, [1024 x i32]* @B, null} ; [ DW_TAG_variable ] +!17 = metadata !{metadata !"0x34\00C\00C\00\003\000\001", null, metadata !4, metadata !13, [1024 x i32]* @C, null} ; [ DW_TAG_variable ] !18 = metadata !{i32 6, i32 0, metadata !10, null} !19 = metadata !{i32 7, i32 0, metadata !20, null} -!20 = metadata !{i32 786443, metadata !25, metadata !10, i32 6, i32 0, i32 1} +!20 = metadata !{metadata !"0xb\006\000\001", metadata !25, metadata !10} ; [ DW_TAG_lexical_block ] !24 = metadata !{i32 9, i32 0, metadata !3, null} !25 = metadata !{metadata !"test", metadata !"/path/to/somewhere"} -!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 2} diff --git a/test/Transforms/LoopVectorize/debugloc.ll b/test/Transforms/LoopVectorize/debugloc.ll index bf0b418..6350296 100644 --- a/test/Transforms/LoopVectorize/debugloc.ll +++ b/test/Transforms/LoopVectorize/debugloc.ll @@ -1,4 +1,4 @@ -; RUN: opt -S < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 | FileCheck %s +; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -19,10 +19,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i32 @f(i32* nocapture %a, i32 %size) #0 { entry: - tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13), !dbg !19 - tail call void @llvm.dbg.value(metadata !{i32 %size}, i64 0, metadata !14), !dbg !19 - tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !15), !dbg !20 - tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !16), !dbg !21 + tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13, metadata !{}), !dbg !19 + tail call void @llvm.dbg.value(metadata !{i32 %size}, i64 0, metadata !14, metadata !{}), !dbg !19 + tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !15, metadata !{}), !dbg !20 + tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !16, metadata !{}), !dbg !21 %cmp4 = icmp eq i32 %size, 0, !dbg !21 br i1 %cmp4, label %for.end, label %for.body.lr.ph, !dbg !21 @@ -35,9 +35,9 @@ for.body: ; preds = %for.body.lr.ph, %fo %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv, !dbg !22 %0 = load i32* %arrayidx, align 4, !dbg !22 %add = add i32 %0, %sum.05, !dbg !22 - tail call void @llvm.dbg.value(metadata !{i32 %add.lcssa}, i64 0, metadata !15), !dbg !22 + tail call void @llvm.dbg.value(metadata !{i32 %add.lcssa}, i64 0, metadata !15, metadata !{}), !dbg !22 %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !21 - tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !16), !dbg !21 + tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !16, metadata !{}), !dbg !21 %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21 %exitcond = icmp ne i32 %lftr.wideiv, %size, !dbg !21 br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge, !dbg !21 @@ -52,10 +52,10 @@ for.end: ; preds = %entry, %for.cond.fo } ; Function Attrs: nounwind readnone -declare void @llvm.dbg.declare(metadata, metadata) #1 +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 ; Function Attrs: nounwind readnone -declare void @llvm.dbg.value(metadata, i64, metadata) #1 +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 attributes #0 = { nounwind readonly ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" } attributes #1 = { nounwind readnone } @@ -63,28 +63,28 @@ attributes #1 = { nounwind readnone } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!18, !27} -!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 185038) (llvm/trunk 185097)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Data/backedup/dev/os/llvm/debug/-] [DW_LANG_C99] +!0 = metadata !{metadata !"0x11\0012\00clang version 3.4 (trunk 185038) (llvm/trunk 185097)\001\00\000\00\000", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [/Volumes/Data/backedup/dev/os/llvm/debug/-] [DW_LANG_C99] !1 = metadata !{metadata !"-", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"} !2 = metadata !{i32 0} !3 = metadata !{metadata !4} -!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @f, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f] +!4 = metadata !{metadata !"0x2e\00f\00f\00\003\000\001\000\006\00256\001\003", metadata !5, metadata !6, metadata !7, null, i32 (i32*, i32)* @f, null, null, metadata !12} ; [ DW_TAG_subprogram ] [line 3] [def] [f] !5 = metadata !{metadata !"<stdin>", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"} -!6 = metadata !{i32 786473, metadata !5} ; [ DW_TAG_file_type ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>] -!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!6 = metadata !{metadata !"0x29", metadata !5} ; [ DW_TAG_file_type ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>] +!7 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{metadata !9, metadata !10, metadata !11} -!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] -!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int] -!11 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned] +!9 = metadata !{metadata !"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!10 = metadata !{metadata !"0xf\00\000\0064\0064\000\000", null, null, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int] +!11 = metadata !{metadata !"0x24\00unsigned int\000\0032\0032\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned] !12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16} -!13 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3] -!14 = metadata !{i32 786689, metadata !4, metadata !"size", metadata !6, i32 33554435, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [size] [line 3] -!15 = metadata !{i32 786688, metadata !4, metadata !"sum", metadata !6, i32 4, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 4] -!16 = metadata !{i32 786688, metadata !17, metadata !"i", metadata !6, i32 5, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 5] -!17 = metadata !{i32 786443, metadata !5, metadata !4, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>] +!13 = metadata !{metadata !"0x101\00a\0016777219\000", metadata !4, metadata !6, metadata !10} ; [ DW_TAG_arg_variable ] [a] [line 3] +!14 = metadata !{metadata !"0x101\00size\0033554435\000", metadata !4, metadata !6, metadata !11} ; [ DW_TAG_arg_variable ] [size] [line 3] +!15 = metadata !{metadata !"0x100\00sum\004\000", metadata !4, metadata !6, metadata !11} ; [ DW_TAG_auto_variable ] [sum] [line 4] +!16 = metadata !{metadata !"0x100\00i\005\000", metadata !17, metadata !6, metadata !11} ; [ DW_TAG_auto_variable ] [i] [line 5] +!17 = metadata !{metadata !"0xb\005\000\000", metadata !5, metadata !4} ; [ DW_TAG_lexical_block ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>] !18 = metadata !{i32 2, metadata !"Dwarf Version", i32 3} !19 = metadata !{i32 3, i32 0, metadata !4, null} !20 = metadata !{i32 4, i32 0, metadata !4, null} !21 = metadata !{i32 5, i32 0, metadata !17, null} !22 = metadata !{i32 6, i32 0, metadata !17, null} !26 = metadata !{i32 7, i32 0, metadata !4, null} -!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 2} diff --git a/test/Transforms/LoopVectorize/duplicated-metadata.ll b/test/Transforms/LoopVectorize/duplicated-metadata.ll new file mode 100644 index 0000000..8353dca --- /dev/null +++ b/test/Transforms/LoopVectorize/duplicated-metadata.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -loop-vectorize -S 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; This test makes sure we don't duplicate the loop vectorizer's metadata +; while marking them as already vectorized (by setting width = 1), even +; at lower optimization levels, where no extra cleanup is done + +define void @_Z3fooPf(float* %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv + %p = load float* %arrayidx, align 4 + %mul = fmul float %p, 2.000000e+00 + store float %mul, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: ; preds = %for.body + ret void +} + +!0 = metadata !{metadata !0, metadata !1} +!1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 4} +; CHECK-NOT: !{metadata !"llvm.loop.vectorize.width", i32 4} +; CHECK: !{metadata !"llvm.loop.interleave.count", i32 1} diff --git a/test/Transforms/LoopVectorize/ee-crash.ll b/test/Transforms/LoopVectorize/ee-crash.ll index 8a4f8ce..a3c0bb8 100644 --- a/test/Transforms/LoopVectorize/ee-crash.ll +++ b/test/Transforms/LoopVectorize/ee-crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/exact.ll b/test/Transforms/LoopVectorize/exact.ll new file mode 100644 index 0000000..0a8fbf3 --- /dev/null +++ b/test/Transforms/LoopVectorize/exact.ll @@ -0,0 +1,24 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; CHECK-LABEL: @lshr_exact( +; CHECK: lshr exact <4 x i32> +define void @lshr_exact(i32* %x) { +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32* %x, i64 %iv + %0 = load i32* %arrayidx, align 4 + %conv1 = lshr exact i32 %0, 1 + store i32 %conv1, i32* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/Transforms/LoopVectorize/flags.ll b/test/Transforms/LoopVectorize/flags.ll index 21d0937..0fc55c8 100644 --- a/test/Transforms/LoopVectorize/flags.ll +++ b/test/Transforms/LoopVectorize/flags.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll index 0dfbab0..0f064ee 100644 --- a/test/Transforms/LoopVectorize/float-reduction.ll +++ b/test/Transforms/LoopVectorize/float-reduction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -23,3 +23,25 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret float %add } + +;CHECK-LABEL: @foosub( +;CHECK: fsub fast <4 x float> +;CHECK: ret +define float @foosub(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %sum.04 = phi float [ 0.000000e+00, %entry ], [ %sub, %for.body ] + %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %sub = fsub fast float %sum.04, %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 200 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret float %sub +} diff --git a/test/Transforms/LoopVectorize/funcall.ll b/test/Transforms/LoopVectorize/funcall.ll index f1f068c..e03534f 100644 --- a/test/Transforms/LoopVectorize/funcall.ll +++ b/test/Transforms/LoopVectorize/funcall.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll index d8959d4..6c8af0b 100644 --- a/test/Transforms/LoopVectorize/gcc-examples.ll +++ b/test/Transforms/LoopVectorize/gcc-examples.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s -; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL +; RUN: opt < %s -basicaa -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-vectorize -force-vector-width=4 -force-vector-interleave=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll index d64d67f..3f11ce8 100644 --- a/test/Transforms/LoopVectorize/global_alias.ll +++ b/test/Transforms/LoopVectorize/global_alias.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -O1 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -O1 -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" diff --git a/test/Transforms/LoopVectorize/hoist-loads.ll b/test/Transforms/LoopVectorize/hoist-loads.ll index 765e14d..d0b27f1 100644 --- a/test/Transforms/LoopVectorize/hoist-loads.ll +++ b/test/Transforms/LoopVectorize/hoist-loads.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s +; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/i8-induction.ll b/test/Transforms/LoopVectorize/i8-induction.ll index 2a0e826..90e3ec0 100644 --- a/test/Transforms/LoopVectorize/i8-induction.ll +++ b/test/Transforms/LoopVectorize/i8-induction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/if-conv-crash.ll b/test/Transforms/LoopVectorize/if-conv-crash.ll index f8f2cf1..67910bf 100644 --- a/test/Transforms/LoopVectorize/if-conv-crash.ll +++ b/test/Transforms/LoopVectorize/if-conv-crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/if-conversion-nest.ll b/test/Transforms/LoopVectorize/if-conversion-nest.ll index 92cb06e..b5ac8fc 100644 --- a/test/Transforms/LoopVectorize/if-conversion-nest.ll +++ b/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/if-conversion-reduction.ll b/test/Transforms/LoopVectorize/if-conversion-reduction.ll index 8cb703c..455699c 100644 --- a/test/Transforms/LoopVectorize/if-conversion-reduction.ll +++ b/test/Transforms/LoopVectorize/if-conversion-reduction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll index 6e3e8ed..9e18528 100644 --- a/test/Transforms/LoopVectorize/if-conversion.ll +++ b/test/Transforms/LoopVectorize/if-conversion.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/if-pred-stores.ll b/test/Transforms/LoopVectorize/if-pred-stores.ll index 7b0e181..c6067e0 100644 --- a/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-unroll=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL -; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-unroll=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC +; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL +; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/incorrect-dom-info.ll b/test/Transforms/LoopVectorize/incorrect-dom-info.ll new file mode 100644 index 0000000..624ee7e --- /dev/null +++ b/test/Transforms/LoopVectorize/incorrect-dom-info.ll @@ -0,0 +1,142 @@ +; This test is based on one of benchmarks from SPEC2006. It exposes a bug with +; incorrect updating of the dom-tree. +; RUN: opt < %s -loop-vectorize -verify-dom-info +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +@PL_utf8skip = external constant [0 x i8] + +; Function Attrs: nounwind ssp uwtable +define void @Perl_pp_quotemeta() #0 { + %len = alloca i64, align 8 + br i1 undef, label %2, label %1 + +; <label>:1 ; preds = %0 + br label %3 + +; <label>:2 ; preds = %0 + br label %3 + +; <label>:3 ; preds = %2, %1 + br i1 undef, label %34, label %4 + +; <label>:4 ; preds = %3 + br i1 undef, label %5, label %6 + +; <label>:5 ; preds = %4 + br label %6 + +; <label>:6 ; preds = %5, %4 + br i1 undef, label %7, label %8 + +; <label>:7 ; preds = %6 + br label %8 + +; <label>:8 ; preds = %7, %6 + br i1 undef, label %.preheader, label %9 + +.preheader: ; preds = %9, %8 + br i1 undef, label %.loopexit, label %.lr.ph + +; <label>:9 ; preds = %8 + br i1 undef, label %thread-pre-split.preheader, label %.preheader + +thread-pre-split.preheader: ; preds = %9 + br i1 undef, label %thread-pre-split._crit_edge, label %.lr.ph21 + +.thread-pre-split.loopexit_crit_edge: ; preds = %19 + %scevgep.sum = xor i64 %umax, -1 + %scevgep45 = getelementptr i8* %d.020, i64 %scevgep.sum + br label %thread-pre-split.loopexit + +thread-pre-split.loopexit: ; preds = %11, %.thread-pre-split.loopexit_crit_edge + %d.1.lcssa = phi i8* [ %scevgep45, %.thread-pre-split.loopexit_crit_edge ], [ %d.020, %11 ] + br i1 false, label %thread-pre-split._crit_edge, label %.lr.ph21 + +.lr.ph21: ; preds = %26, %thread-pre-split.loopexit, %thread-pre-split.preheader + %d.020 = phi i8* [ undef, %26 ], [ %d.1.lcssa, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ] + %10 = phi i64 [ %28, %26 ], [ undef, %thread-pre-split.loopexit ], [ undef, %thread-pre-split.preheader ] + br i1 undef, label %11, label %22 + +; <label>:11 ; preds = %.lr.ph21 + %12 = getelementptr inbounds [0 x i8]* @PL_utf8skip, i64 0, i64 undef + %13 = load i8* %12, align 1 + %14 = zext i8 %13 to i64 + %15 = icmp ugt i64 %14, %10 + %. = select i1 %15, i64 %10, i64 %14 + br i1 undef, label %thread-pre-split.loopexit, label %.lr.ph28 + +.lr.ph28: ; preds = %11 + %16 = xor i64 %10, -1 + %17 = xor i64 %14, -1 + %18 = icmp ugt i64 %16, %17 + %umax = select i1 %18, i64 %16, i64 %17 + br label %19 + +; <label>:19 ; preds = %19, %.lr.ph28 + %ulen.126 = phi i64 [ %., %.lr.ph28 ], [ %20, %19 ] + %20 = add i64 %ulen.126, -1 + %21 = icmp eq i64 %20, 0 + br i1 %21, label %.thread-pre-split.loopexit_crit_edge, label %19 + +; <label>:22 ; preds = %.lr.ph21 + br i1 undef, label %26, label %23 + +; <label>:23 ; preds = %22 + br i1 undef, label %26, label %24 + +; <label>:24 ; preds = %23 + br i1 undef, label %26, label %25 + +; <label>:25 ; preds = %24 + br label %26 + +; <label>:26 ; preds = %25, %24, %23, %22 + %27 = load i64* %len, align 8 + %28 = add i64 %27, -1 + br i1 undef, label %thread-pre-split._crit_edge, label %.lr.ph21 + +thread-pre-split._crit_edge: ; preds = %26, %thread-pre-split.loopexit, %thread-pre-split.preheader + br label %.loopexit + +.lr.ph: ; preds = %33, %.preheader + br i1 undef, label %29, label %thread-pre-split5 + +; <label>:29 ; preds = %.lr.ph + br i1 undef, label %33, label %30 + +; <label>:30 ; preds = %29 + br i1 undef, label %33, label %31 + +thread-pre-split5: ; preds = %.lr.ph + br i1 undef, label %33, label %31 + +; <label>:31 ; preds = %thread-pre-split5, %30 + br i1 undef, label %33, label %32 + +; <label>:32 ; preds = %31 + br label %33 + +; <label>:33 ; preds = %32, %31, %thread-pre-split5, %30, %29 + br i1 undef, label %.loopexit, label %.lr.ph + +.loopexit: ; preds = %33, %thread-pre-split._crit_edge, %.preheader + br label %35 + +; <label>:34 ; preds = %3 + br label %35 + +; <label>:35 ; preds = %34, %.loopexit + br i1 undef, label %37, label %36 + +; <label>:36 ; preds = %35 + br label %37 + +; <label>:37 ; preds = %36, %35 + ret void +} + +attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = metadata !{metadata !"clang version 3.6.0 "} diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll index 71bedb7..067a76b 100644 --- a/test/Transforms/LoopVectorize/increment.ll +++ b/test/Transforms/LoopVectorize/increment.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll index 7dabcb2..3f34918 100644 --- a/test/Transforms/LoopVectorize/induction.ll +++ b/test/Transforms/LoopVectorize/induction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -28,7 +28,7 @@ for.end: ret void } -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND ; Make sure we remove unneeded vectorization of induction variables. ; In order for instcombine to cleanup the vectorized induction variables that we diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll index 9c8201a..ce64c5b 100644 --- a/test/Transforms/LoopVectorize/induction_plus.ll +++ b/test/Transforms/LoopVectorize/induction_plus.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll index 7dfaf03..d48731a 100644 --- a/test/Transforms/LoopVectorize/intrinsic.ll +++ b/test/Transforms/LoopVectorize/intrinsic.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -1192,3 +1192,59 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %for.body, %entry ret void } + +declare float @llvm.minnum.f32(float, float) nounwind readnone + +;CHECK-LABEL: @minnum_f32( +;CHECK: llvm.minnum.v4f32 +;CHECK: ret void +define void @minnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv + %1 = load float* %arrayidx2, align 4 + %call = tail call float @llvm.minnum.f32(float %0, float %1) nounwind readnone + %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv + store float %call, float* %arrayidx4, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare float @llvm.maxnum.f32(float, float) nounwind readnone + +;CHECK-LABEL: @maxnum_f32( +;CHECK: llvm.maxnum.v4f32 +;CHECK: ret void +define void @maxnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv + %1 = load float* %arrayidx2, align 4 + %call = tail call float @llvm.maxnum.f32(float %0, float %1) nounwind readnone + %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv + store float %call, float* %arrayidx4, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} diff --git a/test/Transforms/LoopVectorize/lcssa-crash.ll b/test/Transforms/LoopVectorize/lcssa-crash.ll index de6be54..68cc74e 100644 --- a/test/Transforms/LoopVectorize/lcssa-crash.ll +++ b/test/Transforms/LoopVectorize/lcssa-crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LoopVectorize/lifetime.ll b/test/Transforms/LoopVectorize/lifetime.ll index 4f6f3b8..ba36cc4 100644 --- a/test/Transforms/LoopVectorize/lifetime.ll +++ b/test/Transforms/LoopVectorize/lifetime.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/memdep.ll b/test/Transforms/LoopVectorize/memdep.ll index 21cb703..f857e80 100644 --- a/test/Transforms/LoopVectorize/memdep.ll +++ b/test/Transforms/LoopVectorize/memdep.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S | FileCheck %s -; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -S | FileCheck %s -check-prefix=WIDTH +; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s -check-prefix=WIDTH target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/metadata-unroll.ll b/test/Transforms/LoopVectorize/metadata-unroll.ll index 2fcc53a..848f1f9 100644 --- a/test/Transforms/LoopVectorize/metadata-unroll.ll +++ b/test/Transforms/LoopVectorize/metadata-unroll.ll @@ -38,4 +38,4 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp { } !0 = metadata !{metadata !0, metadata !1} -!1 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 2} +!1 = metadata !{metadata !"llvm.loop.interleave.count", i32 2} diff --git a/test/Transforms/LoopVectorize/metadata-width.ll b/test/Transforms/LoopVectorize/metadata-width.ll index 87de655..da0c622 100644 --- a/test/Transforms/LoopVectorize/metadata-width.ll +++ b/test/Transforms/LoopVectorize/metadata-width.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/Transforms/LoopVectorize/metadata.ll b/test/Transforms/LoopVectorize/metadata.ll new file mode 100644 index 0000000..14f60b3 --- /dev/null +++ b/test/Transforms/LoopVectorize/metadata.ll @@ -0,0 +1,44 @@ +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) #0 { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !tbaa !0 + %conv = fptosi float %0 to i32 + %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv + store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1600 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 0 +} + +; CHECK-LABEL: @test1 +; CHECK: load <4 x float>* %{{.*}}, align 4, !tbaa ![[TFLT:[0-9]+]] +; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa ![[TINT:[0-9]+]] +; CHECK: ret i32 0 + +; CHECK-DAG: ![[TFLT]] = metadata !{metadata ![[TFLT1:[0-9]+]] +; CHECK-DAG: ![[TFLT1]] = metadata !{metadata !"float" + +; CHECK-DAG: ![[TINT]] = metadata !{metadata ![[TINT1:[0-9]+]] +; CHECK-DAG: ![[TINT1]] = metadata !{metadata !"int" + +attributes #0 = { nounwind uwtable } + +!0 = metadata !{metadata !1, metadata !1, i64 0} +!1 = metadata !{metadata !"float", metadata !2, i64 0} +!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0} +!3 = metadata !{metadata !"Simple C/C++ TBAA"} +!4 = metadata !{metadata !5, metadata !5, i64 0} +!5 = metadata !{metadata !"int", metadata !2, i64 0} + diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll index 0e47260..e73e69d 100644 --- a/test/Transforms/LoopVectorize/minmax_reduction.ll +++ b/test/Transforms/LoopVectorize/minmax_reduction.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll index 88a29c5..cd022ad 100644 --- a/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll +++ b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s +; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/multiple-address-spaces.ll b/test/Transforms/LoopVectorize/multiple-address-spaces.ll index 7d836de..bb2af1e 100644 --- a/test/Transforms/LoopVectorize/multiple-address-spaces.ll +++ b/test/Transforms/LoopVectorize/multiple-address-spaces.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s ; From a simple program with two address spaces: ; char Y[4*10000] __attribute__((address_space(1))); diff --git a/test/Transforms/LoopVectorize/no_array_bounds.ll b/test/Transforms/LoopVectorize/no_array_bounds.ll new file mode 100644 index 0000000..a39b44f --- /dev/null +++ b/test/Transforms/LoopVectorize/no_array_bounds.ll @@ -0,0 +1,101 @@ +; RUN: opt < %s -loop-vectorize -S 2>&1 | FileCheck %s + +; Verify warning is generated when vectorization/ interleaving is explicitly specified and fails to occur. +; CHECK: warning: no_array_bounds.cpp:5:5: loop not vectorized: failed explicitly specified loop vectorization +; CHECK: warning: no_array_bounds.cpp:10:5: loop not interleaved: failed explicitly specified loop interleaving + +; #pragma clang loop vectorize(enable) +; for (int i = 0; i < number; i++) { +; A[B[i]]++; +; } + +; #pragma clang loop vectorize(disable) interleave(enable) +; for (int i = 0; i < number; i++) { +; B[A[i]]++; +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind ssp uwtable +define void @_Z4testPiS_i(i32* nocapture %A, i32* nocapture %B, i32 %number) #0 { +entry: + %cmp25 = icmp sgt i32 %number, 0, !dbg !10 + br i1 %cmp25, label %for.body.preheader, label %for.end15, !dbg !10, !llvm.loop !12 + +for.body.preheader: ; preds = %entry + br label %for.body, !dbg !14 + +for.cond5.preheader: ; preds = %for.body + br i1 %cmp25, label %for.body7.preheader, label %for.end15, !dbg !16, !llvm.loop !18 + +for.body7.preheader: ; preds = %for.cond5.preheader + br label %for.body7, !dbg !20 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv27 = phi i64 [ %indvars.iv.next28, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv27, !dbg !14 + %0 = load i32* %arrayidx, align 4, !dbg !14, !tbaa !22 + %idxprom1 = sext i32 %0 to i64, !dbg !14 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1, !dbg !14 + %1 = load i32* %arrayidx2, align 4, !dbg !14, !tbaa !22 + %inc = add nsw i32 %1, 1, !dbg !14 + store i32 %inc, i32* %arrayidx2, align 4, !dbg !14, !tbaa !22 + %indvars.iv.next28 = add nuw nsw i64 %indvars.iv27, 1, !dbg !10 + %lftr.wideiv29 = trunc i64 %indvars.iv.next28 to i32, !dbg !10 + %exitcond30 = icmp eq i32 %lftr.wideiv29, %number, !dbg !10 + br i1 %exitcond30, label %for.cond5.preheader, label %for.body, !dbg !10, !llvm.loop !12 + +for.body7: ; preds = %for.body7.preheader, %for.body7 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body7 ], [ 0, %for.body7.preheader ] + %arrayidx9 = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !20 + %2 = load i32* %arrayidx9, align 4, !dbg !20, !tbaa !22 + %idxprom10 = sext i32 %2 to i64, !dbg !20 + %arrayidx11 = getelementptr inbounds i32* %B, i64 %idxprom10, !dbg !20 + %3 = load i32* %arrayidx11, align 4, !dbg !20, !tbaa !22 + %inc12 = add nsw i32 %3, 1, !dbg !20 + store i32 %inc12, i32* %arrayidx11, align 4, !dbg !20, !tbaa !22 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !16 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !16 + %exitcond = icmp eq i32 %lftr.wideiv, %number, !dbg !16 + br i1 %exitcond, label %for.end15.loopexit, label %for.body7, !dbg !16, !llvm.loop !18 + +for.end15.loopexit: ; preds = %for.body7 + br label %for.end15 + +for.end15: ; preds = %for.end15.loopexit, %entry, %for.cond5.preheader + ret void, !dbg !26 +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0\001\00\000\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] +!1 = metadata !{metadata !"no_array_bounds.cpp", metadata !"."} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{metadata !"0x2e\00test\00test\00\001\000\001\000\006\00256\001\002", metadata !1, metadata !5, metadata !6, null, void (i32*, i32*, i32)* @_Z4testPiS_i, null, null, metadata !2} ; [ DW_TAG_subprogram ] +!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] +!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] +!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} +!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 2} +!9 = metadata !{metadata !"clang version 3.5.0"} +!10 = metadata !{i32 4, i32 8, metadata !11, null} +!11 = metadata !{metadata !"0xb\004\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] +!12 = metadata !{metadata !12, metadata !13} +!13 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} +!14 = metadata !{i32 5, i32 5, metadata !15, null} +!15 = metadata !{metadata !"0xb\004\0036\000", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ] +!16 = metadata !{i32 9, i32 8, metadata !17, null} +!17 = metadata !{metadata !"0xb\009\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] +!18 = metadata !{metadata !18, metadata !13, metadata !19} +!19 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1} +!20 = metadata !{i32 10, i32 5, metadata !21, null} +!21 = metadata !{metadata !"0xb\009\0036\000", metadata !1, metadata !17} ; [ DW_TAG_lexical_block ] +!22 = metadata !{metadata !23, metadata !23, i64 0} +!23 = metadata !{metadata !"int", metadata !24, i64 0} +!24 = metadata !{metadata !"omnipotent char", metadata !25, i64 0} +!25 = metadata !{metadata !"Simple C/C++ TBAA"} +!26 = metadata !{i32 12, i32 1, metadata !4, null} diff --git a/test/Transforms/LoopVectorize/no_idiv_reduction.ll b/test/Transforms/LoopVectorize/no_idiv_reduction.ll index 295fcab..5c721a680 100644 --- a/test/Transforms/LoopVectorize/no_idiv_reduction.ll +++ b/test/Transforms/LoopVectorize/no_idiv_reduction.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s +; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s @a = common global [128 x i32] zeroinitializer, align 16 ;; Must not vectorize division reduction. Division is lossy. diff --git a/test/Transforms/LoopVectorize/no_int_induction.ll b/test/Transforms/LoopVectorize/no_int_induction.ll index e572d1a..1275915 100644 --- a/test/Transforms/LoopVectorize/no_int_induction.ll +++ b/test/Transforms/LoopVectorize/no_int_induction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s ; int __attribute__((noinline)) sum_array(int *A, int n) { ; return std::accumulate(A, A + n, 0); diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll index 1f891ad..bcd29c1 100644 --- a/test/Transforms/LoopVectorize/no_outside_user.ll +++ b/test/Transforms/LoopVectorize/no_outside_user.ll @@ -1,4 +1,7 @@ -; RUN: opt -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s + +; CHECK: remark: {{.*}}: loop not vectorized: value could not be identified as an induction or reduction variable +; CHECK: remark: {{.*}}: loop not vectorized: use of induction value outside of the loop is not handled by vectorizer target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" diff --git a/test/Transforms/LoopVectorize/no_switch.ll b/test/Transforms/LoopVectorize/no_switch.ll index 52b4285..c989c6b 100644 --- a/test/Transforms/LoopVectorize/no_switch.ll +++ b/test/Transforms/LoopVectorize/no_switch.ll @@ -1,7 +1,8 @@ ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s ; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement -; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization is explicitly enabled with width 4 +; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info (Force=true, Vector Width=4) +; CHECK: warning: source.cpp:4:5: loop not vectorized: failed explicitly specified loop vectorization ; CHECK: _Z11test_switchPii ; CHECK-NOT: x i32> @@ -58,28 +59,28 @@ attributes #0 = { nounwind } !llvm.module.flags = !{!7, !8} !llvm.ident = !{!9} -!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} +!0 = metadata !{metadata !"0x11\004\00clang version 3.5.0\001\00\006\00\002", metadata !1, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{metadata !"source.cpp", metadata !"."} !2 = metadata !{} !3 = metadata !{metadata !4} -!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_switch", metadata !"test_switch", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z11test_switchPii, null, null, metadata !2, i32 1} -!5 = metadata !{i32 786473, metadata !1} -!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} +!4 = metadata !{metadata !"0x2e\00test_switch\00test_switch\00\001\000\001\000\006\00256\001\001", metadata !1, metadata !5, metadata !6, null, void (i32*, i32)* @_Z11test_switchPii, null, null, metadata !2} ; [ DW_TAG_subprogram ] [line 1] [def] [test_switch] +!5 = metadata !{metadata !"0x29", metadata !1} ; [ DW_TAG_file_type ] [./source.cpp] +!6 = metadata !{metadata !"0x15\00\000\000\000\000\000\000", i32 0, null, null, metadata !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} -!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 2} !9 = metadata !{metadata !"clang version 3.5.0"} !10 = metadata !{i32 3, i32 8, metadata !11, null} -!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0} +!11 = metadata !{metadata !"0xb\003\003\000", metadata !1, metadata !4} ; [ DW_TAG_lexical_block ] !12 = metadata !{metadata !12, metadata !13, metadata !13} !13 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} !14 = metadata !{i32 4, i32 5, metadata !15, null} -!15 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 36, i32 0, i32 1} +!15 = metadata !{metadata !"0xb\003\0036\000", metadata !1, metadata !11} ; [ DW_TAG_lexical_block ] !16 = metadata !{metadata !17, metadata !17, i64 0} !17 = metadata !{metadata !"int", metadata !18, i64 0} !18 = metadata !{metadata !"omnipotent char", metadata !19, i64 0} !19 = metadata !{metadata !"Simple C/C++ TBAA"} !20 = metadata !{i32 6, i32 7, metadata !21, null} -!21 = metadata !{i32 786443, metadata !1, metadata !15, i32 4, i32 18, i32 0, i32 2} +!21 = metadata !{metadata !"0xb\004\0018\000", metadata !1, metadata !15} ; [ DW_TAG_lexical_block ] !22 = metadata !{i32 7, i32 5, metadata !21, null} !23 = metadata !{i32 9, i32 7, metadata !21, null} !24 = metadata !{i32 14, i32 1, metadata !4, null} diff --git a/test/Transforms/LoopVectorize/nofloat.ll b/test/Transforms/LoopVectorize/nofloat.ll index c3c81b6..e9f4c5f 100644 --- a/test/Transforms/LoopVectorize/nofloat.ll +++ b/test/Transforms/LoopVectorize/nofloat.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s ; Make sure that we don't vectorize functions with 'noimplicitfloat' attributes. diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll index 0c54a2b..b03d4f0 100644 --- a/test/Transforms/LoopVectorize/non-const-n.ll +++ b/test/Transforms/LoopVectorize/non-const-n.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/nsw-crash.ll b/test/Transforms/LoopVectorize/nsw-crash.ll index e5fad14..68d9933 100644 --- a/test/Transforms/LoopVectorize/nsw-crash.ll +++ b/test/Transforms/LoopVectorize/nsw-crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/opt.ll b/test/Transforms/LoopVectorize/opt.ll index 27030a2..a9be80f 100644 --- a/test/Transforms/LoopVectorize/opt.ll +++ b/test/Transforms/LoopVectorize/opt.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -O3 -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=LOOPVEC %s -; RUN: opt -S -O3 -disable-loop-vectorization -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=NOLOOPVEC %s +; RUN: opt -S -O3 -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck --check-prefix=LOOPVEC %s +; RUN: opt -S -O3 -disable-loop-vectorization -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck --check-prefix=NOLOOPVEC %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/ptr_loops.ll b/test/Transforms/LoopVectorize/ptr_loops.ll index 15983f0..3fb38fe 100644 --- a/test/Transforms/LoopVectorize/ptr_loops.ll +++ b/test/Transforms/LoopVectorize/ptr_loops.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s +; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll index fc8f0a5..2f7a96a 100644 --- a/test/Transforms/LoopVectorize/read-only.ll +++ b/test/Transforms/LoopVectorize/read-only.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll index 791fce1..5e6b7fa 100644 --- a/test/Transforms/LoopVectorize/reduction.ll +++ b/test/Transforms/LoopVectorize/reduction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll index 65ef95d..da02d01 100644 --- a/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/test/Transforms/LoopVectorize/reverse_induction.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=2 -force-vector-width=4 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/reverse_iter.ll b/test/Transforms/LoopVectorize/reverse_iter.ll index f803120..13172bb 100644 --- a/test/Transforms/LoopVectorize/reverse_iter.ll +++ b/test/Transforms/LoopVectorize/reverse_iter.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/runtime-check-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-address-space.ll index 6c86561..34bbe52 100644 --- a/test/Transforms/LoopVectorize/runtime-check-address-space.ll +++ b/test/Transforms/LoopVectorize/runtime-check-address-space.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s +; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s ; Check vectorization that would ordinarily require a runtime bounds ; check on the pointers when mixing address spaces. For now we cannot diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll index 212b37c..56f1f99 100644 --- a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll +++ b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s +; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s ; Artificial datalayout target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64" diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll index 01e28bc..9d02a6a 100644 --- a/test/Transforms/LoopVectorize/runtime-check-readonly.ll +++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK: br ;CHECK: br ;CHECK: getelementptr -;CHECK-NEXT: getelementptr +;CHECK-DAG: getelementptr ;CHECK-DAG: icmp uge ;CHECK-DAG: icmp uge ;CHECK-DAG: icmp uge diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll index d15479d..1edafb4 100644 --- a/test/Transforms/LoopVectorize/runtime-check.ll +++ b/test/Transforms/LoopVectorize/runtime-check.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/runtime-limit.ll b/test/Transforms/LoopVectorize/runtime-limit.ll index 7370a6f..324949d 100644 --- a/test/Transforms/LoopVectorize/runtime-limit.ll +++ b/test/Transforms/LoopVectorize/runtime-limit.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/safegep.ll b/test/Transforms/LoopVectorize/safegep.ll index c950860..f853afd 100644 --- a/test/Transforms/LoopVectorize/safegep.ll +++ b/test/Transforms/LoopVectorize/safegep.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s target datalayout = "e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f16:16:16-f32:32:32-f64:32:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" diff --git a/test/Transforms/LoopVectorize/same-base-access.ll b/test/Transforms/LoopVectorize/same-base-access.ll index d623a34..d19458f 100644 --- a/test/Transforms/LoopVectorize/same-base-access.ll +++ b/test/Transforms/LoopVectorize/same-base-access.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll index 257c7be..6b37cc2 100644 --- a/test/Transforms/LoopVectorize/scalar-select.ll +++ b/test/Transforms/LoopVectorize/scalar-select.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/scev-exitlim-crash.ll b/test/Transforms/LoopVectorize/scev-exitlim-crash.ll index 683621a..1bce3f8 100644 --- a/test/Transforms/LoopVectorize/scev-exitlim-crash.ll +++ b/test/Transforms/LoopVectorize/scev-exitlim-crash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=2 -force-vector-width=8 -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=8 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx" diff --git a/test/Transforms/LoopVectorize/simple-unroll.ll b/test/Transforms/LoopVectorize/simple-unroll.ll index 83f35ff..8bf680a 100644 --- a/test/Transforms/LoopVectorize/simple-unroll.ll +++ b/test/Transforms/LoopVectorize/simple-unroll.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/small-loop.ll b/test/Transforms/LoopVectorize/small-loop.ll index 49ce5c5..1d30102 100644 --- a/test/Transforms/LoopVectorize/small-loop.ll +++ b/test/Transforms/LoopVectorize/small-loop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/start-non-zero.ll b/test/Transforms/LoopVectorize/start-non-zero.ll index 8f675af..cc47494 100644 --- a/test/Transforms/LoopVectorize/start-non-zero.ll +++ b/test/Transforms/LoopVectorize/start-non-zero.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/store-shuffle-bug.ll b/test/Transforms/LoopVectorize/store-shuffle-bug.ll index e53c120..6d3d113 100644 --- a/test/Transforms/LoopVectorize/store-shuffle-bug.ll +++ b/test/Transforms/LoopVectorize/store-shuffle-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s +; RUN: opt -S -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll index 75beae8..cf6f325 100644 --- a/test/Transforms/LoopVectorize/struct_access.ll +++ b/test/Transforms/LoopVectorize/struct_access.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/test/Transforms/LoopVectorize/tbaa-nodep.ll b/test/Transforms/LoopVectorize/tbaa-nodep.ll new file mode 100644 index 0000000..5cd104c --- /dev/null +++ b/test/Transforms/LoopVectorize/tbaa-nodep.ll @@ -0,0 +1,102 @@ +; RUN: opt < %s -tbaa -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s --check-prefix=CHECK-NOTBAA +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define i32 @test1(i32* nocapture %a, float* nocapture readonly %b) #0 { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !tbaa !0 + %conv = fptosi float %0 to i32 + %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv + store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1600 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 0 + +; TBAA partitions the accesses in this loop, so it can be vectorized without +; runtime checks. + +; CHECK-LABEL: @test1 +; CHECK: entry: +; CHECK-NEXT: br label %vector.body +; CHECK: vector.body: + +; CHECK: load <4 x float>* %{{.*}}, align 4, !tbaa +; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa + +; CHECK: ret i32 0 + +; CHECK-NOTBAA-LABEL: @test1 +; CHECK-NOTBAA: icmp uge i32* + +; CHECK-NOTBAA: load <4 x float>* %{{.*}}, align 4, !tbaa +; CHECK-NOTBAA: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa + +; CHECK-NOTBAA: ret i32 0 +} + +; Function Attrs: nounwind uwtable +define i32 @test2(i32* nocapture readonly %a, float* nocapture readonly %b, float* nocapture %c) #0 { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !tbaa !0 + %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv + %1 = load i32* %arrayidx2, align 4, !tbaa !4 + %conv = sitofp i32 %1 to float + %mul = fmul float %0, %conv + %arrayidx4 = getelementptr inbounds float* %c, i64 %indvars.iv + store float %mul, float* %arrayidx4, align 4, !tbaa !0 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1600 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 0 + +; This test is like the first, except here there is still one runtime check +; required. Without TBAA, however, two checks are required. + +; CHECK-LABEL: @test2 +; CHECK: icmp uge float* +; CHECK: icmp uge float* +; CHECK-NOT: icmp uge i32* + +; CHECK: load <4 x float>* %{{.*}}, align 4, !tbaa +; CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa + +; CHECK: ret i32 0 + +; CHECK-NOTBAA-LABEL: @test2 +; CHECK-NOTBAA: icmp uge float* +; CHECK-NOTBAA: icmp uge float* +; CHECK-NOTBAA-DAG: icmp uge float* +; CHECK-NOTBAA-DAG: icmp uge i32* + +; CHECK-NOTBAA: load <4 x float>* %{{.*}}, align 4, !tbaa +; CHECK-NOTBAA: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa + +; CHECK-NOTBAA: ret i32 0 +} + +attributes #0 = { nounwind uwtable } + +!0 = metadata !{metadata !1, metadata !1, i64 0} +!1 = metadata !{metadata !"float", metadata !2, i64 0} +!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0} +!3 = metadata !{metadata !"Simple C/C++ TBAA"} +!4 = metadata !{metadata !5, metadata !5, i64 0} +!5 = metadata !{metadata !"int", metadata !2, i64 0} + diff --git a/test/Transforms/LoopVectorize/undef-inst-bug.ll b/test/Transforms/LoopVectorize/undef-inst-bug.ll index ed60e80..0444fe8 100644 --- a/test/Transforms/LoopVectorize/undef-inst-bug.ll +++ b/test/Transforms/LoopVectorize/undef-inst-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/unroll_novec.ll b/test/Transforms/LoopVectorize/unroll_novec.ll index 89f4678..257b4e6 100644 --- a/test/Transforms/LoopVectorize/unroll_novec.ll +++ b/test/Transforms/LoopVectorize/unroll_novec.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-target-num-scalar-regs=16 -force-target-max-scalar-unroll=8 -force-target-instruction-cost=1 -small-loop-cost=40 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-target-num-scalar-regs=16 -force-target-max-scalar-interleave=8 -force-target-instruction-cost=1 -small-loop-cost=40 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/test/Transforms/LoopVectorize/unsized-pointee-crash.ll b/test/Transforms/LoopVectorize/unsized-pointee-crash.ll new file mode 100644 index 0000000..5cc9837 --- /dev/null +++ b/test/Transforms/LoopVectorize/unsized-pointee-crash.ll @@ -0,0 +1,24 @@ +; RUN: opt -S -loop-vectorize < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: @fn1 +define void @fn1() { +entry: + br label %for.body + +for.body: + %b.05 = phi i32 (...)* [ undef, %entry ], [ %1, %for.body ] + %a.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = bitcast i32 (...)* %b.05 to i8* + %add.ptr = getelementptr i8* %0, i64 1 + %1 = bitcast i8* %add.ptr to i32 (...)* +; CHECK: %[[cst:.*]] = bitcast i32 (...)* {{.*}} to i8* +; CHECK-NEXT: %[[gep:.*]] = getelementptr i8* %[[cst]], i64 1 + %inc = add nsw i32 %a.04, 1 + %exitcond = icmp eq i32 %a.04, 63 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/Transforms/LoopVectorize/value-ptr-bug.ll b/test/Transforms/LoopVectorize/value-ptr-bug.ll index 6b06afa..7fb9095 100644 --- a/test/Transforms/LoopVectorize/value-ptr-bug.ll +++ b/test/Transforms/LoopVectorize/value-ptr-bug.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/vect.omp.persistence.ll b/test/Transforms/LoopVectorize/vect.omp.persistence.ll index f646567..b0fe7a5 100644 --- a/test/Transforms/LoopVectorize/vect.omp.persistence.ll +++ b/test/Transforms/LoopVectorize/vect.omp.persistence.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -O2 -force-vector-unroll=2 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s +; RUN: opt < %s -O2 -force-vector-interleave=2 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s ; REQUIRES: asserts ; Loop from "rotated" diff --git a/test/Transforms/LoopVectorize/vect.stats.ll b/test/Transforms/LoopVectorize/vect.stats.ll index 92ec24f..556da45 100644 --- a/test/Transforms/LoopVectorize/vect.stats.ll +++ b/test/Transforms/LoopVectorize/vect.stats.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=4 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s ; REQUIRES: asserts ; @@ -62,4 +62,4 @@ for.body: for.end: ret void -}
\ No newline at end of file +} diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll index 47de13d..cee4b16 100644 --- a/test/Transforms/LoopVectorize/vectorize-once.ll +++ b/test/Transforms/LoopVectorize/vectorize-once.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -70,7 +70,7 @@ attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" " ; CHECK: !0 = metadata !{metadata !0, metadata !1, metadata !2} ; CHECK: !1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1} -; CHECK: !2 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1} +; CHECK: !2 = metadata !{metadata !"llvm.loop.interleave.count", i32 1} ; CHECK: !3 = metadata !{metadata !3, metadata !1, metadata !2} !0 = metadata !{metadata !0, metadata !1} diff --git a/test/Transforms/LoopVectorize/version-mem-access.ll b/test/Transforms/LoopVectorize/version-mem-access.ll index 51d20e2..7ac2fca 100644 --- a/test/Transforms/LoopVectorize/version-mem-access.ll +++ b/test/Transforms/LoopVectorize/version-mem-access.ll @@ -1,4 +1,4 @@ -; RUN: opt -basicaa -loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-unroll=1 < %s -S | FileCheck %s +; RUN: opt -basicaa -loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 < %s -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/write-only.ll b/test/Transforms/LoopVectorize/write-only.ll index 71a9cd0..2f100de 100644 --- a/test/Transforms/LoopVectorize/write-only.ll +++ b/test/Transforms/LoopVectorize/write-only.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" |