diff options
Diffstat (limited to 'test/Transforms/LoopVectorize')
22 files changed, 558 insertions, 33 deletions
diff --git a/test/Transforms/LoopVectorize/AArch64/lit.local.cfg b/test/Transforms/LoopVectorize/AArch64/lit.local.cfg index f1d1f88..937cffb 100644 --- a/test/Transforms/LoopVectorize/AArch64/lit.local.cfg +++ b/test/Transforms/LoopVectorize/AArch64/lit.local.cfg @@ -1,6 +1,5 @@ config.suffixes = ['.ll'] -targets = set(config.root.targets_to_build.split()) -if not 'ARM64' in targets: +if not 'AArch64' in config.root.targets: config.unsupported = True diff --git a/test/Transforms/LoopVectorize/ARM/lit.local.cfg b/test/Transforms/LoopVectorize/ARM/lit.local.cfg index 8a3ba96..98c6700 100644 --- a/test/Transforms/LoopVectorize/ARM/lit.local.cfg +++ b/test/Transforms/LoopVectorize/ARM/lit.local.cfg @@ -1,4 +1,3 @@ -targets = set(config.root.targets_to_build.split()) -if not 'ARM' in targets: +if not 'ARM' in config.root.targets: config.unsupported = True diff --git a/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg b/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg index 2e46300..5d33887 100644 --- a/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg +++ b/test/Transforms/LoopVectorize/PowerPC/lit.local.cfg @@ -1,4 +1,3 @@ -targets = set(config.root.targets_to_build.split()) -if not 'PowerPC' in targets: +if not 'PowerPC' in config.root.targets: config.unsupported = True diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll index faed77d..fce3b70 100644 --- a/test/Transforms/LoopVectorize/X86/already-vectorized.ll +++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll @@ -40,7 +40,7 @@ for.end: ; preds = %for.body ; Now, we check for the Hint metadata ; CHECK: [[vect]] = metadata !{metadata [[vect]], metadata [[width:![0-9]+]], metadata [[unroll:![0-9]+]]} -; CHECK: [[width]] = metadata !{metadata !"llvm.vectorizer.width", i32 1} -; CHECK: [[unroll]] = metadata !{metadata !"llvm.vectorizer.unroll", i32 1} +; CHECK: [[width]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 1} +; CHECK: [[unroll]] = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1} ; CHECK: [[scalar]] = metadata !{metadata [[scalar]], metadata [[width]], metadata [[unroll]]} diff --git a/test/Transforms/LoopVectorize/X86/avx512.ll b/test/Transforms/LoopVectorize/X86/avx512.ll new file mode 100644 index 0000000..a220866 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/avx512.ll @@ -0,0 +1,35 @@ +; RUN: opt -mattr=+avx512f --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; Verify that we generate 512-bit wide vectors for a basic integer memset +; loop. + +; CHECK-LABEL: f: +; CHECK: vmovdqu32 %zmm{{.}}, ( +; CHECK-NOT: %ymm + +define void @f(i32* %a, i32 %n) { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + store i32 %n, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} diff --git a/test/Transforms/LoopVectorize/X86/lit.local.cfg b/test/Transforms/LoopVectorize/X86/lit.local.cfg index ba763cf..e71f3cc 100644 --- a/test/Transforms/LoopVectorize/X86/lit.local.cfg +++ b/test/Transforms/LoopVectorize/X86/lit.local.cfg @@ -1,4 +1,3 @@ -targets = set(config.root.targets_to_build.split()) -if not 'X86' in targets: +if not 'X86' in config.root.targets: config.unsupported = True diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll index 9e4e989..8e0ca41 100644 --- a/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -9,8 +9,9 @@ ; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2 ; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS -; This file tests the llvm.vectorizer.pragma forcing vectorization even when -; optimization levels are too low, or when vectorization is disabled. +; This file tests the llvm.loop.vectorize.enable metadata forcing +; vectorization even when optimization levels are too low, or when +; vectorization is disabled. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -170,6 +171,6 @@ for.end: ; preds = %for.body } !0 = metadata !{metadata !0, metadata !1} -!1 = metadata !{metadata !"llvm.vectorizer.enable", i1 1} +!1 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 1} !2 = metadata !{metadata !2, metadata !3} -!3 = metadata !{metadata !"llvm.vectorizer.enable", i1 0} +!3 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 0} diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll index 84ffb27..074313b 100644 --- a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll +++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll @@ -53,7 +53,7 @@ for.end: } !1 = metadata !{metadata !1, metadata !2} -!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true} +!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} ; ; This method will not be vectorized, as scalar cost is lower than any of vector costs. diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index 1b979e5..97c31a1 100644 --- a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -44,7 +44,7 @@ for.end: } !1 = metadata !{metadata !1, metadata !2} -!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true} +!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} ; ; This loop will not be vectorized as the trip count is below the threshold. diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll new file mode 100644 index 0000000..6cdd29b --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll @@ -0,0 +1,160 @@ +; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s + +; C/C++ code for tests +; void test(int *A, int Length) { +; #pragma clang loop vectorize(enable) interleave(enable) +; for (int i = 0; i < Length; i++) { +; A[i] = i; +; if (A[i] > Length) +; break; +; } +; } + +; void test_disabled(int *A, int Length) { +; #pragma clang loop vectorize(disable) interleave(disable) +; for (int i = 0; i < Length; i++) +; A[i] = i; +; } + +; void test_array_bounds(int *A, int *B, int Length) { +; #pragma clang loop vectorize(enable) +; for (int i = 0; i < Length; i++) +; A[i] = A[B[i]]; +; } + +; File, line, and column should match those specified in the metadata +; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations +; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization was not specified +; CHECK: remark: source.cpp:13:5: loop not vectorized: vector width and interleave count are explicitly set to 1 +; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds +; CHECK: remark: source.cpp:19:5: loop not vectorized: vectorization is explicitly enabled + +; CHECK: _Z4testPii +; CHECK-NOT: x i32> +; CHECK: ret + +; CHECK: _Z13test_disabledPii +; CHECK-NOT: x i32> +; CHECK: ret + +; CHECK: _Z17test_array_boundsPiS_i +; CHECK-NOT: x i32> +; CHECK: ret + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind optsize ssp uwtable +define void @_Z4testPii(i32* nocapture %A, i32 %Length) #0 { +entry: + %cmp10 = icmp sgt i32 %Length, 0, !dbg !12 + br i1 %cmp10, label %for.body, label %for.end, !dbg !12, !llvm.loop !14 + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !16 + %0 = trunc i64 %indvars.iv to i32, !dbg !16 + store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18 + %cmp3 = icmp sle i32 %0, %Length, !dbg !22 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12 + %1 = trunc i64 %indvars.iv.next to i32 + %cmp = icmp slt i32 %1, %Length, !dbg !12 + %or.cond = and i1 %cmp3, %cmp, !dbg !22 + br i1 %or.cond, label %for.body, label %for.end, !dbg !22 + +for.end: ; preds = %for.body, %entry + ret void, !dbg !24 +} + +; Function Attrs: nounwind optsize ssp uwtable +define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 { +entry: + %cmp4 = icmp sgt i32 %Length, 0, !dbg !25 + br i1 %cmp4, label %for.body, label %for.end, !dbg !25, !llvm.loop !27 + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !30 + %0 = trunc i64 %indvars.iv to i32, !dbg !30 + store i32 %0, i32* %arrayidx, align 4, !dbg !30, !tbaa !18 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !25 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !25 + %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !25 + br i1 %exitcond, label %for.end, label %for.body, !dbg !25, !llvm.loop !27 + +for.end: ; preds = %for.body, %entry + ret void, !dbg !31 +} + +; Function Attrs: nounwind optsize ssp uwtable +define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 { +entry: + %cmp9 = icmp sgt i32 %Length, 0, !dbg !32 + br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32, !llvm.loop !34 + +for.body.preheader: ; preds = %entry + br label %for.body, !dbg !35 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv, !dbg !35 + %0 = load i32* %arrayidx, align 4, !dbg !35, !tbaa !18 + %idxprom1 = sext i32 %0 to i64, !dbg !35 + %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1, !dbg !35 + %1 = load i32* %arrayidx2, align 4, !dbg !35, !tbaa !18 + %arrayidx4 = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !35 + store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !32 + %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !32 + br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !32, !llvm.loop !34 + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void, !dbg !36 +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9, !10} +!llvm.ident = !{!11} + +!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} +!1 = metadata !{metadata !"source.cpp", metadata !"."} +!2 = metadata !{} +!3 = metadata !{metadata !4, metadata !7, metadata !8} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 1} +!5 = metadata !{i32 786473, metadata !1} +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} +!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_disabled", metadata !"test_disabled", metadata !"", i32 10, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z13test_disabledPii, null, null, metadata !2, i32 10} +!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_array_bounds", metadata !"test_array_bounds", metadata !"", i32 16, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, metadata !2, i32 16} +!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} +!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!11 = metadata !{metadata !"clang version 3.5.0"} +!12 = metadata !{i32 3, i32 8, metadata !13, null} +!13 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0} +!14 = metadata !{metadata !14, metadata !15, metadata !15} +!15 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} +!16 = metadata !{i32 4, i32 5, metadata !17, null} +!17 = metadata !{i32 786443, metadata !1, metadata !13, i32 3, i32 36, i32 0, i32 1} +!18 = metadata !{metadata !19, metadata !19, i64 0} +!19 = metadata !{metadata !"int", metadata !20, i64 0} +!20 = metadata !{metadata !"omnipotent char", metadata !21, i64 0} +!21 = metadata !{metadata !"Simple C/C++ TBAA"} +!22 = metadata !{i32 5, i32 9, metadata !23, null} +!23 = metadata !{i32 786443, metadata !1, metadata !17, i32 5, i32 9, i32 0, i32 2} +!24 = metadata !{i32 8, i32 1, metadata !4, null} +!25 = metadata !{i32 12, i32 8, metadata !26, null} +!26 = metadata !{i32 786443, metadata !1, metadata !7, i32 12, i32 3, i32 0, i32 3} +!27 = metadata !{metadata !27, metadata !28, metadata !29} +!28 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1} +!29 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1} +!30 = metadata !{i32 13, i32 5, metadata !26, null} +!31 = metadata !{i32 14, i32 1, metadata !7, null} +!32 = metadata !{i32 18, i32 8, metadata !33, null} +!33 = metadata !{i32 786443, metadata !1, metadata !8, i32 18, i32 3, i32 0, i32 4} +!34 = metadata !{metadata !34, metadata !15} +!35 = metadata !{i32 19, i32 5, metadata !33, null} +!36 = metadata !{i32 20, i32 1, metadata !8, null} diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll index 685d034..f683447 100644 --- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll +++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll @@ -1,8 +1,17 @@ ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=VECTORIZED %s -; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='.*vectorize.*' 2>&1 | FileCheck -check-prefix=UNROLLED %s +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s -; VECTORIZED: remark: {{.*}}.c:17:8: vectorized loop (vectorization factor: 4, unrolling interleave factor: 1) -; UNROLLED: remark: {{.*}}.c:17:8: unrolled with interleaving factor 4 (vectorization not beneficial) +; This code has all the !dbg annotations needed to track source line information, +; but is missing the llvm.dbg.cu annotation. This prevents code generation from +; emitting debug info in the final output. +; RUN: llc -mtriple x86_64-pc-linux-gnu %s -o - | FileCheck -check-prefix=DEBUG-OUTPUT %s +; DEBUG-OUTPUT-NOT: .loc +; DEBUG-OUTPUT-NOT: {{.*}}.debug_info + +; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization factor: 4, unrolling interleave factor: 1) +; UNROLLED: remark: vectorization-remarks.c:17:8: unrolled with interleaving factor 4 (vectorization not beneficial) +; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vector width and interleave count are explicitly set to 1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -37,11 +46,9 @@ for.end: ; preds = %for.body declare void @ibar(i32*) #1 -!llvm.dbg.cu = !{!0} !llvm.module.flags = !{!7, !8} !llvm.ident = !{!9} -!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [./vectorization-remarks.c] [DW_LANG_C99] !1 = metadata !{metadata !"vectorization-remarks.c", metadata !"."} !2 = metadata !{} !3 = metadata !{metadata !4} diff --git a/test/Transforms/LoopVectorize/XCore/lit.local.cfg b/test/Transforms/LoopVectorize/XCore/lit.local.cfg index 4d17d46..bb48713 100644 --- a/test/Transforms/LoopVectorize/XCore/lit.local.cfg +++ b/test/Transforms/LoopVectorize/XCore/lit.local.cfg @@ -1,3 +1,2 @@ -targets = set(config.root.targets_to_build.split()) -if not 'XCore' in targets: +if not 'XCore' in config.root.targets: config.unsupported = True diff --git a/test/Transforms/LoopVectorize/control-flow.ll b/test/Transforms/LoopVectorize/control-flow.ll new file mode 100644 index 0000000..e4ba77f --- /dev/null +++ b/test/Transforms/LoopVectorize/control-flow.ll @@ -0,0 +1,78 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s + +; C/C++ code for control flow test +; int test(int *A, int Length) { +; for (int i = 0; i < Length; i++) { +; if (A[i] > 10.0) goto end; +; A[i] = 0; +; } +; end: +; return 0; +; } + +; CHECK: remark: source.cpp:5:9: loop not vectorized: loop control flow is not understood by vectorizer +; CHECK: remark: source.cpp:5:9: loop not vectorized: vectorization was not specified + +; CHECK: _Z4testPii +; CHECK-NOT: x i32> +; CHECK: ret + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind optsize ssp uwtable +define i32 @_Z4testPii(i32* nocapture %A, i32 %Length) #0 { +entry: + %cmp8 = icmp sgt i32 %Length, 0, !dbg !10 + br i1 %cmp8, label %for.body.preheader, label %end, !dbg !10 + +for.body.preheader: ; preds = %entry + br label %for.body, !dbg !12 + +for.body: ; preds = %for.body.preheader, %if.else + %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !12 + %0 = load i32* %arrayidx, align 4, !dbg !12, !tbaa !15 + %cmp1 = icmp sgt i32 %0, 10, !dbg !12 + br i1 %cmp1, label %end.loopexit, label %if.else, !dbg !12 + +if.else: ; preds = %for.body + store i32 0, i32* %arrayidx, align 4, !dbg !19, !tbaa !15 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10 + %1 = trunc i64 %indvars.iv.next to i32, !dbg !10 + %cmp = icmp slt i32 %1, %Length, !dbg !10 + br i1 %cmp, label %for.body, label %end.loopexit, !dbg !10 + +end.loopexit: ; preds = %if.else, %for.body + br label %end + +end: ; preds = %end.loopexit, %entry + ret i32 0, !dbg !20 +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} +!1 = metadata !{metadata !"source.cpp", metadata !"."} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 2} +!5 = metadata !{i32 786473, metadata !1} +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} +!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} +!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!9 = metadata !{metadata !"clang version 3.5.0"} +!10 = metadata !{i32 3, i32 8, metadata !11, null} +!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0} +!12 = metadata !{i32 5, i32 9, metadata !13, null} +!13 = metadata !{i32 786443, metadata !1, metadata !14, i32 5, i32 9, i32 0, i32 2} +!14 = metadata !{i32 786443, metadata !1, metadata !11, i32 4, i32 3, i32 0, i32 1} +!15 = metadata !{metadata !16, metadata !16, i64 0} +!16 = metadata !{metadata !"int", metadata !17, i64 0} +!17 = metadata !{metadata !"omnipotent char", metadata !18, i64 0} +!18 = metadata !{metadata !"Simple C/C++ TBAA"} +!19 = metadata !{i32 8, i32 7, metadata !13, null} +!20 = metadata !{i32 12, i32 3, metadata !4, null} diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll index dbe0243..6e3e8ed 100644 --- a/test/Transforms/LoopVectorize/if-conversion.ll +++ b/test/Transforms/LoopVectorize/if-conversion.ll @@ -156,7 +156,7 @@ for.body: br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end cond.false: - %cond.1 = or i32 %inc3, sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c) to i32)) + %cond.1 = or i32 %inc3, sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 1), i32** @c) to i32)) br label %cond.end cond.end: diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll index ad2c663..7dabcb2 100644 --- a/test/Transforms/LoopVectorize/induction.ll +++ b/test/Transforms/LoopVectorize/induction.ll @@ -108,3 +108,64 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ; <label>:5 ; preds = %1 ret i32 %2 } + +; This loop has a backedge taken count of i32_max. We need to check for this +; condition and branch directly to the scalar loop. + +; CHECK-LABEL: max_i32_backedgetaken +; CHECK: %backedge.overflow = icmp eq i32 -1, -1 +; CHECK: br i1 %backedge.overflow, label %scalar.ph, label %overflow.checked + +; CHECK: scalar.ph: +; CHECK: %bc.resume.val = phi i32 [ %resume.val, %middle.block ], [ 0, %0 ] +; CHECK: %bc.merge.rdx = phi i32 [ 1, %0 ], [ %5, %middle.block ] + +define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable { + + br label %1 + +; <label>:1 ; preds = %1, %0 + %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ] + %b.0 = phi i32 [ 0, %0 ], [ %3, %1 ] + %2 = and i32 %a.0, 4 + %3 = add i32 %b.0, -1 + %4 = icmp eq i32 %3, 0 + br i1 %4, label %5, label %1 + +; <label>:5 ; preds = %1 + ret i32 %2 +} + +; When generating the overflow check we must sure that the induction start value +; is defined before the branch to the scalar preheader. + +; CHECK-LABEL: testoverflowcheck +; CHECK: entry +; CHECK: %[[LOAD:.*]] = load i8 +; CHECK: %[[VAL:.*]] = zext i8 %[[LOAD]] to i32 +; CHECK: br + +; CHECK: scalar.ph +; CHECK: phi i32 [ %{{.*}}, %middle.block ], [ %[[VAL]], %entry ] + +@e = global i8 1, align 1 +@d = common global i32 0, align 4 +@c = common global i32 0, align 4 +define i32 @testoverflowcheck() { +entry: + %.pr.i = load i8* @e, align 1 + %0 = load i32* @d, align 4 + %c.promoted.i = load i32* @c, align 4 + br label %cond.end.i + +cond.end.i: + %inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ] + %and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ] + %and.i = and i32 %0, %and3.i + %inc.i = add i8 %inc4.i, 1 + %tobool.i = icmp eq i8 %inc.i, 0 + br i1 %tobool.i, label %loopexit, label %cond.end.i + +loopexit: + ret i32 %and.i +} diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll index c3d570c..7dfaf03 100644 --- a/test/Transforms/LoopVectorize/intrinsic.ll +++ b/test/Transforms/LoopVectorize/intrinsic.ll @@ -1090,3 +1090,105 @@ for.end: ; preds = %for.body ret void } +declare double @llvm.powi.f64(double %Val, i32 %power) nounwind readnone + +;CHECK-LABEL: @powi_f64( +;CHECK: llvm.powi.v4f64 +;CHECK: ret void +define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv + %0 = load double* %arrayidx, align 8 + %call = tail call double @llvm.powi.f64(double %0, i32 %P) nounwind readnone + %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv + store double %call, double* %arrayidx4, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +;CHECK-LABEL: @powi_f64_neg( +;CHECK-NOT: llvm.powi.v4f64 +;CHECK: ret void +define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv + %0 = load double* %arrayidx, align 8 + %1 = trunc i64 %indvars.iv to i32 + %call = tail call double @llvm.powi.f64(double %0, i32 %1) nounwind readnone + %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv + store double %call, double* %arrayidx4, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i64 @llvm.cttz.i64 (i64, i1) nounwind readnone + +;CHECK-LABEL: @cttz_f64( +;CHECK: llvm.cttz.v4i64 +;CHECK: ret void +define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv + %0 = load i64* %arrayidx, align 8 + %call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone + %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv + store i64 %call, i64* %arrayidx4, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i64 @llvm.ctlz.i64 (i64, i1) nounwind readnone + +;CHECK-LABEL: @ctlz_f64( +;CHECK: llvm.ctlz.v4i64 +;CHECK: ret void +define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv + %0 = load i64* %arrayidx, align 8 + %call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone + %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv + store i64 %call, i64* %arrayidx4, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} diff --git a/test/Transforms/LoopVectorize/metadata-unroll.ll b/test/Transforms/LoopVectorize/metadata-unroll.ll index 7f10372..2fcc53a 100644 --- a/test/Transforms/LoopVectorize/metadata-unroll.ll +++ b/test/Transforms/LoopVectorize/metadata-unroll.ll @@ -38,4 +38,4 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp { } !0 = metadata !{metadata !0, metadata !1} -!1 = metadata !{metadata !"llvm.vectorizer.unroll", i32 2} +!1 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 2} diff --git a/test/Transforms/LoopVectorize/metadata-width.ll b/test/Transforms/LoopVectorize/metadata-width.ll index 1960c0b..87de655 100644 --- a/test/Transforms/LoopVectorize/metadata-width.ll +++ b/test/Transforms/LoopVectorize/metadata-width.ll @@ -28,4 +28,4 @@ for.end: ; preds = %for.body, %entry attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } !0 = metadata !{metadata !0, metadata !1} -!1 = metadata !{metadata !"llvm.vectorizer.width", i32 8} +!1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 8} diff --git a/test/Transforms/LoopVectorize/no_switch.ll b/test/Transforms/LoopVectorize/no_switch.ll new file mode 100644 index 0000000..52b4285 --- /dev/null +++ b/test/Transforms/LoopVectorize/no_switch.ll @@ -0,0 +1,85 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s + +; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement +; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization is explicitly enabled with width 4 + +; CHECK: _Z11test_switchPii +; CHECK-NOT: x i32> +; CHECK: ret + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind optsize ssp uwtable +define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 { +entry: + %cmp18 = icmp sgt i32 %Length, 0, !dbg !10 + br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12 + +for.body.preheader: ; preds = %entry + br label %for.body, !dbg !14 + +for.body: ; preds = %for.body.preheader, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv, !dbg !14 + %0 = load i32* %arrayidx, align 4, !dbg !14, !tbaa !16 + switch i32 %0, label %for.inc [ + i32 0, label %sw.bb + i32 1, label %sw.bb3 + ], !dbg !14 + +sw.bb: ; preds = %for.body + %1 = trunc i64 %indvars.iv to i32, !dbg !20 + %mul = shl nsw i32 %1, 1, !dbg !20 + br label %for.inc, !dbg !22 + +sw.bb3: ; preds = %for.body + %2 = trunc i64 %indvars.iv to i32, !dbg !23 + store i32 %2, i32* %arrayidx, align 4, !dbg !23, !tbaa !16 + br label %for.inc, !dbg !23 + +for.inc: ; preds = %sw.bb3, %for.body, %sw.bb + %storemerge = phi i32 [ %mul, %sw.bb ], [ 0, %for.body ], [ 0, %sw.bb3 ] + store i32 %storemerge, i32* %arrayidx, align 4, !dbg !20, !tbaa !16 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10 + %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !10 + br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !10, !llvm.loop !12 + +for.end.loopexit: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void, !dbg !24 +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} +!1 = metadata !{metadata !"source.cpp", metadata !"."} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_switch", metadata !"test_switch", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z11test_switchPii, null, null, metadata !2, i32 1} +!5 = metadata !{i32 786473, metadata !1} +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} +!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2} +!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!9 = metadata !{metadata !"clang version 3.5.0"} +!10 = metadata !{i32 3, i32 8, metadata !11, null} +!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0} +!12 = metadata !{metadata !12, metadata !13, metadata !13} +!13 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} +!14 = metadata !{i32 4, i32 5, metadata !15, null} +!15 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 36, i32 0, i32 1} +!16 = metadata !{metadata !17, metadata !17, i64 0} +!17 = metadata !{metadata !"int", metadata !18, i64 0} +!18 = metadata !{metadata !"omnipotent char", metadata !19, i64 0} +!19 = metadata !{metadata !"Simple C/C++ TBAA"} +!20 = metadata !{i32 6, i32 7, metadata !21, null} +!21 = metadata !{i32 786443, metadata !1, metadata !15, i32 4, i32 18, i32 0, i32 2} +!22 = metadata !{i32 7, i32 5, metadata !21, null} +!23 = metadata !{i32 9, i32 7, metadata !21, null} +!24 = metadata !{i32 14, i32 1, metadata !4, null} diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll index e7b1e2a..01e28bc 100644 --- a/test/Transforms/LoopVectorize/runtime-check-readonly.ll +++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll @@ -5,6 +5,7 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK-LABEL: @add_ints( ;CHECK: br +;CHECK: br ;CHECK: getelementptr ;CHECK-NEXT: getelementptr ;CHECK-DAG: icmp uge diff --git a/test/Transforms/LoopVectorize/vect.omp.persistence.ll b/test/Transforms/LoopVectorize/vect.omp.persistence.ll index dc3df7a..f646567 100644 --- a/test/Transforms/LoopVectorize/vect.omp.persistence.ll +++ b/test/Transforms/LoopVectorize/vect.omp.persistence.ll @@ -18,7 +18,7 @@ target triple = "x86_64-unknown-linux-gnu" ; ; Test #1 ; -; Ensure that "llvm.vectorizer.enable" metadata was not lost prior to LoopVectorize pass. +; Ensure that "llvm.loop.vectorize.enable" metadata was not lost prior to LoopVectorize pass. ; In past LoopRotate was clearing that metadata. ; ; The source C code is: @@ -62,12 +62,12 @@ for.end: } !1 = metadata !{metadata !1, metadata !2} -!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true} +!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} ; ; Test #2 ; -; Ensure that "llvm.vectorizer.enable" metadata was not lost even +; Ensure that "llvm.loop.vectorize.enable" metadata was not lost even ; if loop was not rotated (see http://reviews.llvm.org/D3348#comment-4). ; define i32 @nonrotated(i32 %a) { @@ -85,4 +85,4 @@ return: } !3 = metadata !{metadata !3, metadata !4} -!4 = metadata !{metadata !"llvm.vectorizer.enable", i1 true} +!4 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true} diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll index 7800469..47de13d 100644 --- a/test/Transforms/LoopVectorize/vectorize-once.ll +++ b/test/Transforms/LoopVectorize/vectorize-once.ll @@ -69,9 +69,9 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit: ; preds = %for.body.i, %entry attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" } ; CHECK: !0 = metadata !{metadata !0, metadata !1, metadata !2} -; CHECK: !1 = metadata !{metadata !"llvm.vectorizer.width", i32 1} -; CHECK: !2 = metadata !{metadata !"llvm.vectorizer.unroll", i32 1} +; CHECK: !1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1} +; CHECK: !2 = metadata !{metadata !"llvm.loop.vectorize.unroll", i32 1} ; CHECK: !3 = metadata !{metadata !3, metadata !1, metadata !2} !0 = metadata !{metadata !0, metadata !1} -!1 = metadata !{metadata !"llvm.vectorizer.width", i32 1} +!1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1} |