diff options
author | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
commit | dce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch) | |
tree | dcebc53f2b182f145a2e659393bf9a0472cedf23 /test/Transforms/LoopVectorize/X86 | |
parent | 220b921aed042f9e520c26cffd8282a94c66c3d5 (diff) | |
download | external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2 |
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'test/Transforms/LoopVectorize/X86')
4 files changed, 243 insertions, 10 deletions
diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll index 224823b..9e4e989 100644 --- a/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -1,13 +1,13 @@ -; RUN: opt < %s -mcpu=corei7 -O1 -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O1 -; RUN: opt < %s -mcpu=corei7 -O2 -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O2 -; RUN: opt < %s -mcpu=corei7 -O3 -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O3 -; RUN: opt < %s -mcpu=corei7 -Os -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=Os -; RUN: opt < %s -mcpu=corei7 -Oz -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=Oz -; RUN: opt < %s -mcpu=corei7 -O1 -vectorize-loops -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O1VEC -; RUN: opt < %s -mcpu=corei7 -Oz -vectorize-loops -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=OzVEC -; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O1VEC2 -; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=OzVEC2 -; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -x86-use-partial-unrolling=0 | FileCheck %s --check-prefix=O3DIS +; RUN: opt < %s -mcpu=corei7 -O1 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1 +; RUN: opt < %s -mcpu=corei7 -O2 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O2 +; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3 +; RUN: opt < %s -mcpu=corei7 -Os -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Os +; RUN: opt < %s -mcpu=corei7 -Oz -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Oz +; RUN: opt < %s -mcpu=corei7 -O1 -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC +; RUN: opt < %s -mcpu=corei7 -Oz -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC +; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC2 +; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2 +; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS ; This file tests the llvm.vectorizer.pragma forcing vectorization even when ; optimization levels are too low, or when vectorization is disabled. diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll new file mode 100644 index 0000000..84ffb27 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll @@ -0,0 +1,93 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s +; REQUIRES: asserts + +; CHECK: LV: Loop hints: force=enabled +; CHECK: LV: Loop hints: force=? +; No more loops in the module +; CHECK-NOT: LV: Loop hints: force= +; CHECK: 2 loop-vectorize - Number of loops analyzed for vectorization +; CHECK: 1 loop-vectorize - Number of loops vectorized + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +; +; The source code for the test: +; +; #include <math.h> +; void foo(float* restrict A, float * restrict B, int size) +; { +; for (int i = 0; i < size; ++i) A[i] = sinf(B[i]); +; } +; + +; +; This loop will be vectorized, although the scalar cost is lower than any of vector costs, but vectorization is explicitly forced in metadata. +; + +define void @vectorized(float* noalias nocapture %A, float* noalias nocapture %B, i32 %size) { +entry: + %cmp6 = icmp sgt i32 %size, 0 + br i1 %cmp6, label %for.body.preheader, label %for.end + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1 + %call = tail call float @llvm.sin.f32(float %0) + %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %size + br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop !1 + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +!1 = metadata !{metadata !1, metadata !2} +!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true} + +; +; This method will not be vectorized, as scalar cost is lower than any of vector costs. +; + +define void @not_vectorized(float* noalias nocapture %A, float* noalias nocapture %B, i32 %size) { +entry: + %cmp6 = icmp sgt i32 %size, 0 + br i1 %cmp6, label %for.body.preheader, label %for.end + +for.body.preheader: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 + %call = tail call float @llvm.sin.f32(float %0) + %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv + store float %call, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %size + br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop !3 + +for.end.loopexit: + br label %for.end + +for.end: + ret void +} + +declare float @llvm.sin.f32(float) nounwind readnone + +; Dummy metadata +!3 = metadata !{metadata !3} + diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll new file mode 100644 index 0000000..1b979e5 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -0,0 +1,73 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -debug-only=loop-vectorize -stats -S -vectorizer-min-trip-count=21 2>&1 | FileCheck %s +; REQUIRES: asserts + +; CHECK: LV: Loop hints: force=enabled +; CHECK: LV: Loop hints: force=? +; No more loops in the module +; CHECK-NOT: LV: Loop hints: force= +; CHECK: 2 loop-vectorize - Number of loops analyzed for vectorization +; CHECK: 1 loop-vectorize - Number of loops vectorized + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +; +; The source code for the test: +; +; void foo(float* restrict A, float* restrict B) +; { +; for (int i = 0; i < 20; ++i) A[i] += B[i]; +; } +; + +; +; This loop will be vectorized, although the trip count is below the threshold, but vectorization is explicitly forced in metadata. +; +define void @vectorized(float* noalias nocapture %A, float* noalias nocapture readonly %B) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1 + %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv + %1 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1 + %add = fadd fast float %0, %1 + store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 20 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: + ret void +} + +!1 = metadata !{metadata !1, metadata !2} +!2 = metadata !{metadata !"llvm.vectorizer.enable", i1 true} + +; +; This loop will not be vectorized as the trip count is below the threshold. +; +define void @not_vectorized(float* noalias nocapture %A, float* noalias nocapture readonly %B) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv + %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 + %arrayidx2 = getelementptr inbounds float* %A, i64 %indvars.iv + %1 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 + %add = fadd fast float %0, %1 + store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 20 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3 + +for.end: + ret void +} + +!3 = metadata !{metadata !3} + diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll new file mode 100644 index 0000000..685d034 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=VECTORIZED %s +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='.*vectorize.*' 2>&1 | FileCheck -check-prefix=UNROLLED %s + +; VECTORIZED: remark: {{.*}}.c:17:8: vectorized loop (vectorization factor: 4, unrolling interleave factor: 1) +; UNROLLED: remark: {{.*}}.c:17:8: unrolled with interleaving factor 4 (vectorization not beneficial) + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define i32 @foo(i32 %n) #0 { +entry: + %diff = alloca i32, align 4 + %cb = alloca [16 x i8], align 16 + %cc = alloca [16 x i8], align 16 + store i32 0, i32* %diff, align 4, !dbg !10, !tbaa !11 + br label %for.body, !dbg !15 + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %add8 = phi i32 [ 0, %entry ], [ %add, %for.body ], !dbg !19 + %arrayidx = getelementptr inbounds [16 x i8]* %cb, i64 0, i64 %indvars.iv, !dbg !19 + %0 = load i8* %arrayidx, align 1, !dbg !19, !tbaa !21 + %conv = sext i8 %0 to i32, !dbg !19 + %arrayidx2 = getelementptr inbounds [16 x i8]* %cc, i64 0, i64 %indvars.iv, !dbg !19 + %1 = load i8* %arrayidx2, align 1, !dbg !19, !tbaa !21 + %conv3 = sext i8 %1 to i32, !dbg !19 + %sub = sub i32 %conv, %conv3, !dbg !19 + %add = add nsw i32 %sub, %add8, !dbg !19 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !15 + %exitcond = icmp eq i64 %indvars.iv.next, 16, !dbg !15 + br i1 %exitcond, label %for.end, label %for.body, !dbg !15 + +for.end: ; preds = %for.body + store i32 %add, i32* %diff, align 4, !dbg !19, !tbaa !11 + call void @ibar(i32* %diff) #2, !dbg !22 + ret i32 0, !dbg !23 +} + +declare void @ibar(i32*) #1 + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [./vectorization-remarks.c] [DW_LANG_C99] +!1 = metadata !{metadata !"vectorization-remarks.c", metadata !"."} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @foo, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [foo] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [./vectorization-remarks.c] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!9 = metadata !{metadata !"clang version 3.5.0 "} +!10 = metadata !{i32 8, i32 3, metadata !4, null} ; [ DW_TAG_imported_declaration ] +!11 = metadata !{metadata !12, metadata !12, i64 0} +!12 = metadata !{metadata !"int", metadata !13, i64 0} +!13 = metadata !{metadata !"omnipotent char", metadata !14, i64 0} +!14 = metadata !{metadata !"Simple C/C++ TBAA"} +!15 = metadata !{i32 17, i32 8, metadata !16, null} +!16 = metadata !{i32 786443, metadata !1, metadata !17, i32 17, i32 8, i32 2, i32 3} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] +!17 = metadata !{i32 786443, metadata !1, metadata !18, i32 17, i32 8, i32 1, i32 2} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] +!18 = metadata !{i32 786443, metadata !1, metadata !4, i32 17, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] +!19 = metadata !{i32 18, i32 5, metadata !20, null} +!20 = metadata !{i32 786443, metadata !1, metadata !18, i32 17, i32 27, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c] +!21 = metadata !{metadata !13, metadata !13, i64 0} +!22 = metadata !{i32 20, i32 3, metadata !4, null} +!23 = metadata !{i32 21, i32 3, metadata !4, null} |