diff options
author | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-05-15 01:44:30 +0000 |
---|---|---|
committer | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-05-15 01:44:30 +0000 |
commit | 1386692ef64d3151da8986589eadf0c58aba5c50 (patch) | |
tree | 0a0d7b40736b61bca369b8658fa8a669659dc97c /test/Transforms | |
parent | a88d974ce274152d2f8f28660ba277906bde2384 (diff) | |
download | external_llvm-1386692ef64d3151da8986589eadf0c58aba5c50.zip external_llvm-1386692ef64d3151da8986589eadf0c58aba5c50.tar.gz external_llvm-1386692ef64d3151da8986589eadf0c58aba5c50.tar.bz2 |
LoopVectorize: Hoist conditional loads if possible
InstCombine can be uncooperative to vectorization and sink loads into
conditional blocks. This prevents vectorization.
Undo this optimization if there are unconditional memory accesses to the same
addresses in the loop.
radar://13815763
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181860 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms')
-rw-r--r-- | test/Transforms/LoopVectorize/hoist-loads.ll | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/test/Transforms/LoopVectorize/hoist-loads.ll b/test/Transforms/LoopVectorize/hoist-loads.ll new file mode 100644 index 0000000..fad1735 --- /dev/null +++ b/test/Transforms/LoopVectorize/hoist-loads.ll @@ -0,0 +1,69 @@ +; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +@A = common global [1024 x float] zeroinitializer, align 16 +@B = common global [1024 x float] zeroinitializer, align 16 + +; Make sure we can vectorize in the presence of hoistable conditional loads. +; CHECK: hoist_cond_load +; CHECK: load <2 x float> + +define void @hoist_cond_load() { +entry: + br label %for.body +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ] + %arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx2, align 4 + %cmp3 = fcmp oeq float %0, 0.000000e+00 + br i1 %cmp3, label %if.end9, label %if.else + +if.else: + %1 = load float* %arrayidx, align 4 + br label %if.end9 + +if.end9: + %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ] + store float %tmp.0, float* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +} + +; However, we can't hoist loads whose address we have not seen unconditionally +; accessed. +; CHECK: dont_hoist_cond_load +; CHECK-NOT: load <2 x float> + +define void @dont_hoist_cond_load() { +entry: + br label %for.body +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ] + %arrayidx = getelementptr inbounds [1024 x float]* @A, i64 0, i64 %indvars.iv + %arrayidx2 = getelementptr inbounds [1024 x float]* @B, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx2, align 4 + %cmp3 = fcmp oeq float %0, 0.000000e+00 + br i1 %cmp3, label %if.end9, label %if.else + +if.else: + %1 = load float* %arrayidx, align 4 + br label %if.end9 + +if.end9: + %tmp.0 = phi float [ %1, %if.else ], [ 0.000000e+00, %for.body ] + store float %tmp.0, float* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +} |