diff options
Diffstat (limited to 'test/Transforms/LoopVectorize')
25 files changed, 845 insertions, 186 deletions
diff --git a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll index 2dd7fe3..bab6300 100644 --- a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll +++ b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll @@ -15,7 +15,7 @@ entry: for.body: ; preds = %entry, %if.end %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ] %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4, !tbaa !0 + %0 = load i32* %arrayidx, align 4 %tobool = icmp eq i32 %0, 0 br i1 %tobool, label %if.end, label %if.then @@ -29,7 +29,7 @@ if.then: ; preds = %for.body if.end: ; preds = %for.body, %if.then %z.0 = phi i32 [ %add1, %if.then ], [ 9, %for.body ] - store i32 %z.0, i32* %arrayidx, align 4, !tbaa !0 + store i32 %z.0, i32* %arrayidx, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %x @@ -38,7 +38,3 @@ if.end: ; preds = %for.body, %if.then for.end: ; preds = %if.end, %entry ret i32 undef } - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll index 405582c..ae9f998 100644 --- a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll +++ b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll @@ -24,7 +24,7 @@ entry: %3 = shl nsw i64 %indvars.iv, 2 %4 = getelementptr inbounds i8* %1, i64 %3 %5 = bitcast i8* %4 to float* - store float %value, float* %5, align 4, !tbaa !0 + store float %value, float* %5, align 4 %indvars.iv.next = add i64 %indvars.iv, %2 %6 = trunc i64 %indvars.iv.next to i32 %7 = icmp slt i32 %6, %_n @@ -43,7 +43,7 @@ entry: %0 = shl nsw i64 %indvars.iv, 2 %1 = getelementptr inbounds i8* bitcast (float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 16000) to i8*), i64 %0 %2 = bitcast i8* %1 to float* - store float -1.000000e+00, float* %2, align 4, !tbaa !0 + store float -1.000000e+00, float* %2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 16000 @@ -52,6 +52,3 @@ entry: "5": ; preds = %"3" ret i32 0 } - -!0 = metadata !{metadata !"alias set 7: float", metadata !1} -!1 = metadata !{metadata !1} diff --git a/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll new file mode 100644 index 0000000..f4c07b4 --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll @@ -0,0 +1,30 @@ +; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -loop-vectorize -dce -instcombine -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +@B = common global [1024 x i32] zeroinitializer, align 16 +@A = common global [1024 x i32] zeroinitializer, align 16 + +; We use to not vectorize this loop because the shift was deemed to expensive. +; Now that we differentiate shift cost base on the operand value kind, we will +; vectorize this loop. +; CHECK: ashr <4 x i32> +define void @f() { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @B, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %shl = ashr i32 %0, 3 + %arrayidx2 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + store i32 %shl, i32* %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll index 23d9233..760d28d 100644 --- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -33,11 +33,10 @@ define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) noun .lr.ph: ; preds = %0, %.lr.ph %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ] - %2 = add nsw i64 %indvars.iv, 3 - %3 = trunc i64 %2 to i32 - %4 = sitofp i32 %3 to float - %5 = getelementptr inbounds float* %B, i64 %indvars.iv - store float %4, float* %5, align 4 + %add = add nsw i64 %indvars.iv, 3 + %tofp = sitofp i64 %add to float + %gep = getelementptr inbounds float* %B, i64 %indvars.iv + store float %tofp, float* %gep, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n diff --git a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll new file mode 100644 index 0000000..47a5e7a --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;CHECK: @foo +;CHECK-NOT: <4 x i32> +;CHECK: ret void + +; Function Attrs: nounwind uwtable +define void @foo(i32* nocapture %a, i32* nocapture %b, i32 %k, i32 %m) #0 { +entry: + %cmp27 = icmp sgt i32 %m, 0 + br i1 %cmp27, label %for.body3.lr.ph.us, label %for.end15 + +for.end.us: ; preds = %for.body3.us + %arrayidx9.us = getelementptr inbounds i32* %b, i64 %indvars.iv33 + %0 = load i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3 + %add10.us = add nsw i32 %0, 3 + store i32 %add10.us, i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3 + %indvars.iv.next34 = add i64 %indvars.iv33, 1 + %lftr.wideiv35 = trunc i64 %indvars.iv.next34 to i32 + %exitcond36 = icmp eq i32 %lftr.wideiv35, %m + br i1 %exitcond36, label %for.end15, label %for.body3.lr.ph.us, !llvm.loop.parallel !5 + +for.body3.us: ; preds = %for.body3.us, %for.body3.lr.ph.us + %indvars.iv29 = phi i64 [ 0, %for.body3.lr.ph.us ], [ %indvars.iv.next30, %for.body3.us ] + %1 = trunc i64 %indvars.iv29 to i32 + %add4.us = add i32 %add.us, %1 + %idxprom.us = sext i32 %add4.us to i64 + %arrayidx.us = getelementptr inbounds i32* %a, i64 %idxprom.us + %2 = load i32* %arrayidx.us, align 4, !llvm.mem.parallel_loop_access !3 + %add5.us = add nsw i32 %2, 1 + store i32 %add5.us, i32* %arrayidx7.us, align 4, !llvm.mem.parallel_loop_access !3 + %indvars.iv.next30 = add i64 %indvars.iv29, 1 + %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32 + %exitcond32 = icmp eq i32 %lftr.wideiv31, %m + br i1 %exitcond32, label %for.end.us, label %for.body3.us, !llvm.loop.parallel !4 + +for.body3.lr.ph.us: ; preds = %for.end.us, %entry + %indvars.iv33 = phi i64 [ %indvars.iv.next34, %for.end.us ], [ 0, %entry ] + %3 = trunc i64 %indvars.iv33 to i32 + %add.us = add i32 %3, %k + %arrayidx7.us = getelementptr inbounds i32* %a, i64 %indvars.iv33 + br label %for.body3.us + +for.end15: ; preds = %for.end.us, %entry + ret void +} + +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!3 = metadata !{metadata !4, metadata !5} +!4 = metadata !{metadata !4} +!5 = metadata !{metadata !5} + diff --git a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll index 186fba8..8716cff 100644 --- a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll +++ b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll @@ -11,9 +11,9 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %add = fadd float %0, 1.000000e+00 - store float %add, float* %arrayidx, align 4, !tbaa !0 + store float %add, float* %arrayidx, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 8 @@ -22,7 +22,3 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } - -!0 = metadata !{metadata !"float", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll index 452d0df..f904a8e 100644 --- a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll +++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll @@ -19,19 +19,19 @@ entry: for.body: ; preds = %for.body.for.body_crit_edge, %entry %indvars.iv.reload = load i64* %indvars.iv.reg2mem %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv.reload - %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv.reload - %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %idxprom3 = sext i32 %1 to i64 %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 - store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !3 %indvars.iv.next = add i64 %indvars.iv.reload, 1 ; A new store without the parallel metadata here: store i64 %indvars.iv.next, i64* %indvars.iv.next.reg2mem %indvars.iv.next.reload1 = load i64* %indvars.iv.next.reg2mem %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next.reload1 - %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 - store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3 + store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %indvars.iv.next.reload = load i64* %indvars.iv.next.reg2mem %lftr.wideiv = trunc i64 %indvars.iv.next.reload to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 @@ -46,7 +46,4 @@ for.end: ; preds = %for.body ret void } -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} !3 = metadata !{metadata !3} diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll index f648722..3f1a071 100644 --- a/test/Transforms/LoopVectorize/X86/parallel-loops.ll +++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll @@ -21,16 +21,16 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4, !tbaa !0 + %0 = load i32* %arrayidx, align 4 %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv - %1 = load i32* %arrayidx2, align 4, !tbaa !0 + %1 = load i32* %arrayidx2, align 4 %idxprom3 = sext i32 %1 to i64 %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 - store i32 %0, i32* %arrayidx4, align 4, !tbaa !0 + store i32 %0, i32* %arrayidx4, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next - %2 = load i32* %arrayidx6, align 4, !tbaa !0 - store i32 %2, i32* %arrayidx2, align 4, !tbaa !0 + %2 = load i32* %arrayidx6, align 4 + store i32 %2, i32* %arrayidx2, align 4 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 br i1 %exitcond, label %for.end, label %for.body @@ -51,18 +51,18 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3 %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv - %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %idxprom3 = sext i32 %1 to i64 %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 ; This store might have originated from inlining a function with a parallel ; loop. Refers to a list with the "original loop reference" (!4) also included. - store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !5 + store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !5 %indvars.iv.next = add i64 %indvars.iv, 1 %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next - %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 - store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3 + %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3 + store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !3 @@ -84,18 +84,18 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6 + %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !6 %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv - %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6 + %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6 %idxprom3 = sext i32 %1 to i64 %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3 ; This refers to the loop marked with !7 which we are not in at the moment. ; It should prevent detecting as a parallel loop. - store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !7 + store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !7 %indvars.iv.next = add i64 %indvars.iv, 1 %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next - %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6 - store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6 + %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !6 + store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, 512 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !6 @@ -104,9 +104,6 @@ for.end: ; preds = %for.body ret void } -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} !3 = metadata !{metadata !3} !4 = metadata !{metadata !4} !5 = metadata !{metadata !3, metadata !4} diff --git a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll new file mode 100644 index 0000000..b66119f --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll @@ -0,0 +1,29 @@ +; RUN: opt -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.7.0" + +@x = common global [1024 x x86_fp80] zeroinitializer, align 16 + +;CHECK: @example +;CHECK-NOT: bitcast x86_fp80* {{%[^ ]+}} to <{{[2-9][0-9]*}} x x86_fp80>* +;CHECK: store +;CHECK: ret void + +define void @example() nounwind ssp uwtable { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %conv = sitofp i32 1 to x86_fp80 + %arrayidx = getelementptr inbounds [1024 x x86_fp80]* @x, i64 0, i64 %indvars.iv + store x86_fp80 %conv, x86_fp80* %arrayidx, align 16 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/test/Transforms/LoopVectorize/bsd_regex.ll b/test/Transforms/LoopVectorize/bsd_regex.ll new file mode 100644 index 0000000..a14b92d --- /dev/null +++ b/test/Transforms/LoopVectorize/bsd_regex.ll @@ -0,0 +1,38 @@ +; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=2 < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +;PR 15830. + +;CHECK: foo +; When scalarizing stores we need to preserve the original order. +; Make sure that we are extracting in the correct order (0101, and not 0011). +;CHECK: extractelement <2 x i64> {{.*}}, i32 0 +;CHECK: extractelement <2 x i64> {{.*}}, i32 1 +;CHECK: extractelement <2 x i64> {{.*}}, i32 0 +;CHECK: extractelement <2 x i64> {{.*}}, i32 1 +;CHECK: store +;CHECK: store +;CHECK: store +;CHECK: store +;CHECK: ret + +define i32 @foo(i32* nocapture %A) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = shl nsw i64 %indvars.iv, 2 + %arrayidx = getelementptr inbounds i32* %A, i64 %0 + store i32 4, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 10000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 undef +} + + diff --git a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll index 431e422..2648bbe 100644 --- a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll +++ b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll @@ -17,7 +17,7 @@ do.body: ; preds = %cond.end, %entry %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %cond.end ] %p.addr.0 = phi i16* [ %p, %entry ], [ %incdec.ptr, %cond.end ] %incdec.ptr = getelementptr inbounds i16* %p.addr.0, i64 -1 - %0 = load i16* %incdec.ptr, align 2, !tbaa !0 + %0 = load i16* %incdec.ptr, align 2 %conv = zext i16 %0 to i32 %cmp = icmp ult i32 %conv, %size br i1 %cmp, label %cond.end, label %cond.true @@ -29,7 +29,7 @@ cond.true: ; preds = %do.body cond.end: ; preds = %do.body, %cond.true %cond = phi i16 [ %phitmp, %cond.true ], [ 0, %do.body ] - store i16 %cond, i16* %incdec.ptr, align 2, !tbaa !0 + store i16 %cond, i16* %incdec.ptr, align 2 %dec = add i32 %n.addr.0, -1 %tobool = icmp eq i32 %dec, 0 br i1 %tobool, label %do.end, label %do.body @@ -52,11 +52,11 @@ do.body: ; preds = %do.body, %entry %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] %p.0 = phi i32* [ %a, %entry ], [ %incdec.ptr, %do.body ] %incdec.ptr = getelementptr inbounds i32* %p.0, i64 -1 - %0 = load i32* %incdec.ptr, align 4, !tbaa !3 + %0 = load i32* %incdec.ptr, align 4 %cmp = icmp slt i32 %0, %wsize %sub = sub nsw i32 %0, %wsize %cond = select i1 %cmp, i32 0, i32 %sub - store i32 %cond, i32* %incdec.ptr, align 4, !tbaa !3 + store i32 %cond, i32* %incdec.ptr, align 4 %dec = add nsw i32 %n.addr.0, -1 %tobool = icmp eq i32 %dec, 0 br i1 %tobool, label %do.end, label %do.body @@ -64,8 +64,3 @@ do.body: ; preds = %do.body, %entry do.end: ; preds = %do.body ret void } - -!0 = metadata !{metadata !"short", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} -!3 = metadata !{metadata !"int", metadata !1} diff --git a/test/Transforms/LoopVectorize/calloc.ll b/test/Transforms/LoopVectorize/calloc.ll index 08c84ef..7e79916 100644 --- a/test/Transforms/LoopVectorize/calloc.ll +++ b/test/Transforms/LoopVectorize/calloc.ll @@ -23,7 +23,7 @@ for.body: ; preds = %for.body, %for.body %i.030 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] %shr = lshr i64 %i.030, 1 %arrayidx = getelementptr inbounds i8* %bytes, i64 %shr - %1 = load i8* %arrayidx, align 1, !tbaa !0 + %1 = load i8* %arrayidx, align 1 %conv = zext i8 %1 to i32 %and = shl i64 %i.030, 2 %neg = and i64 %and, 4 @@ -38,7 +38,7 @@ for.body: ; preds = %for.body, %for.body %add17 = add nsw i32 %cond, %shr11 %conv18 = trunc i32 %add17 to i8 %arrayidx19 = getelementptr inbounds i8* %call, i64 %i.030 - store i8 %conv18, i8* %arrayidx19, align 1, !tbaa !0 + store i8 %conv18, i8* %arrayidx19, align 1 %inc = add i64 %i.030, 1 %exitcond = icmp eq i64 %inc, %0 br i1 %exitcond, label %for.end, label %for.body @@ -48,6 +48,3 @@ for.end: ; preds = %for.body, %entry } declare noalias i8* @calloc(i64, i64) nounwind - -!0 = metadata !{metadata !"omnipotent char", metadata !1} -!1 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll index a2ea951..127d479 100644 --- a/test/Transforms/LoopVectorize/dbg.value.ll +++ b/test/Transforms/LoopVectorize/dbg.value.ll @@ -18,12 +18,12 @@ for.body: ;CHECK: load <4 x i32> %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds [1024 x i32]* @B, i64 0, i64 %indvars.iv, !dbg !19 - %0 = load i32* %arrayidx, align 4, !dbg !19, !tbaa !21 + %0 = load i32* %arrayidx, align 4, !dbg !19 %arrayidx2 = getelementptr inbounds [1024 x i32]* @C, i64 0, i64 %indvars.iv, !dbg !19 - %1 = load i32* %arrayidx2, align 4, !dbg !19, !tbaa !21 + %1 = load i32* %arrayidx2, align 4, !dbg !19 %add = add nsw i32 %1, %0, !dbg !19 %arrayidx4 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv, !dbg !19 - store i32 %add, i32* %arrayidx4, align 4, !dbg !19, !tbaa !21 + store i32 %add, i32* %arrayidx4, align 4, !dbg !19 %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !18 tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9), !dbg !18 %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !18 @@ -64,7 +64,4 @@ attributes #1 = { nounwind readnone } !18 = metadata !{i32 6, i32 0, metadata !10, null} !19 = metadata !{i32 7, i32 0, metadata !20, null} !20 = metadata !{i32 786443, metadata !10, i32 6, i32 0, metadata !4, i32 1} -!21 = metadata !{metadata !"int", metadata !22} -!22 = metadata !{metadata !"omnipotent char", metadata !23} -!23 = metadata !{metadata !"Simple C/C++ TBAA"} !24 = metadata !{i32 9, i32 0, metadata !3, null} diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll index 565684c..54ca172 100644 --- a/test/Transforms/LoopVectorize/float-reduction.ll +++ b/test/Transforms/LoopVectorize/float-reduction.ll @@ -13,7 +13,7 @@ for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %sum.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %add = fadd fast float %sum.04, %0 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 @@ -23,7 +23,3 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret float %add } - -!0 = metadata !{metadata !"float", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/i8-induction.ll b/test/Transforms/LoopVectorize/i8-induction.ll index 7759b70..2a0e826 100644 --- a/test/Transforms/LoopVectorize/i8-induction.ll +++ b/test/Transforms/LoopVectorize/i8-induction.ll @@ -8,8 +8,8 @@ target triple = "x86_64-apple-macosx10.8.0" define void @f() nounwind uwtable ssp { scalar.ph: - store i8 0, i8* inttoptr (i64 1 to i8*), align 1, !tbaa !0 - %0 = load i8* @a, align 1, !tbaa !0 + store i8 0, i8* inttoptr (i64 1 to i8*), align 1 + %0 = load i8* @a, align 1 br label %for.body for.body: @@ -26,10 +26,6 @@ for.body: br i1 %phitmp14, label %for.body, label %for.end for.end: ; preds = %for.body - store i8 %mul, i8* @b, align 1, !tbaa !0 + store i8 %mul, i8* @b, align 1 ret void } - -!0 = metadata !{metadata !"omnipotent char", metadata !1} -!1 = metadata !{metadata !"Simple C/C++ TBAA"} - diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll index e79d78d..defbb5b 100644 --- a/test/Transforms/LoopVectorize/intrinsic.ll +++ b/test/Transforms/LoopVectorize/intrinsic.ll @@ -14,10 +14,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.sqrt.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -40,10 +40,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.sqrt.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -66,10 +66,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.sin.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -92,10 +92,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.sin.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -118,10 +118,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.cos.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -144,10 +144,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.cos.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -170,10 +170,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.exp.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -196,10 +196,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.exp.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -222,10 +222,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.exp2.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -248,10 +248,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.exp2.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -274,10 +274,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.log.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -300,10 +300,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.log.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -326,10 +326,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.log10.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -352,10 +352,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.log10.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -378,10 +378,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.log2.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -404,10 +404,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.log2.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -430,10 +430,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.fabs.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -453,10 +453,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.fabs(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -479,10 +479,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.floor.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -505,10 +505,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.floor.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -531,10 +531,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.ceil.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -557,10 +557,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.ceil.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -583,10 +583,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.trunc.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -609,10 +609,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.trunc.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -635,10 +635,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.rint.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -661,10 +661,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.rint.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -687,10 +687,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %call = tail call float @llvm.nearbyint.f32(float %0) nounwind readnone %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx2, align 4, !tbaa !0 + store float %call, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -713,10 +713,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %call = tail call double @llvm.nearbyint.f64(double %0) nounwind readnone %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx2, align 8, !tbaa !3 + store double %call, double* %arrayidx2, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -739,14 +739,14 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv - %1 = load float* %arrayidx2, align 4, !tbaa !0 + %1 = load float* %arrayidx2, align 4 %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv - %2 = load float* %arrayidx4, align 4, !tbaa !0 + %2 = load float* %arrayidx4, align 4 %3 = tail call float @llvm.fma.f32(float %0, float %2, float %1) %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %3, float* %arrayidx6, align 4, !tbaa !0 + store float %3, float* %arrayidx6, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -769,14 +769,14 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv - %1 = load double* %arrayidx2, align 8, !tbaa !3 + %1 = load double* %arrayidx2, align 8 %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv - %2 = load double* %arrayidx4, align 8, !tbaa !3 + %2 = load double* %arrayidx4, align 8 %3 = tail call double @llvm.fma.f64(double %0, double %2, double %1) %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %3, double* %arrayidx6, align 8, !tbaa !3 + store double %3, double* %arrayidx6, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -799,14 +799,14 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv - %1 = load float* %arrayidx2, align 4, !tbaa !0 + %1 = load float* %arrayidx2, align 4 %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv - %2 = load float* %arrayidx4, align 4, !tbaa !0 + %2 = load float* %arrayidx4, align 4 %3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1) %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %3, float* %arrayidx6, align 4, !tbaa !0 + store float %3, float* %arrayidx6, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -829,14 +829,14 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv - %1 = load double* %arrayidx2, align 8, !tbaa !3 + %1 = load double* %arrayidx2, align 8 %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv - %2 = load double* %arrayidx4, align 8, !tbaa !3 + %2 = load double* %arrayidx4, align 8 %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1) %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %3, double* %arrayidx6, align 8, !tbaa !3 + store double %3, double* %arrayidx6, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -859,12 +859,12 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv - %1 = load float* %arrayidx2, align 4, !tbaa !0 + %1 = load float* %arrayidx2, align 4 %call = tail call float @llvm.pow.f32(float %0, float %1) nounwind readnone %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv - store float %call, float* %arrayidx4, align 4, !tbaa !0 + store float %call, float* %arrayidx4, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -887,12 +887,12 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv - %0 = load double* %arrayidx, align 8, !tbaa !3 + %0 = load double* %arrayidx, align 8 %arrayidx2 = getelementptr inbounds double* %z, i64 %indvars.iv - %1 = load double* %arrayidx2, align 8, !tbaa !3 + %1 = load double* %arrayidx2, align 8 %call = tail call double @llvm.pow.f64(double %0, double %1) nounwind readnone %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv - store double %call, double* %arrayidx4, align 8, !tbaa !3 + store double %call, double* %arrayidx4, align 8 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -927,9 +927,3 @@ for.end: ; preds = %for.body declare float @fabsf(float) nounwind readnone declare double @llvm.pow.f64(double, double) nounwind readnone - -!0 = metadata !{metadata !"float", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} -!3 = metadata !{metadata !"double", metadata !1} -!4 = metadata !{metadata !"int", metadata !1} diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll new file mode 100644 index 0000000..99dd093 --- /dev/null +++ b/test/Transforms/LoopVectorize/minmax_reduction.ll @@ -0,0 +1,402 @@ +; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +@A = common global [1024 x i32] zeroinitializer, align 16 + +; Signed tests. + +; Turn this into a max reduction. +; CHECK: @max_red +; CHECK: icmp sgt <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp sgt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @max_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp sgt i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Turn this into a max reduction. The select has its inputs reversed therefore +; this is a max reduction. +; CHECK: @max_red_inverse_select +; CHECK: icmp slt <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp sgt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @max_red_inverse_select(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp slt i32 %max.red.08, %0 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Turn this into a min reduction. +; CHECK: @min_red +; CHECK: icmp slt <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp slt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @min_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp slt i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Turn this into a min reduction. The select has its inputs reversed therefore +; this is a min reduction. +; CHECK: @min_red_inverse_select +; CHECK: icmp sgt <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp slt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @min_red_inverse_select(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp sgt i32 %max.red.08, %0 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Unsigned tests. + +; Turn this into a max reduction. +; CHECK: @umax_red +; CHECK: icmp ugt <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp ugt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @umax_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp ugt i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Turn this into a max reduction. The select has its inputs reversed therefore +; this is a max reduction. +; CHECK: @umax_red_inverse_select +; CHECK: icmp ult <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp ugt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @umax_red_inverse_select(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp ult i32 %max.red.08, %0 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Turn this into a min reduction. +; CHECK: @umin_red +; CHECK: icmp ult <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp ult <2 x i32> +; CHECK: select <2 x i1> + +define i32 @umin_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp ult i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; Turn this into a min reduction. The select has its inputs reversed therefore +; this is a min reduction. +; CHECK: @umin_red_inverse_select +; CHECK: icmp ugt <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp ult <2 x i32> +; CHECK: select <2 x i1> + +define i32 @umin_red_inverse_select(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp ugt i32 %max.red.08, %0 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; SGE -> SLT +; Turn this into a min reduction (select inputs are reversed). +; CHECK: @sge_min_red +; CHECK: icmp sge <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp slt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @sge_min_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp sge i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; SLE -> SGT +; Turn this into a max reduction (select inputs are reversed). +; CHECK: @sle_min_red +; CHECK: icmp sle <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp sgt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @sle_min_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp sle i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; UGE -> ULT +; Turn this into a min reduction (select inputs are reversed). +; CHECK: @uge_min_red +; CHECK: icmp uge <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp ult <2 x i32> +; CHECK: select <2 x i1> + +define i32 @uge_min_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp uge i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; ULE -> UGT +; Turn this into a max reduction (select inputs are reversed). +; CHECK: @ule_min_red +; CHECK: icmp ule <2 x i32> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: icmp ugt <2 x i32> +; CHECK: select <2 x i1> + +define i32 @ule_min_red(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp3 = icmp ule i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; No reduction. +; CHECK: @no_red_1 +; CHECK-NOT: icmp <2 x i32> +define i32 @no_red_1(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %arrayidx1 = getelementptr inbounds [1024 x i32]* @A, i64 1, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %1 = load i32* %arrayidx1, align 4 + %cmp3 = icmp sgt i32 %0, %1 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} + +; CHECK: @no_red_2 +; CHECK-NOT: icmp <2 x i32> +define i32 @no_red_2(i32 %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv + %arrayidx1 = getelementptr inbounds [1024 x i32]* @A, i64 1, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %1 = load i32* %arrayidx1, align 4 + %cmp3 = icmp sgt i32 %0, %max.red.08 + %max.red.0 = select i1 %cmp3, i32 %0, i32 %1 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %max.red.0 +} diff --git a/test/Transforms/LoopVectorize/no_idiv_reduction.ll b/test/Transforms/LoopVectorize/no_idiv_reduction.ll new file mode 100644 index 0000000..cdfb3fd --- /dev/null +++ b/test/Transforms/LoopVectorize/no_idiv_reduction.ll @@ -0,0 +1,24 @@ +; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s +@a = common global [128 x i32] zeroinitializer, align 16 + +;; Must not vectorize division reduction. Division is lossy. +define i32 @g() { +entry: + br label %for.body + +for.body: + ; CHECK: @g + ; CHECK-NOT: sdiv <2 x i32> + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %r.05 = phi i32 [ 80, %entry ], [ %div, %for.body ] + %arrayidx = getelementptr inbounds [128 x i32]* @a, i64 0, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %div = sdiv i32 %r.05, %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 %div +} diff --git a/test/Transforms/LoopVectorize/phi-hang.ll b/test/Transforms/LoopVectorize/phi-hang.ll index b80d459..bbce239 100644 --- a/test/Transforms/LoopVectorize/phi-hang.ll +++ b/test/Transforms/LoopVectorize/phi-hang.ll @@ -27,3 +27,21 @@ bb5: ; preds = %bb4, %bb1 bb11: ; preds = %bb5 ret void } + +; PR15748 +define void @test2() { +bb: + br label %bb1 + +bb1: ; preds = %bb1, %bb + %tmp = phi i32 [ 0, %bb ], [ %tmp5, %bb1 ] + %tmp2 = phi i32 [ 0, %bb ], [ 1, %bb1 ] + %tmp3 = phi i32 [ 0, %bb ], [ %tmp4, %bb1 ] + %tmp4 = or i32 %tmp2, %tmp3 + %tmp5 = add nsw i32 %tmp, 1 + %tmp6 = icmp eq i32 %tmp5, 0 + br i1 %tmp6, label %bb7, label %bb1 + +bb7: ; preds = %bb1 + ret void +} diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll new file mode 100644 index 0000000..4145d13 --- /dev/null +++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +;CHECK: add_ints +;CHECK: br +;CHECK: getelementptr +;CHECK-NEXT: getelementptr +;CHECK-NEXT: icmp uge +;CHECK-NEXT: icmp uge +;CHECK-NEXT: icmp uge +;CHECK-NEXT: icmp uge +;CHECK-NEXT: and +;CHECK: ret +define void @add_ints(i32* nocapture %A, i32* nocapture %B, i32* nocapture %C) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32* %C, i64 %indvars.iv + %1 = load i32* %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32* %A, i64 %indvars.iv + store i32 %add, i32* %arrayidx4, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 200 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll index 86098a6..014c4fc 100644 --- a/test/Transforms/LoopVectorize/runtime-check.ll +++ b/test/Transforms/LoopVectorize/runtime-check.ll @@ -22,10 +22,10 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv - %0 = load float* %arrayidx, align 4, !tbaa !0 + %0 = load float* %arrayidx, align 4 %mul = fmul float %0, 3.000000e+00 %arrayidx2 = getelementptr inbounds float* %a, i64 %indvars.iv - store float %mul, float* %arrayidx2, align 4, !tbaa !0 + store float %mul, float* %arrayidx2, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n @@ -34,7 +34,3 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %for.body, %entry ret i32 undef } - -!0 = metadata !{metadata !"float", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/runtime-limit.ll b/test/Transforms/LoopVectorize/runtime-limit.ll new file mode 100644 index 0000000..d783974 --- /dev/null +++ b/test/Transforms/LoopVectorize/runtime-limit.ll @@ -0,0 +1,84 @@ +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +; We are vectorizing with 6 runtime checks. +;CHECK: func1x6 +;CHECK: <4 x i32> +;CHECK: ret +define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.016 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i32* %A, i64 %i.016 + %0 = load i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %B, i64 %i.016 + %1 = load i32* %arrayidx1, align 4 + %add = add nsw i32 %1, %0 + %arrayidx2 = getelementptr inbounds i32* %C, i64 %i.016 + %2 = load i32* %arrayidx2, align 4 + %add3 = add nsw i32 %add, %2 + %arrayidx4 = getelementptr inbounds i32* %E, i64 %i.016 + %3 = load i32* %arrayidx4, align 4 + %add5 = add nsw i32 %add3, %3 + %arrayidx6 = getelementptr inbounds i32* %F, i64 %i.016 + %4 = load i32* %arrayidx6, align 4 + %add7 = add nsw i32 %add5, %4 + %arrayidx8 = getelementptr inbounds i32* %out, i64 %i.016 + store i32 %add7, i32* %arrayidx8, align 4 + %inc = add i64 %i.016, 1 + %exitcond = icmp eq i64 %inc, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 undef +} + +; We are not vectorizing with 12 runtime checks. +;CHECK: func2x6 +;CHECK-NOT: <4 x i32> +;CHECK: ret +define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.037 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i32* %A, i64 %i.037 + %0 = load i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32* %B, i64 %i.037 + %1 = load i32* %arrayidx1, align 4 + %add = add nsw i32 %1, %0 + %arrayidx2 = getelementptr inbounds i32* %C, i64 %i.037 + %2 = load i32* %arrayidx2, align 4 + %add3 = add nsw i32 %add, %2 + %arrayidx4 = getelementptr inbounds i32* %E, i64 %i.037 + %3 = load i32* %arrayidx4, align 4 + %add5 = add nsw i32 %add3, %3 + %arrayidx6 = getelementptr inbounds i32* %F, i64 %i.037 + %4 = load i32* %arrayidx6, align 4 + %add7 = add nsw i32 %add5, %4 + %arrayidx8 = getelementptr inbounds i32* %out, i64 %i.037 + store i32 %add7, i32* %arrayidx8, align 4 + %5 = load i32* %arrayidx, align 4 + %6 = load i32* %arrayidx1, align 4 + %add11 = add nsw i32 %6, %5 + %7 = load i32* %arrayidx2, align 4 + %add13 = add nsw i32 %add11, %7 + %8 = load i32* %arrayidx4, align 4 + %add15 = add nsw i32 %add13, %8 + %9 = load i32* %arrayidx6, align 4 + %add17 = add nsw i32 %add15, %9 + %arrayidx18 = getelementptr inbounds i32* %out2, i64 %i.037 + store i32 %add17, i32* %arrayidx18, align 4 + %inc = add i64 %i.037, 1 + %exitcond = icmp eq i64 %inc, 256 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 undef +} + diff --git a/test/Transforms/LoopVectorize/start-non-zero.ll b/test/Transforms/LoopVectorize/start-non-zero.ll index 998001c..e8a089a 100644 --- a/test/Transforms/LoopVectorize/start-non-zero.ll +++ b/test/Transforms/LoopVectorize/start-non-zero.ll @@ -18,9 +18,9 @@ for.body.lr.ph: ; preds = %entry for.body: ; preds = %for.body.lr.ph, %for.body %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %1 = load i32* %arrayidx, align 4, !tbaa !0 + %1 = load i32* %arrayidx, align 4 %mul = mul nuw i32 %1, 333 - store i32 %mul, i32* %arrayidx, align 4, !tbaa !0 + store i32 %mul, i32* %arrayidx, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %2 = trunc i64 %indvars.iv.next to i32 %cmp = icmp slt i32 %2, %end @@ -29,7 +29,3 @@ for.body: ; preds = %for.body.lr.ph, %fo for.end: ; preds = %for.body, %entry ret i32 4 } - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll index de65d0d..573480d 100644 --- a/test/Transforms/LoopVectorize/struct_access.ll +++ b/test/Transforms/LoopVectorize/struct_access.ll @@ -33,7 +33,7 @@ for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] %x = getelementptr inbounds %struct.coordinate* %A, i64 %indvars.iv, i32 0 - %0 = load i32* %x, align 4, !tbaa !0 + %0 = load i32* %x, align 4 %add = add nsw i32 %0, %sum.05 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 @@ -44,7 +44,3 @@ for.end: ; preds = %for.body, %entry %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] ret i32 %sum.0.lcssa } - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll index ac16948..f289ded 100644 --- a/test/Transforms/LoopVectorize/vectorize-once.ll +++ b/test/Transforms/LoopVectorize/vectorize-once.ll @@ -29,7 +29,7 @@ entry: for.body.i: ; preds = %entry, %for.body.i %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ] %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ] - %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0 + %0 = load i32* %__first.addr.04.i, align 4 %add.i = add nsw i32 %0, %__init.addr.05.i %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1 %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr @@ -55,7 +55,7 @@ entry: for.body.i: ; preds = %entry, %for.body.i %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ] %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ] - %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0 + %0 = load i32* %__first.addr.04.i, align 4 %add.i = add nsw i32 %0, %__init.addr.05.i %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1 %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr @@ -68,8 +68,5 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit: ; preds = %for.body.i, %entry attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" } -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} !3 = metadata !{} |
