diff options
Diffstat (limited to 'test/Transforms')
61 files changed, 1910 insertions, 543 deletions
diff --git a/test/Transforms/BBVectorize/no-ldstr-conn.ll b/test/Transforms/BBVectorize/no-ldstr-conn.ll new file mode 100644 index 0000000..ada2a71 --- /dev/null +++ b/test/Transforms/BBVectorize/no-ldstr-conn.ll @@ -0,0 +1,23 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=2 -instcombine -gvn -S | FileCheck %s + +; Make sure that things (specifically getelementptr) are not connected to loads +; and stores via the address operand (which would be bad because the address +; is really a scalar even after vectorization) +define i64 @test2(i64 %a) nounwind uwtable readonly { +entry: + %a1 = inttoptr i64 %a to i64* + %a2 = getelementptr i64* %a1, i64 1 + %a3 = getelementptr i64* %a1, i64 2 + %v2 = load i64* %a2, align 8 + %v3 = load i64* %a3, align 8 + %v2a = add i64 %v2, 5 + %v3a = add i64 %v3, 7 + store i64 %v2a, i64* %a2, align 8 + store i64 %v3a, i64* %a3, align 8 + %r = add i64 %v2, %v3 + ret i64 %r +; CHECK: @test2 +; CHECK-NOT: getelementptr <2 x i64*> +} + diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll index b2ef27b..6844977 100644 --- a/test/Transforms/BBVectorize/simple-int.ll +++ b/test/Transforms/BBVectorize/simple-int.ll @@ -3,6 +3,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 declare double @llvm.fma.f64(double, double, double) declare double @llvm.cos.f64(double) +declare double @llvm.powi.f64(double, i32) ; Basic depth-3 chain with fma define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) { @@ -54,6 +55,49 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) { ; CHECK: ret double %R } +; Basic depth-3 chain with powi +define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) { + + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) + %Y2 = call double @llvm.powi.f64(double %X2, i32 %P) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R +; CHECK: @test3 +; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 +; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 +; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 +; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P) +; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 +; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 +; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 +; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 +; CHECK: ret double %R +} + +; Basic depth-3 chain with powi (different powers: should not vectorize) +define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) { + + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 + %P2 = add i32 %P, 1 + %Y1 = call double @llvm.powi.f64(double %X1, i32 %P) + %Y2 = call double @llvm.powi.f64(double %X2, i32 %P2) + %Z1 = fadd double %Y1, %B1 + %Z2 = fadd double %Y2, %B2 + %R = fmul double %Z1, %Z2 + ret double %R +; CHECK: @test4 +; CHECK-NOT: <2 x double> +; CHECK: ret double %R +} + ; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone ; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) nounwind readonly +; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) nounwind readonly diff --git a/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll new file mode 100644 index 0000000..f992d41 --- /dev/null +++ b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll @@ -0,0 +1,81 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s +; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO + +; Simple 3-pair chain also with loads and stores (using ptrs and gep) +define double @test1(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly { +entry: + %i0 = load i64* %a, align 8 + %i1 = load i64* %b, align 8 + %mul = mul i64 %i0, %i1 + %arrayidx3 = getelementptr inbounds i64* %a, i64 1 + %i3 = load i64* %arrayidx3, align 8 + %arrayidx4 = getelementptr inbounds i64* %b, i64 1 + %i4 = load i64* %arrayidx4, align 8 + %mul5 = mul i64 %i3, %i4 + %ptr = inttoptr i64 %mul to double* + %ptr5 = inttoptr i64 %mul5 to double* + %aptr = getelementptr inbounds double* %ptr, i64 2 + %aptr5 = getelementptr inbounds double* %ptr5, i64 3 + %av = load double* %aptr, align 16 + %av5 = load double* %aptr5, align 16 + %r = fmul double %av, %av5 + store i64 %mul, i64* %c, align 8 + %arrayidx5 = getelementptr inbounds i64* %c, i64 1 + store i64 %mul5, i64* %arrayidx5, align 8 + ret double %r +; CHECK: @test1 +; CHECK: %i0.v.i0 = bitcast i64* %a to <2 x i64>* +; CHECK: %i1.v.i0 = bitcast i64* %b to <2 x i64>* +; CHECK: %i0 = load <2 x i64>* %i0.v.i0, align 8 +; CHECK: %i1 = load <2 x i64>* %i1.v.i0, align 8 +; CHECK: %mul = mul <2 x i64> %i0, %i1 +; CHECK: %ptr = inttoptr <2 x i64> %mul to <2 x double*> +; CHECK: %aptr = getelementptr inbounds <2 x double*> %ptr, <2 x i64> <i64 2, i64 3> +; CHECK: %aptr.v.r1 = extractelement <2 x double*> %aptr, i32 0 +; CHECK: %aptr.v.r2 = extractelement <2 x double*> %aptr, i32 1 +; CHECK: %av = load double* %aptr.v.r1, align 16 +; CHECK: %av5 = load double* %aptr.v.r2, align 16 +; CHECK: %r = fmul double %av, %av5 +; CHECK: %0 = bitcast i64* %c to <2 x i64>* +; CHECK: store <2 x i64> %mul, <2 x i64>* %0, align 8 +; CHECK: ret double %r +; CHECK-AO: @test1 +; CHECK-AO-NOT: load <2 x +} + +; Simple 3-pair chain with loads and stores (using ptrs and gep) +define void @test2(i64** %a, i64** %b, i64** %c) nounwind uwtable readonly { +entry: + %i0 = load i64** %a, align 8 + %i1 = load i64** %b, align 8 + %arrayidx3 = getelementptr inbounds i64** %a, i64 1 + %i3 = load i64** %arrayidx3, align 8 + %arrayidx4 = getelementptr inbounds i64** %b, i64 1 + %i4 = load i64** %arrayidx4, align 8 + %o1 = load i64* %i1, align 8 + %o4 = load i64* %i4, align 8 + %ptr0 = getelementptr inbounds i64* %i0, i64 %o1 + %ptr3 = getelementptr inbounds i64* %i3, i64 %o4 + store i64* %ptr0, i64** %c, align 8 + %arrayidx5 = getelementptr inbounds i64** %c, i64 1 + store i64* %ptr3, i64** %arrayidx5, align 8 + ret void +; CHECK: @test2 +; CHECK: %i0.v.i0 = bitcast i64** %a to <2 x i64*>* +; CHECK: %i1 = load i64** %b, align 8 +; CHECK: %i0 = load <2 x i64*>* %i0.v.i0, align 8 +; CHECK: %arrayidx4 = getelementptr inbounds i64** %b, i64 1 +; CHECK: %i4 = load i64** %arrayidx4, align 8 +; CHECK: %o1 = load i64* %i1, align 8 +; CHECK: %o4 = load i64* %i4, align 8 +; CHECK: %ptr0.v.i1.1 = insertelement <2 x i64> undef, i64 %o1, i32 0 +; CHECK: %ptr0.v.i1.2 = insertelement <2 x i64> %ptr0.v.i1.1, i64 %o4, i32 1 +; CHECK: %ptr0 = getelementptr inbounds <2 x i64*> %i0, <2 x i64> %ptr0.v.i1.2 +; CHECK: %0 = bitcast i64** %c to <2 x i64*>* +; CHECK: store <2 x i64*> %ptr0, <2 x i64*>* %0, align 8 +; CHECK: ret void +; CHECK-AO: @test2 +; CHECK-AO-NOT: <2 x +} + diff --git a/test/Transforms/BBVectorize/simple-sel.ll b/test/Transforms/BBVectorize/simple-sel.ll new file mode 100644 index 0000000..4daa571 --- /dev/null +++ b/test/Transforms/BBVectorize/simple-sel.ll @@ -0,0 +1,30 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s + +; Basic depth-3 chain with select +define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) { +; CHECK: @test1 +; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 +; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 +; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 +; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 + %Z1 = select i1 %C1, double %Y1, double %B1 + %Z2 = select i1 %C2, double %Y2, double %B2 +; CHECK: %Z1.v.i0.1 = insertelement <2 x i1> undef, i1 %C1, i32 0 +; CHECK: %Z1.v.i0.2 = insertelement <2 x i1> %Z1.v.i0.1, i1 %C2, i32 1 +; CHECK: %Z1 = select <2 x i1> %Z1.v.i0.2, <2 x double> %Y1, <2 x double> %X1.v.i1.2 + %R = fmul double %Z1, %Z2 +; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 +; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 +; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 + ret double %R +; CHECK: ret double %R +} + + diff --git a/test/Transforms/GVN/pre-compare.ll b/test/Transforms/GVN/pre-compare.ll new file mode 100644 index 0000000..18d0c2e --- /dev/null +++ b/test/Transforms/GVN/pre-compare.ll @@ -0,0 +1,68 @@ +; RUN: opt -gvn -S < %s | FileCheck %s + +; C source: +; +; void f(int x) { +; if (x != 1) +; puts (x == 2 ? "a" : "b"); +; for (;;) { +; puts("step 1"); +; if (x == 2) +; continue; +; printf("step 2: %d\n", x); +; } +; } +; +; If we PRE %cmp3, CodeGenPrepare won't be able to sink the compare down to its +; uses, and we are forced to keep both %x and %cmp3 in registers in the loop. +; +; It is just as cheap to recompute the icmp against %x as it is to compare a +; GPR against 0. On x86-64, the br i1 %cmp3 becomes: +; +; testb %r12b, %r12b +; jne LBB0_3 +; +; The sunk icmp is: +; +; cmpl $2, %ebx +; je LBB0_3 +; +; This is just as good, and it doesn't require a separate register. +; +; CHECK-NOT: phi i1 + +@.str = private unnamed_addr constant [2 x i8] c"a\00", align 1 +@.str1 = private unnamed_addr constant [2 x i8] c"b\00", align 1 +@.str2 = private unnamed_addr constant [7 x i8] c"step 1\00", align 1 +@.str3 = private unnamed_addr constant [12 x i8] c"step 2: %d\0A\00", align 1 + +define void @f(i32 %x) noreturn nounwind uwtable ssp { +entry: + %cmp = icmp eq i32 %x, 1 + br i1 %cmp, label %for.cond.preheader, label %if.then + +if.then: ; preds = %entry + %cmp1 = icmp eq i32 %x, 2 + %cond = select i1 %cmp1, i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0) + %call = tail call i32 @puts(i8* %cond) nounwind + br label %for.cond.preheader + +for.cond.preheader: ; preds = %entry, %if.then + %cmp3 = icmp eq i32 %x, 2 + br label %for.cond + +for.cond: ; preds = %for.cond.backedge, %for.cond.preheader + %call2 = tail call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str2, i64 0, i64 0)) nounwind + br i1 %cmp3, label %for.cond.backedge, label %if.end5 + +if.end5: ; preds = %for.cond + %call6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str3, i64 0, i64 0), i32 %x) nounwind + br label %for.cond.backedge + +for.cond.backedge: ; preds = %if.end5, %for.cond + br label %for.cond +} + +declare i32 @puts(i8* nocapture) nounwind + +declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll b/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll new file mode 100644 index 0000000..d613601 --- /dev/null +++ b/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll @@ -0,0 +1,11 @@ +; RUN: opt < %s -S -globalopt | FileCheck %s + +@zero = internal global [10 x i32] zeroinitializer + +define i32 @test1(i64 %idx) nounwind { + %arrayidx = getelementptr inbounds [10 x i32]* @zero, i64 0, i64 %idx + %l = load i32* %arrayidx + ret i32 %l +; CHECK: @test1 +; CHECK: ret i32 0 +} diff --git a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll index 4ad63aa..af9f1b3 100644 --- a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll +++ b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s +; RUN: opt < %s -indvars -S | FileCheck %s ; Test WidenIV::GetExtendedOperandRecurrence. ; add219 should be extended to i64 because it is nsw, even though its ; sext cannot be hoisted outside the loop. diff --git a/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll b/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll index c7809b8..c0c508f 100644 --- a/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll +++ b/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll @@ -1,7 +1,5 @@ -; RUN: opt < %s -indvars -S -enable-iv-rewrite=false "-default-data-layout=e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" | FileCheck %s -; RUN: opt < %s -indvars -S -enable-iv-rewrite=true "-default-data-layout=e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" | FileCheck %s -; RUN: opt < %s -indvars -S -enable-iv-rewrite=false "-default-data-layout=e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" | FileCheck %s -; RUN: opt < %s -indvars -S -enable-iv-rewrite=true "-default-data-layout=e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" | FileCheck %s +; RUN: opt < %s -indvars -S "-default-data-layout=e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" | FileCheck %s +; RUN: opt < %s -indvars -S "-default-data-layout=e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" | FileCheck %s ; ; PR11279: Assertion !IVLimit->getType()->isPointerTy() ; diff --git a/test/Transforms/IndVarSimplify/ada-loops.ll b/test/Transforms/IndVarSimplify/ada-loops.ll index 154de6f..c093298 100644 --- a/test/Transforms/IndVarSimplify/ada-loops.ll +++ b/test/Transforms/IndVarSimplify/ada-loops.ll @@ -1,5 +1,4 @@ ; RUN: opt < %s -indvars -S | FileCheck %s -; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s ; ; PR1301 diff --git a/test/Transforms/IndVarSimplify/complex-scev.ll b/test/Transforms/IndVarSimplify/complex-scev.ll deleted file mode 100644 index 395377e..0000000 --- a/test/Transforms/IndVarSimplify/complex-scev.ll +++ /dev/null @@ -1,31 +0,0 @@ -; The i induction variable looks like a wrap-around, but it really is just -; a simple affine IV. Make sure that indvars eliminates it. - -; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s -; CHECK: phi -; CHECK-NOT: phi - -define void @foo() { -entry: - br label %bb6 - -bb6: ; preds = %cond_true, %entry - %j.0 = phi i32 [ 1, %entry ], [ %tmp5, %cond_true ] ; <i32> [#uses=3] - %i.0 = phi i32 [ 0, %entry ], [ %j.0, %cond_true ] ; <i32> [#uses=1] - %tmp7 = call i32 (...)* @foo2( ) ; <i32> [#uses=1] - %tmp = icmp ne i32 %tmp7, 0 ; <i1> [#uses=1] - br i1 %tmp, label %cond_true, label %return - -cond_true: ; preds = %bb6 - %tmp2 = call i32 (...)* @bar( i32 %i.0, i32 %j.0 ) ; <i32> [#uses=0] - %tmp5 = add i32 %j.0, 1 ; <i32> [#uses=1] - br label %bb6 - -return: ; preds = %bb6 - ret void -} - -declare i32 @bar(...) - -declare i32 @foo2(...) - diff --git a/test/Transforms/IndVarSimplify/elim-extend.ll b/test/Transforms/IndVarSimplify/elim-extend.ll index 43c162f..ad5679f 100644 --- a/test/Transforms/IndVarSimplify/elim-extend.ll +++ b/test/Transforms/IndVarSimplify/elim-extend.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s +; RUN: opt < %s -indvars -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" diff --git a/test/Transforms/IndVarSimplify/gep-with-mul-base.ll b/test/Transforms/IndVarSimplify/gep-with-mul-base.ll deleted file mode 100644 index 7e1e2a3..0000000 --- a/test/Transforms/IndVarSimplify/gep-with-mul-base.ll +++ /dev/null @@ -1,68 +0,0 @@ -; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s -; CHECK: define void @foo -; CHECK: mul -; CHECK: mul -; CHECK: mul -; CHECK: add -; CHECK: sub -; CHECK: define void @bar -; CHECK: mul -; CHECK: mul -; CHECK: mul -; CHECK: add -; CHECK: sub - -define void @foo(i64 %n, i64 %m, i64 %o, double* nocapture %p) nounwind { -entry: - %tmp = icmp sgt i64 %n, 0 ; <i1> [#uses=1] - br i1 %tmp, label %bb.nph, label %return - -bb.nph: ; preds = %entry - %tmp1 = mul i64 %n, 37 ; <i64> [#uses=1] - %tmp2 = mul i64 %tmp1, %m ; <i64> [#uses=1] - %tmp3 = mul i64 %tmp2, %o ; <i64> [#uses=1] - br label %bb - -bb: ; preds = %bb, %bb.nph - %i.01 = phi i64 [ %tmp3, %bb.nph ], [ %tmp13, %bb ] ; <i64> [#uses=3] - %tmp9 = getelementptr double* %p, i64 %i.01 ; <double*> [#uses=1] - %tmp10 = load double* %tmp9, align 8 ; <double> [#uses=1] - %tmp11 = fdiv double %tmp10, 2.100000e+00 ; <double> [#uses=1] - store double %tmp11, double* %tmp9, align 8 - %tmp13 = add i64 %i.01, 1 ; <i64> [#uses=2] - %tmp14 = icmp slt i64 %tmp13, %n ; <i1> [#uses=1] - br i1 %tmp14, label %bb, label %return.loopexit - -return.loopexit: ; preds = %bb - br label %return - -return: ; preds = %return.loopexit, %entry - ret void -} -define void @bar(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind { -entry: - %tmp = icmp sgt i64 %n, 0 ; <i1> [#uses=1] - br i1 %tmp, label %bb.nph, label %return - -bb.nph: ; preds = %entry - %tmp1 = mul i64 %n, %q ; <i64> [#uses=1] - %tmp2 = mul i64 %tmp1, %m ; <i64> [#uses=1] - %tmp3 = mul i64 %tmp2, %o ; <i64> [#uses=1] - br label %bb - -bb: ; preds = %bb, %bb.nph - %i.01 = phi i64 [ %tmp3, %bb.nph ], [ %tmp13, %bb ] ; <i64> [#uses=3] - %tmp9 = getelementptr double* %p, i64 %i.01 ; <double*> [#uses=1] - %tmp10 = load double* %tmp9, align 8 ; <double> [#uses=1] - %tmp11 = fdiv double %tmp10, 2.100000e+00 ; <double> [#uses=1] - store double %tmp11, double* %tmp9, align 8 - %tmp13 = add i64 %i.01, 1 ; <i64> [#uses=2] - %tmp14 = icmp slt i64 %tmp13, %n ; <i1> [#uses=1] - br i1 %tmp14, label %bb, label %return.loopexit - -return.loopexit: ; preds = %bb - br label %return - -return: ; preds = %return.loopexit, %entry - ret void -} diff --git a/test/Transforms/IndVarSimplify/iv-fold.ll b/test/Transforms/IndVarSimplify/iv-fold.ll index 2e19118..e0b05cd 100644 --- a/test/Transforms/IndVarSimplify/iv-fold.ll +++ b/test/Transforms/IndVarSimplify/iv-fold.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s +; RUN: opt < %s -indvars -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64" diff --git a/test/Transforms/IndVarSimplify/iv-zext.ll b/test/Transforms/IndVarSimplify/iv-zext.ll index 646e6c0..2e0f70c 100644 --- a/test/Transforms/IndVarSimplify/iv-zext.ll +++ b/test/Transforms/IndVarSimplify/iv-zext.ll @@ -1,5 +1,4 @@ ; RUN: opt < %s -indvars -S | FileCheck %s -; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s ; CHECK-NOT: and ; CHECK-NOT: zext diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll index 490eee9..9abfe13 100644 --- a/test/Transforms/IndVarSimplify/lftr-reuse.ll +++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s +; RUN: opt < %s -indvars -S | FileCheck %s ; ; Make sure that indvars can perform LFTR without a canonical IV. diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll index 23fdc87..bfdd000 100644 --- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll +++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s +; RUN: opt < %s -indvars -S | FileCheck %s ; ; Make sure that indvars isn't inserting canonical IVs. ; This is kinda hard to do until linear function test replacement is removed. diff --git a/test/Transforms/IndVarSimplify/preserve-gep-nested.ll b/test/Transforms/IndVarSimplify/preserve-gep-nested.ll deleted file mode 100644 index cdcaaa0..0000000 --- a/test/Transforms/IndVarSimplify/preserve-gep-nested.ll +++ /dev/null @@ -1,76 +0,0 @@ -; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s -; No explicit integer multiplications! -; No i8* arithmetic or pointer casting anywhere! -; CHECK-NOT: = {{= mul|i8\*|bitcast|inttoptr|ptrtoint}} -; Exactly one getelementptr for each load+store. -; Each getelementptr using %struct.Q* %s as a base and not i8*. -; CHECK: getelementptr %struct.Q* %s, -; CHECK: getelementptr %struct.Q* %s, -; CHECK: getelementptr %struct.Q* %s, -; CHECK: getelementptr %struct.Q* %s, -; CHECK: getelementptr %struct.Q* %s, -; CHECK: getelementptr %struct.Q* %s, -; CHECK-NOT: = {{= mul|i8\*|bitcast|inttoptr|ptrtoint}} - -; FIXME: This test should pass with or without TargetData. Until opt -; supports running tests without targetdata, just hardware this in. -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64" - -%struct.Q = type { [10 x %struct.N] } -%struct.N = type { %struct.S } -%struct.S = type { [100 x double], [100 x double] } - -define void @foo(%struct.Q* %s, i64 %n) nounwind { -entry: - br label %bb1 - -bb1: - %i = phi i64 [ 2, %entry ], [ %i.next, %bb ] - %j = phi i64 [ 0, %entry ], [ %j.next, %bb ] - %t5 = icmp slt i64 %i, %n - br i1 %t5, label %bb, label %return - -bb: - %t0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %i - %t1 = load double* %t0, align 8 - %t2 = fmul double %t1, 3.200000e+00 - %t3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %i - store double %t2, double* %t3, align 8 - - %s0 = getelementptr inbounds %struct.Q* %s, i64 13, i32 0, i64 7, i32 0, i32 1, i64 %i - %s1 = load double* %s0, align 8 - %s2 = fmul double %s1, 3.200000e+00 - %s3 = getelementptr inbounds %struct.Q* %s, i64 13, i32 0, i64 7, i32 0, i32 1, i64 %i - store double %s2, double* %s3, align 8 - - %u0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 7, i32 0, i32 1, i64 %j - %u1 = load double* %u0, align 8 - %u2 = fmul double %u1, 3.200000e+00 - %u3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 7, i32 0, i32 1, i64 %j - store double %u2, double* %u3, align 8 - - %v0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 1, i64 %i - %v1 = load double* %v0, align 8 - %v2 = fmul double %v1, 3.200000e+00 - %v3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 1, i64 %i - store double %v2, double* %v3, align 8 - - %w0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %j - %w1 = load double* %w0, align 8 - %w2 = fmul double %w1, 3.200000e+00 - %w3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %j - store double %w2, double* %w3, align 8 - - %x0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 3, i32 0, i32 0, i64 %i - %x1 = load double* %x0, align 8 - %x2 = fmul double %x1, 3.200000e+00 - %x3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 3, i32 0, i32 0, i64 %i - store double %x2, double* %x3, align 8 - - %i.next = add i64 %i, 1 - %j.next = add i64 %j, 1 - br label %bb1 - -return: - ret void -} diff --git a/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll b/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll deleted file mode 100644 index a62943d..0000000 --- a/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s -; CHECK: %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 2, i64 %0, i64 1 -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n32:64" - -; Indvars shouldn't expand this to -; %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 %tmp, i64 19 -; or something. That's valid, but more obscure. - -define void @foo([3 x [3 x double]]* noalias %p) nounwind { -entry: - br label %loop - -loop: - %i = phi i64 [ 0, %entry ], [ %i.next, %loop ] - %ip = add i64 %i, 1 - %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 2, i64 %ip, i64 1 - store volatile double 0.0, double* %p.2.ip.1 - %i.next = add i64 %i, 1 - br label %loop -} diff --git a/test/Transforms/IndVarSimplify/preserve-gep.ll b/test/Transforms/IndVarSimplify/preserve-gep.ll deleted file mode 100644 index fec8a28..0000000 --- a/test/Transforms/IndVarSimplify/preserve-gep.ll +++ /dev/null @@ -1,39 +0,0 @@ -; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s -; CHECK-NOT: {{ptrtoint|inttoptr}} -; CHECK: getelementptr -; CHECK-NOT: {{ptrtoint|inttoptr|getelementptr}} - -; Indvars shouldn't leave getelementptrs expanded out as -; inttoptr+ptrtoint in its output in common cases. - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64" -target triple = "x86_64-unknown-linux-gnu" - %struct.Foo = type { i32, i32, [10 x i32], i32 } - -define void @me(%struct.Foo* nocapture %Bar) nounwind { -entry: - br i1 false, label %return, label %bb.nph - -bb.nph: ; preds = %entry - br label %bb - -bb: ; preds = %bb1, %bb.nph - %i.01 = phi i64 [ %4, %bb1 ], [ 0, %bb.nph ] ; <i64> [#uses=3] - %0 = getelementptr %struct.Foo* %Bar, i64 %i.01, i32 2, i64 3 ; <i32*> [#uses=1] - %1 = load i32* %0, align 4 ; <i32> [#uses=1] - %2 = mul i32 %1, 113 ; <i32> [#uses=1] - %3 = getelementptr %struct.Foo* %Bar, i64 %i.01, i32 2, i64 3 ; <i32*> [#uses=1] - store i32 %2, i32* %3, align 4 - %4 = add i64 %i.01, 1 ; <i64> [#uses=2] - br label %bb1 - -bb1: ; preds = %bb - %phitmp = icmp sgt i64 %4, 19999 ; <i1> [#uses=1] - br i1 %phitmp, label %bb1.return_crit_edge, label %bb - -bb1.return_crit_edge: ; preds = %bb1 - br label %return - -return: ; preds = %bb1.return_crit_edge, %entry - ret void -} diff --git a/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll b/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll index 22e2092..f619e8d 100644 --- a/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll +++ b/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll @@ -1,5 +1,4 @@ ; RUN: opt < %s -indvars -S | FileCheck %s -; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s ; Indvars should insert a 64-bit induction variable to eliminate the ; sext for the addressing, however it shouldn't eliminate the sext diff --git a/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll b/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll index fc906cd..fb9ef22 100644 --- a/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll +++ b/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll @@ -1,5 +1,4 @@ ; RUN: opt < %s -indvars -instcombine -S | FileCheck %s -; RUN: opt < %s -indvars -enable-iv-rewrite=false -instcombine -S | FileCheck %s ; ; Test that -indvars can reduce variable stride IVs. If it can reduce variable ; stride iv's, it will make %iv. and %m.0.0 isomorphic to each other without diff --git a/test/Transforms/Inline/2008-09-02-AlwaysInline.ll b/test/Transforms/Inline/2008-09-02-AlwaysInline.ll deleted file mode 100644 index 39095c4..0000000 --- a/test/Transforms/Inline/2008-09-02-AlwaysInline.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: opt < %s -inline-threshold=0 -inline -S | not grep call - -define i32 @fn2() alwaysinline { - ret i32 1 -} - -define i32 @fn3() { - %r = call i32 @fn2() - ret i32 %r -} diff --git a/test/Transforms/Inline/2008-10-30-AlwaysInline.ll b/test/Transforms/Inline/2008-10-30-AlwaysInline.ll deleted file mode 100644 index 11e5012..0000000 --- a/test/Transforms/Inline/2008-10-30-AlwaysInline.ll +++ /dev/null @@ -1,14 +0,0 @@ -; RUN: opt < %s -always-inline -S | not grep call - -; Ensure that threshold doesn't disrupt always inline. -; RUN: opt < %s -inline-threshold=-2000000001 -always-inline -S | not grep call - - -define internal i32 @if0() alwaysinline { - ret i32 1 -} - -define i32 @f0() { - %r = call i32 @if0() - ret i32 %r -} diff --git a/test/Transforms/Inline/2008-11-04-AlwaysInline.ll b/test/Transforms/Inline/2008-11-04-AlwaysInline.ll deleted file mode 100644 index bc9787b..0000000 --- a/test/Transforms/Inline/2008-11-04-AlwaysInline.ll +++ /dev/null @@ -1,7 +0,0 @@ -; RUN: opt < %s -always-inline -S | grep {@foo} -; Ensure that foo is not removed by always inliner -; PR 2945 - -define internal i32 @foo() nounwind { - ret i32 0 -} diff --git a/test/Transforms/Inline/alloca-bonus.ll b/test/Transforms/Inline/alloca-bonus.ll index fb4062f..d04d54e 100644 --- a/test/Transforms/Inline/alloca-bonus.ll +++ b/test/Transforms/Inline/alloca-bonus.ll @@ -1,5 +1,7 @@ ; RUN: opt -inline < %s -S -o - -inline-threshold=8 | FileCheck %s +target datalayout = "p:32:32" + declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr) @glbl = external global i32 @@ -15,8 +17,8 @@ define void @outer1() { define void @inner1(i32 *%ptr) { %A = load i32* %ptr store i32 0, i32* %ptr - %C = getelementptr i32* %ptr, i32 0 - %D = getelementptr i32* %ptr, i32 1 + %C = getelementptr inbounds i32* %ptr, i32 0 + %D = getelementptr inbounds i32* %ptr, i32 1 %E = bitcast i32* %ptr to i8* %F = select i1 false, i32* %ptr, i32* @glbl call void @llvm.lifetime.start(i64 0, i8* %E) @@ -35,8 +37,8 @@ define void @outer2() { define void @inner2(i32 *%ptr) { %A = load i32* %ptr store i32 0, i32* %ptr - %C = getelementptr i32* %ptr, i32 0 - %D = getelementptr i32* %ptr, i32 %A + %C = getelementptr inbounds i32* %ptr, i32 0 + %D = getelementptr inbounds i32* %ptr, i32 %A %E = bitcast i32* %ptr to i8* %F = select i1 false, i32* %ptr, i32* @glbl call void @llvm.lifetime.start(i64 0, i8* %E) @@ -90,12 +92,12 @@ define void @outer4(i32 %A) { ret void } -; %D poisons this call, scalar-repl can't handle that instruction. However, we +; %B poisons this call, scalar-repl can't handle that instruction. However, we ; still want to detect that the icmp and branch *can* be handled. define void @inner4(i32 *%ptr, i32 %A) { - %B = getelementptr i32* %ptr, i32 %A - %E = icmp eq i32* %ptr, null - br i1 %E, label %bb.true, label %bb.false + %B = getelementptr inbounds i32* %ptr, i32 %A + %C = icmp eq i32* %ptr, null + br i1 %C, label %bb.true, label %bb.false bb.true: ; This block musn't be counted in the inline cost. %t1 = load i32* %ptr @@ -122,3 +124,32 @@ bb.true: bb.false: ret void } + +define void @outer5() { +; CHECK: @outer5 +; CHECK-NOT: call void @inner5 + %ptr = alloca i32 + call void @inner5(i1 false, i32* %ptr) + ret void +} + +; %D poisons this call, scalar-repl can't handle that instruction. However, if +; the flag is set appropriately, the poisoning instruction is inside of dead +; code, and so shouldn't be counted. +define void @inner5(i1 %flag, i32 *%ptr) { + %A = load i32* %ptr + store i32 0, i32* %ptr + %C = getelementptr inbounds i32* %ptr, i32 0 + br i1 %flag, label %if.then, label %exit + +if.then: + %D = getelementptr inbounds i32* %ptr, i32 %A + %E = bitcast i32* %ptr to i8* + %F = select i1 false, i32* %ptr, i32* @glbl + call void @llvm.lifetime.start(i64 0, i8* %E) + ret void + +exit: + ret void +} + diff --git a/test/Transforms/Inline/always-inline.ll b/test/Transforms/Inline/always-inline.ll new file mode 100644 index 0000000..e0be41f --- /dev/null +++ b/test/Transforms/Inline/always-inline.ll @@ -0,0 +1,125 @@ +; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s +; +; Ensure the threshold has no impact on these decisions. +; RUN: opt < %s -inline-threshold=20000000 -always-inline -S | FileCheck %s +; RUN: opt < %s -inline-threshold=-20000000 -always-inline -S | FileCheck %s + +define i32 @inner1() alwaysinline { + ret i32 1 +} +define i32 @outer1() { +; CHECK: @outer1 +; CHECK-NOT: call +; CHECK: ret + + %r = call i32 @inner1() + ret i32 %r +} + +; The always inliner can't DCE internal functions. PR2945 +; CHECK: @pr2945 +define internal i32 @pr2945() nounwind { + ret i32 0 +} + +define internal void @inner2(i32 %N) alwaysinline { + %P = alloca i32, i32 %N + ret void +} +define void @outer2(i32 %N) { +; The always inliner (unlike the normal one) should be willing to inline +; a function with a dynamic alloca into one without a dynamic alloca. +; rdar://6655932 +; +; CHECK: @outer2 +; CHECK-NOT: call void @inner2 +; CHECK alloca i32, i32 %N +; CHECK-NOT: call void @inner2 +; CHECK: ret void + + call void @inner2( i32 %N ) + ret void +} + +declare i32 @a() returns_twice +declare i32 @b() returns_twice + +define i32 @inner3() alwaysinline { +entry: + %call = call i32 @a() returns_twice + %add = add nsw i32 1, %call + ret i32 %add +} +define i32 @outer3() { +entry: +; CHECK: @outer3 +; CHECK-NOT: call i32 @a +; CHECK: ret + + %call = call i32 @inner3() + %add = add nsw i32 1, %call + ret i32 %add +} + +define i32 @inner4() alwaysinline returns_twice { +entry: + %call = call i32 @b() returns_twice + %add = add nsw i32 1, %call + ret i32 %add +} + +define i32 @outer4() { +entry: +; CHECK: @outer4 +; CHECK: call i32 @b() +; CHECK: ret + + %call = call i32 @inner4() returns_twice + %add = add nsw i32 1, %call + ret i32 %add +} + +define i32 @inner5(i8* %addr) alwaysinline { +entry: + indirectbr i8* %addr, [ label %one, label %two ] + +one: + ret i32 42 + +two: + ret i32 44 +} +define i32 @outer5(i32 %x) { +; CHECK: @outer5 +; CHECK: call i32 @inner5 +; CHECK: ret + + %cmp = icmp slt i32 %x, 42 + %addr = select i1 %cmp, i8* blockaddress(@inner5, %one), i8* blockaddress(@inner5, %two) + %call = call i32 @inner5(i8* %addr) + ret i32 %call +} + +define void @inner6(i32 %x) alwaysinline { +entry: + %icmp = icmp slt i32 %x, 0 + br i1 %icmp, label %return, label %bb + +bb: + %sub = sub nsw i32 %x, 1 + call void @inner6(i32 %sub) + ret void + +return: + ret void +} +define void @outer6() { +; CHECK: @outer6 +; CHECK: call void @inner6(i32 42) +; CHECK: ret + +entry: + call void @inner6(i32 42) + ret void +} + diff --git a/test/Transforms/Inline/always_inline_dyn_alloca.ll b/test/Transforms/Inline/always_inline_dyn_alloca.ll deleted file mode 100644 index 25cfc49..0000000 --- a/test/Transforms/Inline/always_inline_dyn_alloca.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: opt < %s -inline -S | not grep callee -; rdar://6655932 - -; If callee is marked alwaysinline, inline it! Even if callee has dynamic -; alloca and caller does not, - -define internal void @callee(i32 %N) alwaysinline { - %P = alloca i32, i32 %N - ret void -} - -define void @foo(i32 %N) { - call void @callee( i32 %N ) - ret void -} diff --git a/test/Transforms/Inline/dynamic_alloca_test.ll b/test/Transforms/Inline/dynamic_alloca_test.ll index 0286535..15a5c66 100644 --- a/test/Transforms/Inline/dynamic_alloca_test.ll +++ b/test/Transforms/Inline/dynamic_alloca_test.ll @@ -3,33 +3,43 @@ ; Functions with dynamic allocas can only be inlined into functions that ; already have dynamic allocas. -; RUN: opt < %s -inline -S | \ -; RUN: grep llvm.stacksave -; RUN: opt < %s -inline -S | not grep callee - +; RUN: opt < %s -inline -S | FileCheck %s +; +; FIXME: This test is xfailed because the inline cost rewrite disabled *all* +; inlining of functions which contain a dynamic alloca. It should be re-enabled +; once that functionality is restored. +; XFAIL: * declare void @ext(i32*) define internal void @callee(i32 %N) { - %P = alloca i32, i32 %N ; <i32*> [#uses=1] - call void @ext( i32* %P ) - ret void + %P = alloca i32, i32 %N + call void @ext(i32* %P) + ret void } define void @foo(i32 %N) { -; <label>:0 - %P = alloca i32, i32 %N ; <i32*> [#uses=1] - call void @ext( i32* %P ) - br label %Loop - -Loop: ; preds = %Loop, %0 - %count = phi i32 [ 0, %0 ], [ %next, %Loop ] ; <i32> [#uses=2] - %next = add i32 %count, 1 ; <i32> [#uses=1] - call void @callee( i32 %N ) - %cond = icmp eq i32 %count, 100000 ; <i1> [#uses=1] - br i1 %cond, label %out, label %Loop - -out: ; preds = %Loop - ret void +; CHECK: @foo +; CHECK: alloca i32, i32 %{{.*}} +; CHECK: call i8* @llvm.stacksave() +; CHECK: alloca i32, i32 %{{.*}} +; CHECK: call void @ext +; CHECK: call void @llvm.stackrestore +; CHECK: ret + +entry: + %P = alloca i32, i32 %N + call void @ext(i32* %P) + br label %loop + +loop: + %count = phi i32 [ 0, %entry ], [ %next, %loop ] + %next = add i32 %count, 1 + call void @callee(i32 %N) + %cond = icmp eq i32 %count, 100000 + br i1 %cond, label %out, label %loop + +out: + ret void } diff --git a/test/Transforms/Inline/inline_cleanup.ll b/test/Transforms/Inline/inline_cleanup.ll index 4c64721..3898aa7 100644 --- a/test/Transforms/Inline/inline_cleanup.ll +++ b/test/Transforms/Inline/inline_cleanup.ll @@ -1,10 +1,8 @@ ; Test that the inliner doesn't leave around dead allocas, and that it folds ; uncond branches away after it is done specializing. -; RUN: opt < %s -inline -S | \ -; RUN: not grep {alloca.*uses=0} -; RUN: opt < %s -inline -S | \ -; RUN: not grep {br label} +; RUN: opt < %s -inline -S | FileCheck %s + @A = weak global i32 0 ; <i32*> [#uses=1] @B = weak global i32 0 ; <i32*> [#uses=1] @C = weak global i32 0 ; <i32*> [#uses=1] @@ -54,6 +52,18 @@ UnifiedReturnBlock: ; preds = %cond_next13 declare void @ext(i32*) define void @test() { +; CHECK: @test +; CHECK-NOT: ret +; +; FIXME: This should be a CHECK-NOT, but currently we have a bug that causes us +; to not nuke unused allocas. +; CHECK: alloca +; CHECK-NOT: ret +; +; No branches should survive the inliner's cleanup. +; CHECK-NOT: br +; CHECK: ret void + entry: tail call fastcc void @foo( i32 1 ) tail call fastcc void @foo( i32 2 ) @@ -61,3 +71,143 @@ entry: tail call fastcc void @foo( i32 8 ) ret void } + +declare void @f(i32 %x) + +define void @inner2(i32 %x, i32 %y, i32 %z, i1 %b) { +entry: + %cmp1 = icmp ne i32 %x, 0 + br i1 %cmp1, label %then1, label %end1 + +then1: + call void @f(i32 %x) + br label %end1 + +end1: + %x2 = and i32 %x, %z + %cmp2 = icmp sgt i32 %x2, 1 + br i1 %cmp2, label %then2, label %end2 + +then2: + call void @f(i32 %x2) + br label %end2 + +end2: + %y2 = or i32 %y, %z + %cmp3 = icmp sgt i32 %y2, 0 + br i1 %cmp3, label %then3, label %end3 + +then3: + call void @f(i32 %y2) + br label %end3 + +end3: + br i1 %b, label %end3.1, label %end3.2 + +end3.1: + %x3.1 = or i32 %x, 10 + br label %end3.3 + +end3.2: + %x3.2 = or i32 %x, 10 + br label %end3.3 + +end3.3: + %x3.3 = phi i32 [ %x3.1, %end3.1 ], [ %x3.2, %end3.2 ] + %cmp4 = icmp slt i32 %x3.3, 1 + br i1 %cmp4, label %then4, label %end4 + +then4: + call void @f(i32 %x3.3) + br label %end4 + +end4: + ret void +} + +define void @outer2(i32 %z, i1 %b) { +; Ensure that after inlining, none of the blocks with a call to @f actually +; make it through inlining. +; CHECK: define void @outer2 +; CHECK-NOT: call +; CHECK: ret void + +entry: + call void @inner2(i32 0, i32 -1, i32 %z, i1 %b) + ret void +} + +define void @PR12470_inner(i16 signext %p1) nounwind uwtable { +entry: + br i1 undef, label %cond.true, label %cond.false + +cond.true: + br label %cond.end + +cond.false: + %conv = sext i16 %p1 to i32 + br label %cond.end + +cond.end: + %cond = phi i32 [ undef, %cond.true ], [ 0, %cond.false ] + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.end5, label %if.then + +if.then: + ret void + +if.end5: + ret void +} + +define void @PR12470_outer() { +; This previously crashed during inliner cleanup and folding inner return +; instructions. Check that we don't crash and we produce a function with a single +; return instruction due to merging the returns of the inlined function. +; CHECK: define void @PR12470_outer +; CHECK-NOT: call +; CHECK: ret void +; CHECK-NOT: ret void +; CHECK: } + +entry: + call void @PR12470_inner(i16 signext 1) + ret void +} + +define void @crasher_inner() nounwind uwtable { +entry: + br i1 false, label %for.end28, label %for.body6 + +for.body6: + br i1 undef, label %for.body6, label %for.cond12.for.inc26_crit_edge + +for.cond12.for.inc26_crit_edge: + br label %for.body6.1 + +for.end28: + ret void + +for.body6.1: + br i1 undef, label %for.body6.1, label %for.cond12.for.inc26_crit_edge.1 + +for.cond12.for.inc26_crit_edge.1: + br label %for.body6.2 + +for.body6.2: + br i1 undef, label %for.body6.2, label %for.cond12.for.inc26_crit_edge.2 + +for.cond12.for.inc26_crit_edge.2: + br label %for.end28 +} + +define void @crasher_outer() { +; CHECK: @crasher_outer +; CHECK-NOT: call +; CHECK: ret void +; CHECK-NOT: ret +; CHECK: } +entry: + tail call void @crasher_inner() + ret void +} diff --git a/test/Transforms/Inline/inline_constprop.ll b/test/Transforms/Inline/inline_constprop.ll index cc7aaac..dc35b60 100644 --- a/test/Transforms/Inline/inline_constprop.ll +++ b/test/Transforms/Inline/inline_constprop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -inline -inline-threshold=20 -S | FileCheck %s define internal i32 @callee1(i32 %A, i32 %B) { %C = sdiv i32 %A, %B @@ -14,17 +14,18 @@ define i32 @caller1() { } define i32 @caller2() { +; Check that we can constant-prop through instructions after inlining callee21 +; to get constants in the inlined callsite to callee22. +; FIXME: Currently, the threshold is fixed at 20 because we don't perform +; *recursive* cost analysis to realize that the nested call site will definitely +; inline and be cheap. We should eventually do that and lower the threshold here +; to 1. +; ; CHECK: @caller2 ; CHECK-NOT: call void @callee2 ; CHECK: ret -; We contrive to make this hard for *just* the inline pass to do in order to -; simulate what can actually happen with large, complex functions getting -; inlined. - %a = add i32 42, 0 - %b = add i32 48, 0 - - %x = call i32 @callee21(i32 %a, i32 %b) + %x = call i32 @callee21(i32 42, i32 48) ret i32 %x } @@ -41,49 +42,71 @@ define i32 @callee22(i32 %x) { br i1 %icmp, label %bb.true, label %bb.false bb.true: ; This block musn't be counted in the inline cost. - %ptr = call i8* @getptr() - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr - load volatile i8* %ptr + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + %x4 = add i32 %x3, 1 + %x5 = add i32 %x4, 1 + %x6 = add i32 %x5, 1 + %x7 = add i32 %x6, 1 + %x8 = add i32 %x7, 1 - ret i32 %x + ret i32 %x8 bb.false: ret i32 %x } + +define i32 @caller3() { +; Check that even if the expensive path is hidden behind several basic blocks, +; it doesn't count toward the inline cost when constant-prop proves those paths +; dead. +; +; CHECK: @caller3 +; CHECK-NOT: call +; CHECK: ret i32 6 + +entry: + %x = call i32 @callee3(i32 42, i32 48) + ret i32 %x +} + +define i32 @callee3(i32 %x, i32 %y) { + %sub = sub i32 %y, %x + %icmp = icmp ugt i32 %sub, 42 + br i1 %icmp, label %bb.true, label %bb.false + +bb.true: + %icmp2 = icmp ult i32 %sub, 64 + br i1 %icmp2, label %bb.true.true, label %bb.true.false + +bb.true.true: + ; This block musn't be counted in the inline cost. + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + %x4 = add i32 %x3, 1 + %x5 = add i32 %x4, 1 + %x6 = add i32 %x5, 1 + %x7 = add i32 %x6, 1 + %x8 = add i32 %x7, 1 + br label %bb.merge + +bb.true.false: + ; This block musn't be counted in the inline cost. + %y1 = add i32 %y, 1 + %y2 = add i32 %y1, 1 + %y3 = add i32 %y2, 1 + %y4 = add i32 %y3, 1 + %y5 = add i32 %y4, 1 + %y6 = add i32 %y5, 1 + %y7 = add i32 %y6, 1 + %y8 = add i32 %y7, 1 + br label %bb.merge + +bb.merge: + %result = phi i32 [ %x8, %bb.true.true ], [ %y8, %bb.true.false ] + ret i32 %result + +bb.false: + ret i32 %sub +} diff --git a/test/Transforms/Inline/noinline-recursive-fn.ll b/test/Transforms/Inline/noinline-recursive-fn.ll index d56b390..6cde0e2 100644 --- a/test/Transforms/Inline/noinline-recursive-fn.ll +++ b/test/Transforms/Inline/noinline-recursive-fn.ll @@ -71,3 +71,40 @@ entry: call void @f2(i32 123, i8* bitcast (void (i32, i8*, i8*)* @f1 to i8*), i8* bitcast (void (i32, i8*, i8*)* @f2 to i8*)) nounwind ssp ret void } + + +; Check that a recursive function, when called with a constant that makes the +; recursive path dead code can actually be inlined. +define i32 @fib(i32 %i) { +entry: + %is.zero = icmp eq i32 %i, 0 + br i1 %is.zero, label %zero.then, label %zero.else + +zero.then: + ret i32 0 + +zero.else: + %is.one = icmp eq i32 %i, 1 + br i1 %is.one, label %one.then, label %one.else + +one.then: + ret i32 1 + +one.else: + %i1 = sub i32 %i, 1 + %f1 = call i32 @fib(i32 %i1) + %i2 = sub i32 %i, 2 + %f2 = call i32 @fib(i32 %i2) + %f = add i32 %f1, %f2 + ret i32 %f +} + +define i32 @fib_caller() { +; CHECK: @fib_caller +; CHECK-NOT: call +; CHECK: ret + %f1 = call i32 @fib(i32 0) + %f2 = call i32 @fib(i32 1) + %result = add i32 %f1, %f2 + ret i32 %result +} diff --git a/test/Transforms/Inline/ptr-diff.ll b/test/Transforms/Inline/ptr-diff.ll index 0b431d6..60fc3e2 100644 --- a/test/Transforms/Inline/ptr-diff.ll +++ b/test/Transforms/Inline/ptr-diff.ll @@ -1,5 +1,7 @@ ; RUN: opt -inline < %s -S -o - -inline-threshold=10 | FileCheck %s +target datalayout = "p:32:32" + define i32 @outer1() { ; CHECK: @outer1 ; CHECK-NOT: call diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll index e4d1367..ef7185c 100644 --- a/test/Transforms/InstCombine/alloca.ll +++ b/test/Transforms/InstCombine/alloca.ll @@ -44,3 +44,47 @@ define i32* @test4(i32 %n) { %A = alloca i32, i32 %n ret i32* %A } + +; Allocas which are only used by GEPs, bitcasts, and stores (transitively) +; should be deleted. +define void @test5() { +; CHECK: @test5 +; CHECK-NOT: alloca +; CHECK-NOT: store +; CHECK: ret + +entry: + %a = alloca { i32 } + %b = alloca i32* + %a.1 = getelementptr { i32 }* %a, i32 0, i32 0 + store i32 123, i32* %a.1 + store i32* %a.1, i32** %b + %b.1 = bitcast i32** %b to i32* + store i32 123, i32* %b.1 + %a.2 = getelementptr { i32 }* %a, i32 0, i32 0 + store atomic i32 2, i32* %a.2 unordered, align 4 + %a.3 = getelementptr { i32 }* %a, i32 0, i32 0 + store atomic i32 3, i32* %a.3 release, align 4 + %a.4 = getelementptr { i32 }* %a, i32 0, i32 0 + store atomic i32 4, i32* %a.4 seq_cst, align 4 + ret void +} + +declare void @f(i32* %p) + +; Check that we don't delete allocas in some erroneous cases. +define void @test6() { +; CHECK: @test6 +; CHECK-NOT: ret +; CHECK: alloca +; CHECK-NEXT: alloca +; CHECK: ret + +entry: + %a = alloca { i32 } + %b = alloca i32 + %a.1 = getelementptr { i32 }* %a, i32 0, i32 0 + store volatile i32 123, i32* %a.1 + tail call void @f(i32* %b) + ret void +} diff --git a/test/Transforms/InstCombine/apint-shift.ll b/test/Transforms/InstCombine/apint-shift.ll index 55243a6..0ea73a0 100644 --- a/test/Transforms/InstCombine/apint-shift.ll +++ b/test/Transforms/InstCombine/apint-shift.ll @@ -1,70 +1,93 @@ -; This test makes sure that shit instructions are properly eliminated +; This test makes sure that shift instructions are properly eliminated ; even with arbitrary precision integers. -; RUN: opt < %s -instcombine -S | not grep sh -; END. +; RUN: opt < %s -instcombine -S | FileCheck %s +; CHECK: @test1 +; CHECK-NOT: sh define i47 @test1(i47 %A) { %B = shl i47 %A, 0 ; <i47> [#uses=1] ret i47 %B } +; CHECK: @test2 +; CHECK-NOT: sh define i41 @test2(i7 %X) { %A = zext i7 %X to i41 ; <i41> [#uses=1] %B = shl i41 0, %A ; <i41> [#uses=1] ret i41 %B } +; CHECK: @test3 +; CHECK-NOT: sh define i41 @test3(i41 %A) { %B = ashr i41 %A, 0 ; <i41> [#uses=1] ret i41 %B } +; CHECK: @test4 +; CHECK-NOT: sh define i39 @test4(i7 %X) { %A = zext i7 %X to i39 ; <i39> [#uses=1] %B = ashr i39 0, %A ; <i39> [#uses=1] ret i39 %B } +; CHECK: @test5 +; CHECK-NOT: sh define i55 @test5(i55 %A) { %B = lshr i55 %A, 55 ; <i55> [#uses=1] ret i55 %B } +; CHECK: @test5a +; CHECK-NOT: sh define i32 @test5a(i32 %A) { %B = shl i32 %A, 32 ; <i32> [#uses=1] ret i32 %B } +; CHECK: @test6 +; CHECK-NOT: sh define i55 @test6(i55 %A) { %B = shl i55 %A, 1 ; <i55> [#uses=1] %C = mul i55 %B, 3 ; <i55> [#uses=1] ret i55 %C } +; CHECK: @test7 +; CHECK-NOT: sh define i29 @test7(i8 %X) { %A = zext i8 %X to i29 ; <i29> [#uses=1] %B = ashr i29 -1, %A ; <i29> [#uses=1] ret i29 %B } +; CHECK: @test8 +; CHECK-NOT: sh define i7 @test8(i7 %A) { %B = shl i7 %A, 4 ; <i7> [#uses=1] %C = shl i7 %B, 3 ; <i7> [#uses=1] ret i7 %C } +; CHECK: @test9 +; CHECK-NOT: sh define i17 @test9(i17 %A) { %B = shl i17 %A, 16 ; <i17> [#uses=1] %C = lshr i17 %B, 16 ; <i17> [#uses=1] ret i17 %C } +; CHECK: @test10 +; CHECK-NOT: sh define i19 @test10(i19 %A) { %B = lshr i19 %A, 18 ; <i19> [#uses=1] %C = shl i19 %B, 18 ; <i19> [#uses=1] ret i19 %C } +; CHECK: @test11 +; CHECK-NOT: sh define i23 @test11(i23 %A) { %a = mul i23 %A, 3 ; <i23> [#uses=1] %B = lshr i23 %a, 11 ; <i23> [#uses=1] @@ -72,12 +95,16 @@ define i23 @test11(i23 %A) { ret i23 %C } +; CHECK: @test12 +; CHECK-NOT: sh define i47 @test12(i47 %A) { %B = ashr i47 %A, 8 ; <i47> [#uses=1] %C = shl i47 %B, 8 ; <i47> [#uses=1] ret i47 %C } +; CHECK: @test13 +; CHECK-NOT: sh define i18 @test13(i18 %A) { %a = mul i18 %A, 3 ; <i18> [#uses=1] %B = ashr i18 %a, 8 ; <i18> [#uses=1] @@ -85,6 +112,8 @@ define i18 @test13(i18 %A) { ret i18 %C } +; CHECK: @test14 +; CHECK-NOT: sh define i35 @test14(i35 %A) { %B = lshr i35 %A, 4 ; <i35> [#uses=1] %C = or i35 %B, 1234 ; <i35> [#uses=1] @@ -92,6 +121,8 @@ define i35 @test14(i35 %A) { ret i35 %D } +; CHECK: @test14a +; CHECK-NOT: sh define i79 @test14a(i79 %A) { %B = shl i79 %A, 4 ; <i79> [#uses=1] %C = and i79 %B, 1234 ; <i79> [#uses=1] @@ -99,12 +130,16 @@ define i79 @test14a(i79 %A) { ret i79 %D } +; CHECK: @test15 +; CHECK-NOT: sh define i45 @test15(i1 %C) { %A = select i1 %C, i45 3, i45 1 ; <i45> [#uses=1] %V = shl i45 %A, 2 ; <i45> [#uses=1] ret i45 %V } +; CHECK: @test15a +; CHECK-NOT: sh define i53 @test15a(i1 %X) { %A = select i1 %X, i8 3, i8 1 ; <i8> [#uses=1] %B = zext i8 %A to i53 ; <i53> [#uses=1] @@ -112,6 +147,8 @@ define i53 @test15a(i1 %X) { ret i53 %V } +; CHECK: @test16 +; CHECK-NOT: sh define i1 @test16(i84 %X) { %tmp.3 = ashr i84 %X, 4 ; <i84> [#uses=1] %tmp.6 = and i84 %tmp.3, 1 ; <i84> [#uses=1] @@ -119,48 +156,64 @@ define i1 @test16(i84 %X) { ret i1 %tmp.7 } +; CHECK: @test17 +; CHECK-NOT: sh define i1 @test17(i106 %A) { %B = lshr i106 %A, 3 ; <i106> [#uses=1] %C = icmp eq i106 %B, 1234 ; <i1> [#uses=1] ret i1 %C } +; CHECK: @test18 +; CHECK-NOT: sh define i1 @test18(i11 %A) { %B = lshr i11 %A, 10 ; <i11> [#uses=1] %C = icmp eq i11 %B, 123 ; <i1> [#uses=1] ret i1 %C } +; CHECK: @test19 +; CHECK-NOT: sh define i1 @test19(i37 %A) { %B = ashr i37 %A, 2 ; <i37> [#uses=1] %C = icmp eq i37 %B, 0 ; <i1> [#uses=1] ret i1 %C } +; CHECK: @test19a +; CHECK-NOT: sh define i1 @test19a(i39 %A) { %B = ashr i39 %A, 2 ; <i39> [#uses=1] %C = icmp eq i39 %B, -1 ; <i1> [#uses=1] ret i1 %C } +; CHECK: @test20 +; CHECK-NOT: sh define i1 @test20(i13 %A) { %B = ashr i13 %A, 12 ; <i13> [#uses=1] %C = icmp eq i13 %B, 123 ; <i1> [#uses=1] ret i1 %C } +; CHECK: @test21 +; CHECK-NOT: sh define i1 @test21(i12 %A) { %B = shl i12 %A, 6 ; <i12> [#uses=1] %C = icmp eq i12 %B, -128 ; <i1> [#uses=1] ret i1 %C } +; CHECK: @test22 +; CHECK-NOT: sh define i1 @test22(i14 %A) { %B = shl i14 %A, 7 ; <i14> [#uses=1] %C = icmp eq i14 %B, 0 ; <i1> [#uses=1] ret i1 %C } +; CHECK: @test23 +; CHECK-NOT: sh define i11 @test23(i44 %A) { %B = shl i44 %A, 33 ; <i44> [#uses=1] %C = ashr i44 %B, 33 ; <i44> [#uses=1] @@ -168,6 +221,8 @@ define i11 @test23(i44 %A) { ret i11 %D } +; CHECK: @test25 +; CHECK-NOT: sh define i37 @test25(i37 %tmp.2, i37 %AA) { %x = lshr i37 %AA, 17 ; <i37> [#uses=1] %tmp.3 = lshr i37 %tmp.2, 17 ; <i37> [#uses=1] @@ -176,6 +231,8 @@ define i37 @test25(i37 %tmp.2, i37 %AA) { ret i37 %tmp.6 } +; CHECK: @test26 +; CHECK-NOT: sh define i40 @test26(i40 %A) { %B = lshr i40 %A, 1 ; <i40> [#uses=1] %C = bitcast i40 %B to i40 ; <i40> [#uses=1] diff --git a/test/Transforms/InstCombine/apint-shl-trunc.ll b/test/Transforms/InstCombine/apint-shl-trunc.ll index 8163e6d..f2dc7d5 100644 --- a/test/Transforms/InstCombine/apint-shl-trunc.ll +++ b/test/Transforms/InstCombine/apint-shl-trunc.ll @@ -1,13 +1,24 @@ -; RUN: opt < %s -instcombine -S | grep shl -; END. +; RUN: opt < %s -instcombine -S | FileCheck %s define i1 @test0(i39 %X, i39 %A) { +; CHECK: @test0 +; CHECK: %[[V1:.*]] = shl i39 1, %A +; CHECK: %[[V2:.*]] = and i39 %[[V1]], %X +; CHECK: %[[V3:.*]] = icmp ne i39 %[[V2]], 0 +; CHECK: ret i1 %[[V3]] + %B = lshr i39 %X, %A %D = trunc i39 %B to i1 ret i1 %D } define i1 @test1(i799 %X, i799 %A) { +; CHECK: @test1 +; CHECK: %[[V1:.*]] = shl i799 1, %A +; CHECK: %[[V2:.*]] = and i799 %[[V1]], %X +; CHECK: %[[V3:.*]] = icmp ne i799 %[[V2]], 0 +; CHECK: ret i1 %[[V3]] + %B = lshr i799 %X, %A %D = trunc i799 %B to i1 ret i1 %D diff --git a/test/Transforms/InstCombine/pr12251.ll b/test/Transforms/InstCombine/pr12251.ll new file mode 100644 index 0000000..74a41eb --- /dev/null +++ b/test/Transforms/InstCombine/pr12251.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +define zeroext i1 @_Z3fooPb(i8* nocapture %x) { +entry: + %a = load i8* %x, align 1, !range !0 + %b = and i8 %a, 1 + %tobool = icmp ne i8 %b, 0 + ret i1 %tobool +} + +; CHECK: %a = load i8* %x, align 1, !range !0 +; CHECK-NEXT: %tobool = icmp ne i8 %a, 0 +; CHECK-NEXT: ret i1 %tobool + +!0 = metadata !{i8 0, i8 2} diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll index e15bfaa..ced74bd 100644 --- a/test/Transforms/InstSimplify/compare.ll +++ b/test/Transforms/InstSimplify/compare.ll @@ -103,6 +103,68 @@ define i1 @gep8(%gept* %x) { ; CHECK: ret i1 %equal } +define i1 @gep9(i8* %ptr) { +; CHECK: @gep9 +; CHECK-NOT: ret +; CHECK: ret i1 true + +entry: + %first1 = getelementptr inbounds i8* %ptr, i32 0 + %first2 = getelementptr inbounds i8* %first1, i32 1 + %first3 = getelementptr inbounds i8* %first2, i32 2 + %first4 = getelementptr inbounds i8* %first3, i32 4 + %last1 = getelementptr inbounds i8* %first2, i32 48 + %last2 = getelementptr inbounds i8* %last1, i32 8 + %last3 = getelementptr inbounds i8* %last2, i32 -4 + %last4 = getelementptr inbounds i8* %last3, i32 -4 + %first.int = ptrtoint i8* %first4 to i32 + %last.int = ptrtoint i8* %last4 to i32 + %cmp = icmp ne i32 %last.int, %first.int + ret i1 %cmp +} + +define i1 @gep10(i8* %ptr) { +; CHECK: @gep10 +; CHECK-NOT: ret +; CHECK: ret i1 true + +entry: + %first1 = getelementptr inbounds i8* %ptr, i32 -2 + %first2 = getelementptr inbounds i8* %first1, i32 44 + %last1 = getelementptr inbounds i8* %ptr, i32 48 + %last2 = getelementptr inbounds i8* %last1, i32 -6 + %first.int = ptrtoint i8* %first2 to i32 + %last.int = ptrtoint i8* %last2 to i32 + %cmp = icmp eq i32 %last.int, %first.int + ret i1 %cmp +} + +define i1 @gep11(i8* %ptr) { +; CHECK: @gep11 +; CHECK-NOT: ret +; CHECK: ret i1 true + +entry: + %first1 = getelementptr inbounds i8* %ptr, i32 -2 + %last1 = getelementptr inbounds i8* %ptr, i32 48 + %last2 = getelementptr inbounds i8* %last1, i32 -6 + %cmp = icmp ult i8* %first1, %last2 + ret i1 %cmp +} + +define i1 @gep12(i8* %ptr) { +; CHECK: @gep12 +; CHECK-NOT: ret +; CHECK: ret i1 %cmp + +entry: + %first1 = getelementptr inbounds i8* %ptr, i32 -2 + %last1 = getelementptr inbounds i8* %ptr, i32 48 + %last2 = getelementptr inbounds i8* %last1, i32 -6 + %cmp = icmp slt i8* %first1, %last2 + ret i1 %cmp +} + define i1 @zext(i32 %x) { ; CHECK: @zext %e1 = zext i32 %x to i64 diff --git a/test/Transforms/InstSimplify/ptr_diff.ll b/test/Transforms/InstSimplify/ptr_diff.ll index 013964c..1eb1fd4 100644 --- a/test/Transforms/InstSimplify/ptr_diff.ll +++ b/test/Transforms/InstSimplify/ptr_diff.ll @@ -6,8 +6,8 @@ define i64 @ptrdiff1(i8* %ptr) { ; CHECK: @ptrdiff1 ; CHECK-NEXT: ret i64 42 - %first = getelementptr i8* %ptr, i32 0 - %last = getelementptr i8* %ptr, i32 42 + %first = getelementptr inbounds i8* %ptr, i32 0 + %last = getelementptr inbounds i8* %ptr, i32 42 %first.int = ptrtoint i8* %first to i64 %last.int = ptrtoint i8* %last to i64 %diff = sub i64 %last.int, %first.int @@ -18,16 +18,31 @@ define i64 @ptrdiff2(i8* %ptr) { ; CHECK: @ptrdiff2 ; CHECK-NEXT: ret i64 42 - %first1 = getelementptr i8* %ptr, i32 0 - %first2 = getelementptr i8* %first1, i32 1 - %first3 = getelementptr i8* %first2, i32 2 - %first4 = getelementptr i8* %first3, i32 4 - %last1 = getelementptr i8* %first2, i32 48 - %last2 = getelementptr i8* %last1, i32 8 - %last3 = getelementptr i8* %last2, i32 -4 - %last4 = getelementptr i8* %last3, i32 -4 + %first1 = getelementptr inbounds i8* %ptr, i32 0 + %first2 = getelementptr inbounds i8* %first1, i32 1 + %first3 = getelementptr inbounds i8* %first2, i32 2 + %first4 = getelementptr inbounds i8* %first3, i32 4 + %last1 = getelementptr inbounds i8* %first2, i32 48 + %last2 = getelementptr inbounds i8* %last1, i32 8 + %last3 = getelementptr inbounds i8* %last2, i32 -4 + %last4 = getelementptr inbounds i8* %last3, i32 -4 %first.int = ptrtoint i8* %first4 to i64 %last.int = ptrtoint i8* %last4 to i64 %diff = sub i64 %last.int, %first.int ret i64 %diff } + +define i64 @ptrdiff3(i8* %ptr) { +; Don't bother with non-inbounds GEPs. +; CHECK: @ptrdiff3 +; CHECK: getelementptr +; CHECK: sub +; CHECK: ret + + %first = getelementptr i8* %ptr, i32 0 + %last = getelementptr i8* %ptr, i32 42 + %first.int = ptrtoint i8* %first to i64 + %last.int = ptrtoint i8* %last to i64 + %diff = sub i64 %last.int, %first.int + ret i64 %diff +} diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll index 9287178..b32ee82 100644 --- a/test/Transforms/LoopRotate/dbgvalue.ll +++ b/test/Transforms/LoopRotate/dbgvalue.ll @@ -1,11 +1,13 @@ ; RUN: opt -S -loop-rotate %s | FileCheck %s -; CHECK: entry -; CHECK-NEXT: call void @llvm.dbg.value(metadata !{i32 %x} - declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp { +; CHECK: define i32 @tak +; CHECK: entry +; CHECK-NEXT: call void @llvm.dbg.value(metadata !{i32 %x} + entry: br label %tailrecurse @@ -35,7 +37,45 @@ return: ; preds = %if.end ret i32 %z.tr, !dbg !17 } -declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone +@channelColumns = external global i64 +@horzPlane = external global i8*, align 8 + +define void @FindFreeHorzSeg(i64 %startCol, i64 %row, i64* %rowStart) { +; Ensure that the loop increment basic block is rotated into the tail of the +; body, even though it contains a debug intrinsic call. +; CHECK: define void @FindFreeHorzSeg +; CHECK: %dec = add +; CHECK-NEXT: tail call void @llvm.dbg.value +; CHECK-NEXT: br i1 %tobool, label %for.cond, label %for.end + +entry: + br label %for.cond + +for.cond: + %i.0 = phi i64 [ %startCol, %entry ], [ %dec, %for.inc ] + %cmp = icmp eq i64 %i.0, 0 + br i1 %cmp, label %for.end, label %for.body + +for.body: + %0 = load i64* @channelColumns, align 8 + %mul = mul i64 %0, %row + %add = add i64 %mul, %i.0 + %1 = load i8** @horzPlane, align 8 + %arrayidx = getelementptr inbounds i8* %1, i64 %add + %2 = load i8* %arrayidx, align 1 + %tobool = icmp eq i8 %2, 0 + br i1 %tobool, label %for.inc, label %for.end + +for.inc: + %dec = add i64 %i.0, -1 + tail call void @llvm.dbg.value(metadata !{i64 %dec}, i64 0, metadata undef) + br label %for.cond + +for.end: + %add1 = add i64 %i.0, 1 + store i64 %add1, i64* %rowStart, align 8 + ret void +} !llvm.dbg.sp = !{!0} diff --git a/test/Transforms/LoopStrengthReduce/2012-03-26-constexpr.ll b/test/Transforms/LoopStrengthReduce/2012-03-26-constexpr.ll new file mode 100644 index 0000000..c9b11a9 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/2012-03-26-constexpr.ll @@ -0,0 +1,49 @@ +; RUN: opt < %s -loop-reduce -S +; PR11950: isHighCostExpansion crashes on ConstExpr +; +; The crash happened during IVChain analysis (CollectChains). We don't +; really care how LSR decides to transform this loop, so we don't +; check it. As long as the analysis doesn't crash we're ok. +target datalayout = "e-p:64:64:64-n32:64" + +%struct.this_structure_s.0.5 = type { [6144 x [8 x i32]], [6144 x [8 x i32]], [6147 x [4 x i32]], [8 x i32], [2 x i8*], [2 x i8*], [6144 x i8], [6144 x i32], [6144 x i32], [4 x [4 x i8]] } + +define internal fastcc void @someFunction(%struct.this_structure_s.0.5* nocapture %scratch, i32 %stage, i32 %cbSize) nounwind { +entry: + %0 = getelementptr inbounds %struct.this_structure_s.0.5* %scratch, i32 0, i32 4, i32 %stage + %1 = load i8** %0, align 4 + %2 = getelementptr inbounds %struct.this_structure_s.0.5* %scratch, i32 0, i32 5, i32 %stage + %3 = load i8** %2, align 4 + %4 = getelementptr inbounds %struct.this_structure_s.0.5* %scratch, i32 0, i32 2, i32 0, i32 0 + %tmp11 = shl i32 %stage, 1 + %tmp1325 = or i32 %tmp11, 1 + br label %__label_D_1608 + +__label_D_1608: ; preds = %__label_D_1608, %entry + %i.12 = phi i32 [ 0, %entry ], [ %10, %__label_D_1608 ] + %tmp = shl i32 %i.12, 2 + %lvar_g.13 = getelementptr i32* %4, i32 %tmp + %tmp626 = or i32 %tmp, 1 + %scevgep = getelementptr i32* %4, i32 %tmp626 + %tmp727 = or i32 %tmp, 2 + %scevgep8 = getelementptr i32* %4, i32 %tmp727 + %tmp928 = or i32 %tmp, 3 + %scevgep10 = getelementptr i32* %4, i32 %tmp928 + %scevgep12 = getelementptr %struct.this_structure_s.0.5* %scratch, i32 0, i32 9, i32 %tmp11, i32 %i.12 + %scevgep14 = getelementptr %struct.this_structure_s.0.5* %scratch, i32 0, i32 9, i32 %tmp1325, i32 %i.12 + %5 = load i8* %scevgep12, align 1 + %6 = sext i8 %5 to i32 + %7 = load i8* %scevgep14, align 1 + %8 = sext i8 %7 to i32 + store i32 0, i32* %lvar_g.13, align 4 + store i32 %8, i32* %scevgep, align 4 + store i32 %6, i32* %scevgep8, align 4 + %9 = add nsw i32 %8, %6 + store i32 %9, i32* %scevgep10, align 4 + %10 = add nsw i32 %i.12, 1 + %exitcond = icmp eq i32 %10, 3 + br i1 %exitcond, label %return, label %__label_D_1608 + +return: ; preds = %__label_D_1608 + ret void +} diff --git a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg index d622529..bac2ffa 100644 --- a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg +++ b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg @@ -1,13 +1,6 @@ config.suffixes = ['.ll'] -def getRoot(config): - if not config.parent: - return config - return getRoot(config.parent) - -root = getRoot(config) - -targets = set(root.targets_to_build.split()) +targets = set(config.root.targets_to_build.split()) if not 'ARM' in targets: config.unsupported = True diff --git a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll index 2dcaab8..ed32ca8 100644 --- a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll +++ b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll @@ -61,7 +61,7 @@ exit: ; preds = %cond.true29.i, %cond.true.i ; CHECK: @test2 ; CHECK: %entry ; CHECK-NOT: mov -; CHECK: jne +; CHECK: je define void @test2(i32 %n) nounwind uwtable { entry: br i1 undef, label %while.end, label %for.cond468 diff --git a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg index 84bd88c..da2db5a 100644 --- a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg +++ b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg @@ -1,13 +1,6 @@ config.suffixes = ['.ll'] -def getRoot(config): - if not config.parent: - return config - return getRoot(config.parent) - -root = getRoot(config) - -targets = set(root.targets_to_build.split()) +targets = set(config.root.targets_to_build.split()) if not 'X86' in targets: config.unsupported = True diff --git a/test/Transforms/IndVarSimplify/addrec-gep.ll b/test/Transforms/LoopStrengthReduce/addrec-gep.ll index b62d093..3e4e369 100644 --- a/test/Transforms/IndVarSimplify/addrec-gep.ll +++ b/test/Transforms/LoopStrengthReduce/addrec-gep.ll @@ -1,13 +1,17 @@ -; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s -; CHECK: getelementptr -; CHECK: mul {{.*}}, 37 -; CHECK: add {{.*}}, 5203 +; RUN: opt < %s -loop-reduce -S | FileCheck %s +; CHECK: bb1: +; CHECK: load double* [[IV:%[^,]+]] +; CHECK: store double {{.*}}, double* [[IV]] +; CHECK: getelementptr double* ; CHECK-NOT: cast +; CHECK: br {{.*}} label %bb1 ; This test tests several things. The load and store should use the ; same address instead of having it computed twice, and SCEVExpander should ; be able to reconstruct the full getelementptr, despite it having a few ; obstacles set in its way. +; We only check that the inner loop (bb1-bb2) is "reduced" because LSR +; currently only operates on inner loops. target datalayout = "e-p:64:64:64-n32:64" diff --git a/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll b/test/Transforms/LoopStrengthReduce/preserve-gep-loop-variant.ll index 251d34e..f90d030 100644 --- a/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll +++ b/test/Transforms/LoopStrengthReduce/preserve-gep-loop-variant.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s +; RUN: opt < %s -loop-reduce -S | FileCheck %s ; CHECK-NOT: {{inttoptr|ptrtoint}} ; CHECK: scevgep ; CHECK-NOT: {{inttoptr|ptrtoint}} diff --git a/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll b/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll index 59551d5..a43a4ff 100644 --- a/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll +++ b/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll @@ -1,4 +1,4 @@ -; RUN: opt -S < %s -loop-unroll -unroll-count=4 -enable-iv-rewrite=false | FileCheck %s +; RUN: opt -S < %s -loop-unroll -unroll-count=4 | FileCheck %s ; ; Test induction variable simplify after loop unrolling. It should ; expose nice opportunities for GVN. diff --git a/test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll b/test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll new file mode 100644 index 0000000..8946a23 --- /dev/null +++ b/test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -S -loop-unroll -simplifycfg | FileCheck %s +; PR12513: Loop unrolling breaks with indirect branches. +; If loop unrolling attempts to transform this loop, it replaces the +; indirectbr successors. SimplifyCFG then considers them to be unreachable. +declare void @subtract() nounwind uwtable + +; CHECK-NOT: unreachable +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind uwtable { +entry: + %vals19 = alloca [5 x i32], align 16 + %x20 = alloca i32, align 4 + store i32 135, i32* %x20, align 4 + br label %for.body + +for.body: ; preds = ; %call2_termjoin, %call3_termjoin + %indvars.iv = phi i64 [ 0, %entry ], [ %joinphi15.in.in, %call2_termjoin ] + %a6 = call coldcc i8* @funca(i8* blockaddress(@main, %for.body_code), i8* +blockaddress(@main, %for.body_codeprime)) nounwind + indirectbr i8* %a6, [label %for.body_code, label %for.body_codeprime] + +for.body_code: ; preds = %for.body + call void @subtract() + br label %call2_termjoin + +call2_termjoin: ; preds = %for.body_codeprime, %for.body_code + %joinphi15.in.in = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %joinphi15.in.in, 5 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %call2_termjoin + ret i32 0 + +for.body_codeprime: ; preds = %for.body + call void @subtract_v2(i64 %indvars.iv) + br label %call2_termjoin +} + +declare coldcc i8* @funca(i8*, i8*) readonly + +declare void @subtract_v2(i64) nounwind uwtable diff --git a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll new file mode 100644 index 0000000..3179d55 --- /dev/null +++ b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s +; Loop size = 3, when the function has the optsize attribute, the +; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled +; by 16 times because 3 * 16 < 50. +define void @unroll_opt_for_size() nounwind optsize { +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] + %inc = add i32 %iv, 1 + %exitcnd = icmp uge i32 %inc, 1024 + br i1 %exitcnd, label %exit, label %loop + +exit: + ret void +} + +; CHECK: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: icmp diff --git a/test/Transforms/LoopUnswitch/2012-04-02-IndirectBr.ll b/test/Transforms/LoopUnswitch/2012-04-02-IndirectBr.ll new file mode 100644 index 0000000..c92f0a2 --- /dev/null +++ b/test/Transforms/LoopUnswitch/2012-04-02-IndirectBr.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -S -loop-unswitch -verify-loop-info -verify-dom-info | FileCheck %s +; PR12343: -loop-unswitch crash on indirect branch + +; CHECK: %0 = icmp eq i64 undef, 0 +; CHECK-NEXT: br i1 %0, label %"5", label %"4" + +; CHECK: "5": ; preds = %entry +; CHECK-NEXT: br label %"16" + +; CHECK: "16": ; preds = %"22", %"5" +; CHECK-NEXT: indirectbr i8* undef, [label %"22", label %"33"] + +; CHECK: "22": ; preds = %"16" +; CHECK-NEXT: br i1 %0, label %"16", label %"26" + +; CHECK: "26": ; preds = %"22" +; CHECK-NEXT: unreachable + +define void @foo() { +entry: + %0 = icmp eq i64 undef, 0 + br i1 %0, label %"5", label %"4" + +"4": ; preds = %entry + unreachable + +"5": ; preds = %entry + br label %"16" + +"16": ; preds = %"22", %"5" + indirectbr i8* undef, [label %"22", label %"33"] + +"22": ; preds = %"16" + br i1 %0, label %"16", label %"26" + +"26": ; preds = %"22" + unreachable + +"33": ; preds = %"16" + unreachable +} diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll index 08bd8c0..ba2f778 100644 --- a/test/Transforms/ObjCARC/basic.ll +++ b/test/Transforms/ObjCARC/basic.ll @@ -3,10 +3,12 @@ target datalayout = "e-p:64:64:64" declare i8* @objc_retain(i8*) +declare i8* @objc_retainAutoreleasedReturnValue(i8*) declare void @objc_release(i8*) declare i8* @objc_autorelease(i8*) +declare i8* @objc_autoreleaseReturnValue(i8*) declare void @objc_autoreleasePoolPop(i8*) -declare void @objc_autoreleasePoolPush() +declare i8* @objc_autoreleasePoolPush() declare i8* @objc_retainBlock(i8*) declare i8* @objc_retainedObject(i8*) @@ -526,7 +528,7 @@ entry: define void @test13d(i8* %x, i64 %n) { entry: call i8* @objc_retain(i8* %x) nounwind - call void @objc_autoreleasePoolPush() + call i8* @objc_autoreleasePoolPush() call i8* @objc_retain(i8* %x) nounwind call void @use_pointer(i8* %x) call void @use_pointer(i8* %x) @@ -1400,7 +1402,7 @@ entry: ; CHECK-NEXT: call i8* @objc_autorelease(i8* %p) ; CHECK-NEXT: call void @use_pointer(i8* %p) ; CHECK-NEXT: call void @use_pointer(i8* %p) -; CHECK-NEXT: call void @objc_autoreleasePoolPush() +; CHECK-NEXT: call i8* @objc_autoreleasePoolPush() ; CHECK-NEXT: ret void ; CHECK-NEXT: } define void @test43b(i8* %p) { @@ -1410,7 +1412,7 @@ entry: call i8* @objc_retain(i8* %p) call void @use_pointer(i8* %p) call void @use_pointer(i8* %p) - call void @objc_autoreleasePoolPush() + call i8* @objc_autoreleasePoolPush() call void @objc_release(i8* %p) ret void } @@ -1797,6 +1799,78 @@ exit: ret void } +; Move an autorelease past a phi with a null. + +; CHECK: define i8* @test65( +; CHECK: if.then: +; CHECK: call i8* @objc_autorelease( +; CHECK: return: +; CHECK-NOT: @objc_autorelease +; CHECK: } +define i8* @test65(i1 %x) { +entry: + br i1 %x, label %return, label %if.then + +if.then: ; preds = %entry + %c = call i8* @returner() + %s = call i8* @objc_retainAutoreleasedReturnValue(i8* %c) nounwind + br label %return + +return: ; preds = %if.then, %entry + %retval = phi i8* [ %s, %if.then ], [ null, %entry ] + %q = call i8* @objc_autorelease(i8* %retval) nounwind + ret i8* %retval +} + +; Don't move an autorelease past an autorelease pool boundary. + +; CHECK: define i8* @test65b( +; CHECK: if.then: +; CHECK-NOT: @objc_autorelease +; CHECK: return: +; CHECK: call i8* @objc_autorelease( +; CHECK: } +define i8* @test65b(i1 %x) { +entry: + %t = call i8* @objc_autoreleasePoolPush() + br i1 %x, label %return, label %if.then + +if.then: ; preds = %entry + %c = call i8* @returner() + %s = call i8* @objc_retainAutoreleasedReturnValue(i8* %c) nounwind + br label %return + +return: ; preds = %if.then, %entry + %retval = phi i8* [ %s, %if.then ], [ null, %entry ] + call void @objc_autoreleasePoolPop(i8* %t) + %q = call i8* @objc_autorelease(i8* %retval) nounwind + ret i8* %retval +} + +; Don't move an autoreleaseReuturnValue, which would break +; the RV optimization. + +; CHECK: define i8* @test65c( +; CHECK: if.then: +; CHECK-NOT: @objc_autorelease +; CHECK: return: +; CHECK: call i8* @objc_autoreleaseReturnValue( +; CHECK: } +define i8* @test65c(i1 %x) { +entry: + br i1 %x, label %return, label %if.then + +if.then: ; preds = %entry + %c = call i8* @returner() + %s = call i8* @objc_retainAutoreleasedReturnValue(i8* %c) nounwind + br label %return + +return: ; preds = %if.then, %entry + %retval = phi i8* [ %s, %if.then ], [ null, %entry ] + %q = call i8* @objc_autoreleaseReturnValue(i8* %retval) nounwind + ret i8* %retval +} + declare void @bar(i32 ()*) ; A few real-world testcases. diff --git a/test/Transforms/ObjCARC/contract.ll b/test/Transforms/ObjCARC/contract.ll index 04ae3ca..c48f8a5 100644 --- a/test/Transforms/ObjCARC/contract.ll +++ b/test/Transforms/ObjCARC/contract.ll @@ -143,3 +143,21 @@ define i8* @test7(i8* %p) { %2 = tail call i8* @objc_autoreleaseReturnValue(i8* %p) ret i8* %p } + +; Do the return value substitution for PHI nodes too. + +; CHECK: define i8* @test8( +; CHECK: %retval = phi i8* [ %p, %if.then ], [ null, %entry ] +; CHECK: } +define i8* @test8(i1 %x, i8* %c) { +entry: + br i1 %x, label %return, label %if.then + +if.then: ; preds = %entry + %p = call i8* @objc_retain(i8* %c) nounwind + br label %return + +return: ; preds = %if.then, %entry + %retval = phi i8* [ %c, %if.then ], [ null, %entry ] + ret i8* %retval +} diff --git a/test/Transforms/ObjCARC/escape.ll b/test/Transforms/ObjCARC/escape.ll new file mode 100644 index 0000000..3f694cf --- /dev/null +++ b/test/Transforms/ObjCARC/escape.ll @@ -0,0 +1,131 @@ +; RUN: opt -objc-arc -S < %s | FileCheck %s +; rdar://11229925 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +%struct.__block_byref_weakLogNTimes = type { i8*, %struct.__block_byref_weakLogNTimes*, i32, i32, i8*, i8*, void (...)* } +%struct.__block_descriptor = type { i64, i64 } + +; Don't optimize away the retainBlock, because the object's address "escapes" +; with the objc_storeWeak call. + +; CHECK: define void @test0( +; CHECK: %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) nounwind, !clang.arc.copy_on_escape !0 +; CHECK: call void @objc_release(i8* %tmp7) nounwind, !clang.imprecise_release !0 +; CHECK: } +define void @test0() nounwind { +entry: + %weakLogNTimes = alloca %struct.__block_byref_weakLogNTimes, align 8 + %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8 + %byref.isa = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 0 + store i8* null, i8** %byref.isa, align 8 + %byref.forwarding = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 1 + store %struct.__block_byref_weakLogNTimes* %weakLogNTimes, %struct.__block_byref_weakLogNTimes** %byref.forwarding, align 8 + %byref.flags = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 2 + store i32 33554432, i32* %byref.flags, align 8 + %byref.size = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 3 + store i32 48, i32* %byref.size, align 4 + %tmp1 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 4 + store i8* bitcast (void (i8*, i8*)* @__Block_byref_object_copy_ to i8*), i8** %tmp1, align 8 + %tmp2 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 5 + store i8* bitcast (void (i8*)* @__Block_byref_object_dispose_ to i8*), i8** %tmp2, align 8 + %weakLogNTimes1 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 6 + %tmp3 = bitcast void (...)** %weakLogNTimes1 to i8** + %tmp4 = call i8* @objc_initWeak(i8** %tmp3, i8* null) nounwind + %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0 + store i8* null, i8** %block.isa, align 8 + %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1 + store i32 1107296256, i32* %block.flags, align 8 + %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2 + store i32 0, i32* %block.reserved, align 4 + %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3 + store i8* bitcast (void (i8*, i32)* @__main_block_invoke_0 to i8*), i8** %block.invoke, align 8 + %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4 + store %struct.__block_descriptor* null, %struct.__block_descriptor** %block.descriptor, align 8 + %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5 + %tmp5 = bitcast %struct.__block_byref_weakLogNTimes* %weakLogNTimes to i8* + store i8* %tmp5, i8** %block.captured, align 8 + %tmp6 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8* + %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) nounwind, !clang.arc.copy_on_escape !0 + %tmp8 = load %struct.__block_byref_weakLogNTimes** %byref.forwarding, align 8 + %weakLogNTimes3 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %tmp8, i64 0, i32 6 + %tmp9 = bitcast void (...)** %weakLogNTimes3 to i8** + %tmp10 = call i8* @objc_storeWeak(i8** %tmp9, i8* %tmp7) nounwind + %tmp11 = getelementptr inbounds i8* %tmp7, i64 16 + %tmp12 = bitcast i8* %tmp11 to i8** + %tmp13 = load i8** %tmp12, align 8 + %tmp14 = bitcast i8* %tmp13 to void (i8*, i32)* + call void %tmp14(i8* %tmp7, i32 10) nounwind, !clang.arc.no_objc_arc_exceptions !0 + call void @objc_release(i8* %tmp7) nounwind, !clang.imprecise_release !0 + call void @_Block_object_dispose(i8* %tmp5, i32 8) nounwind + call void @objc_destroyWeak(i8** %tmp3) nounwind + ret void +} + +; Like test0, but it makes a regular call instead of a storeWeak call, +; so the optimization is valid. + +; CHECK: define void @test1( +; CHECK-NOT: @objc_retainBlock +; CHECK: } +define void @test1() nounwind { +entry: + %weakLogNTimes = alloca %struct.__block_byref_weakLogNTimes, align 8 + %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8 + %byref.isa = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 0 + store i8* null, i8** %byref.isa, align 8 + %byref.forwarding = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 1 + store %struct.__block_byref_weakLogNTimes* %weakLogNTimes, %struct.__block_byref_weakLogNTimes** %byref.forwarding, align 8 + %byref.flags = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 2 + store i32 33554432, i32* %byref.flags, align 8 + %byref.size = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 3 + store i32 48, i32* %byref.size, align 4 + %tmp1 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 4 + store i8* bitcast (void (i8*, i8*)* @__Block_byref_object_copy_ to i8*), i8** %tmp1, align 8 + %tmp2 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 5 + store i8* bitcast (void (i8*)* @__Block_byref_object_dispose_ to i8*), i8** %tmp2, align 8 + %weakLogNTimes1 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %weakLogNTimes, i64 0, i32 6 + %tmp3 = bitcast void (...)** %weakLogNTimes1 to i8** + %tmp4 = call i8* @objc_initWeak(i8** %tmp3, i8* null) nounwind + %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0 + store i8* null, i8** %block.isa, align 8 + %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1 + store i32 1107296256, i32* %block.flags, align 8 + %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2 + store i32 0, i32* %block.reserved, align 4 + %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3 + store i8* bitcast (void (i8*, i32)* @__main_block_invoke_0 to i8*), i8** %block.invoke, align 8 + %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4 + store %struct.__block_descriptor* null, %struct.__block_descriptor** %block.descriptor, align 8 + %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5 + %tmp5 = bitcast %struct.__block_byref_weakLogNTimes* %weakLogNTimes to i8* + store i8* %tmp5, i8** %block.captured, align 8 + %tmp6 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8* + %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) nounwind, !clang.arc.copy_on_escape !0 + %tmp8 = load %struct.__block_byref_weakLogNTimes** %byref.forwarding, align 8 + %weakLogNTimes3 = getelementptr inbounds %struct.__block_byref_weakLogNTimes* %tmp8, i64 0, i32 6 + %tmp9 = bitcast void (...)** %weakLogNTimes3 to i8** + %tmp10 = call i8* @not_really_objc_storeWeak(i8** %tmp9, i8* %tmp7) nounwind + %tmp11 = getelementptr inbounds i8* %tmp7, i64 16 + %tmp12 = bitcast i8* %tmp11 to i8** + %tmp13 = load i8** %tmp12, align 8 + %tmp14 = bitcast i8* %tmp13 to void (i8*, i32)* + call void %tmp14(i8* %tmp7, i32 10) nounwind, !clang.arc.no_objc_arc_exceptions !0 + call void @objc_release(i8* %tmp7) nounwind, !clang.imprecise_release !0 + call void @_Block_object_dispose(i8* %tmp5, i32 8) nounwind + call void @objc_destroyWeak(i8** %tmp3) nounwind + ret void +} + +declare void @__Block_byref_object_copy_(i8*, i8*) nounwind +declare void @__Block_byref_object_dispose_(i8*) nounwind +declare void @objc_destroyWeak(i8**) +declare i8* @objc_initWeak(i8**, i8*) +declare void @__main_block_invoke_0(i8* nocapture, i32) nounwind ssp +declare void @_Block_object_dispose(i8*, i32) +declare i8* @objc_retainBlock(i8*) +declare i8* @objc_storeWeak(i8**, i8*) +declare i8* @not_really_objc_storeWeak(i8**, i8*) +declare void @objc_release(i8*) + +!0 = metadata !{} diff --git a/test/Transforms/ObjCARC/invoke.ll b/test/Transforms/ObjCARC/invoke.ll index 9e26209..76e82a5 100644 --- a/test/Transforms/ObjCARC/invoke.ll +++ b/test/Transforms/ObjCARC/invoke.ll @@ -6,6 +6,7 @@ declare i8* @objc_retainAutoreleasedReturnValue(i8*) declare i8* @objc_msgSend(i8*, i8*, ...) declare void @use_pointer(i8*) declare void @callee() +declare i8* @returner() ; ARCOpt shouldn't try to move the releases to the block containing the invoke. @@ -103,6 +104,114 @@ finally.rethrow: ; preds = %invoke.cont, %entry unreachable } +; Don't try to place code on invoke critical edges. + +; CHECK: define void @test3( +; CHECK: if.end: +; CHECK-NEXT: call void @objc_release(i8* %p) nounwind +; CHECK-NEXT: ret void +define void @test3(i8* %p, i1 %b) { +entry: + %0 = call i8* @objc_retain(i8* %p) + call void @callee() + br i1 %b, label %if.else, label %if.then + +if.then: + invoke void @use_pointer(i8* %p) + to label %if.end unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0 + +if.else: + invoke void @use_pointer(i8* %p) + to label %if.end unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0 + +lpad: + %r = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) + cleanup + ret void + +if.end: + call void @objc_release(i8* %p) + ret void +} + +; Like test3, but with ARC-relevant exception handling. + +; CHECK: define void @test4( +; CHECK: lpad: +; CHECK-NEXT: %r = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @objc_release(i8* %p) nounwind +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: call void @objc_release(i8* %p) nounwind +; CHECK-NEXT: ret void +define void @test4(i8* %p, i1 %b) { +entry: + %0 = call i8* @objc_retain(i8* %p) + call void @callee() + br i1 %b, label %if.else, label %if.then + +if.then: + invoke void @use_pointer(i8* %p) + to label %if.end unwind label %lpad + +if.else: + invoke void @use_pointer(i8* %p) + to label %if.end unwind label %lpad + +lpad: + %r = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) + cleanup + call void @objc_release(i8* %p) + ret void + +if.end: + call void @objc_release(i8* %p) + ret void +} + +; Don't turn the retainAutoreleaseReturnValue into retain, because it's +; for an invoke which we can assume codegen will put immediately prior. + +; CHECK: define void @test5( +; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %z) +; CHECK: } +define void @test5() { +entry: + %z = invoke i8* @returner() + to label %if.end unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0 + +lpad: + %r13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) + cleanup + ret void + +if.end: + call i8* @objc_retainAutoreleasedReturnValue(i8* %z) + ret void +} + +; Like test5, but there's intervening code. + +; CHECK: define void @test6( +; CHECK: call i8* @objc_retain(i8* %z) +; CHECK: } +define void @test6() { +entry: + %z = invoke i8* @returner() + to label %if.end unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0 + +lpad: + %r13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) + cleanup + ret void + +if.end: + call void @callee() + call i8* @objc_retainAutoreleasedReturnValue(i8* %z) + ret void +} + declare i32 @__gxx_personality_v0(...) declare i32 @__objc_personality_v0(...) diff --git a/test/Transforms/ObjCARC/pr12270.ll b/test/Transforms/ObjCARC/pr12270.ll index 30610f8..1faae5f 100644 --- a/test/Transforms/ObjCARC/pr12270.ll +++ b/test/Transforms/ObjCARC/pr12270.ll @@ -9,7 +9,13 @@ entry: return: ; No predecessors! %bar = bitcast %2* %x to i8* %foo = call i8* @objc_autoreleaseReturnValue(i8* %bar) nounwind + call void @callee() + call void @use_pointer(i8* %foo) + call void @objc_release(i8* %foo) nounwind ret void } declare i8* @objc_autoreleaseReturnValue(i8*) +declare void @objc_release(i8*) +declare void @callee() +declare void @use_pointer(i8*) diff --git a/test/Transforms/ObjCARC/retain-not-declared.ll b/test/Transforms/ObjCARC/retain-not-declared.ll index 41bde01..f876e51 100644 --- a/test/Transforms/ObjCARC/retain-not-declared.ll +++ b/test/Transforms/ObjCARC/retain-not-declared.ll @@ -30,7 +30,7 @@ entry: ; CHECK: @test1( ; CHECK: @objc_retain( -; CHECK: @objc_retain( +; CHECK: @objc_retainAutoreleasedReturnValue( ; CHECK: @objc_release( ; CHECK: @objc_release( ; CHECK: } diff --git a/test/Transforms/PhaseOrdering/PR6627.ll b/test/Transforms/PhaseOrdering/PR6627.ll new file mode 100644 index 0000000..ef9947f --- /dev/null +++ b/test/Transforms/PhaseOrdering/PR6627.ll @@ -0,0 +1,93 @@ +; RUN: opt -O3 -S %s | FileCheck %s +; XFAIL: * + +declare i32 @doo(...) + +; PR6627 - This whole nasty sequence should be flattened down to a single +; 32-bit comparison. +define void @test2(i8* %arrayidx) nounwind ssp { +entry: + %xx = bitcast i8* %arrayidx to i32* + %x1 = load i32* %xx, align 4 + %tmp = trunc i32 %x1 to i8 + %conv = zext i8 %tmp to i32 + %cmp = icmp eq i32 %conv, 127 + br i1 %cmp, label %land.lhs.true, label %if.end + +land.lhs.true: ; preds = %entry + %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1 + %tmp5 = load i8* %arrayidx4, align 1 + %conv6 = zext i8 %tmp5 to i32 + %cmp7 = icmp eq i32 %conv6, 69 + br i1 %cmp7, label %land.lhs.true9, label %if.end + +land.lhs.true9: ; preds = %land.lhs.true + %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2 + %tmp13 = load i8* %arrayidx12, align 1 + %conv14 = zext i8 %tmp13 to i32 + %cmp15 = icmp eq i32 %conv14, 76 + br i1 %cmp15, label %land.lhs.true17, label %if.end + +land.lhs.true17: ; preds = %land.lhs.true9 + %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3 + %tmp21 = load i8* %arrayidx20, align 1 + %conv22 = zext i8 %tmp21 to i32 + %cmp23 = icmp eq i32 %conv22, 70 + br i1 %cmp23, label %if.then, label %if.end + +if.then: ; preds = %land.lhs.true17 + %call25 = call i32 (...)* @doo() + br label %if.end + +if.end: + ret void + +; CHECK: @test2 +; CHECK: %x1 = load i32* %xx, align 4 +; CHECK-NEXT: icmp eq i32 %x1, 1179403647 +; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end +} + +; PR6627 - This should all be flattened down to one compare. This is the same +; as test2, except that the initial load is done as an i8 instead of i32, thus +; requiring widening. +define void @test2a(i8* %arrayidx) nounwind ssp { +entry: + %x1 = load i8* %arrayidx, align 4 + %conv = zext i8 %x1 to i32 + %cmp = icmp eq i32 %conv, 127 + br i1 %cmp, label %land.lhs.true, label %if.end + +land.lhs.true: ; preds = %entry + %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1 + %tmp5 = load i8* %arrayidx4, align 1 + %conv6 = zext i8 %tmp5 to i32 + %cmp7 = icmp eq i32 %conv6, 69 + br i1 %cmp7, label %land.lhs.true9, label %if.end + +land.lhs.true9: ; preds = %land.lhs.true + %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2 + %tmp13 = load i8* %arrayidx12, align 1 + %conv14 = zext i8 %tmp13 to i32 + %cmp15 = icmp eq i32 %conv14, 76 + br i1 %cmp15, label %land.lhs.true17, label %if.end + +land.lhs.true17: ; preds = %land.lhs.true9 + %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3 + %tmp21 = load i8* %arrayidx20, align 1 + %conv22 = zext i8 %tmp21 to i32 + %cmp23 = icmp eq i32 %conv22, 70 + br i1 %cmp23, label %if.then, label %if.end + +if.then: ; preds = %land.lhs.true17 + %call25 = call i32 (...)* @doo() + br label %if.end + +if.end: + ret void + +; CHECK: @test2a +; CHECK: %x1 = load i32* {{.*}}, align 4 +; CHECK-NEXT: icmp eq i32 %x1, 1179403647 +; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end +} diff --git a/test/Transforms/PhaseOrdering/basic.ll b/test/Transforms/PhaseOrdering/basic.ll index e5b2ba4..2d52ae5 100644 --- a/test/Transforms/PhaseOrdering/basic.ll +++ b/test/Transforms/PhaseOrdering/basic.ll @@ -1,5 +1,4 @@ ; RUN: opt -O3 -S %s | FileCheck %s -; XFAIL: * target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-macosx10.6.7" @@ -23,96 +22,3 @@ define void @test1() nounwind ssp { ; CHECK: @test1 ; CHECK-NEXT: ret void } - - -; PR6627 - This whole nasty sequence should be flattened down to a single -; 32-bit comparison. -define void @test2(i8* %arrayidx) nounwind ssp { -entry: - %xx = bitcast i8* %arrayidx to i32* - %x1 = load i32* %xx, align 4 - %tmp = trunc i32 %x1 to i8 - %conv = zext i8 %tmp to i32 - %cmp = icmp eq i32 %conv, 127 - br i1 %cmp, label %land.lhs.true, label %if.end - -land.lhs.true: ; preds = %entry - %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1 - %tmp5 = load i8* %arrayidx4, align 1 - %conv6 = zext i8 %tmp5 to i32 - %cmp7 = icmp eq i32 %conv6, 69 - br i1 %cmp7, label %land.lhs.true9, label %if.end - -land.lhs.true9: ; preds = %land.lhs.true - %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2 - %tmp13 = load i8* %arrayidx12, align 1 - %conv14 = zext i8 %tmp13 to i32 - %cmp15 = icmp eq i32 %conv14, 76 - br i1 %cmp15, label %land.lhs.true17, label %if.end - -land.lhs.true17: ; preds = %land.lhs.true9 - %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3 - %tmp21 = load i8* %arrayidx20, align 1 - %conv22 = zext i8 %tmp21 to i32 - %cmp23 = icmp eq i32 %conv22, 70 - br i1 %cmp23, label %if.then, label %if.end - -if.then: ; preds = %land.lhs.true17 - %call25 = call i32 (...)* @doo() - br label %if.end - -if.end: - ret void - -; CHECK: @test2 -; CHECK: %x1 = load i32* %xx, align 4 -; CHECK-NEXT: icmp eq i32 %x1, 1179403647 -; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end -} - -declare i32 @doo(...) - -; PR6627 - This should all be flattened down to one compare. This is the same -; as test2, except that the initial load is done as an i8 instead of i32, thus -; requiring widening. -define void @test2a(i8* %arrayidx) nounwind ssp { -entry: - %x1 = load i8* %arrayidx, align 4 - %conv = zext i8 %x1 to i32 - %cmp = icmp eq i32 %conv, 127 - br i1 %cmp, label %land.lhs.true, label %if.end - -land.lhs.true: ; preds = %entry - %arrayidx4 = getelementptr inbounds i8* %arrayidx, i64 1 - %tmp5 = load i8* %arrayidx4, align 1 - %conv6 = zext i8 %tmp5 to i32 - %cmp7 = icmp eq i32 %conv6, 69 - br i1 %cmp7, label %land.lhs.true9, label %if.end - -land.lhs.true9: ; preds = %land.lhs.true - %arrayidx12 = getelementptr inbounds i8* %arrayidx, i64 2 - %tmp13 = load i8* %arrayidx12, align 1 - %conv14 = zext i8 %tmp13 to i32 - %cmp15 = icmp eq i32 %conv14, 76 - br i1 %cmp15, label %land.lhs.true17, label %if.end - -land.lhs.true17: ; preds = %land.lhs.true9 - %arrayidx20 = getelementptr inbounds i8* %arrayidx, i64 3 - %tmp21 = load i8* %arrayidx20, align 1 - %conv22 = zext i8 %tmp21 to i32 - %cmp23 = icmp eq i32 %conv22, 70 - br i1 %cmp23, label %if.then, label %if.end - -if.then: ; preds = %land.lhs.true17 - %call25 = call i32 (...)* @doo() - br label %if.end - -if.end: - ret void - -; CHECK: @test2a -; CHECK: %x1 = load i32* {{.*}}, align 4 -; CHECK-NEXT: icmp eq i32 %x1, 1179403647 -; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end -} - diff --git a/test/Transforms/SimplifyLibCalls/floor.ll b/test/Transforms/SimplifyLibCalls/floor.ll index 8780e32..03dcdf5 100644 --- a/test/Transforms/SimplifyLibCalls/floor.ll +++ b/test/Transforms/SimplifyLibCalls/floor.ll @@ -1,16 +1,31 @@ -; RUN: opt < %s -simplify-libcalls -S > %t -; RUN: not grep {call.*floor(} %t -; RUN: grep {call.*floorf(} %t -; RUN: not grep {call.*ceil(} %t -; RUN: grep {call.*ceilf(} %t -; RUN: not grep {call.*nearbyint(} %t -; RUN: grep {call.*nearbyintf(} %t -; XFAIL: sparc +; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-linux" | FileCheck -check-prefix=DO-SIMPLIFY %s +; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-win32" | FileCheck -check-prefix=DONT-SIMPLIFY %s +; RUN: opt < %s -simplify-libcalls -S -mtriple "x86_64-pc-win32" | FileCheck -check-prefix=C89-SIMPLIFY %s +; RUN: opt < %s -simplify-libcalls -S -mtriple "i386-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s +; RUN: opt < %s -simplify-libcalls -S -mtriple "x86_64-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s +; RUN: opt < %s -simplify-libcalls -S -mtriple "sparc-sun-solaris" | FileCheck -check-prefix=DO-SIMPLIFY %s + +; DO-SIMPLIFY: call float @floorf( +; DO-SIMPLIFY: call float @ceilf( +; DO-SIMPLIFY: call float @roundf( +; DO-SIMPLIFY: call float @nearbyintf( + +; C89-SIMPLIFY: call float @floorf( +; C89-SIMPLIFY: call float @ceilf( +; C89-SIMPLIFY: call double @round( +; C89-SIMPLIFY: call double @nearbyint( + +; DONT-SIMPLIFY: call double @floor( +; DONT-SIMPLIFY: call double @ceil( +; DONT-SIMPLIFY: call double @round( +; DONT-SIMPLIFY: call double @nearbyint( declare double @floor(double) declare double @ceil(double) +declare double @round(double) + declare double @nearbyint(double) define float @test_floor(float %C) { @@ -29,8 +44,14 @@ define float @test_ceil(float %C) { ret float %F } -; PR8466 -; XFAIL: win32 +define float @test_round(float %C) { + %D = fpext float %C to double ; <double> [#uses=1] + ; --> roundf + %E = call double @round( double %D ) ; <double> [#uses=1] + %F = fptrunc double %E to float ; <float> [#uses=1] + ret float %F +} + define float @test_nearbyint(float %C) { %D = fpext float %C to double ; <double> [#uses=1] ; --> nearbyintf diff --git a/test/Transforms/SimplifyLibCalls/win-math.ll b/test/Transforms/SimplifyLibCalls/win-math.ll new file mode 100644 index 0000000..367e5b8 --- /dev/null +++ b/test/Transforms/SimplifyLibCalls/win-math.ll @@ -0,0 +1,275 @@ +; RUN: opt -O2 -S -mtriple=i386-pc-win32 < %s | FileCheck %s -check-prefix=WIN32 +; RUN: opt -O2 -S -mtriple=x86_64-pc-win32 < %s | FileCheck %s -check-prefix=WIN64 +; RUN: opt -O2 -S -mtriple=i386-pc-mingw32 < %s | FileCheck %s -check-prefix=MINGW32 +; RUN: opt -O2 -S -mtriple=x86_64-pc-mingw32 < %s | FileCheck %s -check-prefix=MINGW64 + +; x86 win32 msvcrt does not provide entry points for single-precision libm. +; x86-64 win32 msvcrt does (except for fabsf) +; msvcrt does not provide C99 math, but mingw32 does. + +declare double @acos(double %x) +define float @float_acos(float %x) nounwind readnone { +; WIN32: @float_acos +; WIN32-NOT: float @acosf +; WIN32: double @acos + %1 = fpext float %x to double + %2 = call double @acos(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + +declare double @asin(double %x) +define float @float_asin(float %x) nounwind readnone { +; WIN32: @float_asin +; WIN32-NOT: float @asinf +; WIN32: double @asin + %1 = fpext float %x to double + %2 = call double @asin(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + +declare double @atan(double %x) +define float @float_atan(float %x) nounwind readnone { +; WIN32: @float_atan +; WIN32-NOT: float @atanf +; WIN32: double @atan + %1 = fpext float %x to double + %2 = call double @atan(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + +declare double @atan2(double %x, double %y) +define float @float_atan2(float %x, float %y) nounwind readnone { +; WIN32: @float_atan2 +; WIN32-NOT: float @atan2f +; WIN32: double @atan2 + %1 = fpext float %x to double + %2 = fpext float %y to double + %3 = call double @atan2(double %1, double %2) + %4 = fptrunc double %3 to float + ret float %4 +} + +declare double @ceil(double %x) +define float @float_ceil(float %x) nounwind readnone { +; WIN32: @float_ceil +; WIN32-NOT: float @ceilf +; WIN32: double @ceil +; WIN64: @float_ceil +; WIN64: float @ceilf +; WIN64-NOT: double @ceil +; MINGW32: @float_ceil +; MINGW32: float @ceilf +; MINGW32-NOT: double @ceil +; MINGW64: @float_ceil +; MINGW64: float @ceilf +; MINGW64-NOT: double @ceil + %1 = fpext float %x to double + %2 = call double @ceil(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + +declare double @_copysign(double %x) +define float @float_copysign(float %x) nounwind readnone { +; WIN32: @float_copysign +; WIN32-NOT: float @copysignf +; WIN32-NOT: float @_copysignf +; WIN32: double @_copysign + %1 = fpext float %x to double + %2 = call double @_copysign(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + +declare double @cos(double %x) +define float @float_cos(float %x) nounwind readnone { +; WIN32: @float_cos +; WIN32-NOT: float @cosf +; WIN32: double @cos + %1 = fpext float %x to double + %2 = call double @cos(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + +declare double @cosh(double %x) +define float @float_cosh(float %x) nounwind readnone { +; WIN32: @float_cosh +; WIN32-NOT: float @coshf +; WIN32: double @cosh + %1 = fpext float %x to double + %2 = call double @cosh(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + +declare double @exp(double %x, double %y) +define float @float_exp(float %x, float %y) nounwind readnone { +; WIN32: @float_exp +; WIN32-NOT: float @expf +; WIN32: double @exp + %1 = fpext float %x to double + %2 = fpext float %y to double + %3 = call double @exp(double %1, double %2) + %4 = fptrunc double %3 to float + ret float %4 +} + +declare double @fabs(double %x, double %y) +define float @float_fabs(float %x, float %y) nounwind readnone { +; WIN32: @float_fabs +; WIN32-NOT: float @fabsf +; WIN32: double @fabs +; WIN64: @float_fabs +; WIN64-NOT: float @fabsf +; WIN64: double @fabs + %1 = fpext float %x to double + %2 = fpext float %y to double + %3 = call double @fabs(double %1, double %2) + %4 = fptrunc double %3 to float + ret float %4 +} + +declare double @floor(double %x) +define float @float_floor(float %x) nounwind readnone { +; WIN32: @float_floor +; WIN32-NOT: float @floorf +; WIN32: double @floor +; WIN64: @float_floor +; WIN64: float @floorf +; WIN64-NOT: double @floor +; MINGW32: @float_floor +; MINGW32: float @floorf +; MINGW32-NOT: double @floor +; MINGW64: @float_floor +; MINGW64: float @floorf +; MINGW64-NOT: double @floor + %1 = fpext float %x to double + %2 = call double @floor(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + +declare double @fmod(double %x, double %y) +define float @float_fmod(float %x, float %y) nounwind readnone { +; WIN32: @float_fmod +; WIN32-NOT: float @fmodf +; WIN32: double @fmod + %1 = fpext float %x to double + %2 = fpext float %y to double + %3 = call double @fmod(double %1, double %2) + %4 = fptrunc double %3 to float + ret float %4 +} + +declare double @log(double %x) +define float @float_log(float %x) nounwind readnone { +; WIN32: @float_log +; WIN32-NOT: float @logf +; WIN32: double @log + %1 = fpext float %x to double + %2 = call double @log(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + +declare double @pow(double %x, double %y) +define float @float_pow(float %x, float %y) nounwind readnone { +; WIN32: @float_pow +; WIN32-NOT: float @powf +; WIN32: double @pow + %1 = fpext float %x to double + %2 = fpext float %y to double + %3 = call double @pow(double %1, double %2) + %4 = fptrunc double %3 to float + ret float %4 +} + +declare double @sin(double %x) +define float @float_sin(float %x) nounwind readnone { +; WIN32: @float_sin +; WIN32-NOT: float @sinf +; WIN32: double @sin + %1 = fpext float %x to double + %2 = call double @sin(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + +declare double @sinh(double %x) +define float @float_sinh(float %x) nounwind readnone { +; WIN32: @float_sinh +; WIN32-NOT: float @sinhf +; WIN32: double @sinh + %1 = fpext float %x to double + %2 = call double @sinh(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + +declare double @sqrt(double %x) +define float @float_sqrt(float %x) nounwind readnone { +; WIN32: @float_sqrt +; WIN32-NOT: float @sqrtf +; WIN32: double @sqrt +; WIN64: @float_sqrt +; WIN64: float @sqrtf +; WIN64-NOT: double @sqrt +; MINGW32: @float_sqrt +; MINGW32: float @sqrtf +; MINGW32-NOT: double @sqrt +; MINGW64: @float_sqrt +; MINGW64: float @sqrtf +; MINGW64-NOT: double @sqrt + %1 = fpext float %x to double + %2 = call double @sqrt(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + +declare double @tan(double %x) +define float @float_tan(float %x) nounwind readnone { +; WIN32: @float_tan +; WIN32-NOT: float @tanf +; WIN32: double @tan + %1 = fpext float %x to double + %2 = call double @tan(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + +declare double @tanh(double %x) +define float @float_tanh(float %x) nounwind readnone { +; WIN32: @float_tanh +; WIN32-NOT: float @tanhf +; WIN32: double @tanh + %1 = fpext float %x to double + %2 = call double @tanh(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + +; win32 does not have round; mingw32 does +declare double @round(double %x) +define float @float_round(float %x) nounwind readnone { +; WIN32: @float_round +; WIN32-NOT: float @roundf +; WIN32: double @round +; WIN64: @float_round +; WIN64-NOT: float @roundf +; WIN64: double @round +; MINGW32: @float_round +; MINGW32: float @roundf +; MINGW32-NOT: double @round +; MINGW64: @float_round +; MINGW64: float @roundf +; MINGW64-NOT: double @round + %1 = fpext float %x to double + %2 = call double @round(double %1) + %3 = fptrunc double %2 to float + ret float %3 +} + diff --git a/test/Transforms/TailDup/X86/lit.local.cfg b/test/Transforms/TailDup/X86/lit.local.cfg index 84bd88c..da2db5a 100644 --- a/test/Transforms/TailDup/X86/lit.local.cfg +++ b/test/Transforms/TailDup/X86/lit.local.cfg @@ -1,13 +1,6 @@ config.suffixes = ['.ll'] -def getRoot(config): - if not config.parent: - return config - return getRoot(config.parent) - -root = getRoot(config) - -targets = set(root.targets_to_build.split()) +targets = set(config.root.targets_to_build.split()) if not 'X86' in targets: config.unsupported = True diff --git a/test/Transforms/TailDup/lit.local.cfg b/test/Transforms/TailDup/lit.local.cfg index 39c8039..18c604a 100644 --- a/test/Transforms/TailDup/lit.local.cfg +++ b/test/Transforms/TailDup/lit.local.cfg @@ -1,12 +1,5 @@ config.suffixes = ['.ll', '.c', '.cpp'] -def getRoot(config): - if not config.parent: - return config - return getRoot(config.parent) - -root = getRoot(config) - -targets = set(root.targets_to_build.split()) +targets = set(config.root.targets_to_build.split()) if not 'X86' in targets: config.unsupported = True |