diff options
Diffstat (limited to 'test/Transforms/LoopUnroll')
21 files changed, 284 insertions, 96 deletions
diff --git a/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll b/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll index d4c8402..95e9dde 100644 --- a/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll +++ b/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll @@ -9,11 +9,11 @@ target triple = "i686-apple-darwin8" define void @Foo(%struct.__mpz_struct* %base) { entry: %want = alloca [1 x %struct.__mpz_struct], align 16 ; <[1 x %struct.__mpz_struct]*> [#uses=4] - %want1 = getelementptr [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=1] + %want1 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=1] call void @__gmpz_init( %struct.__mpz_struct* %want1 ) - %want27 = getelementptr [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=1] - %want3 = getelementptr [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=1] - %want2 = getelementptr [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=2] + %want27 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=1] + %want3 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=1] + %want2 = getelementptr [1 x %struct.__mpz_struct], [1 x %struct.__mpz_struct]* %want, i32 0, i32 0 ; <%struct.__mpz_struct*> [#uses=2] br label %bb bb: ; preds = %bb, %entry diff --git a/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll b/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll index bf6d6d5..a87b16a 100644 --- a/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll +++ b/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll @@ -46,8 +46,8 @@ bb.nph: ; preds = %entry bb: ; preds = %bb.nph, %bb1 %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb1 ] ; <i64> [#uses=2] %s.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb1 ] ; <i32> [#uses=1] - %scevgep = getelementptr i32* %p, i64 %indvar ; <i32*> [#uses=1] - %1 = load i32* %scevgep, align 1 ; <i32> [#uses=1] + %scevgep = getelementptr i32, i32* %p, i64 %indvar ; <i32*> [#uses=1] + %1 = load i32, i32* %scevgep, align 1 ; <i32> [#uses=1] %2 = add nsw i32 %1, %s.01 ; <i32> [#uses=2] br label %bb1 @@ -84,7 +84,7 @@ do.body: ; preds = %do.cond, %if.end br i1 %cond2, label %exit, label %do.cond exit: ; preds = %do.body - %tmp7.i = load i32* undef, align 8 + %tmp7.i = load i32, i32* undef, align 8 br i1 undef, label %do.cond, label %land.lhs.true land.lhs.true: ; preds = %exit diff --git a/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll b/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll index a43a4ff..0b48409 100644 --- a/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll +++ b/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll @@ -12,11 +12,11 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK: while.body: ; CHECK-NOT: while.body.1: ; CHECK: %shr.1 = lshr i32 %bit_addr.addr.01, 5 -; CHECK: %arrayidx.1 = getelementptr inbounds i32* %bitmap, i32 %shr.1 +; CHECK: %arrayidx.1 = getelementptr inbounds i32, i32* %bitmap, i32 %shr.1 ; CHECK: %shr.2 = lshr i32 %bit_addr.addr.01, 5 -; CHECK: %arrayidx.2 = getelementptr inbounds i32* %bitmap, i32 %shr.2 +; CHECK: %arrayidx.2 = getelementptr inbounds i32, i32* %bitmap, i32 %shr.2 ; CHECK: %shr.3 = lshr i32 %bit_addr.addr.01, 5 -; CHECK: %arrayidx.3 = getelementptr inbounds i32* %bitmap, i32 %shr.3 +; CHECK: %arrayidx.3 = getelementptr inbounds i32, i32* %bitmap, i32 %shr.3 define void @FlipBit(i32* nocapture %bitmap, i32 %bit_addr, i32 %nbits) nounwind { entry: br label %while.body @@ -28,8 +28,8 @@ while.body: %shr = lshr i32 %bit_addr.addr.01, 5 %rem = and i32 %bit_addr.addr.01, 31 %shl = shl i32 1, %rem - %arrayidx = getelementptr inbounds i32* %bitmap, i32 %shr - %tmp6 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %bitmap, i32 %shr + %tmp6 = load i32, i32* %arrayidx, align 4 %xor = xor i32 %tmp6, %shl store i32 %xor, i32* %arrayidx, align 4 %inc = add i32 %bit_addr.addr.01, 1 diff --git a/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll b/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll index 617d4db..5f9eec7 100644 --- a/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll +++ b/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll @@ -21,8 +21,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds i8* %arr, i64 %indvars.iv - %0 = load i8* %arrayidx, align 1 + %arrayidx = getelementptr inbounds i8, i8* %arr, i64 %indvars.iv + %0 = load i8, i8* %arrayidx, align 1 %conv = sext i8 %0 to i32 %add = add nsw i32 %conv, %sum.02 %indvars.iv.next = add i64 %indvars.iv, 1 diff --git a/test/Transforms/LoopUnroll/AArch64/lit.local.cfg b/test/Transforms/LoopUnroll/AArch64/lit.local.cfg new file mode 100644 index 0000000..cec29af --- /dev/null +++ b/test/Transforms/LoopUnroll/AArch64/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True + diff --git a/test/Transforms/LoopUnroll/AArch64/partial.ll b/test/Transforms/LoopUnroll/AArch64/partial.ll new file mode 100644 index 0000000..8a1ea80 --- /dev/null +++ b/test/Transforms/LoopUnroll/AArch64/partial.ll @@ -0,0 +1,76 @@ +; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s + +; Partial unroll 8 times for this loop. +define void @unroll1() nounwind { +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %inc, %loop ] + %inc = add i32 %iv, 1 + %exitcnd = icmp uge i32 %inc, 1024 + br i1 %exitcnd, label %exit, label %loop + +exit: + ret void +} + +; CHECK: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: icmp + +; Partial unroll 16 times for this loop. +define void @unroll2() nounwind { +entry: + br label %loop1 + +loop1: + %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ] + br label %loop2.header + +loop2.header: + br label %loop2 + +loop2: + %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ] + %inc2 = add i32 %iv2, 1 + %exitcnd2 = icmp uge i32 %inc2, 1024 + br i1 %exitcnd2, label %exit2, label %loop2 + +exit2: + br label %loop1.latch + +loop1.latch: + %inc1 = add i32 %iv1, 1 + %exitcnd1 = icmp uge i32 %inc1, 1024 + br i1 %exitcnd2, label %exit, label %loop1 + +exit: + ret void +} + + + +; CHECK: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: icmp diff --git a/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll b/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll new file mode 100644 index 0000000..d3dc081 --- /dev/null +++ b/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s + +; Tests for unrolling loops with run-time trip counts + +; CHECK: %xtraiter = and i32 %n +; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0 +; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split + +; CHECK: for.body.prol: +; CHECK: for.body: + +define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %sum.02 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %sum.0.lcssa +} + + diff --git a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll index aae79cb..e9aa1ac 100644 --- a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll +++ b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll @@ -27,8 +27,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %add = add nsw i32 %0, %sum.02 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 diff --git a/test/Transforms/LoopUnroll/X86/partial.ll b/test/Transforms/LoopUnroll/X86/partial.ll index a2b04c7..4566f79 100644 --- a/test/Transforms/LoopUnroll/X86/partial.ll +++ b/test/Transforms/LoopUnroll/X86/partial.ll @@ -9,20 +9,20 @@ entry: vector.body: ; preds = %vector.body, %entry %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] - %0 = getelementptr inbounds double* %b, i64 %index + %0 = getelementptr inbounds double, double* %b, i64 %index %1 = bitcast double* %0 to <2 x double>* - %wide.load = load <2 x double>* %1, align 8 + %wide.load = load <2 x double>, <2 x double>* %1, align 8 %.sum9 = or i64 %index, 2 - %2 = getelementptr double* %b, i64 %.sum9 + %2 = getelementptr double, double* %b, i64 %.sum9 %3 = bitcast double* %2 to <2 x double>* - %wide.load8 = load <2 x double>* %3, align 8 + %wide.load8 = load <2 x double>, <2 x double>* %3, align 8 %4 = fadd <2 x double> %wide.load, <double 1.000000e+00, double 1.000000e+00> %5 = fadd <2 x double> %wide.load8, <double 1.000000e+00, double 1.000000e+00> - %6 = getelementptr inbounds double* %a, i64 %index + %6 = getelementptr inbounds double, double* %a, i64 %index %7 = bitcast double* %6 to <2 x double>* store <2 x double> %4, <2 x double>* %7, align 8 %.sum10 = or i64 %index, 2 - %8 = getelementptr double* %a, i64 %.sum10 + %8 = getelementptr double, double* %a, i64 %.sum10 %9 = bitcast double* %8 to <2 x double>* store <2 x double> %5, <2 x double>* %9, align 8 %index.next = add i64 %index, 4 @@ -45,12 +45,12 @@ entry: vector.body: ; preds = %vector.body, %entry %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] - %v0 = getelementptr inbounds double* %b, i64 %index + %v0 = getelementptr inbounds double, double* %b, i64 %index %v1 = bitcast double* %v0 to <2 x double>* - %wide.load = load <2 x double>* %v1, align 8 + %wide.load = load <2 x double>, <2 x double>* %v1, align 8 %v4 = fadd <2 x double> %wide.load, <double 1.000000e+00, double 1.000000e+00> %v5 = fmul <2 x double> %v4, <double 8.000000e+00, double 8.000000e+00> - %v6 = getelementptr inbounds double* %a, i64 %index + %v6 = getelementptr inbounds double, double* %a, i64 %index %v7 = bitcast double* %v6 to <2 x double>* store <2 x double> %v5, <2 x double>* %v7, align 8 %index.next = add i64 %index, 2 @@ -84,18 +84,18 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %reduction.026 = phi i16 [ %add14, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i16* %arr, i64 %indvars.iv - %0 = load i16* %arrayidx, align 2 + %arrayidx = getelementptr inbounds i16, i16* %arr, i64 %indvars.iv + %0 = load i16, i16* %arrayidx, align 2 %add = add i16 %0, %reduction.026 %sext = mul i64 %indvars.iv, 12884901888 %idxprom3 = ashr exact i64 %sext, 32 - %arrayidx4 = getelementptr inbounds i16* %arr, i64 %idxprom3 - %1 = load i16* %arrayidx4, align 2 + %arrayidx4 = getelementptr inbounds i16, i16* %arr, i64 %idxprom3 + %1 = load i16, i16* %arrayidx4, align 2 %add7 = add i16 %add, %1 %sext28 = mul i64 %indvars.iv, 21474836480 %idxprom10 = ashr exact i64 %sext28, 32 - %arrayidx11 = getelementptr inbounds i16* %arr, i64 %idxprom10 - %2 = load i16* %arrayidx11, align 2 + %arrayidx11 = getelementptr inbounds i16, i16* %arr, i64 %idxprom10 + %2 = load i16, i16* %arrayidx11, align 2 %add14 = add i16 %add7, %2 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 diff --git a/test/Transforms/LoopUnroll/ephemeral.ll b/test/Transforms/LoopUnroll/ephemeral.ll index 9d40613..d16eba7 100644 --- a/test/Transforms/LoopUnroll/ephemeral.ll +++ b/test/Transforms/LoopUnroll/ephemeral.ll @@ -12,8 +12,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 ; This loop will be completely unrolled, even with these extra instructions, ; but only because they're ephemeral (and, thus, free). diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics.ll index a1bb4c5..a9104ad 100644 --- a/test/Transforms/LoopUnroll/full-unroll-heuristics.ll +++ b/test/Transforms/LoopUnroll/full-unroll-heuristics.ll @@ -24,14 +24,14 @@ ; If the absolute threshold is too low, or if we can't optimize away requested ; percent of instructions, we shouldn't unroll: -; TEST1: %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv -; TEST3: %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv +; TEST1: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv +; TEST3: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv ; Otherwise, we should: -; TEST2-NOT: %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv +; TEST2-NOT: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv ; Also, we should unroll if the 'unroll-threshold' is big enough: -; TEST4-NOT: %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv +; TEST4-NOT: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv ; And check that we don't crash when we're not allowed to do any analysis. ; RUN: opt < %s -loop-unroll -unroll-max-iteration-count-to-analyze=0 -disable-output @@ -46,10 +46,10 @@ entry: loop: ; preds = %loop, %entry %iv = phi i64 [ 0, %entry ], [ %inc, %loop ] %r = phi i32 [ 0, %entry ], [ %add, %loop ] - %arrayidx = getelementptr inbounds i32* %src, i64 %iv - %src_element = load i32* %arrayidx, align 4 - %array_const_idx = getelementptr inbounds [9 x i32]* @known_constant, i64 0, i64 %iv - %const_array_element = load i32* %array_const_idx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv + %src_element = load i32, i32* %arrayidx, align 4 + %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv + %const_array_element = load i32, i32* %array_const_idx, align 4 %mul = mul nsw i32 %src_element, %const_array_element %add = add nsw i32 %mul, %r %inc = add nuw nsw i64 %iv, 1 diff --git a/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll b/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll index dcb5d1c..2101b63 100644 --- a/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll +++ b/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll @@ -18,7 +18,7 @@ for.body: ; preds = %entry, %for.body %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ] ; The real loop. %mul = mul nsw i32 %B, %C - %arrayidx = getelementptr inbounds i32* %A, i32 %i.01 + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.01 store i32 %mul, i32* %arrayidx, align 4 %inc = add nsw i32 %i.01, 1 %exitcond = icmp ne i32 %inc, 4 diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll index 80571ec..9e78edf 100644 --- a/test/Transforms/LoopUnroll/runtime-loop.ll +++ b/test/Transforms/LoopUnroll/runtime-loop.ll @@ -20,8 +20,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %add = add nsw i32 %0, %sum.02 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 @@ -47,8 +47,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %add = add nsw i32 %0, %sum.01 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 @@ -96,8 +96,8 @@ for.body: ; preds = %for.body, %entry %p.addr.05 = phi i16* [ %incdec.ptr, %for.body ], [ %p, %entry ] %len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %entry ] %res.03 = phi i32 [ %add, %for.body ], [ 0, %entry ] - %incdec.ptr = getelementptr inbounds i16* %p.addr.05, i64 1 - %0 = load i16* %p.addr.05, align 2 + %incdec.ptr = getelementptr inbounds i16, i16* %p.addr.05, i64 1 + %0 = load i16, i16* %p.addr.05, align 2 %conv = zext i16 %0 to i32 %add = add i32 %conv, %res.03 %sub = add nsw i32 %len.addr.04, -2 @@ -113,6 +113,39 @@ for.end: ; preds = %for.cond.for.end_cr ret i16 %res.0.lcssa } +; Test run-time unrolling disable metadata. +; CHECK: for.body: +; CHECK-NOT: for.body.prol: + +define zeroext i16 @test2(i16* nocapture %p, i32 %len) nounwind uwtable readonly { +entry: + %cmp2 = icmp eq i32 %len, 0 + br i1 %cmp2, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %p.addr.05 = phi i16* [ %incdec.ptr, %for.body ], [ %p, %entry ] + %len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %entry ] + %res.03 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %incdec.ptr = getelementptr inbounds i16, i16* %p.addr.05, i64 1 + %0 = load i16, i16* %p.addr.05, align 2 + %conv = zext i16 %0 to i32 + %add = add i32 %conv, %res.03 + %sub = add nsw i32 %len.addr.04, -2 + %cmp = icmp eq i32 %sub, 0 + br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body, !llvm.loop !0 + +for.cond.for.end_crit_edge: ; preds = %for.body + %phitmp = trunc i32 %add to i16 + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + %res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] + ret i16 %res.0.lcssa +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.unroll.runtime.disable"} + ; CHECK: !0 = distinct !{!0, !1} ; CHECK: !1 = !{!"llvm.loop.unroll.disable"} diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll index 5ff75e3..7684e39 100644 --- a/test/Transforms/LoopUnroll/runtime-loop1.ll +++ b/test/Transforms/LoopUnroll/runtime-loop1.ll @@ -16,8 +16,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %add = add nsw i32 %0, %sum.02 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 diff --git a/test/Transforms/LoopUnroll/runtime-loop2.ll b/test/Transforms/LoopUnroll/runtime-loop2.ll index 176362a..7c6bb96 100644 --- a/test/Transforms/LoopUnroll/runtime-loop2.ll +++ b/test/Transforms/LoopUnroll/runtime-loop2.ll @@ -16,8 +16,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %add = add nsw i32 %0, %sum.02 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 diff --git a/test/Transforms/LoopUnroll/runtime-loop3.ll b/test/Transforms/LoopUnroll/runtime-loop3.ll index aa928cc..fd13ebf 100644 --- a/test/Transforms/LoopUnroll/runtime-loop3.ll +++ b/test/Transforms/LoopUnroll/runtime-loop3.ll @@ -23,8 +23,8 @@ for.body3: ; preds = %for.cond1.preheader %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.cond1.preheader ] %sum.19 = phi i32 [ %add4, %for.body3 ], [ %sum.012, %for.cond1.preheader ] %0 = add nsw i64 %indvars.iv, %indvars.iv16 - %arrayidx = getelementptr inbounds i32* %a, i64 %0 - %1 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %0 + %1 = load i32, i32* %arrayidx, align 4 %add4 = add nsw i32 %1, %sum.19 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 diff --git a/test/Transforms/LoopUnroll/runtime-loop4.ll b/test/Transforms/LoopUnroll/runtime-loop4.ll new file mode 100644 index 0000000..9be0ffd --- /dev/null +++ b/test/Transforms/LoopUnroll/runtime-loop4.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -S -O2 -unroll-runtime=true | FileCheck %s + +; Check runtime unrolling prologue can be promoted by LICM pass. + +; CHECK: entry: +; CHECK: %xtraiter +; CHECK: %lcmp.mod +; CHECK: loop1: +; CHECK: br i1 %lcmp.mod +; CHECK: loop2.prol: + +define void @unroll(i32 %iter, i32* %addr1, i32* %addr2) nounwind { +entry: + br label %loop1 + +loop1: + %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ] + %offset1 = getelementptr i32, i32* %addr1, i32 %iv1 + store i32 %iv1, i32* %offset1, align 4 + br label %loop2.header + +loop2.header: + br label %loop2 + +loop2: + %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ] + %offset2 = getelementptr i32, i32* %addr2, i32 %iv2 + store i32 %iv2, i32* %offset2, align 4 + %inc2 = add i32 %iv2, 1 + %exitcnd2 = icmp uge i32 %inc2, %iter + br i1 %exitcnd2, label %exit2, label %loop2 + +exit2: + br label %loop1.latch + +loop1.latch: + %inc1 = add i32 %iv1, 1 + %exitcnd1 = icmp uge i32 %inc1, 1024 + br i1 %exitcnd1, label %exit, label %loop1 + +exit: + ret void +} diff --git a/test/Transforms/LoopUnroll/scevunroll.ll b/test/Transforms/LoopUnroll/scevunroll.ll index 20161d7..a5c9a6e 100644 --- a/test/Transforms/LoopUnroll/scevunroll.ll +++ b/test/Transforms/LoopUnroll/scevunroll.ll @@ -19,8 +19,8 @@ while.body: %iv = phi i64 [ 10, %entry ], [ %iv.next, %while.body ] %sum = phi i32 [ 0, %entry ], [ %sum.next, %while.body ] %iv.next = add i64 %iv, -1 - %adr = getelementptr inbounds i32* %base, i64 %iv.next - %tmp = load i32* %adr, align 8 + %adr = getelementptr inbounds i32, i32* %base, i64 %iv.next + %tmp = load i32, i32* %adr, align 8 %sum.next = add i32 %sum, %tmp %iv.narrow = trunc i64 %iv.next to i32 %cmp.i65 = icmp sgt i32 %iv.narrow, 0 @@ -46,8 +46,8 @@ entry: loop: %iv = phi i64 [ 0, %entry ], [ %inc, %tail ] %s = phi i64 [ 0, %entry ], [ %s.next, %tail ] - %adr = getelementptr i64* %base, i64 %iv - %val = load i64* %adr + %adr = getelementptr i64, i64* %base, i64 %iv + %val = load i64, i64* %adr %s.next = add i64 %s, %val %inc = add i64 %iv, 1 %cmp = icmp ne i64 %inc, 4 @@ -67,8 +67,8 @@ exit2: ; SCEV properly unrolls multi-exit loops. ; ; CHECK-LABEL: @multiExit( -; CHECK: getelementptr i32* %base, i32 10 -; CHECK-NEXT: load i32* +; CHECK: getelementptr i32, i32* %base, i32 10 +; CHECK-NEXT: load i32, i32* ; CHECK: br i1 false, label %l2.10, label %exit1 ; CHECK: l2.10: ; CHECK-NOT: br @@ -81,8 +81,8 @@ l1: %iv2 = phi i32 [ 0, %entry ], [ %inc2, %l2 ] %inc1 = add i32 %iv1, 1 %inc2 = add i32 %iv2, 1 - %adr = getelementptr i32* %base, i32 %iv1 - %val = load i32* %adr + %adr = getelementptr i32, i32* %base, i32 %iv1 + %val = load i32, i32* %adr %cmp1 = icmp slt i32 %iv1, 5 br i1 %cmp1, label %l2, label %exit1 l2: @@ -112,8 +112,8 @@ l1: %iv2 = phi i32 [ 0, %entry ], [ %inc2, %l3 ] %inc1 = add i32 %iv1, 1 %inc2 = add i32 %iv2, 1 - %adr = getelementptr i32* %base, i32 %iv1 - %val = load i32* %adr + %adr = getelementptr i32, i32* %base, i32 %iv1 + %val = load i32, i32* %adr %cmp1 = icmp slt i32 %iv1, 5 br i1 %cmp1, label %l2, label %exit1 l2: diff --git a/test/Transforms/LoopUnroll/shifted-tripcount.ll b/test/Transforms/LoopUnroll/shifted-tripcount.ll index a118a46..4c21698 100644 --- a/test/Transforms/LoopUnroll/shifted-tripcount.ll +++ b/test/Transforms/LoopUnroll/shifted-tripcount.ll @@ -13,11 +13,11 @@ entry: for.body: ; preds = %entry, %for.body %i.013 = phi i64 [ %tmp16, %for.body ], [ 0, %entry ] ; <i64> [#uses=2] - %arrayidx7 = getelementptr double* %p, i64 %i.013 ; <double*> [#uses=2] + %arrayidx7 = getelementptr double, double* %p, i64 %i.013 ; <double*> [#uses=2] %tmp16 = add i64 %i.013, 1 ; <i64> [#uses=3] - %arrayidx = getelementptr double* %p, i64 %tmp16 ; <double*> [#uses=1] - %tmp4 = load double* %arrayidx ; <double> [#uses=1] - %tmp8 = load double* %arrayidx7 ; <double> [#uses=1] + %arrayidx = getelementptr double, double* %p, i64 %tmp16 ; <double*> [#uses=1] + %tmp4 = load double, double* %arrayidx ; <double> [#uses=1] + %tmp8 = load double, double* %arrayidx7 ; <double> [#uses=1] %mul9 = fmul double %tmp8, %tmp4 ; <double> [#uses=1] store double %mul9, double* %arrayidx7 %exitcond = icmp eq i64 %tmp16, %mul10 ; <i1> [#uses=1] diff --git a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll index 4f934a6..dc812fb 100644 --- a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll +++ b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll @@ -19,8 +19,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %inc = add nsw i32 %0, 1 store i32 %inc, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 @@ -51,8 +51,8 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %inc = add nsw i32 %0, 1 store i32 %inc, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 @@ -78,8 +78,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %inc = add nsw i32 %0, 1 store i32 %inc, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 @@ -110,8 +110,8 @@ entry: for.body3: ; preds = %for.body3, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body3 ] - %arrayidx = getelementptr inbounds i32* %List, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %List, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %add4 = add nsw i32 %0, 10 store i32 %add4, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 @@ -124,8 +124,8 @@ for.body3.1.preheader: ; preds = %for.body3 for.body3.1: ; preds = %for.body3.1.preheader, %for.body3.1 %indvars.iv.1 = phi i64 [ %1, %for.body3.1 ], [ 0, %for.body3.1.preheader ] %1 = add nsw i64 %indvars.iv.1, 1 - %arrayidx.1 = getelementptr inbounds i32* %List, i64 %1 - %2 = load i32* %arrayidx.1, align 4 + %arrayidx.1 = getelementptr inbounds i32, i32* %List, i64 %1 + %2 = load i32, i32* %arrayidx.1, align 4 %add4.1 = add nsw i32 %2, 10 store i32 %add4.1, i32* %arrayidx.1, align 4 %exitcond.1 = icmp eq i64 %1, 4 diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll index 5831557..1354181 100644 --- a/test/Transforms/LoopUnroll/unroll-pragmas.ll +++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -19,8 +19,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %inc = add nsw i32 %0, 1 store i32 %inc, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 @@ -43,8 +43,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %inc = add nsw i32 %0, 1 store i32 %inc, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 @@ -71,8 +71,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %inc = add nsw i32 %0, 1 store i32 %inc, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 @@ -94,8 +94,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %inc = add nsw i32 %0, 1 store i32 %inc, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 @@ -124,8 +124,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %inc = add nsw i32 %0, 1 store i32 %inc, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 @@ -153,8 +153,8 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %inc = add nsw i32 %0, 1 store i32 %inc, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 @@ -190,8 +190,8 @@ entry: for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %inc = add nsw i32 %0, 1 store i32 %inc, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 @@ -217,8 +217,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %inc = add nsw i32 %0, 1 store i32 %inc, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 @@ -245,8 +245,8 @@ entry: for.body: ; preds = %for.body, %entry %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv - %0 = load i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 %inc = add nsw i32 %0, 1 store i32 %inc, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 |