diff options
author | Chandler Carruth <chandlerc@gmail.com> | 2011-11-13 11:20:44 +0000 |
---|---|---|
committer | Chandler Carruth <chandlerc@gmail.com> | 2011-11-13 11:20:44 +0000 |
commit | df234353fb396e84e7a3a1cdd94f73681e65bd88 (patch) | |
tree | d685cc000869f6d97d01f92ffe3a131cb299e5ef /test | |
parent | 9eb674880b98cbeca0cd5b3f0265b77282d48b4a (diff) | |
download | external_llvm-df234353fb396e84e7a3a1cdd94f73681e65bd88.zip external_llvm-df234353fb396e84e7a3a1cdd94f73681e65bd88.tar.gz external_llvm-df234353fb396e84e7a3a1cdd94f73681e65bd88.tar.bz2 |
Rewrite #3 of machine block placement. This is based somewhat on the
second algorithm, but only loosely. It is more heavily based on the last
discussion I had with Andy. It continues to walk from the inner-most
loop outward, but there is a key difference. With this algorithm we
ensure that as we visit each loop, the entire loop is merged into
a single chain. At the end, the entire function is treated as a "loop",
and merged into a single chain. This chain forms the desired sequence of
blocks within the function. Switching to a single algorithm removes my
biggest problem with the previous approaches -- they had different
behavior depending on which system triggered the layout. Now there is
exactly one algorithm and one basis for the decision making.
The other key difference is how the chain is formed. This is based
heavily on the idea Andy mentioned of keeping a worklist of blocks that
are viable layout successors based on the CFG. Having this set allows us
to consistently select the best layout successor for each block. It is
expensive though.
The code here remains very rough. There is a lot that needs to be done
to clean up the code, and to make the runtime cost of this pass much
lower. Very much WIP, but this was a giant chunk of code and I'd rather
folks see it sooner than later. Everything remains behind a flag of
course.
I've added a couple of tests to exercise the issues that this iteration
was motivated by: loop structure preservation. I've also fixed one test
that was exhibiting the broken behavior of the previous version.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144495 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/X86/block-placement.ll | 97 |
1 files changed, 96 insertions, 1 deletions
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll index 38d3062..4f0b671 100644 --- a/test/CodeGen/X86/block-placement.ll +++ b/test/CodeGen/X86/block-placement.ll @@ -72,8 +72,103 @@ exit: ret i32 %b } +define i32 @test_loop_cold_blocks(i32 %i, i32* %a) { +; Check that we sink cold loop blocks after the hot loop body. +; CHECK: test_loop_cold_blocks: +; CHECK: %entry +; CHECK: %body1 +; CHECK: %body2 +; CHECK: %body3 +; CHECK: %unlikely1 +; CHECK: %unlikely2 +; CHECK: %exit + +entry: + br label %body1 + +body1: + %iv = phi i32 [ 0, %entry ], [ %next, %body3 ] + %base = phi i32 [ 0, %entry ], [ %sum, %body3 ] + %unlikelycond1 = icmp slt i32 %base, 42 + br i1 %unlikelycond1, label %unlikely1, label %body2, !prof !0 + +unlikely1: + call void @error(i32 %i, i32 1, i32 %base) + br label %body2 + +body2: + %unlikelycond2 = icmp sgt i32 %base, 21 + br i1 %unlikelycond2, label %unlikely2, label %body3, !prof !0 + +unlikely2: + call void @error(i32 %i, i32 2, i32 %base) + br label %body3 + +body3: + %arrayidx = getelementptr inbounds i32* %a, i32 %iv + %0 = load i32* %arrayidx + %sum = add nsw i32 %0, %base + %next = add i32 %iv, 1 + %exitcond = icmp eq i32 %next, %i + br i1 %exitcond, label %exit, label %body1 + +exit: + ret i32 %sum +} + !0 = metadata !{metadata !"branch_weights", i32 4, i32 64} +define i32 @test_loop_early_exits(i32 %i, i32* %a) { +; Check that we sink early exit blocks out of loop bodies. +; CHECK: test_loop_early_exits: +; CHECK: %entry +; CHECK: %body1 +; CHECK: %body2 +; CHECK: %body3 +; CHECK: %body4 +; CHECK: %exit +; CHECK: %bail1 +; CHECK: %bail2 +; CHECK: %bail3 + +entry: + br label %body1 + +body1: + %iv = phi i32 [ 0, %entry ], [ %next, %body4 ] + %base = phi i32 [ 0, %entry ], [ %sum, %body4 ] + %bailcond1 = icmp eq i32 %base, 42 + br i1 %bailcond1, label %bail1, label %body2 + +bail1: + ret i32 -1 + +body2: + %bailcond2 = icmp eq i32 %base, 43 + br i1 %bailcond2, label %bail2, label %body3 + +bail2: + ret i32 -2 + +body3: + %bailcond3 = icmp eq i32 %base, 44 + br i1 %bailcond3, label %bail3, label %body4 + +bail3: + ret i32 -3 + +body4: + %arrayidx = getelementptr inbounds i32* %a, i32 %iv + %0 = load i32* %arrayidx + %sum = add nsw i32 %0, %base + %next = add i32 %iv, 1 + %exitcond = icmp eq i32 %next, %i + br i1 %exitcond, label %exit, label %body1 + +exit: + ret i32 %sum +} + define i32 @test_loop_align(i32 %i, i32* %a) { ; Check that we provide basic loop body alignment with the block placement ; pass. @@ -105,7 +200,7 @@ define i32 @test_nested_loop_align(i32 %i, i32* %a, i32* %b) { ; CHECK: test_nested_loop_align: ; CHECK: %entry ; CHECK: .align [[ALIGN]], -; CHECK-NEXT: %loop.body.2 +; CHECK-NEXT: %loop.body.1 ; CHECK: .align [[ALIGN]], ; CHECK-NEXT: %inner.loop.body ; CHECK-NOT: .align |