Use a bigger hammer to fix PR11314 by disabling the "forcing two-address

instruction lower optimization" in the pre-RA scheduler. The optimization, rather the hack, was done before MI use-list was available. Now we should be able to implement it in a better way, perhaps in the two-address pass until a MI scheduler is available. Now that the scheduler has to backtrack to handle call sequences. Adding artificial scheduling constraints is just not safe. Furthermore, the hack is not taking all the other scheduling decisions into consideration so it's just as likely to pessimize code. So I view disabling this optimization goodness regardless of PR11314. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144267 91177308-0d34-0410-b5e6-96231b3b80d8
author: Evan Cheng <evan.cheng@apple.com> 2011-11-10 07:43:16 +0000
committer: Evan Cheng <evan.cheng@apple.com> 2011-11-10 07:43:16 +0000
commit: 623a7e146bd86747dc46a6f8bb9993fc217d6b78 (patch)
tree: 72b44ba84e3cb347279c411ac8b159e6fecaf9b0
parent: 4dbe96e22ff4989577cf4854f717b9d972094f87 (diff)
download: external_llvm-623a7e146bd86747dc46a6f8bb9993fc217d6b78.zip
external_llvm-623a7e146bd86747dc46a6f8bb9993fc217d6b78.tar.gz
external_llvm-623a7e146bd86747dc46a6f8bb9993fc217d6b78.tar.bz2
12 files changed, 36 insertions, 21 deletions
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index cab303d..7938a37 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -89,6 +89,9 @@ static cl::opt<bool> DisableSchedCriticalPath(
 static cl::opt<bool> DisableSchedHeight(
   "disable-sched-height", cl::Hidden, cl::init(false),
   cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+static cl::opt<bool> Disable2AddrHack(
+  "disable-2addr-hack", cl::Hidden, cl::init(true),
+  cl::desc("Disable scheduler's two-address hack"));
 
 static cl::opt<int> MaxReorderWindow(
   "max-sched-reorder", cl::Hidden, cl::init(6),
@@ -2628,7 +2631,8 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
   SUnits = &sunits;
   // Add pseudo dependency edges for two-address nodes.
-  AddPseudoTwoAddrDeps();
+  if (!Disable2AddrHack)
+    AddPseudoTwoAddrDeps();
   // Reroute edges to nodes with multiple uses.
   if (!TracksRegPressure)
     PrescheduleNodesWithMultipleUses();
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index a871ea1..8bb9b92 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 -stats -realign-stack=0 |&\
-; RUN:     grep {asm-printer} | grep 34
+; RUN:     grep {asm-printer} | grep 35
 
 target datalayout = "e-p:32:32"
 define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
index 620e0f3..e904b1c 100644
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -5,7 +5,6 @@
 
 ; CHECK: pextrw $14
 ; CHECK-NEXT: shrl $8
-; CHECK-NEXT: (%ebp)
 ; CHECK-NEXT: pinsrw
 
 define void @update(i8** %args_list) nounwind {
diff --git a/test/CodeGen/X86/change-compare-stride-1.ll b/test/CodeGen/X86/change-compare-stride-1.ll
index 8b53ae2..1c5c113 100644
--- a/test/CodeGen/X86/change-compare-stride-1.ll
+++ b/test/CodeGen/X86/change-compare-stride-1.ll
@@ -3,6 +3,10 @@
 ; Nested LSR is required to optimize this case.
 ; We do not expect to see this form of IR without -enable-iv-rewrite.
 
+; xfailed for now because the scheduler two-address hack has been disabled.
+; Now it's generating a leal -1 rather than a decq.
+; XFAIL: *
+
 define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
 ; CHECK: borf:
 ; CHECK-NOT: inc
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
index 647bbdb..1d315ff 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=X86-64 %s
+; DISABLED: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
+
+; i386 test has been disabled when scheduler 2-addr hack is disabled.
 
 ; This testcase shouldn't need to spill the -1 value,
 ; so it should just use pcmpeqd to materialize an all-ones vector.
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index 8f79fb8..4a6f531 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -1,9 +1,8 @@
 ; RUN: llc < %s -march=x86-64 -enable-lsr-nested -o %t
 ; RUN: not grep inc %t
 ; RUN: grep dec %t | count 2
-; RUN: grep addq %t | count 12
+; RUN: grep addq %t | count 10
 ; RUN: not grep addb %t
-; RUN: not grep leaq %t
 ; RUN: not grep leal %t
 ; RUN: not grep movq %t
 
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index 938023f..382b0e0 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -march=x86-64 < %s | FileCheck %s
 
 ; CHECK: decq
+; CHECK-NEXT: movl (
 ; CHECK-NEXT: jne
 
 @Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll
index 1f87089..5f5e093 100644
--- a/test/CodeGen/X86/lsr-reuse-trunc.ll
+++ b/test/CodeGen/X86/lsr-reuse-trunc.ll
@@ -4,13 +4,14 @@
 ; Full strength reduction wouldn't reduce register pressure, so LSR should
 ; stick with indexing here.
 
+; FIXME: This is worse off from disabling of scheduler 2-address hack.
 ; CHECK: movaps        (%{{rsi|rdx}},%rax,4), [[X3:%xmm[0-9]+]]
+; CHECK: leaq  4(%rax), %{{rcx|r9}}
 ; CHECK: cvtdq2ps
 ; CHECK: orps          {{%xmm[0-9]+}}, [[X4:%xmm[0-9]+]]
 ; CHECK: movaps        [[X4]], (%{{rdi|rcx}},%rax,4)
-; CHECK: addq  $4, %rax
-; CHECK: cmpl  %eax, (%{{rdx|r8}})
-; CHECK-NEXT: jg
+; CHECK: cmpl  %{{ecx|r9d}}, (%{{rdx|r8}})
+; CHECK: jg
 
 define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind {
 entry:
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 0b4d73a..3a4acb8 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -3,10 +3,10 @@
 ; RUN: not grep movz %t
 ; RUN: not grep sar %t
 ; RUN: not grep shl %t
-; RUN: grep add %t | count 2
+; RUN: grep add %t | count 1
 ; RUN: grep inc %t | count 4
 ; RUN: grep dec %t | count 2
-; RUN: grep lea %t | count 2
+; RUN: grep lea %t | count 3
 
 ; Optimize away zext-inreg and sext-inreg on the loop induction
 ; variable using trip-count information.
diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll
index 51a0611..4f7e28a 100644
--- a/test/CodeGen/X86/multiple-loop-post-inc.ll
+++ b/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -1,6 +1,10 @@
 ; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s
 ; rdar://7236213
 
+; Xfailed now that scheduler 2-address hack is disabled a lea is generated.
+; The code isn't any worse though.
+; XFAIL: *
+
 ; CodeGen shouldn't require any lea instructions inside the marked loop.
 ; It should properly set up post-increment uses and do coalescing for
 ; the induction variables.
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 1d74af2..36a0fd9 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -178,8 +178,8 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
         %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp27
 ; CHECK: test14:
-; CHECK: 	addps	[[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
-; CHECK: 	subps	[[X1]], [[X2:%xmm[0-9]+]]
+; CHECK: 	subps	[[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]]
+; CHECK: 	addps	[[X1]], [[X0:%xmm[0-9]+]]
 ; CHECK: 	movlhps	[[X2]], [[X0]]
 }
 
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index d05c453..291069d 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -226,15 +226,16 @@ entry:
 }
 
 
-
+; FIXME: t15 is worse off from disabling of scheduler 2-address hack.
 define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 entry:
         %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
         ret <8 x i16> %tmp8
 ; X64: 	t15:
-; X64: 		pextrw	$7, %xmm0, %eax
+; X64:          movdqa %xmm0, %xmm2
 ; X64: 		punpcklqdq	%xmm1, %xmm0
 ; X64: 		pshuflw	$-128, %xmm0, %xmm0
+; X64: 		pextrw	$7, %xmm2, %eax
 ; X64: 		pinsrw	$2, %eax, %xmm0
 ; X64: 		ret
 }
@@ -247,12 +248,12 @@ entry:
         %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
         ret <16 x i8> %tmp9
 ; X64: 	t16:
-; X64: 		movdqa	%xmm1, %xmm0
-; X64: 		pslldq	$2, %xmm0
-; X64: 		pextrw	$1, %xmm0, %eax
-; X64: 		movd	%xmm0, %ecx
-; X64: 		pinsrw	$0, %ecx, %xmm0
-; X64: 		pextrw	$8, %xmm1, %ecx
+; X64: 		movdqa	%xmm1, %xmm2
+; X64: 		pslldq	$2, %xmm2
+; X64: 		movd	%xmm2, %eax
+; X64: 		pinsrw	$0, %eax, %xmm0
+; X64: 		pextrw	$8, %xmm1, %eax
+; X64: 		pextrw	$1, %xmm2, %ecx
 ; X64: 		ret
 }
author	Evan Cheng <evan.cheng@apple.com>	2011-11-10 07:43:16 +0000
committer	Evan Cheng <evan.cheng@apple.com>	2011-11-10 07:43:16 +0000
commit	623a7e146bd86747dc46a6f8bb9993fc217d6b78 (patch)
tree	72b44ba84e3cb347279c411ac8b159e6fecaf9b0
parent	4dbe96e22ff4989577cf4854f717b9d972094f87 (diff)
download	external_llvm-623a7e146bd86747dc46a6f8bb9993fc217d6b78.zip external_llvm-623a7e146bd86747dc46a6f8bb9993fc217d6b78.tar.gz external_llvm-623a7e146bd86747dc46a6f8bb9993fc217d6b78.tar.bz2