aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp84
-rw-r--r--test/CodeGen/X86/2007-01-08-InstrSched.ll8
-rw-r--r--test/CodeGen/X86/lsr-reuse.ll16
-rw-r--r--test/CodeGen/X86/pic.ll2
-rw-r--r--test/CodeGen/X86/stack-align.ll9
-rw-r--r--test/CodeGen/X86/tailcallstack64.ll4
6 files changed, 105 insertions, 18 deletions
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 14102d1..6144ecd 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -898,6 +898,90 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
}
}
}
+
+ // If this is an instruction with a load folded into it, try unfolding
+ // the load, e.g. avoid this:
+ // movq %rdx, %rcx
+ // addq (%rax), %rcx
+ // in favor of this:
+ // movq (%rax), %rcx
+ // addq %rdx, %rcx
+ // because it's preferable to schedule a load than a register copy.
+ if (TID.mayLoad() && !regBKilled) {
+ // Determine if a load can be unfolded.
+ unsigned LoadRegIndex;
+ unsigned NewOpc =
+ TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(),
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false,
+ &LoadRegIndex);
+ if (NewOpc != 0) {
+ const TargetInstrDesc &UnfoldTID = TII->get(NewOpc);
+ if (UnfoldTID.getNumDefs() == 1) {
+ MachineFunction &MF = *mbbi->getParent();
+
+ // Unfold the load.
+ DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi);
+ const TargetRegisterClass *RC =
+ UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI);
+ unsigned Reg = MRI->createVirtualRegister(RC);
+ SmallVector<MachineInstr *, 2> NewMIs;
+ bool Success =
+ TII->unfoldMemoryOperand(MF, mi, Reg,
+ /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
+ NewMIs);
+ (void)Success;
+ assert(Success &&
+ "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
+ "succeeded!");
+ assert(NewMIs.size() == 2 &&
+ "Unfolded a load into multiple instructions!");
+ // The load was previously folded, so this is the only use.
+ NewMIs[1]->addRegisterKilled(Reg, TRI);
+
+ // Tentatively insert the instructions into the block so that they
+ // look "normal" to the transformation logic.
+ mbbi->insert(mi, NewMIs[0]);
+ mbbi->insert(mi, NewMIs[1]);
+
+ DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0]
+ << "2addr: NEW INST: " << *NewMIs[1]);
+
+ // Transform the instruction, now that it no longer has a load.
+ unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
+ unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
+ MachineBasicBlock::iterator NewMI = NewMIs[1];
+ bool TransformSuccess =
+ TryInstructionTransform(NewMI, mi, mbbi,
+ NewSrcIdx, NewDstIdx, Dist);
+ if (TransformSuccess ||
+ NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
+ // Success, or at least we made an improvement. Keep the unfolded
+ // instructions and discard the original.
+ if (LV) {
+ for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = mi->getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.isKill())
+ LV->replaceKillInstruction(Reg, mi, NewMIs[0]);
+ }
+ LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
+ }
+ mi->eraseFromParent();
+ mi = NewMIs[1];
+ if (TransformSuccess)
+ return true;
+ } else {
+ // Transforming didn't eliminate the tie and didn't lead to an
+ // improvement. Clean up the unfolded instructions and keep the
+ // original.
+ DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ NewMIs[0]->eraseFromParent();
+ NewMIs[1]->eraseFromParent();
+ }
+ }
+ }
+ }
+
return false;
}
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index ef19d72..6f8b89c 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -11,12 +11,12 @@ define float @foo(float %x) nounwind {
%tmp14 = fadd float %tmp12, %tmp7
ret float %tmp14
-; CHECK: mulss LCPI0_0(%rip)
-; CHECK: mulss LCPI0_1(%rip)
+; CHECK: mulss
+; CHECK: mulss
; CHECK: addss
-; CHECK: mulss LCPI0_2(%rip)
+; CHECK: mulss
; CHECK: addss
-; CHECK: mulss LCPI0_3(%rip)
+; CHECK: mulss
; CHECK: addss
; CHECK: ret
}
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index 3f4f9ec..b7e69b8 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -465,14 +465,14 @@ bb5: ; preds = %bb3, %entry
; And the one at %bb68, where we want to be sure to use superhero mode:
; CHECK: BB10_10:
-; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
-; CHECK-NEXT: mulps 48(%r{{[^,]*}}), %xmm{{.*}}
-; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
-; CHECK-NEXT: mulps 32(%r{{[^,]*}}), %xmm{{.*}}
-; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
-; CHECK-NEXT: mulps 16(%r{{[^,]*}}), %xmm{{.*}}
-; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
-; CHECK-NEXT: mulps (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: movaps 48(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: movaps 32(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: movaps 16(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: movaps (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: movaps %xmm{{.*}}, (%r{{[^,]*}})
; CHECK-NEXT: movaps %xmm{{.*}}, 16(%r{{[^,]*}})
; CHECK-NEXT: movaps %xmm{{.*}}, 32(%r{{[^,]*}})
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index 9506c9b..d7b0805 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -189,7 +189,7 @@ bb12:
; LINUX: call .L7$pb
; LINUX: .L7$pb:
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb),
-; LINUX: addl .LJTI7_0@GOTOFF(
+; LINUX: .LJTI7_0@GOTOFF(
; LINUX: jmpl *
; LINUX: .LJTI7_0:
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index 271ad1a..8ca0b12 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -9,14 +9,15 @@ target triple = "i686-apple-darwin8"
define void @test({ double, double }* byval %z, double* %P) {
entry:
+ %tmp3 = load double* @G, align 16 ; <double> [#uses=1]
+ %tmp4 = tail call double @fabs( double %tmp3 ) ; <double> [#uses=1]
+ volatile store double %tmp4, double* %P
%tmp = getelementptr { double, double }* %z, i32 0, i32 0 ; <double*> [#uses=1]
- %tmp1 = load double* %tmp, align 8 ; <double> [#uses=1]
+ %tmp1 = volatile load double* %tmp, align 8 ; <double> [#uses=1]
%tmp2 = tail call double @fabs( double %tmp1 ) ; <double> [#uses=1]
; CHECK: andpd{{.*}}4(%esp), %xmm
- %tmp3 = load double* @G, align 16 ; <double> [#uses=1]
- %tmp4 = tail call double @fabs( double %tmp3 ) ; <double> [#uses=1]
%tmp6 = fadd double %tmp4, %tmp2 ; <double> [#uses=1]
- store double %tmp6, double* %P, align 8
+ volatile store double %tmp6, double* %P, align 8
ret void
}
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index d05dff8..107bdf9 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -2,9 +2,11 @@
; Check that lowered arguments on the stack do not overwrite each other.
; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: movl %edi, %eax
+; CHECK: movl 32(%rsp), %eax
; Move param %in1 to temp register (%r10d).
; CHECK: movl 40(%rsp), %r10d
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: addl %edi, %eax
; Move param %in2 to stack.
; CHECK: movl %r10d, 32(%rsp)
; Move result of addition to stack.