6 files changed, 105 insertions, 18 deletions
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 14102d1..6144ecd 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -898,6 +898,90 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
       }
     }
   }
+
+  // If this is an instruction with a load folded into it, try unfolding
+  // the load, e.g. avoid this:
+  //   movq %rdx, %rcx
+  //   addq (%rax), %rcx
+  // in favor of this:
+  //   movq (%rax), %rcx
+  //   addq %rdx, %rcx
+  // because it's preferable to schedule a load than a register copy.
+  if (TID.mayLoad() && !regBKilled) {
+    // Determine if a load can be unfolded.
+    unsigned LoadRegIndex;
+    unsigned NewOpc =
+      TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(),
+                                      /*UnfoldLoad=*/true,
+                                      /*UnfoldStore=*/false,
+                                      &LoadRegIndex);
+    if (NewOpc != 0) {
+      const TargetInstrDesc &UnfoldTID = TII->get(NewOpc);
+      if (UnfoldTID.getNumDefs() == 1) {
+        MachineFunction &MF = *mbbi->getParent();
+
+        // Unfold the load.
+        DEBUG(dbgs() << "2addr:   UNFOLDING: " << *mi);
+        const TargetRegisterClass *RC =
+          UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI);
+        unsigned Reg = MRI->createVirtualRegister(RC);
+        SmallVector<MachineInstr *, 2> NewMIs;
+        bool Success =
+          TII->unfoldMemoryOperand(MF, mi, Reg,
+                                   /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
+                                   NewMIs);
+        (void)Success;
+        assert(Success &&
+               "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
+               "succeeded!");
+        assert(NewMIs.size() == 2 &&
+               "Unfolded a load into multiple instructions!");
+        // The load was previously folded, so this is the only use.
+        NewMIs[1]->addRegisterKilled(Reg, TRI);
+
+        // Tentatively insert the instructions into the block so that they
+        // look "normal" to the transformation logic.
+        mbbi->insert(mi, NewMIs[0]);
+        mbbi->insert(mi, NewMIs[1]);
+
+        DEBUG(dbgs() << "2addr:    NEW LOAD: " << *NewMIs[0]
+                     << "2addr:    NEW INST: " << *NewMIs[1]);
+
+        // Transform the instruction, now that it no longer has a load.
+        unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
+        unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
+        MachineBasicBlock::iterator NewMI = NewMIs[1];
+        bool TransformSuccess =
+          TryInstructionTransform(NewMI, mi, mbbi,
+                                  NewSrcIdx, NewDstIdx, Dist);
+        if (TransformSuccess ||
+            NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
+          // Success, or at least we made an improvement. Keep the unfolded
+          // instructions and discard the original.
+          if (LV) {
+            for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+              MachineOperand &MO = mi->getOperand(i);
+              if (MO.isReg() && MO.isUse() && MO.isKill())
+                LV->replaceKillInstruction(Reg, mi, NewMIs[0]);
+            }
+            LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
+          }
+          mi->eraseFromParent();
+          mi = NewMIs[1];
+          if (TransformSuccess)
+            return true;
+        } else {
+          // Transforming didn't eliminate the tie and didn't lead to an
+          // improvement. Clean up the unfolded instructions and keep the
+          // original.
+          DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+          NewMIs[0]->eraseFromParent();
+          NewMIs[1]->eraseFromParent();
+        }
+      }
+    }
+  }
+
   return false;
 }
 
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index ef19d72..6f8b89c 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -11,12 +11,12 @@ define float @foo(float %x) nounwind {
     %tmp14 = fadd float %tmp12, %tmp7
     ret float %tmp14
 
-; CHECK: mulss	LCPI0_0(%rip)
-; CHECK: mulss	LCPI0_1(%rip)
+; CHECK: mulss
+; CHECK: mulss
 ; CHECK: addss
-; CHECK: mulss	LCPI0_2(%rip)
+; CHECK: mulss
 ; CHECK: addss
-; CHECK: mulss	LCPI0_3(%rip)
+; CHECK: mulss
 ; CHECK: addss
 ; CHECK: ret
 }
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index 3f4f9ec..b7e69b8 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -465,14 +465,14 @@ bb5:                                              ; preds = %bb3, %entry
 ; And the one at %bb68, where we want to be sure to use superhero mode:
 
 ; CHECK:      BB10_10:
-; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
-; CHECK-NEXT:   mulps   48(%r{{[^,]*}}), %xmm{{.*}}
-; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
-; CHECK-NEXT:   mulps   32(%r{{[^,]*}}), %xmm{{.*}}
-; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
-; CHECK-NEXT:   mulps   16(%r{{[^,]*}}), %xmm{{.*}}
-; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
-; CHECK-NEXT:   mulps   (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   movaps  48(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  32(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  16(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
 ; CHECK-NEXT:   movaps  %xmm{{.*}}, (%r{{[^,]*}})
 ; CHECK-NEXT:   movaps  %xmm{{.*}}, 16(%r{{[^,]*}})
 ; CHECK-NEXT:   movaps  %xmm{{.*}}, 32(%r{{[^,]*}})
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index 9506c9b..d7b0805 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -189,7 +189,7 @@ bb12:
 ; LINUX:   call	.L7$pb
 ; LINUX: .L7$pb:
 ; LINUX:   addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb),
-; LINUX:   addl	.LJTI7_0@GOTOFF(
+; LINUX:   .LJTI7_0@GOTOFF(
 ; LINUX:   jmpl	*
 
 ; LINUX: .LJTI7_0:
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index 271ad1a..8ca0b12 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -9,14 +9,15 @@ target triple = "i686-apple-darwin8"
 
 define void @test({ double, double }* byval  %z, double* %P) {
 entry:
+	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
+	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
+        volatile store double %tmp4, double* %P
 	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp1 = load double* %tmp, align 8		; <double> [#uses=1]
+	%tmp1 = volatile load double* %tmp, align 8		; <double> [#uses=1]
 	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
     ; CHECK: andpd{{.*}}4(%esp), %xmm
-	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
-	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
 	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
-	store double %tmp6, double* %P, align 8
+	volatile store double %tmp6, double* %P, align 8
 	ret void
 }
 
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index d05dff8..107bdf9 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -2,9 +2,11 @@
 
 ; Check that lowered arguments on the stack do not overwrite each other.
 ; Add %in1 %p1 to a different temporary register (%eax).
-; CHECK: movl  %edi, %eax
+; CHECK: movl  32(%rsp), %eax
 ; Move param %in1 to temp register (%r10d).
 ; CHECK: movl  40(%rsp), %r10d
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: addl %edi, %eax
 ; Move param %in2 to stack.
 ; CHECK: movl  %r10d, 32(%rsp)
 ; Move result of addition to stack.