aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2012-07-04 19:28:31 +0000
committerJakob Stoklund Olesen <stoklund@2pi.dk>2012-07-04 19:28:31 +0000
commitb8720787015dc73d8a050b063366be6c3ad01946 (patch)
tree3d4a03ae7359619d56d097bf1644726d6735479e
parentf86c00f1f89082c800dbb78870fc5537eb3702f6 (diff)
downloadexternal_llvm-b8720787015dc73d8a050b063366be6c3ad01946.zip
external_llvm-b8720787015dc73d8a050b063366be6c3ad01946.tar.gz
external_llvm-b8720787015dc73d8a050b063366be6c3ad01946.tar.bz2
Ensure CopyToReg nodes are always glued to the call instruction.
The CopyToReg nodes that set up the argument registers before a call must be glued to the call instruction. Otherwise, the scheduler may emit the physreg copies long before the call, causing long live ranges for the fixed registers. Besides disabling good register allocation, that can also expose problems when EmitInstrWithCustomInserter() splits a basic block during the live range of a physreg. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159721 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp52
-rw-r--r--test/CodeGen/X86/crash.ll16
-rw-r--r--test/CodeGen/X86/tailcall-largecode.ll10
3 files changed, 34 insertions, 44 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ba66593..c8c5dae 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2304,27 +2304,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into registers.
- SDValue InFlag;
- // Tail call byval lowering might overwrite argument registers so in case of
- // tail call optimization the copies to registers are lowered later.
- if (!isTailCall)
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
if (Subtarget->isPICStyleGOT()) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (!isTailCall) {
- Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
- DAG.getNode(X86ISD::GlobalBaseReg,
- DebugLoc(), getPointerTy()),
- InFlag);
- InFlag = Chain.getValue(1);
+ RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
+ DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy())));
} else {
// If we are tail calling and generating PIC/GOT style code load the
// address of the callee into ECX. The value in ecx is used as target of
@@ -2362,12 +2347,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert((Subtarget->hasSSE1() || !NumXMMRegs)
&& "SSE registers cannot be used when SSE is disabled");
- Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
- DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
- InFlag = Chain.getValue(1);
+ RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
+ DAG.getConstant(NumXMMRegs, MVT::i8)));
}
-
// For tail calls lower the arguments to the 'real' stack slot.
if (isTailCall) {
// Force all the incoming stack arguments to be loaded from the stack
@@ -2381,8 +2364,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
- // Do not flag preceding copytoreg stuff together with the following stuff.
- InFlag = SDValue();
if (getTargetMachine().Options.GuaranteedTailCallOpt) {
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -2422,19 +2403,20 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains2[0], MemOpChains2.size());
- // Copy arguments to their registers.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
- InFlag =SDValue();
-
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
FPDiff, dl);
}
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into registers.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
// In the 64-bit large code model, we have to make all calls
@@ -2536,14 +2518,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
- // Add an implicit use GOT pointer in EBX.
- if (!isTailCall && Subtarget->isPICStyleGOT())
- Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
-
- // Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
- if (Is64Bit && isVarArg && !IsWin64)
- Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
-
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index c71c6ec..9badfc8 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -426,3 +426,19 @@ while.end: ; preds = %if.then.i256
return: ; preds = %entry
ret i64 -131
}
+
+; The tail call to a varargs function sets %AL.
+; uitofp expands to an FCMOV instruction which splits the basic block.
+; Make sure the live range of %AL isn't split.
+@.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32
+define void @pr13188(i64* nocapture %this) uwtable ssp address_safety align 2 {
+entry:
+ %x7 = load i64* %this, align 8
+ %sub = add i64 %x7, -1
+ %conv = uitofp i64 %sub to float
+ %div = fmul float %conv, 5.000000e-01
+ %conv2 = fpext float %div to double
+ tail call void (...)* @_Z6PrintFz(i8* getelementptr inbounds ({ [1 x i8], [63 x i8] }* @.str, i64 0, i32 0, i64 0), double %conv2)
+ ret void
+}
+declare void @_Z6PrintFz(...)
diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll
index c3f4278..e9b8721 100644
--- a/test/CodeGen/X86/tailcall-largecode.ll
+++ b/test/CodeGen/X86/tailcall-largecode.ll
@@ -49,6 +49,11 @@ define fastcc i32 @direct_manyargs() {
; CHECK: pushq
; Pass the stack argument.
; CHECK: movl $7, 16(%rsp)
+; This is the large code model, so &manyargs_callee may not fit into
+; the jmp instruction. Put it into a register which won't be clobbered
+; while restoring callee-saved registers and won't be used for passing
+; arguments.
+; CHECK: movabsq $manyargs_callee, %rax
; Pass the register arguments, in the right registers.
; CHECK: movl $1, %edi
; CHECK: movl $2, %esi
@@ -56,11 +61,6 @@ define fastcc i32 @direct_manyargs() {
; CHECK: movl $4, %ecx
; CHECK: movl $5, %r8d
; CHECK: movl $6, %r9d
-; This is the large code model, so &manyargs_callee may not fit into
-; the jmp instruction. Put it into R11, which won't be clobbered
-; while restoring callee-saved registers and won't be used for passing
-; arguments.
-; CHECK: movabsq $manyargs_callee, %rax
; Adjust the stack to "return".
; CHECK: popq
; And tail-call to the target.