aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-03-11 00:22:57 +0000
committerChris Lattner <sabre@nondot.org>2010-03-11 00:22:57 +0000
commit29689434635364346cbef6f4e309f1d9fcdd5d80 (patch)
tree2e33d188b2f1afbb93bd58e3fcd8e8d6e2db6521
parent4d6113ee0699ac1eff46971881179de53e2eb810 (diff)
downloadexternal_llvm-29689434635364346cbef6f4e309f1d9fcdd5d80.zip
external_llvm-29689434635364346cbef6f4e309f1d9fcdd5d80.tar.gz
external_llvm-29689434635364346cbef6f4e309f1d9fcdd5d80.tar.bz2
add support, testcases, and dox for the new GHC calling
convention. Patch by David Terei! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@98212 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--docs/CodeGenerator.html3
-rw-r--r--docs/LangRef.html26
-rw-r--r--include/llvm/CallingConv.h3
-rw-r--r--lib/Target/X86/X86CallingConv.td22
-rw-r--r--lib/Target/X86/X86FastISel.cpp6
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp34
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp11
-rw-r--r--test/CodeGen/X86/ghc-cc.ll45
-rw-r--r--test/CodeGen/X86/ghc-cc64.ll86
9 files changed, 219 insertions, 17 deletions
diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html
index f26d0ab..0568667 100644
--- a/docs/CodeGenerator.html
+++ b/docs/CodeGenerator.html
@@ -1679,7 +1679,8 @@ $ llc -regalloc=linearscan file.bc -o ln.s;
supported on x86/x86-64 and PowerPC. It is performed if:</p>
<ul>
- <li>Caller and callee have the calling convention <tt>fastcc</tt>.</li>
+ <li>Caller and callee have the calling convention <tt>fastcc</tt> or
+ <tt>cc 10</tt> (GHC call convention).</li>
<li>The call is a tail call - in tail position (ret immediately follows call
and ret uses value of call or is void).</li>
diff --git a/docs/LangRef.html b/docs/LangRef.html
index eec06fc..adaf44a 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -691,9 +691,9 @@ define i32 @main() { <i>; i32()* </i>
target, without having to conform to an externally specified ABI
(Application Binary Interface).
<a href="CodeGenerator.html#tailcallopt">Tail calls can only be optimized
- when this convention is used.</a> This calling convention does not
- support varargs and requires the prototype of all callees to exactly match
- the prototype of the function definition.</dd>
+ when this or the GHC convention is used.</a> This calling convention
+ does not support varargs and requires the prototype of all callees to
+ exactly match the prototype of the function definition.</dd>
<dt><b>"<tt>coldcc</tt>" - The cold calling convention</b>:</dt>
<dd>This calling convention attempts to make code in the caller as efficient
@@ -703,6 +703,26 @@ define i32 @main() { <i>; i32()* </i>
does not support varargs and requires the prototype of all callees to
exactly match the prototype of the function definition.</dd>
+ <dt><b>"<tt>cc <em>10</em></tt>" - GHC convention</b>:</dt>
+ <dd>This calling convention has been implemented specifically for use by the
+ <a href="http://www.haskell.org/ghc">Glasgow Haskell Compiler (GHC)</a>.
+ It passes everything in registers, going to extremes to achieve this by
+ disabling callee save registers. This calling convention should not be
+ used lightly but only for specific situations such as an alternative to
+ the <em>register pinning</em> performance technique often used when
+ implementing functional programming languages.At the moment only X86
+ supports this convention and it has the following limitations:
+ <ul>
+ <li>On <em>X86-32</em> only supports up to 4 bit type parameters. No
+ floating point types are supported.</li>
+ <li>On <em>X86-64</em> only supports up to 10 bit type parameters and
+ 6 floating point parameters.</li>
+ </ul>
+ This calling convention supports
+ <a href="CodeGenerator.html#tailcallopt">tail call optimization</a> but
+ requires both the caller and callee are using it.
+ </dd>
+
<dt><b>"<tt>cc &lt;<em>n</em>&gt;</tt>" - Numbered convention</b>:</dt>
<dd>Any calling convention may be specified by number, allowing
target-specific calling conventions to be used. Target specific calling
diff --git a/include/llvm/CallingConv.h b/include/llvm/CallingConv.h
index c54527c..624390d 100644
--- a/include/llvm/CallingConv.h
+++ b/include/llvm/CallingConv.h
@@ -44,6 +44,9 @@ namespace CallingConv {
// call does not break any live ranges in the caller side.
Cold = 9,
+ // GHC - Calling convention used by the Glasgow Haskell Compiler (GHC).
+ GHC = 10,
+
// Target - This is the start of the target-specific calling conventions,
// e.g. fastcall and thiscall on X86.
FirstTargetCC = 64,
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 12d3d04..fd15efd 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -221,6 +221,20 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
]>;
+def CC_X86_64_GHC : CallingConv<[
+ // Promote i8/i16/i32 arguments to i64.
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
+ CCIfType<[i64],
+ CCAssignToReg<[R13, RBP, R12, RBX, R14, RSI, RDI, R8, R9, R15]>>,
+
+ // Pass in STG registers: F1, F2, F3, F4, D1, D2
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfSubtarget<"hasSSE1()",
+ CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
+]>;
+
//===----------------------------------------------------------------------===//
// X86 C Calling Convention
//===----------------------------------------------------------------------===//
@@ -320,3 +334,11 @@ def CC_X86_32_FastCC : CallingConv<[
// Otherwise, same as everything else.
CCDelegateTo<CC_X86_32_Common>
]>;
+
+def CC_X86_32_GHC : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Pass in STG registers: Base, Sp, Hp, R1
+ CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
+]>;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 98e3f4e..27807f2 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -172,7 +172,9 @@ bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
bool isTaillCall) {
if (Subtarget->is64Bit()) {
- if (Subtarget->isTargetWin64())
+ if (CC == CallingConv::GHC)
+ return CC_X86_64_GHC;
+ else if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
else
return CC_X86_64_C;
@@ -182,6 +184,8 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
return CC_X86_32_FastCall;
else if (CC == CallingConv::Fast)
return CC_X86_32_FastCC;
+ else if (CC == CallingConv::GHC)
+ return CC_X86_32_GHC;
else
return CC_X86_32_C;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1416a6d..9b7f6fc 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1378,6 +1378,8 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
return !Subtarget->is64Bit();
case CallingConv::Fast:
return GuaranteedTailCallOpt;
+ case CallingConv::GHC:
+ return GuaranteedTailCallOpt;
}
}
@@ -1385,7 +1387,9 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
/// given CallingConvention value.
CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
if (Subtarget->is64Bit()) {
- if (Subtarget->isTargetWin64())
+ if (CC == CallingConv::GHC)
+ return CC_X86_64_GHC;
+ else if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
else
return CC_X86_64_C;
@@ -1395,6 +1399,8 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
return CC_X86_32_FastCall;
else if (CC == CallingConv::Fast)
return CC_X86_32_FastCC;
+ else if (CC == CallingConv::GHC)
+ return CC_X86_32_GHC;
else
return CC_X86_32_C;
}
@@ -1412,10 +1418,16 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
/*AlwaysInline=*/true, NULL, 0, NULL, 0);
}
+/// IsTailCallConvention - Return true if the calling convention is one that
+/// supports tail call optimization.
+static bool IsTailCallConvention(CallingConv::ID CC) {
+ return (CC == CallingConv::Fast || CC == CallingConv::GHC);
+}
+
/// FuncIsMadeTailCallSafe - Return true if the function is being made into
/// a tailcall target by changing its ABI.
static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
- return GuaranteedTailCallOpt && CC == CallingConv::Fast;
+ return GuaranteedTailCallOpt && IsTailCallConvention(CC);
}
SDValue
@@ -1479,8 +1491,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isTargetWin64();
- assert(!(isVarArg && CallConv == CallingConv::Fast) &&
- "Var args not supported with calling convention fastcc");
+ assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+ "Var args not supported with calling convention fastcc or ghc");
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -1683,7 +1695,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
} else {
BytesToPopOnReturn = 0; // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
- if (!Is64Bit && CallConv != CallingConv::Fast && ArgsAreStructReturn(Ins))
+ if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins))
BytesToPopOnReturn = 4;
}
@@ -1779,8 +1791,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
++NumTailCalls;
}
- assert(!(isVarArg && CallConv == CallingConv::Fast) &&
- "Var args not supported with calling convention fastcc");
+ assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+ "Var args not supported with calling convention fastcc or ghc");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -1794,7 +1806,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
- else if (GuaranteedTailCallOpt && CallConv == CallingConv::Fast)
+ else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
@@ -2150,7 +2162,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
unsigned NumBytesForCalleeToPush;
if (IsCalleePop(isVarArg, CallConv))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
- else if (!Is64Bit && CallConv != CallingConv::Fast && IsStructRet)
+ else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
// If this is a call to a struct-return function, the callee
// pops the hidden struct pointer, so we have to push it back.
// This is common for Darwin/X86, Linux & Mingw32 targets.
@@ -2288,14 +2300,14 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
- if (CalleeCC != CallingConv::Fast &&
+ if (!IsTailCallConvention(CalleeCC) &&
CalleeCC != CallingConv::C)
return false;
// If -tailcallopt is specified, make fastcc functions tail-callable.
const Function *CallerF = DAG.getMachineFunction().getFunction();
if (GuaranteedTailCallOpt) {
- if (CalleeCC == CallingConv::Fast &&
+ if (IsTailCallConvention(CalleeCC) &&
CallerF->getCallingConv() == CalleeCC)
return true;
return false;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index cdb579c..3c087bd 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -294,13 +294,20 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
const unsigned *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool callsEHReturn = false;
+ bool ghcCall = false;
if (MF) {
const MachineFrameInfo *MFI = MF->getFrameInfo();
const MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
callsEHReturn = (MMI ? MMI->callsEHReturn() : false);
+ const Function *F = MF->getFunction();
+ ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
}
+ static const unsigned GhcCalleeSavedRegs[] = {
+ 0
+ };
+
static const unsigned CalleeSavedRegs32Bit[] = {
X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
};
@@ -326,7 +333,9 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
X86::XMM14, X86::XMM15, 0
};
- if (Is64Bit) {
+ if (ghcCall) {
+ return GhcCalleeSavedRegs;
+ } else if (Is64Bit) {
if (IsWin64)
return CalleeSavedRegsWin64;
else
diff --git a/test/CodeGen/X86/ghc-cc.ll b/test/CodeGen/X86/ghc-cc.ll
new file mode 100644
index 0000000..9393cf5
--- /dev/null
+++ b/test/CodeGen/X86/ghc-cc.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -tailcallopt -mtriple=i686-linux-gnu | FileCheck %s
+
+; Test the GHC call convention works (x86-32)
+
+@base = external global i32 ; assigned to register: EBX
+@sp = external global i32 ; assigned to register: EBP
+@hp = external global i32 ; assigned to register: EDI
+@r1 = external global i32 ; assigned to register: ESI
+
+define void @zap(i32 %a, i32 %b) nounwind {
+entry:
+ ; CHECK: movl {{[0-9]*}}(%esp), %ebx
+ ; CHECK-NEXT: movl {{[0-9]*}}(%esp), %ebp
+ ; CHECK-NEXT: call addtwo
+ %0 = call cc 10 i32 @addtwo(i32 %a, i32 %b)
+ ; CHECK: call foo
+ call void @foo() nounwind
+ ret void
+}
+
+define cc 10 i32 @addtwo(i32 %x, i32 %y) nounwind {
+entry:
+ ; CHECK: leal (%ebx,%ebp), %eax
+ %0 = add i32 %x, %y
+ ; CHECK-NEXT: ret
+ ret i32 %0
+}
+
+define cc 10 void @foo() nounwind {
+entry:
+ ; CHECK: movl base, %ebx
+ ; CHECK-NEXT: movl sp, %ebp
+ ; CHECK-NEXT: movl hp, %edi
+ ; CHECK-NEXT: movl r1, %esi
+ %0 = load i32* @r1
+ %1 = load i32* @hp
+ %2 = load i32* @sp
+ %3 = load i32* @base
+ ; CHECK: jmp bar
+ tail call cc 10 void @bar( i32 %3, i32 %2, i32 %1, i32 %0 ) nounwind
+ ret void
+}
+
+declare cc 10 void @bar(i32, i32, i32, i32)
+
diff --git a/test/CodeGen/X86/ghc-cc64.ll b/test/CodeGen/X86/ghc-cc64.ll
new file mode 100644
index 0000000..fcf7e17
--- /dev/null
+++ b/test/CodeGen/X86/ghc-cc64.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; Check the GHC call convention works (x86-64)
+
+@base = external global i64 ; assigned to register: R13
+@sp = external global i64 ; assigned to register: RBP
+@hp = external global i64 ; assigned to register: R12
+@r1 = external global i64 ; assigned to register: RBX
+@r2 = external global i64 ; assigned to register: R14
+@r3 = external global i64 ; assigned to register: RSI
+@r4 = external global i64 ; assigned to register: RDI
+@r5 = external global i64 ; assigned to register: R8
+@r6 = external global i64 ; assigned to register: R9
+@splim = external global i64 ; assigned to register: R15
+
+@f1 = external global float ; assigned to register: XMM1
+@f2 = external global float ; assigned to register: XMM2
+@f3 = external global float ; assigned to register: XMM3
+@f4 = external global float ; assigned to register: XMM4
+@d1 = external global double ; assigned to register: XMM5
+@d2 = external global double ; assigned to register: XMM6
+
+define void @zap(i64 %a, i64 %b) nounwind {
+entry:
+ ; CHECK: movq %rdi, %r13
+ ; CHECK-NEXT: movq %rsi, %rbp
+ ; CHECK-NEXT: callq addtwo
+ %0 = call cc 10 i64 @addtwo(i64 %a, i64 %b)
+ ; CHECK: callq foo
+ call void @foo() nounwind
+ ret void
+}
+
+define cc 10 i64 @addtwo(i64 %x, i64 %y) nounwind {
+entry:
+ ; CHECK: leaq (%r13,%rbp), %rax
+ %0 = add i64 %x, %y
+ ; CHECK-NEXT: ret
+ ret i64 %0
+}
+
+define cc 10 void @foo() nounwind {
+entry:
+ ; CHECK: movq base(%rip), %r13
+ ; CHECK-NEXT: movq sp(%rip), %rbp
+ ; CHECK-NEXT: movq hp(%rip), %r12
+ ; CHECK-NEXT: movq r1(%rip), %rbx
+ ; CHECK-NEXT: movq r2(%rip), %r14
+ ; CHECK-NEXT: movq r3(%rip), %rsi
+ ; CHECK-NEXT: movq r4(%rip), %rdi
+ ; CHECK-NEXT: movq r5(%rip), %r8
+ ; CHECK-NEXT: movq r6(%rip), %r9
+ ; CHECK-NEXT: movq splim(%rip), %r15
+ ; CHECK-NEXT: movss f1(%rip), %xmm1
+ ; CHECK-NEXT: movss f2(%rip), %xmm2
+ ; CHECK-NEXT: movss f3(%rip), %xmm3
+ ; CHECK-NEXT: movss f4(%rip), %xmm4
+ ; CHECK-NEXT: movsd d1(%rip), %xmm5
+ ; CHECK-NEXT: movsd d2(%rip), %xmm6
+ %0 = load double* @d2
+ %1 = load double* @d1
+ %2 = load float* @f4
+ %3 = load float* @f3
+ %4 = load float* @f2
+ %5 = load float* @f1
+ %6 = load i64* @splim
+ %7 = load i64* @r6
+ %8 = load i64* @r5
+ %9 = load i64* @r4
+ %10 = load i64* @r3
+ %11 = load i64* @r2
+ %12 = load i64* @r1
+ %13 = load i64* @hp
+ %14 = load i64* @sp
+ %15 = load i64* @base
+ ; CHECK: jmp bar
+ tail call cc 10 void @bar( i64 %15, i64 %14, i64 %13, i64 %12, i64 %11,
+ i64 %10, i64 %9, i64 %8, i64 %7, i64 %6,
+ float %5, float %4, float %3, float %2, double %1,
+ double %0 ) nounwind
+ ret void
+}
+
+declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
+ float, float, float, float, double, double)
+