diff options
author | Chris Lattner <sabre@nondot.org> | 2010-03-11 00:22:57 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2010-03-11 00:22:57 +0000 |
commit | 29689434635364346cbef6f4e309f1d9fcdd5d80 (patch) | |
tree | 2e33d188b2f1afbb93bd58e3fcd8e8d6e2db6521 | |
parent | 4d6113ee0699ac1eff46971881179de53e2eb810 (diff) | |
download | external_llvm-29689434635364346cbef6f4e309f1d9fcdd5d80.zip external_llvm-29689434635364346cbef6f4e309f1d9fcdd5d80.tar.gz external_llvm-29689434635364346cbef6f4e309f1d9fcdd5d80.tar.bz2 |
add support, testcases, and dox for the new GHC calling
convention. Patch by David Terei!
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@98212 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | docs/CodeGenerator.html | 3 | ||||
-rw-r--r-- | docs/LangRef.html | 26 | ||||
-rw-r--r-- | include/llvm/CallingConv.h | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86CallingConv.td | 22 | ||||
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 34 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.cpp | 11 | ||||
-rw-r--r-- | test/CodeGen/X86/ghc-cc.ll | 45 | ||||
-rw-r--r-- | test/CodeGen/X86/ghc-cc64.ll | 86 |
9 files changed, 219 insertions, 17 deletions
diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html index f26d0ab..0568667 100644 --- a/docs/CodeGenerator.html +++ b/docs/CodeGenerator.html @@ -1679,7 +1679,8 @@ $ llc -regalloc=linearscan file.bc -o ln.s; supported on x86/x86-64 and PowerPC. It is performed if:</p> <ul> - <li>Caller and callee have the calling convention <tt>fastcc</tt>.</li> + <li>Caller and callee have the calling convention <tt>fastcc</tt> or + <tt>cc 10</tt> (GHC call convention).</li> <li>The call is a tail call - in tail position (ret immediately follows call and ret uses value of call or is void).</li> diff --git a/docs/LangRef.html b/docs/LangRef.html index eec06fc..adaf44a 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -691,9 +691,9 @@ define i32 @main() { <i>; i32()* </i> target, without having to conform to an externally specified ABI (Application Binary Interface). <a href="CodeGenerator.html#tailcallopt">Tail calls can only be optimized - when this convention is used.</a> This calling convention does not - support varargs and requires the prototype of all callees to exactly match - the prototype of the function definition.</dd> + when this or the GHC convention is used.</a> This calling convention + does not support varargs and requires the prototype of all callees to + exactly match the prototype of the function definition.</dd> <dt><b>"<tt>coldcc</tt>" - The cold calling convention</b>:</dt> <dd>This calling convention attempts to make code in the caller as efficient @@ -703,6 +703,26 @@ define i32 @main() { <i>; i32()* </i> does not support varargs and requires the prototype of all callees to exactly match the prototype of the function definition.</dd> + <dt><b>"<tt>cc <em>10</em></tt>" - GHC convention</b>:</dt> + <dd>This calling convention has been implemented specifically for use by the + <a href="http://www.haskell.org/ghc">Glasgow Haskell Compiler (GHC)</a>. + It passes everything in registers, going to extremes to achieve this by + disabling callee save registers. This calling convention should not be + used lightly but only for specific situations such as an alternative to + the <em>register pinning</em> performance technique often used when + implementing functional programming languages.At the moment only X86 + supports this convention and it has the following limitations: + <ul> + <li>On <em>X86-32</em> only supports up to 4 bit type parameters. No + floating point types are supported.</li> + <li>On <em>X86-64</em> only supports up to 10 bit type parameters and + 6 floating point parameters.</li> + </ul> + This calling convention supports + <a href="CodeGenerator.html#tailcallopt">tail call optimization</a> but + requires both the caller and callee are using it. + </dd> + <dt><b>"<tt>cc <<em>n</em>></tt>" - Numbered convention</b>:</dt> <dd>Any calling convention may be specified by number, allowing target-specific calling conventions to be used. Target specific calling diff --git a/include/llvm/CallingConv.h b/include/llvm/CallingConv.h index c54527c..624390d 100644 --- a/include/llvm/CallingConv.h +++ b/include/llvm/CallingConv.h @@ -44,6 +44,9 @@ namespace CallingConv { // call does not break any live ranges in the caller side. Cold = 9, + // GHC - Calling convention used by the Glasgow Haskell Compiler (GHC). + GHC = 10, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 12d3d04..fd15efd 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -221,6 +221,20 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> ]>; +def CC_X86_64_GHC : CallingConv<[ + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim + CCIfType<[i64], + CCAssignToReg<[R13, RBP, R12, RBX, R14, RSI, RDI, R8, R9, R15]>>, + + // Pass in STG registers: F1, F2, F3, F4, D1, D2 + CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfSubtarget<"hasSSE1()", + CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>> +]>; + //===----------------------------------------------------------------------===// // X86 C Calling Convention //===----------------------------------------------------------------------===// @@ -320,3 +334,11 @@ def CC_X86_32_FastCC : CallingConv<[ // Otherwise, same as everything else. CCDelegateTo<CC_X86_32_Common> ]>; + +def CC_X86_32_GHC : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType<i32>>, + + // Pass in STG registers: Base, Sp, Hp, R1 + CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>> +]>; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 98e3f4e..27807f2 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -172,7 +172,9 @@ bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) { CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, bool isTaillCall) { if (Subtarget->is64Bit()) { - if (Subtarget->isTargetWin64()) + if (CC == CallingConv::GHC) + return CC_X86_64_GHC; + else if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; else return CC_X86_64_C; @@ -182,6 +184,8 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, return CC_X86_32_FastCall; else if (CC == CallingConv::Fast) return CC_X86_32_FastCC; + else if (CC == CallingConv::GHC) + return CC_X86_32_GHC; else return CC_X86_32_C; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1416a6d..9b7f6fc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1378,6 +1378,8 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){ return !Subtarget->is64Bit(); case CallingConv::Fast: return GuaranteedTailCallOpt; + case CallingConv::GHC: + return GuaranteedTailCallOpt; } } @@ -1385,7 +1387,9 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){ /// given CallingConvention value. CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { if (Subtarget->is64Bit()) { - if (Subtarget->isTargetWin64()) + if (CC == CallingConv::GHC) + return CC_X86_64_GHC; + else if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; else return CC_X86_64_C; @@ -1395,6 +1399,8 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { return CC_X86_32_FastCall; else if (CC == CallingConv::Fast) return CC_X86_32_FastCC; + else if (CC == CallingConv::GHC) + return CC_X86_32_GHC; else return CC_X86_32_C; } @@ -1412,10 +1418,16 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, /*AlwaysInline=*/true, NULL, 0, NULL, 0); } +/// IsTailCallConvention - Return true if the calling convention is one that +/// supports tail call optimization. +static bool IsTailCallConvention(CallingConv::ID CC) { + return (CC == CallingConv::Fast || CC == CallingConv::GHC); +} + /// FuncIsMadeTailCallSafe - Return true if the function is being made into /// a tailcall target by changing its ABI. static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) { - return GuaranteedTailCallOpt && CC == CallingConv::Fast; + return GuaranteedTailCallOpt && IsTailCallConvention(CC); } SDValue @@ -1479,8 +1491,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isTargetWin64(); - assert(!(isVarArg && CallConv == CallingConv::Fast) && - "Var args not supported with calling convention fastcc"); + assert(!(isVarArg && IsTailCallConvention(CallConv)) && + "Var args not supported with calling convention fastcc or ghc"); // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; @@ -1683,7 +1695,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } else { BytesToPopOnReturn = 0; // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. - if (!Is64Bit && CallConv != CallingConv::Fast && ArgsAreStructReturn(Ins)) + if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins)) BytesToPopOnReturn = 4; } @@ -1779,8 +1791,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, ++NumTailCalls; } - assert(!(isVarArg && CallConv == CallingConv::Fast) && - "Var args not supported with calling convention fastcc"); + assert(!(isVarArg && IsTailCallConvention(CallConv)) && + "Var args not supported with calling convention fastcc or ghc"); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; @@ -1794,7 +1806,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // This is a sibcall. The memory operands are available in caller's // own caller's stack. NumBytes = 0; - else if (GuaranteedTailCallOpt && CallConv == CallingConv::Fast) + else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv)) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; @@ -2150,7 +2162,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, unsigned NumBytesForCalleeToPush; if (IsCalleePop(isVarArg, CallConv)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything - else if (!Is64Bit && CallConv != CallingConv::Fast && IsStructRet) + else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet) // If this is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. // This is common for Darwin/X86, Linux & Mingw32 targets. @@ -2288,14 +2300,14 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const { - if (CalleeCC != CallingConv::Fast && + if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) return false; // If -tailcallopt is specified, make fastcc functions tail-callable. const Function *CallerF = DAG.getMachineFunction().getFunction(); if (GuaranteedTailCallOpt) { - if (CalleeCC == CallingConv::Fast && + if (IsTailCallConvention(CalleeCC) && CallerF->getCallingConv() == CalleeCC) return true; return false; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index cdb579c..3c087bd 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -294,13 +294,20 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { const unsigned * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { bool callsEHReturn = false; + bool ghcCall = false; if (MF) { const MachineFrameInfo *MFI = MF->getFrameInfo(); const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); callsEHReturn = (MMI ? MMI->callsEHReturn() : false); + const Function *F = MF->getFunction(); + ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); } + static const unsigned GhcCalleeSavedRegs[] = { + 0 + }; + static const unsigned CalleeSavedRegs32Bit[] = { X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0 }; @@ -326,7 +333,9 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { X86::XMM14, X86::XMM15, 0 }; - if (Is64Bit) { + if (ghcCall) { + return GhcCalleeSavedRegs; + } else if (Is64Bit) { if (IsWin64) return CalleeSavedRegsWin64; else diff --git a/test/CodeGen/X86/ghc-cc.ll b/test/CodeGen/X86/ghc-cc.ll new file mode 100644 index 0000000..9393cf5 --- /dev/null +++ b/test/CodeGen/X86/ghc-cc.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -tailcallopt -mtriple=i686-linux-gnu | FileCheck %s + +; Test the GHC call convention works (x86-32) + +@base = external global i32 ; assigned to register: EBX +@sp = external global i32 ; assigned to register: EBP +@hp = external global i32 ; assigned to register: EDI +@r1 = external global i32 ; assigned to register: ESI + +define void @zap(i32 %a, i32 %b) nounwind { +entry: + ; CHECK: movl {{[0-9]*}}(%esp), %ebx + ; CHECK-NEXT: movl {{[0-9]*}}(%esp), %ebp + ; CHECK-NEXT: call addtwo + %0 = call cc 10 i32 @addtwo(i32 %a, i32 %b) + ; CHECK: call foo + call void @foo() nounwind + ret void +} + +define cc 10 i32 @addtwo(i32 %x, i32 %y) nounwind { +entry: + ; CHECK: leal (%ebx,%ebp), %eax + %0 = add i32 %x, %y + ; CHECK-NEXT: ret + ret i32 %0 +} + +define cc 10 void @foo() nounwind { +entry: + ; CHECK: movl base, %ebx + ; CHECK-NEXT: movl sp, %ebp + ; CHECK-NEXT: movl hp, %edi + ; CHECK-NEXT: movl r1, %esi + %0 = load i32* @r1 + %1 = load i32* @hp + %2 = load i32* @sp + %3 = load i32* @base + ; CHECK: jmp bar + tail call cc 10 void @bar( i32 %3, i32 %2, i32 %1, i32 %0 ) nounwind + ret void +} + +declare cc 10 void @bar(i32, i32, i32, i32) + diff --git a/test/CodeGen/X86/ghc-cc64.ll b/test/CodeGen/X86/ghc-cc64.ll new file mode 100644 index 0000000..fcf7e17 --- /dev/null +++ b/test/CodeGen/X86/ghc-cc64.ll @@ -0,0 +1,86 @@ +; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux-gnu | FileCheck %s + +; Check the GHC call convention works (x86-64) + +@base = external global i64 ; assigned to register: R13 +@sp = external global i64 ; assigned to register: RBP +@hp = external global i64 ; assigned to register: R12 +@r1 = external global i64 ; assigned to register: RBX +@r2 = external global i64 ; assigned to register: R14 +@r3 = external global i64 ; assigned to register: RSI +@r4 = external global i64 ; assigned to register: RDI +@r5 = external global i64 ; assigned to register: R8 +@r6 = external global i64 ; assigned to register: R9 +@splim = external global i64 ; assigned to register: R15 + +@f1 = external global float ; assigned to register: XMM1 +@f2 = external global float ; assigned to register: XMM2 +@f3 = external global float ; assigned to register: XMM3 +@f4 = external global float ; assigned to register: XMM4 +@d1 = external global double ; assigned to register: XMM5 +@d2 = external global double ; assigned to register: XMM6 + +define void @zap(i64 %a, i64 %b) nounwind { +entry: + ; CHECK: movq %rdi, %r13 + ; CHECK-NEXT: movq %rsi, %rbp + ; CHECK-NEXT: callq addtwo + %0 = call cc 10 i64 @addtwo(i64 %a, i64 %b) + ; CHECK: callq foo + call void @foo() nounwind + ret void +} + +define cc 10 i64 @addtwo(i64 %x, i64 %y) nounwind { +entry: + ; CHECK: leaq (%r13,%rbp), %rax + %0 = add i64 %x, %y + ; CHECK-NEXT: ret + ret i64 %0 +} + +define cc 10 void @foo() nounwind { +entry: + ; CHECK: movq base(%rip), %r13 + ; CHECK-NEXT: movq sp(%rip), %rbp + ; CHECK-NEXT: movq hp(%rip), %r12 + ; CHECK-NEXT: movq r1(%rip), %rbx + ; CHECK-NEXT: movq r2(%rip), %r14 + ; CHECK-NEXT: movq r3(%rip), %rsi + ; CHECK-NEXT: movq r4(%rip), %rdi + ; CHECK-NEXT: movq r5(%rip), %r8 + ; CHECK-NEXT: movq r6(%rip), %r9 + ; CHECK-NEXT: movq splim(%rip), %r15 + ; CHECK-NEXT: movss f1(%rip), %xmm1 + ; CHECK-NEXT: movss f2(%rip), %xmm2 + ; CHECK-NEXT: movss f3(%rip), %xmm3 + ; CHECK-NEXT: movss f4(%rip), %xmm4 + ; CHECK-NEXT: movsd d1(%rip), %xmm5 + ; CHECK-NEXT: movsd d2(%rip), %xmm6 + %0 = load double* @d2 + %1 = load double* @d1 + %2 = load float* @f4 + %3 = load float* @f3 + %4 = load float* @f2 + %5 = load float* @f1 + %6 = load i64* @splim + %7 = load i64* @r6 + %8 = load i64* @r5 + %9 = load i64* @r4 + %10 = load i64* @r3 + %11 = load i64* @r2 + %12 = load i64* @r1 + %13 = load i64* @hp + %14 = load i64* @sp + %15 = load i64* @base + ; CHECK: jmp bar + tail call cc 10 void @bar( i64 %15, i64 %14, i64 %13, i64 %12, i64 %11, + i64 %10, i64 %9, i64 %8, i64 %7, i64 %6, + float %5, float %4, float %3, float %2, double %1, + double %0 ) nounwind + ret void +} + +declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, + float, float, float, float, double, double) + |