diff options
-rw-r--r-- | include/llvm/Target/TargetLowering.h | 8 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 10 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 13 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Transforms/Scalar/CodeGenPrepare.cpp | 103 | ||||
-rw-r--r-- | test/CodeGen/X86/tailcall-cgp-dup.ll | 63 |
7 files changed, 197 insertions, 4 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 5b3f2b3..6b3c45a 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -1287,6 +1287,14 @@ public: return false; } + /// mayBeEmittedAsTailCall - Return true if the target may be able emit the + /// call instruction as a tail call. This is used by optimization passes to + /// determine if it's profitable to duplicate return instructions to enable + /// tailcall optimization. + virtual bool mayBeEmittedAsTailCall(CallInst *CI) const { + return false; + } + /// getTypeForExtArgOrReturn - Return the type that should be used to zero or /// sign extend a zeroext/signext integer argument or return value. /// FIXME: Most C calling convention requires the return type to be promoted, diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 35a9bf7..891ea62 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1805,6 +1805,16 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { return HasRet; } +bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { + if (!EnableARMTailCalls) + return false; + + if (!CI->isTailCall()) + return false; + + return !Subtarget->isThumb1Only(); +} + // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 402e1c6..e09c1da 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -457,6 +457,8 @@ namespace llvm { virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; + SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 576c879..58acf4f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -45,6 +45,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" @@ -1595,6 +1596,18 @@ static bool IsTailCallConvention(CallingConv::ID CC) { return (CC == CallingConv::Fast || CC == CallingConv::GHC); } +bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { + if (!CI->isTailCall()) + return false; + + CallSite CS(CI); + CallingConv::ID CalleeCC = CS.getCallingConv(); + if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) + return false; + + return true; +} + /// FuncIsMadeTailCallSafe - Return true if the function is being made into /// a tailcall target by changing its ABI. static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 7c1b13a..6301057 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -843,6 +843,8 @@ namespace llvm { virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; + virtual EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const; diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index f0babcc..eb80f5c 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -47,16 +47,17 @@ using namespace llvm; using namespace llvm::PatternMatch; STATISTIC(NumBlocksElim, "Number of blocks eliminated"); -STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); -STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); +STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); +STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"); STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"); STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " "computations were sunk"); -STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); -STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); +STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumRetsDup, "Number of return instructions duplicated"); static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), @@ -104,6 +105,7 @@ namespace { bool OptimizeCallInst(CallInst *CI); bool MoveExtToFormExtLoad(Instruction *I); bool OptimizeExtUses(Instruction *I); + bool DupRetToEnableTailCallOpts(ReturnInst *RI); }; } @@ -547,6 +549,96 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { return Simplifier.fold(CI, TD); } +/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return +/// instructions to the predecessor to enable tail call optimizations. The +/// case it is currently looking for is: +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// br label %return +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// br label %return +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// br label %return +/// return: +/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] +/// ret i32 %retval +/// +/// => +/// +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// ret i32 %tmp0 +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// ret i32 %tmp1 +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// ret i32 %tmp2 +/// +bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { + Value *V = RI->getReturnValue(); + if (!V) + return false; + + if (PHINode *PN = dyn_cast<PHINode>(V)) { + BasicBlock *BB = RI->getParent(); + if (PN->getParent() != BB) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + // See llvm::isInTailCallPosition(). + const Function *F = BB->getParent(); + unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + return false; + + // Make sure there are no instructions between PHI and return. + BasicBlock::iterator BI = PN; + do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); + if (&*BI != RI) + return false; + + /// Only dup the ReturnInst if the CallInst is likely to be emitted as a + /// tail call. + SmallVector<CallInst*, 4> TailCalls; + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { + CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); + if (CI && TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + + bool Changed = false; + for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { + CallInst *CI = TailCalls[i]; + CallSite CS(CI); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore noalias because it doesn't affect the call sequence. + unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes(); + if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) + continue; + + // Make sure the call instruction is followed by an unconditional branch + // to the return block. + BasicBlock *CallBB = CI->getParent(); + BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); + if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) + continue; + + // Duplicate the return into CallBB. + (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); + Changed = true; + ++NumRetsDup; + } + + return Changed; + } + + return false; +} + //===----------------------------------------------------------------------===// // Memory Optimization //===----------------------------------------------------------------------===// @@ -970,6 +1062,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { if (CallInst *CI = dyn_cast<CallInst>(I)) return OptimizeCallInst(CI); + if (ReturnInst *RI = dyn_cast<ReturnInst>(I)) + return DupRetToEnableTailCallOpts(RI); + return false; } diff --git a/test/CodeGen/X86/tailcall-cgp-dup.ll b/test/CodeGen/X86/tailcall-cgp-dup.ll new file mode 100644 index 0000000..10fe146 --- /dev/null +++ b/test/CodeGen/X86/tailcall-cgp-dup.ll @@ -0,0 +1,63 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s + +; Teach CGP to dup returns to enable tail call optimization. +; rdar://9147433 + +define i32 @foo(i32 %x) nounwind ssp { +; CHECK: foo: +entry: + switch i32 %x, label %return [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + i32 3, label %sw.bb3 + i32 4, label %sw.bb5 + i32 5, label %sw.bb7 + i32 6, label %sw.bb9 + ] + +sw.bb: ; preds = %entry +; CHECK: jmp _f1 + %call = tail call i32 @f1() nounwind + br label %return + +sw.bb1: ; preds = %entry +; CHECK: jmp _f2 + %call2 = tail call i32 @f2() nounwind + br label %return + +sw.bb3: ; preds = %entry +; CHECK: jmp _f3 + %call4 = tail call i32 @f3() nounwind + br label %return + +sw.bb5: ; preds = %entry +; CHECK: jmp _f4 + %call6 = tail call i32 @f4() nounwind + br label %return + +sw.bb7: ; preds = %entry +; CHECK: jmp _f5 + %call8 = tail call i32 @f5() nounwind + br label %return + +sw.bb9: ; preds = %entry +; CHECK: jmp _f6 + %call10 = tail call i32 @f6() nounwind + br label %return + +return: ; preds = %entry, %sw.bb9, %sw.bb7, %sw.bb5, %sw.bb3, %sw.bb1, %sw.bb + %retval.0 = phi i32 [ %call10, %sw.bb9 ], [ %call8, %sw.bb7 ], [ %call6, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ], [ 0, %entry ] + ret i32 %retval.0 +} + +declare i32 @f1() + +declare i32 @f2() + +declare i32 @f3() + +declare i32 @f4() + +declare i32 @f5() + +declare i32 @f6() |