diff options
author | Hans Wennborg <hans@hanshq.net> | 2012-06-01 16:27:21 +0000 |
---|---|---|
committer | Hans Wennborg <hans@hanshq.net> | 2012-06-01 16:27:21 +0000 |
commit | f0234fcbc9be9798c10dedc3e3c134b7afbc6511 (patch) | |
tree | 58aa8ee7a2be4159fbbbfe5085f56ea7eee31b1e | |
parent | 6bb5c0074dc4cede2ad8efd420ec91288f91b012 (diff) | |
download | external_llvm-f0234fcbc9be9798c10dedc3e3c134b7afbc6511.zip external_llvm-f0234fcbc9be9798c10dedc3e3c134b7afbc6511.tar.gz external_llvm-f0234fcbc9be9798c10dedc3e3c134b7afbc6511.tar.bz2 |
Implement the local-dynamic TLS model for x86 (PR3985)
This implements codegen support for accesses to thread-local variables
using the local-dynamic model, and adds a clean-up pass so that the base
address for the TLS block can be re-used between local-dynamic access on
an execution path.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157818 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 27 | ||||
-rw-r--r-- | lib/Target/X86/X86.h | 5 | ||||
-rw-r--r-- | lib/Target/X86/X86AsmPrinter.cpp | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 57 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrCompiler.td | 14 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 118 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 11 | ||||
-rw-r--r-- | lib/Target/X86/X86MCInstLower.cpp | 37 | ||||
-rw-r--r-- | lib/Target/X86/X86MachineFunctionInfo.h | 11 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.cpp | 4 | ||||
-rw-r--r-- | test/CodeGen/X86/tls-local-dynamic.ll | 59 | ||||
-rw-r--r-- | test/CodeGen/X86/tls-pic.ll | 20 |
13 files changed, 353 insertions, 17 deletions
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 0c7f14d..f171240 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -100,6 +100,26 @@ namespace X86II { /// SYMBOL_LABEL @TLSGD MO_TLSGD, + /// MO_TLSLD - On a symbol operand this indicates that the immediate is + /// the offset of the GOT entry with the TLS index for the module that + /// contains the symbol. When this index is passed to a call to to + /// __tls_get_addr, the function will return the base address of the TLS + /// block for the symbol. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @TLSLD + MO_TLSLD, + + /// MO_TLSLDM - On a symbol operand this indicates that the immediate is + /// the offset of the GOT entry with the TLS index for the module that + /// contains the symbol. When this index is passed to a call to to + /// ___tls_get_addr, the function will return the base address of the TLS + /// block for the symbol. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @TLSLDM + MO_TLSLDM, + /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// @@ -121,6 +141,13 @@ namespace X86II { /// SYMBOL_LABEL @TPOFF MO_TPOFF, + /// MO_DTPOFF - On a symbol operand this indicates that the immediate is + /// the offset of the GOT entry with the TLS offset of the symbol. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @DTPOFF + MO_DTPOFF, + /// MO_NTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index ecc7b59..bf05ccf 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -36,6 +36,11 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM, /// register for PIC on x86-32. FunctionPass* createGlobalBaseRegPass(); +/// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses +/// to local-dynamic TLS variables so that the TLS base address for the module +/// is only fetched once per execution path through the function. +FunctionPass *createCleanupLocalDynamicTLSPass(); + /// createX86FloatingPointStackifierPass - This function returns a pass which /// converts floating point register references and pseudo instructions into /// floating point stack references and physical instructions. diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 6d97f46..d30c8df 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -186,9 +186,12 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, O << '-' << *MF->getPICBaseSymbol(); break; case X86II::MO_TLSGD: O << "@TLSGD"; break; + case X86II::MO_TLSLD: O << "@TLSLD"; break; + case X86II::MO_TLSLDM: O << "@TLSLDM"; break; case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break; case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break; case X86II::MO_TPOFF: O << "@TPOFF"; break; + case X86II::MO_DTPOFF: O << "@DTPOFF"; break; case X86II::MO_NTPOFF: O << "@NTPOFF"; break; case X86II::MO_GOTNTPOFF: O << "@GOTNTPOFF"; break; case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f3b66e4..8d0e843 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7263,7 +7263,7 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, - unsigned char OperandFlags) { + unsigned char OperandFlags, bool LocalDynamic = false) { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); DebugLoc dl = GA->getDebugLoc(); @@ -7271,12 +7271,16 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, GA->getValueType(0), GA->getOffset(), OperandFlags); + + X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR + : X86ISD::TLSADDR; + if (InFlag) { SDValue Ops[] = { Chain, TGA, *InFlag }; - Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3); + Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3); } else { SDValue Ops[] = { Chain, TGA }; - Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2); + Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2); } // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. @@ -7308,6 +7312,45 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, X86::RAX, X86II::MO_TLSGD); } +static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG, + const EVT PtrVT, + bool is64Bit) { + DebugLoc dl = GA->getDebugLoc(); + + // Get the start address of the TLS block for this module. + X86MachineFunctionInfo* MFI = DAG.getMachineFunction() + .getInfo<X86MachineFunctionInfo>(); + MFI->incNumLocalDynamicTLSAccesses(); + + SDValue Base; + if (is64Bit) { + Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX, + X86II::MO_TLSLD, /*LocalDynamic=*/true); + } else { + SDValue InFlag; + SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, + DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), PtrVT), InFlag); + InFlag = Chain.getValue(1); + Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, + X86II::MO_TLSLDM, /*LocalDynamic=*/true); + } + + // Note: the CleanupLocalDynamicTLSPass will remove redundant computations + // of Base. + + // Build x@dtpoff. + unsigned char OperandFlags = X86II::MO_DTPOFF; + unsigned WrapperKind = X86ISD::Wrapper; + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, + GA->getValueType(0), + GA->getOffset(), OperandFlags); + SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); + + // Add x@dtpoff with the base. + return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base); +} + // Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model. static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, TLSModel::Model model, @@ -7372,8 +7415,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = GA->getGlobal(); if (Subtarget->isTargetELF()) { - // TODO: implement the "local dynamic" model - // If GV is an alias then use the aliasee for determining // thread-localness. if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) @@ -7383,11 +7424,12 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { switch (model) { case TLSModel::GeneralDynamic: - case TLSModel::LocalDynamic: // not implemented if (Subtarget->is64Bit()) return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); - + case TLSModel::LocalDynamic: + return LowerToTLSLocalDynamicModel(GA, DAG, getPointerTy(), + Subtarget->is64Bit()); case TLSModel::InitialExec: case TLSModel::LocalExec: return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, @@ -11257,6 +11299,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; + case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR"; case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 534f6f7..6d6ff60 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -207,6 +207,10 @@ namespace llvm { // TLSADDR - Thread Local Storage. TLSADDR, + // TLSBASEADDR - Thread Local Storage. A call to get the start address + // of the TLS block for the current module. + TLSBASEADDR, + // TLSCALL - Thread Local Storage. When calling to an OS provided // thunk at the address from an earlier relocation. TLSCALL, diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 810e820..99c2b8f 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -375,11 +375,16 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP] in + Uses = [ESP] in { def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_addr32", [(X86tlsaddr tls32addr:$sym)]>, Requires<[In32BitMode]>; +def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), + "# TLS_base_addr32", + [(X86tlsbaseaddr tls32baseaddr:$sym)]>, + Requires<[In32BitMode]>; +} // All calls clobber the non-callee saved registers. RSP is marked as // a use to prevent stack-pointer assignments that appear immediately @@ -389,11 +394,16 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [RSP] in + Uses = [RSP] in { def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_addr64", [(X86tlsaddr tls64addr:$sym)]>, Requires<[In64BitMode]>; +def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), + "# TLS_base_addr64", + [(X86tlsbaseaddr tls64baseaddr:$sym)]>, + Requires<[In64BitMode]>; +} // Darwin TLS Support // For i386, the address of the thunk is passed on the stack, on return the diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index ae0c921..ad7521c 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/LLVMContext.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -3990,9 +3991,126 @@ namespace { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } + + private: + unsigned BaseReg; }; } char CGBR::ID = 0; FunctionPass* llvm::createGlobalBaseRegPass() { return new CGBR(); } + +namespace { + struct LDTLSCleanup : public MachineFunctionPass { + static char ID; + LDTLSCleanup() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + X86MachineFunctionInfo* MFI = MF.getInfo<X86MachineFunctionInfo>(); + if (MFI->getNumLocalDynamicTLSAccesses() < 2) { + // No point folding accesses if there isn't at least two. + return false; + } + + MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>(); + return VisitNode(DT->getRootNode(), 0); + } + + // Visit the dominator subtree rooted at Node in pre-order. + // If TLSBaseAddrReg is non-null, then use that to replace any + // TLS_base_addr instructions. Otherwise, create the register + // when the first such instruction is seen, and then use it + // as we encounter more instructions. + bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { + MachineBasicBlock *BB = Node->getBlock(); + bool Changed = false; + + // Traverse the current block. + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; + ++I) { + switch (I->getOpcode()) { + case X86::TLS_base_addr32: + case X86::TLS_base_addr64: + if (TLSBaseAddrReg) + I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg); + else + I = SetRegister(I, &TLSBaseAddrReg); + Changed = true; + break; + default: + break; + } + } + + // Visit the children of this block in the dominator tree. + for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end(); + I != E; ++I) { + Changed |= VisitNode(*I, TLSBaseAddrReg); + } + + return Changed; + } + + // Replace the TLS_base_addr instruction I with a copy from + // TLSBaseAddrReg, returning the new instruction. + MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I, + unsigned TLSBaseAddrReg) { + MachineFunction *MF = I->getParent()->getParent(); + const X86TargetMachine *TM = + static_cast<const X86TargetMachine *>(&MF->getTarget()); + const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit(); + const X86InstrInfo *TII = TM->getInstrInfo(); + + // Insert a Copy from TLSBaseAddrReg to RAX/EAX. + MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + is64Bit ? X86::RAX : X86::EAX) + .addReg(TLSBaseAddrReg); + + // Erase the TLS_base_addr instruction. + I->eraseFromParent(); + + return Copy; + } + + // Create a virtal register in *TLSBaseAddrReg, and populate it by + // inserting a copy instruction after I. Returns the new instruction. + MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { + MachineFunction *MF = I->getParent()->getParent(); + const X86TargetMachine *TM = + static_cast<const X86TargetMachine *>(&MF->getTarget()); + const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit(); + const X86InstrInfo *TII = TM->getInstrInfo(); + + // Create a virtual register for the TLS base address. + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit + ? &X86::GR64RegClass + : &X86::GR32RegClass); + + // Insert a copy from RAX/EAX to TLSBaseAddrReg. + MachineInstr *Next = I->getNextNode(); + MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + *TLSBaseAddrReg) + .addReg(is64Bit ? X86::RAX : X86::EAX); + + return Copy; + } + + virtual const char *getPassName() const { + return "Local Dynamic TLS Access Clean-up"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char LDTLSCleanup::ID = 0; +FunctionPass* +llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 2756a9e..5cb2c80 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -97,6 +97,8 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; + def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; @@ -203,6 +205,9 @@ def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>; def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, [SDNPHasChain]>; @@ -492,6 +497,9 @@ def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr", def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr", [tglobaltlsaddr], []>; +def tls32baseaddr : ComplexPattern<i32, 5, "SelectTLSADDRAddr", + [tglobaltlsaddr], []>; + def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr", [add, sub, mul, X86mul_imm, shl, or, frameindex, X86WrapperRIP], []>; @@ -499,6 +507,9 @@ def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr", def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr", [tglobaltlsaddr], []>; +def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr", + [tglobaltlsaddr], []>; + //===----------------------------------------------------------------------===// // X86 Instruction Predicate Definitions. def HasCMov : Predicate<"Subtarget->hasCMov()">; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 2f368b6..9dc5c70 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -156,9 +156,12 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, break; case X86II::MO_SECREL: RefKind = MCSymbolRefExpr::VK_SECREL; break; case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break; + case X86II::MO_TLSLD: RefKind = MCSymbolRefExpr::VK_TLSLD; break; + case X86II::MO_TLSLDM: RefKind = MCSymbolRefExpr::VK_TLSLDM; break; case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break; case X86II::MO_TPOFF: RefKind = MCSymbolRefExpr::VK_TPOFF; break; + case X86II::MO_DTPOFF: RefKind = MCSymbolRefExpr::VK_DTPOFF; break; case X86II::MO_NTPOFF: RefKind = MCSymbolRefExpr::VK_NTPOFF; break; case X86II::MO_GOTNTPOFF: RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; break; case X86II::MO_GOTPCREL: RefKind = MCSymbolRefExpr::VK_GOTPCREL; break; @@ -551,17 +554,38 @@ ReSimplify: static void LowerTlsAddr(MCStreamer &OutStreamer, X86MCInstLower &MCInstLowering, const MachineInstr &MI) { - bool is64Bits = MI.getOpcode() == X86::TLS_addr64; + + bool is64Bits = MI.getOpcode() == X86::TLS_addr64 || + MI.getOpcode() == X86::TLS_base_addr64; + + bool needsPadding = MI.getOpcode() == X86::TLS_addr64; + MCContext &context = OutStreamer.getContext(); - if (is64Bits) { + if (needsPadding) { MCInst prefix; prefix.setOpcode(X86::DATA16_PREFIX); OutStreamer.EmitInstruction(prefix); } + + MCSymbolRefExpr::VariantKind SRVK; + switch (MI.getOpcode()) { + case X86::TLS_addr32: + case X86::TLS_addr64: + SRVK = MCSymbolRefExpr::VK_TLSGD; + break; + case X86::TLS_base_addr32: + SRVK = MCSymbolRefExpr::VK_TLSLDM; + break; + case X86::TLS_base_addr64: + SRVK = MCSymbolRefExpr::VK_TLSLD; + break; + default: + llvm_unreachable("unexpected opcode"); + } + MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)); - const MCSymbolRefExpr *symRef = - MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context); + const MCSymbolRefExpr *symRef = MCSymbolRefExpr::Create(sym, SRVK, context); MCInst LEA; if (is64Bits) { @@ -583,7 +607,7 @@ static void LowerTlsAddr(MCStreamer &OutStreamer, } OutStreamer.EmitInstruction(LEA); - if (is64Bits) { + if (needsPadding) { MCInst prefix; prefix.setOpcode(X86::DATA16_PREFIX); OutStreamer.EmitInstruction(prefix); @@ -645,6 +669,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::TLS_addr32: case X86::TLS_addr64: + case X86::TLS_base_addr32: + case X86::TLS_base_addr64: return LowerTlsAddr(OutStreamer, MCInstLowering, *MI); case X86::MOVPC32r: { @@ -714,4 +740,3 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInstLowering.Lower(MI, TmpInst); OutStreamer.EmitInstruction(TmpInst); } - diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index c747109..2bc308d 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -66,6 +66,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// ArgumentStackSize - The number of bytes on stack consumed by the arguments /// being passed on the stack. unsigned ArgumentStackSize; + /// NumLocalDynamics - Number of local-dynamic TLS accesses. + unsigned NumLocalDynamics; public: X86MachineFunctionInfo() : ForceFramePointer(false), @@ -79,7 +81,8 @@ public: RegSaveFrameIndex(0), VarArgsGPOffset(0), VarArgsFPOffset(0), - ArgumentStackSize(0) {} + ArgumentStackSize(0), + NumLocalDynamics(0) {} explicit X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), @@ -93,7 +96,8 @@ public: RegSaveFrameIndex(0), VarArgsGPOffset(0), VarArgsFPOffset(0), - ArgumentStackSize(0) {} + ArgumentStackSize(0), + NumLocalDynamics(0) {} bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } @@ -130,6 +134,9 @@ public: unsigned getArgumentStackSize() const { return ArgumentStackSize; } void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; } + + unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } + void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 89c3884..c066a56 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -147,6 +147,10 @@ bool X86PassConfig::addInstSelector() { // Install an instruction selector. PM->add(createX86ISelDag(getX86TargetMachine(), getOptLevel())); + // For ELF, cleanup any local-dynamic TLS accesses. + if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) + PM->add(createCleanupLocalDynamicTLSPass()); + // For 32-bit, prepend instructions to set the "global base reg" for PIC. if (!getX86Subtarget().is64Bit()) PM->add(createGlobalBaseRegPass()); diff --git a/test/CodeGen/X86/tls-local-dynamic.ll b/test/CodeGen/X86/tls-local-dynamic.ll new file mode 100644 index 0000000..c5fd16b --- /dev/null +++ b/test/CodeGen/X86/tls-local-dynamic.ll @@ -0,0 +1,59 @@ +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck %s + +@x = internal thread_local global i32 0, align 4 +@y = internal thread_local global i32 0, align 4 + +; get_x and get_y are here to prevent x and y to be optimized away as 0 + +define i32* @get_x() { +entry: + ret i32* @x +; FIXME: This function uses a single thread-local variable, +; so we might want to fall back to general-dynamic here. +; CHECK: get_x: +; CHECK: leaq x@TLSLD(%rip), %rdi +; CHECK-NEXT: callq __tls_get_addr@PLT +; CHECK: x@DTPOFF +} + +define i32* @get_y() { +entry: + ret i32* @y +} + +define i32 @f(i32 %i) { +entry: + %cmp = icmp eq i32 %i, 1 + br i1 %cmp, label %return, label %if.else +; This bb does not access TLS, so should not call __tls_get_addr. +; CHECK: f: +; CHECK-NOT: __tls_get_addr +; CHECK: je + + +if.else: + %0 = load i32* @x, align 4 + %cmp1 = icmp eq i32 %i, 2 + br i1 %cmp1, label %if.then2, label %return +; Now we call __tls_get_addr. +; CHECK: # %if.else +; CHECK: leaq x@TLSLD(%rip), %rdi +; CHECK-NEXT: callq __tls_get_addr@PLT +; CHECK: x@DTPOFF + + +if.then2: + %1 = load i32* @y, align 4 + %add = add nsw i32 %1, %0 + br label %return +; This accesses TLS, but is dominated by the previous block, +; so should not have to call __tls_get_addr again. +; CHECK: # %if.then2 +; CHECK-NOT: __tls_get_addr +; CHECK: y@DTPOFF + + +return: + %retval.0 = phi i32 [ %add, %if.then2 ], [ 5, %entry ], [ %0, %if.else ] + ret i32 %retval.0 +} diff --git a/test/CodeGen/X86/tls-pic.ll b/test/CodeGen/X86/tls-pic.ll index b83416d..c3e8216 100644 --- a/test/CodeGen/X86/tls-pic.ll +++ b/test/CodeGen/X86/tls-pic.ll @@ -2,6 +2,8 @@ ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s @i = thread_local global i32 15 +@j = internal thread_local global i32 42 +@k = internal thread_local global i32 42 define i32 @f1() { entry: @@ -64,4 +66,22 @@ entry: ; X64: callq __tls_get_addr@PLT +define i32 @f5() nounwind { +entry: + %0 = load i32* @j, align 4 + %1 = load i32* @k, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} +; X32: f5: +; X32: leal {{[jk]}}@TLSLDM +; X32-NEXT: calll ___tls_get_addr@PLT +; X32-NEXT: movl {{[jk]}}@DTPOFF(%eax) +; X32-NEXT: addl {{[jk]}}@DTPOFF(%eax) + +; X64: f5: +; X64: leaq {{[jk]}}@TLSLD(%rip), %rdi +; X64-NEXT: callq __tls_get_addr@PLT +; X64-NEXT: movl {{[jk]}}@DTPOFF(%rax) +; X64-NEXT: addl {{[jk]}}@DTPOFF(%rax) |