diff options
author | Dan Gohman <gohman@apple.com> | 2010-10-12 18:00:49 +0000 |
---|---|---|
committer | Dan Gohman <gohman@apple.com> | 2010-10-12 18:00:49 +0000 |
commit | 320afb8c818b5cd5b9d4fcd0dba83ba3384ed4b4 (patch) | |
tree | e9f21f0a487672547e31382ef3144e7f392a6035 /lib/Target/X86 | |
parent | 8e157302f4991b08a625b05238e01d12c82a2976 (diff) | |
download | external_llvm-320afb8c818b5cd5b9d4fcd0dba83ba3384ed4b4.zip external_llvm-320afb8c818b5cd5b9d4fcd0dba83ba3384ed4b4.tar.gz external_llvm-320afb8c818b5cd5b9d4fcd0dba83ba3384ed4b4.tar.bz2 |
Initial va_arg support for x86-64. Patch by David Meyer!
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116319 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 324 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 11 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrCompiler.td | 11 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 11 |
4 files changed, 351 insertions, 6 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1ab2f2b..3e0d100 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7570,11 +7570,68 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { } SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { - // X86-64 va_list is a struct { i32, i32, i8*, i8* }. - assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); + assert(Subtarget->is64Bit() && + "LowerVAARG only handles 64-bit va_arg!"); + assert((Subtarget->isTargetLinux() || + Subtarget->isTargetDarwin()) && + "Unhandled target in LowerVAARG"); + assert(Op.getNode()->getNumOperands() == 4); + SDValue Chain = Op.getOperand(0); + SDValue SrcPtr = Op.getOperand(1); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + unsigned Align = Op.getConstantOperandVal(3); + DebugLoc dl = Op.getDebugLoc(); - report_fatal_error("VAArgInst is not yet implemented for x86-64!"); - return SDValue(); + EVT ArgVT = Op.getNode()->getValueType(0); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy); + uint8_t ArgMode; + + // Decide which area this value should be read from. + // TODO: Implement the AMD64 ABI in its entirety. This simple + // selection mechanism works only for the basic types. + if (ArgVT == MVT::f80) { + llvm_unreachable("va_arg for f80 not yet implemented"); + } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) { + ArgMode = 2; // Argument passed in XMM register. Use fp_offset. + } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) { + ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset. + } else { + llvm_unreachable("Unhandled argument type in LowerVAARG"); + } + + if (ArgMode == 2) { + // Sanity Check: Make sure using fp_offset makes sense. + const Function *Fn = DAG.getMachineFunction().getFunction(); + bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); + assert(!UseSoftFloat && !NoImplicitFloatOps && Subtarget->hasSSE1()); + } + + // Insert VAARG_64 node into the DAG + // VAARG_64 returns two values: Variable Argument Address, Chain + SmallVector<SDValue, 11> InstOps; + InstOps.push_back(Chain); + InstOps.push_back(SrcPtr); + InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32)); + InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8)); + InstOps.push_back(DAG.getConstant(Align, MVT::i32)); + SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other); + SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl, + VTs, &InstOps[0], InstOps.size(), + MVT::i64, + MachinePointerInfo(SV), + /*Align=*/0, + /*Volatile=*/false, + /*ReadMem=*/true, + /*WriteMem=*/true); + Chain = VAARG.getValue(1); + + // Load the next argument and return it + return DAG.getLoad(ArgVT, dl, + Chain, + VAARG, + MachinePointerInfo(), + false, false, 0); } SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { @@ -8850,6 +8907,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; + case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA"; } } @@ -9411,6 +9469,261 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, } MachineBasicBlock * +X86TargetLowering::EmitVAARG64WithCustomInserter( + MachineInstr *MI, + MachineBasicBlock *MBB) const { + // Emit va_arg instruction on X86-64. + + // Operands to this pseudo-instruction: + // 0 ) Output : destination address (reg) + // 1-5) Input : va_list address (addr, i64mem) + // 6 ) ArgSize : Size (in bytes) of vararg type + // 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset + // 8 ) Align : Alignment of type + // 9 ) EFLAGS (implicit-def) + + assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!"); + assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands"); + + unsigned DestReg = MI->getOperand(0).getReg(); + MachineOperand &Base = MI->getOperand(1); + MachineOperand &Scale = MI->getOperand(2); + MachineOperand &Index = MI->getOperand(3); + MachineOperand &Disp = MI->getOperand(4); + MachineOperand &Segment = MI->getOperand(5); + unsigned ArgSize = MI->getOperand(6).getImm(); + unsigned ArgMode = MI->getOperand(7).getImm(); + unsigned Align = MI->getOperand(8).getImm(); + + // Memory Reference + assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand"); + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + // Machine Information + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64); + const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32); + DebugLoc DL = MI->getDebugLoc(); + + // struct va_list { + // i32 gp_offset + // i32 fp_offset + // i64 overflow_area (address) + // i64 reg_save_area (address) + // } + // sizeof(va_list) = 24 + // alignment(va_list) = 8 + + unsigned TotalNumIntRegs = 6; + unsigned TotalNumXMMRegs = 8; + bool UseGPOffset = (ArgMode == 1); + bool UseFPOffset = (ArgMode == 2); + unsigned MaxOffset = TotalNumIntRegs * 8 + + (UseFPOffset ? TotalNumXMMRegs * 16 : 0); + + /* Align ArgSize to a multiple of 8 */ + unsigned ArgSizeA8 = (ArgSize + 7) & ~7; + bool NeedsAlign = (Align > 8); + + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *overflowMBB; + MachineBasicBlock *offsetMBB; + MachineBasicBlock *endMBB; + + unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB + unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB + unsigned OffsetReg = 0; + + if (!UseGPOffset && !UseFPOffset) { + // If we only pull from the overflow region, we don't create a branch. + // We don't need to alter control flow. + OffsetDestReg = 0; // unused + OverflowDestReg = DestReg; + + offsetMBB = NULL; + overflowMBB = thisMBB; + endMBB = thisMBB; + } else { + // First emit code to check if gp_offset (or fp_offset) is below the bound. + // If so, pull the argument from reg_save_area. (branch to offsetMBB) + // If not, pull from overflow_area. (branch to overflowMBB) + // + // thisMBB + // | . + // | . + // offsetMBB overflowMBB + // | . + // | . + // endMBB + + // Registers for the PHI in endMBB + OffsetDestReg = MRI.createVirtualRegister(AddrRegClass); + OverflowDestReg = MRI.createVirtualRegister(AddrRegClass); + + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction *MF = MBB->getParent(); + overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB); + offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB); + endMBB = MF->CreateMachineBasicBlock(LLVM_BB); + + MachineFunction::iterator MBBIter = MBB; + ++MBBIter; + + // Insert the new basic blocks + MF->insert(MBBIter, offsetMBB); + MF->insert(MBBIter, overflowMBB); + MF->insert(MBBIter, endMBB); + + // Transfer the remainder of MBB and its successor edges to endMBB. + endMBB->splice(endMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(MI)), + thisMBB->end()); + endMBB->transferSuccessorsAndUpdatePHIs(thisMBB); + + // Make offsetMBB and overflowMBB successors of thisMBB + thisMBB->addSuccessor(offsetMBB); + thisMBB->addSuccessor(overflowMBB); + + // endMBB is a successor of both offsetMBB and overflowMBB + offsetMBB->addSuccessor(endMBB); + overflowMBB->addSuccessor(endMBB); + + // Load the offset value into a register + OffsetReg = MRI.createVirtualRegister(OffsetRegClass); + BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, UseFPOffset ? 4 : 0) + .addOperand(Segment) + .setMemRefs(MMOBegin, MMOEnd); + + // Check if there is enough room left to pull this argument. + BuildMI(thisMBB, DL, TII->get(X86::CMP32ri)) + .addReg(OffsetReg) + .addImm(MaxOffset + 8 - ArgSizeA8); + + // Branch to "overflowMBB" if offset >= max + // Fall through to "offsetMBB" otherwise + BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE))) + .addMBB(overflowMBB); + } + + // In offsetMBB, emit code to use the reg_save_area. + if (offsetMBB) { + assert(OffsetReg != 0); + + // Read the reg_save_area address. + unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass); + BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, 16) + .addOperand(Segment) + .setMemRefs(MMOBegin, MMOEnd); + + // Zero-extend the offset + unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); + BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) + .addImm(0) + .addReg(OffsetReg) + .addImm(X86::sub_32bit); + + // Add the offset to the reg_save_area to get the final address. + BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg) + .addReg(OffsetReg64) + .addReg(RegSaveReg); + + // Compute the offset for the next argument + unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass); + BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg) + .addReg(OffsetReg) + .addImm(UseFPOffset ? 16 : 8); + + // Store it back into the va_list. + BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr)) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, UseFPOffset ? 4 : 0) + .addOperand(Segment) + .addReg(NextOffsetReg) + .setMemRefs(MMOBegin, MMOEnd); + + // Jump to endMBB + BuildMI(offsetMBB, DL, TII->get(X86::JMP_4)) + .addMBB(endMBB); + } + + // + // Emit code to use overflow area + // + + // Load the overflow_area address into a register. + unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); + BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, 8) + .addOperand(Segment) + .setMemRefs(MMOBegin, MMOEnd); + + // If we need to align it, do so. Otherwise, just copy the address + // to OverflowDestReg. + if (NeedsAlign) { + // Align the overflow address + assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2"); + unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass); + + // aligned_addr = (addr + (align-1)) & ~(align-1) + BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg) + .addReg(OverflowAddrReg) + .addImm(Align-1); + + BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg) + .addReg(TmpReg) + .addImm(~(uint64_t)(Align-1)); + } else { + BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg) + .addReg(OverflowAddrReg); + } + + // Compute the next overflow address after this argument. + // (the overflow address should be kept 8-byte aligned) + unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass); + BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg) + .addReg(OverflowDestReg) + .addImm(ArgSizeA8); + + // Store the new overflow address. + BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr)) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, 8) + .addOperand(Segment) + .addReg(NextAddrReg) + .setMemRefs(MMOBegin, MMOEnd); + + // If we branched, emit the PHI to the front of endMBB. + if (offsetMBB) { + BuildMI(*endMBB, endMBB->begin(), DL, + TII->get(X86::PHI), DestReg) + .addReg(OffsetDestReg).addMBB(offsetMBB) + .addReg(OverflowDestReg).addMBB(overflowMBB); + } + + // Erase the pseudo instruction + MI->eraseFromParent(); + + return endMBB; +} + +MachineBasicBlock * X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( MachineInstr *MI, MachineBasicBlock *MBB) const { @@ -9915,6 +10228,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, false); case X86::VASTART_SAVE_XMM_REGS: return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); + + case X86::VAARG_64: + return EmitVAARG64WithCustomInserter(MI, BB); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 1a2da74..34c72c9 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -310,7 +310,11 @@ namespace llvm { /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a /// chain operand, value to store, address, and a ValueType to store it /// as. - FST + FST, + + /// VAARG_64 - This instruction grabs the address of the next argument + /// from a va_list. (reads and modifies the va_list in memory) + VAARG_64 // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be @@ -853,6 +857,11 @@ namespace llvm { MachineBasicBlock *BB, unsigned cmovOpc) const; + // Utility function to emit the low-level va_arg code for X86-64. + MachineBasicBlock *EmitVAARG64WithCustomInserter( + MachineInstr *MI, + MachineBasicBlock *MBB) const; + /// Utility function to emit the xmm reg save portion of va_start. MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter( MachineInstr *BInstr, diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 4fdbdc5..6bfd605 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -78,6 +78,17 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo, imm:$regsavefi, imm:$offset)]>; +// The VAARG_64 pseudo-instruction takes the address of the va_list, +// and places the address of the next argument into a register. +let Defs = [EFLAGS] in +def VAARG_64 : I<0, Pseudo, + (outs GR64:$dst), + (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align), + "#VAARG_64 $dst, $ap, $size, $mode, $align", + [(set GR64:$dst, + (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)), + (implicit EFLAGS)]>; + // Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets. Calls // to _alloca is needed to probe the stack when allocating more than 4k bytes in // one go. Touching the stack at 4K increments is necessary to ensure that the diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0708b67..ed5e407 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -64,6 +64,12 @@ def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>, SDTCisVT<1, iPTR>, SDTCisVT<2, iPTR>]>; +def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, i32>, + SDTCisVT<3, i8>, + SDTCisVT<4, i32>]>; + def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; def SDTX86Void : SDTypeProfile<0, 0, []>; @@ -141,7 +147,10 @@ def X86vastart_save_xmm_regs : SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", SDT_X86VASTART_SAVE_XMM_REGS, [SDNPHasChain, SDNPVariadic]>; - +def X86vaarg64 : + SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, + SDNPMemOperand]>; def X86callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart, [SDNPHasChain, SDNPOutFlag]>; |