diff options
Diffstat (limited to 'lib/Target/Sparc')
30 files changed, 2261 insertions, 706 deletions
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index efb10db..acf7496 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -10,7 +10,6 @@ add_public_tablegen_target(SparcCommonTableGen) add_llvm_target(SparcCodeGen DelaySlotFiller.cpp - FPMover.cpp SparcAsmPrinter.cpp SparcInstrInfo.cpp SparcISelDAGToDAG.cpp @@ -23,7 +22,7 @@ add_llvm_target(SparcCodeGen SparcSelectionDAGInfo.cpp ) -add_dependencies(LLVMSparcCodeGen intrinsics_gen) +add_dependencies(LLVMSparcCodeGen SparcCommonTableGen intrinsics_gen) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index 6123773..b101751 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -39,11 +39,10 @@ namespace { /// layout, etc. /// TargetMachine &TM; - const TargetInstrInfo *TII; static char ID; - Filler(TargetMachine &tm) - : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } + Filler(TargetMachine &tm) + : MachineFunctionPass(ID), TM(tm) { } virtual const char *getPassName() const { return "SPARC Delay Slot Filler"; @@ -61,8 +60,9 @@ namespace { bool isDelayFiller(MachineBasicBlock &MBB, MachineBasicBlock::iterator candidate); - void insertCallUses(MachineBasicBlock::iterator MI, - SmallSet<unsigned, 32>& RegUses); + void insertCallDefsUses(MachineBasicBlock::iterator MI, + SmallSet<unsigned, 32>& RegDefs, + SmallSet<unsigned, 32>& RegUses); void insertDefsUses(MachineBasicBlock::iterator MI, SmallSet<unsigned, 32>& RegDefs, @@ -81,6 +81,9 @@ namespace { bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize); + bool tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); + }; char Filler::ID = 0; } // end of anonymous namespace @@ -99,29 +102,45 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) { bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) - if (I->hasDelaySlot()) { - MachineBasicBlock::iterator D = MBB.end(); - MachineBasicBlock::iterator J = I; - - if (!DisableDelaySlotFiller) - D = findDelayInstr(MBB, I); - - ++FilledSlots; - Changed = true; - - if (D == MBB.end()) - BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP)); - else - MBB.splice(++J, &MBB, D); - unsigned structSize = 0; - if (needsUnimp(I, structSize)) { - MachineBasicBlock::iterator J = I; - ++J; //skip the delay filler. - BuildMI(MBB, ++J, I->getDebugLoc(), - TII->get(SP::UNIMP)).addImm(structSize); - } + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { + MachineBasicBlock::iterator MI = I; + ++I; + + // If MI is restore, try combining it with previous inst. + if (!DisableDelaySlotFiller && + (MI->getOpcode() == SP::RESTORErr + || MI->getOpcode() == SP::RESTOREri)) { + Changed |= tryCombineRestoreWithPrevInst(MBB, MI); + continue; + } + + // If MI has no delay slot, skip. + if (!MI->hasDelaySlot()) + continue; + + MachineBasicBlock::iterator D = MBB.end(); + + if (!DisableDelaySlotFiller) + D = findDelayInstr(MBB, MI); + + ++FilledSlots; + Changed = true; + + const TargetInstrInfo *TII = TM.getInstrInfo(); + if (D == MBB.end()) + BuildMI(MBB, I, MI->getDebugLoc(), TII->get(SP::NOP)); + else + MBB.splice(I, &MBB, D); + + unsigned structSize = 0; + if (needsUnimp(MI, structSize)) { + MachineBasicBlock::iterator J = MI; + ++J; // skip the delay filler. + assert (J != MBB.end() && "MI needs a delay instruction."); + BuildMI(MBB, ++J, MI->getDebugLoc(), + TII->get(SP::UNIMP)).addImm(structSize); } + } return Changed; } @@ -134,28 +153,34 @@ Filler::findDelayInstr(MachineBasicBlock &MBB, bool sawLoad = false; bool sawStore = false; - MachineBasicBlock::iterator I = slot; + if (slot == MBB.begin()) + return MBB.end(); if (slot->getOpcode() == SP::RET) return MBB.end(); if (slot->getOpcode() == SP::RETL) { - --I; - if (I->getOpcode() != SP::RESTORErr) - return MBB.end(); - //change retl to ret - slot->setDesc(TII->get(SP::RET)); - return I; + MachineBasicBlock::iterator J = slot; + --J; + + if (J->getOpcode() == SP::RESTORErr + || J->getOpcode() == SP::RESTOREri) { + // change retl to ret. + slot->setDesc(TM.getInstrInfo()->get(SP::RET)); + return J; + } } - //Call's delay filler can def some of call's uses. + // Call's delay filler can def some of call's uses. if (slot->isCall()) - insertCallUses(slot, RegUses); + insertCallDefsUses(slot, RegDefs, RegUses); else insertDefsUses(slot, RegDefs, RegUses); bool done = false; + MachineBasicBlock::iterator I = slot; + while (!done) { done = (I == MBB.begin()); @@ -216,12 +241,12 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, unsigned Reg = MO.getReg(); if (MO.isDef()) { - //check whether Reg is defined or used before delay slot. + // check whether Reg is defined or used before delay slot. if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg)) return true; } if (MO.isUse()) { - //check whether Reg is defined before delay slot. + // check whether Reg is defined before delay slot. if (IsRegInSet(RegDefs, Reg)) return true; } @@ -230,9 +255,12 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, } -void Filler::insertCallUses(MachineBasicBlock::iterator MI, - SmallSet<unsigned, 32>& RegUses) +void Filler::insertCallDefsUses(MachineBasicBlock::iterator MI, + SmallSet<unsigned, 32>& RegDefs, + SmallSet<unsigned, 32>& RegUses) { + // Call defines o7, which is visible to the instruction in delay slot. + RegDefs.insert(SP::O7); switch(MI->getOpcode()) { default: llvm_unreachable("Unknown opcode."); @@ -255,7 +283,7 @@ void Filler::insertCallUses(MachineBasicBlock::iterator MI, } } -//Insert Defs and Uses of MI into the sets RegDefs and RegUses. +// Insert Defs and Uses of MI into the sets RegDefs and RegUses. void Filler::insertDefsUses(MachineBasicBlock::iterator MI, SmallSet<unsigned, 32>& RegDefs, SmallSet<unsigned, 32>& RegUses) @@ -270,13 +298,17 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI, continue; if (MO.isDef()) RegDefs.insert(Reg); - if (MO.isUse()) + if (MO.isUse()) { + // Implicit register uses of retl are return values and + // retl does not use them. + if (MO.isImplicit() && MI->getOpcode() == SP::RETL) + continue; RegUses.insert(Reg); - + } } } -//returns true if the Reg or its alias is in the RegSet. +// returns true if the Reg or its alias is in the RegSet. bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) { // Check Reg and all aliased Registers. @@ -318,3 +350,142 @@ bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize) StructSize = MO.getImm(); return true; } + +static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI, + MachineBasicBlock::iterator AddMI, + const TargetInstrInfo *TII) +{ + // Before: add <op0>, <op1>, %i[0-7] + // restore %g0, %g0, %i[0-7] + // + // After : restore <op0>, <op1>, %o[0-7] + + unsigned reg = AddMI->getOperand(0).getReg(); + if (reg < SP::I0 || reg > SP::I7) + return false; + + // Erase RESTORE. + RestoreMI->eraseFromParent(); + + // Change ADD to RESTORE. + AddMI->setDesc(TII->get((AddMI->getOpcode() == SP::ADDrr) + ? SP::RESTORErr + : SP::RESTOREri)); + + // Map the destination register. + AddMI->getOperand(0).setReg(reg - SP::I0 + SP::O0); + + return true; +} + +static bool combineRestoreOR(MachineBasicBlock::iterator RestoreMI, + MachineBasicBlock::iterator OrMI, + const TargetInstrInfo *TII) +{ + // Before: or <op0>, <op1>, %i[0-7] + // restore %g0, %g0, %i[0-7] + // and <op0> or <op1> is zero, + // + // After : restore <op0>, <op1>, %o[0-7] + + unsigned reg = OrMI->getOperand(0).getReg(); + if (reg < SP::I0 || reg > SP::I7) + return false; + + // check whether it is a copy. + if (OrMI->getOpcode() == SP::ORrr + && OrMI->getOperand(1).getReg() != SP::G0 + && OrMI->getOperand(2).getReg() != SP::G0) + return false; + + if (OrMI->getOpcode() == SP::ORri + && OrMI->getOperand(1).getReg() != SP::G0 + && (!OrMI->getOperand(2).isImm() || OrMI->getOperand(2).getImm() != 0)) + return false; + + // Erase RESTORE. + RestoreMI->eraseFromParent(); + + // Change OR to RESTORE. + OrMI->setDesc(TII->get((OrMI->getOpcode() == SP::ORrr) + ? SP::RESTORErr + : SP::RESTOREri)); + + // Map the destination register. + OrMI->getOperand(0).setReg(reg - SP::I0 + SP::O0); + + return true; +} + +static bool combineRestoreSETHIi(MachineBasicBlock::iterator RestoreMI, + MachineBasicBlock::iterator SetHiMI, + const TargetInstrInfo *TII) +{ + // Before: sethi imm3, %i[0-7] + // restore %g0, %g0, %g0 + // + // After : restore %g0, (imm3<<10), %o[0-7] + + unsigned reg = SetHiMI->getOperand(0).getReg(); + if (reg < SP::I0 || reg > SP::I7) + return false; + + if (!SetHiMI->getOperand(1).isImm()) + return false; + + int64_t imm = SetHiMI->getOperand(1).getImm(); + + // Is it a 3 bit immediate? + if (!isInt<3>(imm)) + return false; + + // Make it a 13 bit immediate. + imm = (imm << 10) & 0x1FFF; + + assert(RestoreMI->getOpcode() == SP::RESTORErr); + + RestoreMI->setDesc(TII->get(SP::RESTOREri)); + + RestoreMI->getOperand(0).setReg(reg - SP::I0 + SP::O0); + RestoreMI->getOperand(1).setReg(SP::G0); + RestoreMI->getOperand(2).ChangeToImmediate(imm); + + + // Erase the original SETHI. + SetHiMI->eraseFromParent(); + + return true; +} + +bool Filler::tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) +{ + // No previous instruction. + if (MBBI == MBB.begin()) + return false; + + // assert that MBBI is a "restore %g0, %g0, %g0". + assert(MBBI->getOpcode() == SP::RESTORErr + && MBBI->getOperand(0).getReg() == SP::G0 + && MBBI->getOperand(1).getReg() == SP::G0 + && MBBI->getOperand(2).getReg() == SP::G0); + + MachineBasicBlock::iterator PrevInst = MBBI; --PrevInst; + + // It cannot combine with a delay filler. + if (isDelayFiller(MBB, PrevInst)) + return false; + + const TargetInstrInfo *TII = TM.getInstrInfo(); + + switch (PrevInst->getOpcode()) { + default: break; + case SP::ADDrr: + case SP::ADDri: return combineRestoreADD(MBBI, PrevInst, TII); break; + case SP::ORrr: + case SP::ORri: return combineRestoreOR(MBBI, PrevInst, TII); break; + case SP::SETHIi: return combineRestoreSETHIi(MBBI, PrevInst, TII); break; + } + // It cannot combine with the previous instruction. + return false; +} diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp deleted file mode 100644 index 1325b98..0000000 --- a/lib/Target/Sparc/FPMover.cpp +++ /dev/null @@ -1,141 +0,0 @@ -//===-- FPMover.cpp - Sparc double-precision floating point move fixer ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Expand FpMOVD/FpABSD/FpNEGD instructions into their single-precision pieces. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "fpmover" -#include "Sparc.h" -#include "SparcSubtarget.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -STATISTIC(NumFpDs , "Number of instructions translated"); -STATISTIC(NoopFpDs, "Number of noop instructions removed"); - -namespace { - struct FPMover : public MachineFunctionPass { - /// Target machine description which we query for reg. names, data - /// layout, etc. - /// - TargetMachine &TM; - - static char ID; - explicit FPMover(TargetMachine &tm) - : MachineFunctionPass(ID), TM(tm) { } - - virtual const char *getPassName() const { - return "Sparc Double-FP Move Fixer"; - } - - bool runOnMachineBasicBlock(MachineBasicBlock &MBB); - bool runOnMachineFunction(MachineFunction &F); - }; - char FPMover::ID = 0; -} // end of anonymous namespace - -/// createSparcFPMoverPass - Returns a pass that turns FpMOVD -/// instructions into FMOVS instructions -/// -FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) { - return new FPMover(tm); -} - -/// getDoubleRegPair - Given a DFP register, return the even and odd FP -/// registers that correspond to it. -static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg, - unsigned &OddReg) { - static const uint16_t EvenHalvesOfPairs[] = { - SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14, - SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30 - }; - static const uint16_t OddHalvesOfPairs[] = { - SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15, - SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31 - }; - static const uint16_t DoubleRegsInOrder[] = { - SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8, - SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15 - }; - for (unsigned i = 0; i < array_lengthof(DoubleRegsInOrder); ++i) - if (DoubleRegsInOrder[i] == DoubleReg) { - EvenReg = EvenHalvesOfPairs[i]; - OddReg = OddHalvesOfPairs[i]; - return; - } - llvm_unreachable("Can't find reg"); -} - -/// runOnMachineBasicBlock - Fixup FpMOVD instructions in this MBB. -/// -bool FPMover::runOnMachineBasicBlock(MachineBasicBlock &MBB) { - bool Changed = false; - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { - MachineInstr *MI = I++; - DebugLoc dl = MI->getDebugLoc(); - if (MI->getOpcode() == SP::FpMOVD || MI->getOpcode() == SP::FpABSD || - MI->getOpcode() == SP::FpNEGD) { - Changed = true; - unsigned DestDReg = MI->getOperand(0).getReg(); - unsigned SrcDReg = MI->getOperand(1).getReg(); - if (DestDReg == SrcDReg && MI->getOpcode() == SP::FpMOVD) { - MBB.erase(MI); // Eliminate the noop copy. - ++NoopFpDs; - continue; - } - - unsigned EvenSrcReg = 0, OddSrcReg = 0, EvenDestReg = 0, OddDestReg = 0; - getDoubleRegPair(DestDReg, EvenDestReg, OddDestReg); - getDoubleRegPair(SrcDReg, EvenSrcReg, OddSrcReg); - - const TargetInstrInfo *TII = TM.getInstrInfo(); - if (MI->getOpcode() == SP::FpMOVD) - MI->setDesc(TII->get(SP::FMOVS)); - else if (MI->getOpcode() == SP::FpNEGD) - MI->setDesc(TII->get(SP::FNEGS)); - else if (MI->getOpcode() == SP::FpABSD) - MI->setDesc(TII->get(SP::FABSS)); - else - llvm_unreachable("Unknown opcode!"); - - MI->getOperand(0).setReg(EvenDestReg); - MI->getOperand(1).setReg(EvenSrcReg); - DEBUG(errs() << "FPMover: the modified instr is: " << *MI); - // Insert copy for the other half of the double. - if (DestDReg != SrcDReg) { - MI = BuildMI(MBB, I, dl, TM.getInstrInfo()->get(SP::FMOVS), OddDestReg) - .addReg(OddSrcReg); - DEBUG(errs() << "FPMover: the inserted instr is: " << *MI); - } - ++NumFpDs; - } - } - return Changed; -} - -bool FPMover::runOnMachineFunction(MachineFunction &F) { - // If the target has V9 instructions, the fp-mover pseudos will never be - // emitted. Avoid a scan of the instructions to improve compile time. - if (TM.getSubtarget<SparcSubtarget>().isV9()) - return false; - - bool Changed = false; - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) - Changed |= runOnMachineBasicBlock(*FI); - return Changed; -} diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt index fe20d2f..7d54d32 100644 --- a/lib/Target/Sparc/LLVMBuild.txt +++ b/lib/Target/Sparc/LLVMBuild.txt @@ -28,5 +28,6 @@ has_asmprinter = 1 type = Library name = SparcCodeGen parent = Sparc -required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc SparcInfo Support Target +required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc + SparcInfo Support Target add_to_library_groups = Sparc diff --git a/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h new file mode 100644 index 0000000..aac0e8d --- /dev/null +++ b/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h @@ -0,0 +1,62 @@ +//===-- SparcBaseInfo.h - Top level definitions for Sparc ---- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions +// for the Sparc target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core code gen +// types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARCBASEINFO_H +#define SPARCBASEINFO_H + +namespace llvm { + +/// SPII - This namespace holds target specific flags for instruction info. +namespace SPII { + +/// Target Operand Flags. Sparc specific TargetFlags for MachineOperands and +/// SDNodes. +enum TOF { + MO_NO_FLAG, + + // Extract the low 10 bits of an address. + // Assembler: %lo(addr) + MO_LO, + + // Extract bits 31-10 of an address. Only for sethi. + // Assembler: %hi(addr) or %lm(addr) + MO_HI, + + // Extract bits 43-22 of an adress. Only for sethi. + // Assembler: %h44(addr) + MO_H44, + + // Extract bits 21-12 of an address. + // Assembler: %m44(addr) + MO_M44, + + // Extract bits 11-0 of an address. + // Assembler: %l44(addr) + MO_L44, + + // Extract bits 63-42 of an address. Only for sethi. + // Assembler: %hh(addr) + MO_HH, + + // Extract bits 41-32 of an address. + // Assembler: %hm(addr) + MO_HM +}; + +} // end namespace SPII +} // end namespace llvm + +#endif diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 3d4bfdc..5a52abe 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -18,7 +18,7 @@ using namespace llvm; void SparcELFMCAsmInfo::anchor() { } -SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) { +SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) { IsLittleEndian = false; Triple TheTriple(TT); if (TheTriple.getArch() == Triple::sparcv9) { diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h index f0e1354..621e8ff 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h @@ -18,12 +18,11 @@ namespace llvm { class StringRef; - class Target; class SparcELFMCAsmInfo : public MCAsmInfo { virtual void anchor(); public: - explicit SparcELFMCAsmInfo(const Target &T, StringRef TT); + explicit SparcELFMCAsmInfo(StringRef TT); }; } // namespace llvm diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index 7fdb0c3..1c64e1b 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -50,14 +50,42 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } +// Code models. Some only make sense for 64-bit code. +// +// SunCC Reloc CodeModel Constraints +// abs32 Static Small text+data+bss linked below 2^32 bytes +// abs44 Static Medium text+data+bss linked below 2^44 bytes +// abs64 Static Large text smaller than 2^31 bytes +// pic13 PIC_ Small GOT < 2^13 bytes +// pic32 PIC_ Medium GOT < 2^32 bytes +// +// All code models require that the text segment is smaller than 2GB. + static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); + + // The default 32-bit code model is abs32/pic32. + if (CM == CodeModel::Default) + CM = RM == Reloc::PIC_ ? CodeModel::Medium : CodeModel::Small; + X->InitMCCodeGenInfo(RM, CM, OL); return X; } +static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + + // The default 64-bit code model is abs44/pic32. + if (CM == CodeModel::Default) + CM = CodeModel::Medium; + + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} extern "C" void LLVMInitializeSparcTargetMC() { // Register the MC asm info. RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget); @@ -67,7 +95,7 @@ extern "C" void LLVMInitializeSparcTargetMC() { TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget, createSparcMCCodeGenInfo); TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target, - createSparcMCCodeGenInfo); + createSparcV9MCCodeGenInfo); // Register the MC instruction info. TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo); diff --git a/lib/Target/Sparc/README.txt b/lib/Target/Sparc/README.txt index b4991fe..34e68cf 100644 --- a/lib/Target/Sparc/README.txt +++ b/lib/Target/Sparc/README.txt @@ -38,7 +38,7 @@ t1: 1) should be replaced with a brz in V9 mode. -* Same as above, but emit conditional move on register zero (p192) in V9 +* Same as above, but emit conditional move on register zero (p192) in V9 mode. Testcase: int %t1(int %a, int %b) { @@ -47,13 +47,15 @@ int %t1(int %a, int %b) { ret int %D } -* Emit MULX/[SU]DIVX instructions in V9 mode instead of fiddling +* Emit MULX/[SU]DIVX instructions in V9 mode instead of fiddling with the Y register, if they are faster. * Codegen bswap(load)/store(bswap) -> load/store ASI -* Implement frame pointer elimination, e.g. eliminate save/restore for +* Implement frame pointer elimination, e.g. eliminate save/restore for leaf fns. * Fill delay slots * Implement JIT support + +* Use %g0 directly to materialize 0. No instruction is required. diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h index ce6ae17..98563db 100644 --- a/lib/Target/Sparc/Sparc.h +++ b/lib/Target/Sparc/Sparc.h @@ -26,7 +26,6 @@ namespace llvm { FunctionPass *createSparcISelDag(SparcTargetMachine &TM); FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM); - FunctionPass *createSparcFPMoverPass(TargetMachine &TM); } // end namespace llvm; @@ -51,7 +50,7 @@ namespace llvm { ICC_NEG = 6 , // Negative ICC_VC = 15 , // Overflow Clear ICC_VS = 7 , // Overflow Set - + //FCC_A = 8+16, // Always //FCC_N = 0+16, // Never FCC_U = 7+16, // Unordered @@ -70,7 +69,7 @@ namespace llvm { FCC_O = 15+16 // Ordered }; } - + inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) { switch (CC) { case SPCC::ICC_NE: return "ne"; diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td index 611f8e8..d42c40f 100644 --- a/lib/Target/Sparc/Sparc.td +++ b/lib/Target/Sparc/Sparc.td @@ -19,7 +19,7 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// // SPARC Subtarget features. // - + def FeatureV9 : SubtargetFeature<"v9", "IsV9", "true", "Enable SPARC-V9 instructions">; diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index e14b3cb..3fe2b44 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -16,6 +16,7 @@ #include "Sparc.h" #include "SparcInstrInfo.h" #include "SparcTargetMachine.h" +#include "MCTargetDesc/SparcBaseInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineInstr.h" @@ -59,11 +60,9 @@ namespace { raw_ostream &O); bool printGetPCX(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS); - + virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; - - virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const; }; } // end of anonymous namespace @@ -72,15 +71,39 @@ namespace { void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { const MachineOperand &MO = MI->getOperand (opNum); - bool CloseParen = false; - if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) { - O << "%hi("; - CloseParen = true; - } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) && - !MO.isReg() && !MO.isImm()) { - O << "%lo("; - CloseParen = true; + unsigned TF = MO.getTargetFlags(); +#ifndef NDEBUG + // Verify the target flags. + if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) { + if (MI->getOpcode() == SP::CALL) + assert(TF == SPII::MO_NO_FLAG && + "Cannot handle target flags on call address"); + else if (MI->getOpcode() == SP::SETHIi) + assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH) && + "Invalid target flags for address operand on sethi"); + else + assert((TF == SPII::MO_LO || TF == SPII::MO_M44 || TF == SPII::MO_L44 || + TF == SPII::MO_HM) && + "Invalid target flags for small address operand"); } +#endif + + bool CloseParen = true; + switch (TF) { + default: + llvm_unreachable("Unknown target flags on operand"); + case SPII::MO_NO_FLAG: + CloseParen = false; + break; + case SPII::MO_LO: O << "%lo("; break; + case SPII::MO_HI: O << "%hi("; break; + case SPII::MO_H44: O << "%h44("; break; + case SPII::MO_M44: O << "%m44("; break; + case SPII::MO_L44: O << "%l44("; break; + case SPII::MO_HH: O << "%hh("; break; + case SPII::MO_HM: O << "%hm("; break; + } + switch (MO.getType()) { case MachineOperand::MO_Register: O << "%" << StringRef(getRegisterName(MO.getReg())).lower(); @@ -95,6 +118,9 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, case MachineOperand::MO_GlobalAddress: O << *Mang->getSymbol(MO.getGlobal()); break; + case MachineOperand::MO_BlockAddress: + O << GetBlockAddressSymbol(MO.getBlockAddress())->getName(); + break; case MachineOperand::MO_ExternalSymbol: O << MO.getSymbolName(); break; @@ -127,14 +153,7 @@ void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, return; // don't print "+0" O << "+"; - if (MI->getOperand(opNum+1).isGlobal() || - MI->getOperand(opNum+1).isCPI()) { - O << "%lo("; - printOperand(MI, opNum+1, O); - O << ")"; - } else { - printOperand(MI, opNum+1, O); - } + printOperand(MI, opNum+1, O); } bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum, @@ -146,7 +165,7 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum, case MachineOperand::MO_Register: assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && "Operand is not a physical register "); - assert(MO.getReg() != SP::O7 && + assert(MO.getReg() != SP::O7 && "%o7 is assigned as destination for getpcx!"); operand = "%" + StringRef(getRegisterName(MO.getReg())).lower(); break; @@ -159,15 +178,15 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum, O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ; O << "\t sethi\t" - << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum + << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), " << operand << '\n' ; O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ; - O << "\tor\t" << operand + O << "\tor\t" << operand << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), " << operand << '\n'; - O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; - + O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; + return true; } @@ -225,19 +244,19 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { // then nothing falls through to it. if (MBB->isLandingPad() || MBB->pred_empty()) return false; - + // If there isn't exactly one predecessor, it can't be a fall through. MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; ++PI2; if (PI2 != MBB->pred_end()) return false; - + // The predecessor has to be immediately before this block. const MachineBasicBlock *Pred = *PI; - + if (!Pred->isLayoutSuccessor(MBB)) return false; - + // Check if the last terminator is an unconditional branch. MachineBasicBlock::const_iterator I = Pred->end(); while (I != Pred->begin() && !(--I)->isTerminator()) @@ -245,17 +264,8 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { return I == Pred->end() || !I->isBarrier(); } -MachineLocation SparcAsmPrinter:: -getDebugValueLocation(const MachineInstr *MI) const { - assert(MI->getNumOperands() == 4 && "Invalid number of operands!"); - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() && - "Unexpected MachineOperand types"); - return MachineLocation(MI->getOperand(0).getReg(), - MI->getOperand(1).getImm()); -} - // Force static initialization. -extern "C" void LLVMInitializeSparcAsmPrinter() { +extern "C" void LLVMInitializeSparcAsmPrinter() { RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget); RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target); } diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td index d471220..a181bcf 100644 --- a/lib/Target/Sparc/SparcCallingConv.td +++ b/lib/Target/Sparc/SparcCallingConv.td @@ -12,19 +12,11 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Return Value Calling Conventions +// SPARC v8 32-bit. //===----------------------------------------------------------------------===// -// Sparc 32-bit C return-value convention. -def RetCC_Sparc32 : CallingConv<[ - CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, - CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1]>> -]>; - -// Sparc 32-bit C Calling convention. def CC_Sparc32 : CallingConv<[ - //Custom assign SRet to [sp+64]. + // Custom assign SRet to [sp+64]. CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>, // i32 f32 arguments get passed in integer registers if there is space. CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, @@ -34,3 +26,94 @@ def CC_Sparc32 : CallingConv<[ // Alternatively, they are assigned to the stack in 4-byte aligned units. CCAssignToStack<4, 4> ]>; + +def RetCC_Sparc32 : CallingConv<[ + CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, + CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1]>> +]>; + + +//===----------------------------------------------------------------------===// +// SPARC v9 64-bit. +//===----------------------------------------------------------------------===// +// +// The 64-bit ABI conceptually assigns all function arguments to a parameter +// array starting at [%fp+BIAS+128] in the callee's stack frame. All arguments +// occupy a multiple of 8 bytes in the array. Integer arguments are extended to +// 64 bits by the caller. Floats are right-aligned in their 8-byte slot, the +// first 4 bytes in the slot are undefined. +// +// The integer registers %i0 to %i5 shadow the first 48 bytes of the parameter +// array at fixed offsets. Integer arguments are promoted to registers when +// possible. +// +// The floating point registers %f0 to %f31 shadow the first 128 bytes of the +// parameter array at fixed offsets. Float and double parameters are promoted +// to these registers when possible. +// +// Structs up to 16 bytes in size are passed by value. They are right-aligned +// in one or two 8-byte slots in the parameter array. Struct members are +// promoted to both floating point and integer registers when possible. A +// struct containing two floats would thus be passed in %f0 and %f1, while two +// float function arguments would occupy 8 bytes each, and be passed in %f1 and +// %f3. +// +// When a struct { int, float } is passed by value, the int goes in the high +// bits of an integer register while the float goes in a floating point +// register. +// +// The difference is encoded in LLVM IR using the inreg atttribute on function +// arguments: +// +// C: void f(float, float); +// IR: declare void f(float %f1, float %f3) +// +// C: void f(struct { float f0, f1; }); +// IR: declare void f(float inreg %f0, float inreg %f1) +// +// C: void f(int, float); +// IR: declare void f(int signext %i0, float %f3) +// +// C: void f(struct { int i0high; float f1; }); +// IR: declare void f(i32 inreg %i0high, float inreg %f1) +// +// Two ints in a struct are simply coerced to i64: +// +// C: void f(struct { int i0high, i0low; }); +// IR: declare void f(i64 %i0.coerced) +// +// The frontend and backend divide the task of producing ABI compliant code for +// C functions. The C frontend will: +// +// - Annotate integer arguments with zeroext or signext attributes. +// +// - Split structs into one or two 64-bit sized chunks, or 32-bit chunks with +// inreg attributes. +// +// - Pass structs larger than 16 bytes indirectly with an explicit pointer +// argument. The byval attribute is not used. +// +// The backend will: +// +// - Assign all arguments to 64-bit aligned stack slots, 32-bits for inreg. +// +// - Promote to integer or floating point registers depending on type. +// +// Function return values are passed exactly like function arguments, except a +// struct up to 32 bytes in size can be returned in registers. + +// Function arguments AND return values. +def CC_Sparc64 : CallingConv<[ + // The frontend uses the inreg flag to indicate i32 and float arguments from + // structs. These arguments are not promoted to 64 bits, but they can still + // be assigned to integer and float registers. + CCIfInReg<CCIfType<[i32, f32], CCCustom<"CC_Sparc64_Half">>>, + + // All integers are promoted to i64 by the caller. + CCIfType<[i32], CCPromoteToType<i64>>, + + // Custom assignment is required because stack space is reserved for all + // arguments whether they are passed in registers or not. + CCCustom<"CC_Sparc64_Full"> +]>; diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index a0dae6e..536e466 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -26,7 +26,16 @@ using namespace llvm; +static cl::opt<bool> +DisableLeafProc("disable-sparc-leaf-proc", + cl::init(false), + cl::desc("Disable Sparc leaf procedure optimization."), + cl::Hidden); + + void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { + SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); + MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); const SparcInstrInfo &TII = @@ -37,22 +46,18 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { // Get the number of bytes to allocate from the FrameInfo int NumBytes = (int) MFI->getStackSize(); - // Emit the correct save instruction based on the number of bytes in - // the frame. Minimum stack frame size according to V8 ABI is: - // 16 words for register window spill - // 1 word for address of returned aggregate-value - // + 6 words for passing parameters on the stack - // ---------- - // 23 words * 4 bytes per word = 92 bytes - NumBytes += 92; - - // Round up to next doubleword boundary -- a double-word boundary - // is required by the ABI. - NumBytes = (NumBytes + 7) & ~7; - NumBytes = -NumBytes; + unsigned SAVEri = SP::SAVEri; + unsigned SAVErr = SP::SAVErr; + if (FuncInfo->isLeafProc()) { + if (NumBytes == 0) + return; + SAVEri = SP::ADDri; + SAVErr = SP::ADDrr; + } + NumBytes = - SubTarget.getAdjustedFrameSize(NumBytes); if (NumBytes >= -4096) { - BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6) + BuildMI(MBB, MBBI, dl, TII.get(SAVEri), SP::O6) .addReg(SP::O6).addImm(NumBytes); } else { // Emit this the hard way. This clobbers G1 which we always know is @@ -62,7 +67,7 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { // Emit G1 = G1 + I6 BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1) .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1)); - BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6) + BuildMI(MBB, MBBI, dl, TII.get(SAVErr), SP::O6) .addReg(SP::O6).addReg(SP::G1); } } @@ -70,27 +75,144 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { void SparcFrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - MachineInstr &MI = *I; - DebugLoc dl = MI.getDebugLoc(); - int Size = MI.getOperand(0).getImm(); - if (MI.getOpcode() == SP::ADJCALLSTACKDOWN) - Size = -Size; - const SparcInstrInfo &TII = - *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo()); - if (Size) - BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size); + if (!hasReservedCallFrame(MF)) { + MachineInstr &MI = *I; + DebugLoc DL = MI.getDebugLoc(); + int Size = MI.getOperand(0).getImm(); + if (MI.getOpcode() == SP::ADJCALLSTACKDOWN) + Size = -Size; + const SparcInstrInfo &TII = + *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo()); + if (Size) + BuildMI(MBB, I, DL, TII.get(SP::ADDri), SP::O6).addReg(SP::O6) + .addImm(Size); + } MBB.erase(I); } void SparcFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { + SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); const SparcInstrInfo &TII = *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo()); DebugLoc dl = MBBI->getDebugLoc(); assert(MBBI->getOpcode() == SP::RETL && "Can only put epilog before 'retl' instruction!"); - BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0) - .addReg(SP::G0); + if (!FuncInfo->isLeafProc()) { + BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0) + .addReg(SP::G0); + return; + } + MachineFrameInfo *MFI = MF.getFrameInfo(); + + int NumBytes = (int) MFI->getStackSize(); + if (NumBytes == 0) + return; + + NumBytes = SubTarget.getAdjustedFrameSize(NumBytes); + + if (NumBytes < 4096) { + BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), SP::O6) + .addReg(SP::O6).addImm(NumBytes); + } else { + // Emit this the hard way. This clobbers G1 which we always know is + // available here. + unsigned OffHi = (unsigned)NumBytes >> 10U; + BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi); + // Emit G1 = G1 + I6 + BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1) + .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1)); + BuildMI(MBB, MBBI, dl, TII.get(SP::ADDrr), SP::O6) + .addReg(SP::O6).addReg(SP::G1); + } +} + +bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + // Reserve call frame if there are no variable sized objects on the stack. + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +bool SparcFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); +} + + +static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI) +{ + + for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) + if (MRI->isPhysRegUsed(reg)) + return false; + + for (unsigned reg = SP::L0; reg <= SP::L7; ++reg) + if (MRI->isPhysRegUsed(reg)) + return false; + + return true; +} + +bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const +{ + + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + return !(MFI->hasCalls() // has calls + || MRI.isPhysRegUsed(SP::L0) // Too many registers needed + || MRI.isPhysRegUsed(SP::O6) // %SP is used + || hasFP(MF)); // need %FP +} + +void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const { + + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Remap %i[0-7] to %o[0-7]. + for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) { + if (!MRI.isPhysRegUsed(reg)) + continue; + unsigned mapped_reg = (reg - SP::I0 + SP::O0); + assert(!MRI.isPhysRegUsed(mapped_reg)); + + // Replace I register with O register. + MRI.replaceRegWith(reg, mapped_reg); + + // Mark the reg unused. + MRI.setPhysRegUnused(reg); + } + + // Rewrite MBB's Live-ins. + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB) { + for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) { + if (!MBB->isLiveIn(reg)) + continue; + MBB->removeLiveIn(reg); + MBB->addLiveIn(reg - SP::I0 + SP::O0); + } + } + + assert(verifyLeafProcRegUse(&MRI)); +#ifdef XDEBUG + MF.verify(0, "After LeafProc Remapping"); +#endif +} + +void SparcFrameLowering::processFunctionBeforeCalleeSavedScan + (MachineFunction &MF, RegScavenger *RS) const { + + if (!DisableLeafProc && isLeafProc(MF)) { + SparcMachineFunctionInfo *MFI = MF.getInfo<SparcMachineFunctionInfo>(); + MFI->setLeafProc(true); + + remapRegsForLeafProc(MF); + } + } diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h index 464233e..8eaef59 100644 --- a/lib/Target/Sparc/SparcFrameLowering.h +++ b/lib/Target/Sparc/SparcFrameLowering.h @@ -22,10 +22,12 @@ namespace llvm { class SparcSubtarget; class SparcFrameLowering : public TargetFrameLowering { + const SparcSubtarget &SubTarget; public: - explicit SparcFrameLowering(const SparcSubtarget &/*sti*/) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) { - } + explicit SparcFrameLowering(const SparcSubtarget &ST) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, + ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8), + SubTarget(ST) {} /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. @@ -36,7 +38,17 @@ public: MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - bool hasFP(const MachineFunction &MF) const { return false; } + bool hasReservedCallFrame(const MachineFunction &MF) const; + bool hasFP(const MachineFunction &MF) const; + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + +private: + // Remap input registers to output registers for leaf procedure. + void remapRegsForLeafProc(MachineFunction &MF) const; + + // Returns true if MF is a leaf procedure. + bool isLeafProc(MachineFunction &MF) const; }; } // End llvm namespace diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index 5fa545d..db62151 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -33,7 +33,7 @@ class SparcDAGToDAGISel : public SelectionDAGISel { /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can /// make the right decision when generating code for different targets. const SparcSubtarget &Subtarget; - SparcTargetMachine& TM; + SparcTargetMachine &TM; public: explicit SparcDAGToDAGISel(SparcTargetMachine &tm) : SelectionDAGISel(tm), @@ -67,13 +67,15 @@ private: SDNode* SparcDAGToDAGISel::getGlobalBaseReg() { unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF); - return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); + return CurDAG->getRegister(GlobalBaseReg, + getTargetLowering()->getPointerTy()).getNode(); } bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr, SDValue &Base, SDValue &Offset) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), + getTargetLowering()->getPointerTy()); Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -87,7 +89,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr, if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { // Constant offset from frame ref. - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), + getTargetLowering()->getPointerTy()); } else { Base = Addr.getOperand(0); } @@ -130,12 +133,12 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) { } R1 = Addr; - R2 = CurDAG->getRegister(SP::G0, MVT::i32); + R2 = CurDAG->getRegister(SP::G0, getTargetLowering()->getPointerTy()); return true; } SDNode *SparcDAGToDAGISel::Select(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->isMachineOpcode()) return NULL; // Already selected. @@ -146,6 +149,9 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) { case ISD::SDIV: case ISD::UDIV: { + // sdivx / udivx handle 64-bit divides. + if (N->getValueType(0) == MVT::i64) + break; // FIXME: should use a custom expander to expose the SRA to the dag. SDValue DivLHS = N->getOperand(0); SDValue DivRHS = N->getOperand(1); diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 28ac02a..4b0fa67 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -15,6 +15,7 @@ #include "SparcISelLowering.h" #include "SparcMachineFunctionInfo.h" #include "SparcTargetMachine.h" +#include "MCTargetDesc/SparcBaseInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -39,7 +40,7 @@ static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT, { assert (ArgFlags.isSRet()); - //Assign SRet argument + // Assign SRet argument. State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 0, LocVT, LocInfo)); @@ -53,18 +54,18 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT, static const uint16_t RegList[] = { SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 }; - //Try to get first reg + // Try to get first reg. if (unsigned Reg = State.AllocateReg(RegList, 6)) { State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); } else { - //Assign whole thing in stack + // Assign whole thing in stack. State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, State.AllocateStack(8,4), LocVT, LocInfo)); return true; } - //Try to get second reg + // Try to get second reg. if (unsigned Reg = State.AllocateReg(RegList, 6)) State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); else @@ -74,25 +75,117 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT, return true; } +// Allocate a full-sized argument for the 64-bit ABI. +static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + assert((LocVT == MVT::f32 || LocVT.getSizeInBits() == 64) && + "Can't handle non-64 bits locations"); + + // Stack space is allocated for all arguments starting from [%fp+BIAS+128]. + unsigned Offset = State.AllocateStack(8, 8); + unsigned Reg = 0; + + if (LocVT == MVT::i64 && Offset < 6*8) + // Promote integers to %i0-%i5. + Reg = SP::I0 + Offset/8; + else if (LocVT == MVT::f64 && Offset < 16*8) + // Promote doubles to %d0-%d30. (Which LLVM calls D0-D15). + Reg = SP::D0 + Offset/8; + else if (LocVT == MVT::f32 && Offset < 16*8) + // Promote floats to %f1, %f3, ... + Reg = SP::F1 + Offset/4; + + // Promote to register when possible, otherwise use the stack slot. + if (Reg) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return true; + } + + // This argument goes on the stack in an 8-byte slot. + // When passing floats, LocVT is smaller than 8 bytes. Adjust the offset to + // the right-aligned float. The first 4 bytes of the stack slot are undefined. + if (LocVT == MVT::f32) + Offset += 4; + + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return true; +} + +// Allocate a half-sized argument for the 64-bit ABI. +// +// This is used when passing { float, int } structs by value in registers. +static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + assert(LocVT.getSizeInBits() == 32 && "Can't handle non-32 bits locations"); + unsigned Offset = State.AllocateStack(4, 4); + + if (LocVT == MVT::f32 && Offset < 16*8) { + // Promote floats to %f0-%f31. + State.addLoc(CCValAssign::getReg(ValNo, ValVT, SP::F0 + Offset/4, + LocVT, LocInfo)); + return true; + } + + if (LocVT == MVT::i32 && Offset < 6*8) { + // Promote integers to %i0-%i5, using half the register. + unsigned Reg = SP::I0 + Offset/8; + LocVT = MVT::i64; + LocInfo = CCValAssign::AExt; + + // Set the Custom bit if this i32 goes in the high bits of a register. + if (Offset % 8 == 0) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, + LocVT, LocInfo)); + else + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return true; + } + + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return true; +} + #include "SparcGenCallingConv.inc" +// The calling conventions in SparcCallingConv.td are described in terms of the +// callee's register window. This function translates registers to the +// corresponding caller window %o register. +static unsigned toCallerWindow(unsigned Reg) { + assert(SP::I0 + 7 == SP::I7 && SP::O0 + 7 == SP::O7 && "Unexpected enum"); + if (Reg >= SP::I0 && Reg <= SP::I7) + return Reg - SP::I0 + SP::O0; + return Reg; +} + SDValue SparcTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, + CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { + SDLoc DL, SelectionDAG &DAG) const { + if (Subtarget->is64Bit()) + return LowerReturn_64(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG); + return LowerReturn_32(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG); +} +SDValue +SparcTargetLowering::LowerReturn_32(SDValue Chain, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + SDLoc DL, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); // CCValAssign - represent the assignment of the return value to locations. SmallVector<CCValAssign, 16> RVLocs; // CCState - Info about the registers and stack slot. - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), DAG.getTarget(), RVLocs, *DAG.getContext()); - // Analize return values. + // Analyze return values. CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32); SDValue Flag; @@ -105,7 +198,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Flag); // Guarantee that all emitted copies are stuck together with flags. @@ -113,15 +206,15 @@ SparcTargetLowering::LowerReturn(SDValue Chain, RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - unsigned RetAddrOffset = 8; //Call Inst + Delay Slot + unsigned RetAddrOffset = 8; // Call Inst + Delay Slot // If the function returns a struct, copy the SRetReturnReg to I0 if (MF.getFunction()->hasStructRetAttr()) { SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>(); unsigned Reg = SFI->getSRetReturnReg(); if (!Reg) llvm_unreachable("sret virtual register not created in the entry block"); - SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); - Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag); + SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy()); + Chain = DAG.getCopyToReg(Chain, DL, SP::I0, Val, Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy())); RetAddrOffset = 12; // CallInst + Delay Slot + Unimp @@ -134,22 +227,114 @@ SparcTargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); - return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, + return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, &RetOps[0], RetOps.size()); } -/// LowerFormalArguments - V8 uses a very simple ABI, where all values are -/// passed in either one or two GPRs, including FP values. TODO: we should -/// pass FP values in FP registers for fastcc functions. +// Lower return values for the 64-bit ABI. +// Return values are passed the exactly the same way as function arguments. SDValue -SparcTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> - &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) - const { +SparcTargetLowering::LowerReturn_64(SDValue Chain, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + SDLoc DL, SelectionDAG &DAG) const { + // CCValAssign - represent the assignment of the return value to locations. + SmallVector<CCValAssign, 16> RVLocs; + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), + DAG.getTarget(), RVLocs, *DAG.getContext()); + + // Analyze return values. + CCInfo.AnalyzeReturn(Outs, CC_Sparc64); + + SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); + + // The second operand on the return instruction is the return address offset. + // The return address is always %i7+8 with the 64-bit ABI. + RetOps.push_back(DAG.getConstant(8, MVT::i32)); + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + SDValue OutVal = OutVals[i]; + + // Integer return values must be sign or zero extended by the callee. + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal); + break; + case CCValAssign::ZExt: + OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal); + break; + case CCValAssign::AExt: + OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal); + default: + break; + } + + // The custom bit on an i32 return value indicates that it should be passed + // in the high bits of the register. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) { + OutVal = DAG.getNode(ISD::SHL, DL, MVT::i64, OutVal, + DAG.getConstant(32, MVT::i32)); + + // The next value may go in the low bits of the same register. + // Handle both at once. + if (i+1 < RVLocs.size() && RVLocs[i+1].getLocReg() == VA.getLocReg()) { + SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, OutVals[i+1]); + OutVal = DAG.getNode(ISD::OR, DL, MVT::i64, OutVal, NV); + // Skip the next value, it's already done. + ++i; + } + } + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag); + + // Guarantee that all emitted copies are stuck together with flags. + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + + return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, + &RetOps[0], RetOps.size()); +} + +SDValue SparcTargetLowering:: +LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SDLoc DL, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + if (Subtarget->is64Bit()) + return LowerFormalArguments_64(Chain, CallConv, IsVarArg, Ins, + DL, DAG, InVals); + return LowerFormalArguments_32(Chain, CallConv, IsVarArg, Ins, + DL, DAG, InVals); +} + +/// LowerFormalArguments32 - V8 uses a very simple ABI, where all values are +/// passed in either one or two GPRs, including FP values. TODO: we should +/// pass FP values in FP registers for fastcc functions. +SDValue SparcTargetLowering:: +LowerFormalArguments_32(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SDLoc dl, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); @@ -166,7 +351,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, CCValAssign &VA = ArgLocs[i]; if (i == 0 && Ins[i].Flags.isSRet()) { - //Get SRet from [%fp+64] + // Get SRet from [%fp+64]. int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, @@ -225,7 +410,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, if (VA.needsCustom()) { assert(VA.getValVT() == MVT::f64); - //If it is double-word aligned, just load. + // If it is double-word aligned, just load. if (Offset % 8 == 0) { int FI = MF.getFrameInfo()->CreateFixedObject(8, Offset, @@ -285,7 +470,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, } if (MF.getFunction()->hasStructRetAttr()) { - //Copy the SRet Argument to SRetReturnReg + // Copy the SRet Argument to SRetReturnReg. SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>(); unsigned Reg = SFI->getSRetReturnReg(); if (!Reg) { @@ -341,14 +526,137 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, return Chain; } +// Lower formal arguments for the 64 bit ABI. +SDValue SparcTargetLowering:: +LowerFormalArguments_64(SDValue Chain, + CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SDLoc DL, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + + // Analyze arguments according to CC_Sparc64. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc64); + + // The argument array begins at %fp+BIAS+128, after the register save area. + const unsigned ArgArea = 128; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isRegLoc()) { + // This argument is passed in a register. + // All integer register arguments are promoted by the caller to i64. + + // Create a virtual register for the promoted live-in value. + unsigned VReg = MF.addLiveIn(VA.getLocReg(), + getRegClassFor(VA.getLocVT())); + SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT()); + + // Get the high bits for i32 struct elements. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) + Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg, + DAG.getConstant(32, MVT::i32)); + + // The caller promoted the argument, so insert an Assert?ext SDNode so we + // won't promote the value again in this function. + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg, + DAG.getValueType(VA.getValVT())); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg, + DAG.getValueType(VA.getValVT())); + break; + default: + break; + } + + // Truncate the register down to the argument type. + if (VA.isExtInLoc()) + Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg); + + InVals.push_back(Arg); + continue; + } + + // The registers are exhausted. This argument was passed on the stack. + assert(VA.isMemLoc()); + // The CC_Sparc64_Full/Half functions compute stack offsets relative to the + // beginning of the arguments area at %fp+BIAS+128. + unsigned Offset = VA.getLocMemOffset() + ArgArea; + unsigned ValSize = VA.getValVT().getSizeInBits() / 8; + // Adjust offset for extended arguments, SPARC is big-endian. + // The caller will have written the full slot with extended bytes, but we + // prefer our own extending loads. + if (VA.isExtInLoc()) + Offset += 8 - ValSize; + int FI = MF.getFrameInfo()->CreateFixedObject(ValSize, Offset, true); + InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, + DAG.getFrameIndex(FI, getPointerTy()), + MachinePointerInfo::getFixedStack(FI), + false, false, false, 0)); + } + + if (!IsVarArg) + return Chain; + + // This function takes variable arguments, some of which may have been passed + // in registers %i0-%i5. Variable floating point arguments are never passed + // in floating point registers. They go on %i0-%i5 or on the stack like + // integer arguments. + // + // The va_start intrinsic needs to know the offset to the first variable + // argument. + unsigned ArgOffset = CCInfo.getNextStackOffset(); + SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); + // Skip the 128 bytes of register save area. + FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgArea + + Subtarget->getStackPointerBias()); + + // Save the variable arguments that were passed in registers. + // The caller is required to reserve stack space for 6 arguments regardless + // of how many arguments were actually passed. + SmallVector<SDValue, 8> OutChains; + for (; ArgOffset < 6*8; ArgOffset += 8) { + unsigned VReg = MF.addLiveIn(SP::I0 + ArgOffset/8, &SP::I64RegsRegClass); + SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); + int FI = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset + ArgArea, true); + OutChains.push_back(DAG.getStore(Chain, DL, VArg, + DAG.getFrameIndex(FI, getPointerTy()), + MachinePointerInfo::getFixedStack(FI), + false, false, 0)); + } + + if (!OutChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + &OutChains[0], OutChains.size()); + + return Chain; +} + SDValue SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { + if (Subtarget->is64Bit()) + return LowerCall_64(CLI, InVals); + return LowerCall_32(CLI, InVals); +} + +// Lower a call for the 32-bit ABI. +SDValue +SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; - SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; - SmallVector<SDValue, 32> &OutVals = CLI.OutVals; - SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; + SDLoc &dl = CLI.DL; + SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; @@ -372,7 +680,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - //Create local copies for byval args. + // Create local copies for byval args. SmallVector<SDValue, 8> ByValArgs; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { ISD::ArgFlagsTy Flags = Outs[i].Flags; @@ -388,13 +696,14 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue SizeNode = DAG.getConstant(Size, MVT::i32); Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align, - false, //isVolatile, - (Size <= 32), //AlwaysInline if size <= 32 + false, // isVolatile, + (Size <= 32), // AlwaysInline if size <= 32 MachinePointerInfo(), MachinePointerInfo()); ByValArgs.push_back(FIPtr); } - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true), + dl); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<SDValue, 8> MemOpChains; @@ -410,7 +719,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; - //Use local copy if it is a byval arg. + // Use local copy if it is a byval arg. if (Flags.isByVal()) Arg = ByValArgs[byvalArgIdx++]; @@ -450,7 +759,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (VA.isMemLoc()) { unsigned Offset = VA.getLocMemOffset() + StackOffset; - //if it is double-word aligned, just store. + // if it is double-word aligned, just store. if (Offset % 8 == 0) { SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(Offset); @@ -483,7 +792,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (NextVA.isRegLoc()) { RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo)); } else { - //Store the low part in stack. + // Store the low part in stack. unsigned Offset = NextVA.getLocMemOffset() + StackOffset; SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(Offset); @@ -546,11 +855,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - unsigned Reg = RegsToPass[i].first; - // Remap I0->I7 -> O0->O7. - if (Reg >= SP::I0 && Reg <= SP::I7) - Reg = Reg-SP::I0+SP::O0; - + unsigned Reg = toCallerWindow(RegsToPass[i].first); Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } @@ -572,13 +877,9 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(Callee); if (hasStructRetAttr) Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32)); - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - unsigned Reg = RegsToPass[i].first; - if (Reg >= SP::I0 && Reg <= SP::I7) - Reg = Reg-SP::I0+SP::O0; - - Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType())); - } + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first), + RegsToPass[i].second.getValueType())); if (InFlag.getNode()) Ops.push_back(InFlag); @@ -586,7 +887,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), - DAG.getIntPtrConstant(0, true), InFlag); + DAG.getIntPtrConstant(0, true), InFlag, dl); InFlag = Chain.getValue(1); // Assign locations to each value returned by this call. @@ -598,13 +899,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { - unsigned Reg = RVLocs[i].getLocReg(); - - // Remap I0->I7 -> O0->O7. - if (Reg >= SP::I0 && Reg <= SP::I7) - Reg = Reg-SP::I0+SP::O0; - - Chain = DAG.getCopyFromReg(Chain, dl, Reg, + Chain = DAG.getCopyFromReg(Chain, dl, toCallerWindow(RVLocs[i].getLocReg()), RVLocs[i].getValVT(), InFlag).getValue(1); InFlag = Chain.getValue(2); InVals.push_back(Chain.getValue(0)); @@ -637,6 +932,260 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const return getDataLayout()->getTypeAllocSize(ElementTy); } + +// Fixup floating point arguments in the ... part of a varargs call. +// +// The SPARC v9 ABI requires that floating point arguments are treated the same +// as integers when calling a varargs function. This does not apply to the +// fixed arguments that are part of the function's prototype. +// +// This function post-processes a CCValAssign array created by +// AnalyzeCallOperands(). +static void fixupVariableFloatArgs(SmallVectorImpl<CCValAssign> &ArgLocs, + ArrayRef<ISD::OutputArg> Outs) { + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + const CCValAssign &VA = ArgLocs[i]; + // FIXME: What about f32 arguments? C promotes them to f64 when calling + // varargs functions. + if (!VA.isRegLoc() || VA.getLocVT() != MVT::f64) + continue; + // The fixed arguments to a varargs function still go in FP registers. + if (Outs[VA.getValNo()].IsFixed) + continue; + + // This floating point argument should be reassigned. + CCValAssign NewVA; + + // Determine the offset into the argument array. + unsigned Offset = 8 * (VA.getLocReg() - SP::D0); + assert(Offset < 16*8 && "Offset out of range, bad register enum?"); + + if (Offset < 6*8) { + // This argument should go in %i0-%i5. + unsigned IReg = SP::I0 + Offset/8; + // Full register, just bitconvert into i64. + NewVA = CCValAssign::getReg(VA.getValNo(), VA.getValVT(), + IReg, MVT::i64, CCValAssign::BCvt); + } else { + // This needs to go to memory, we're out of integer registers. + NewVA = CCValAssign::getMem(VA.getValNo(), VA.getValVT(), + Offset, VA.getLocVT(), VA.getLocInfo()); + } + ArgLocs[i] = NewVA; + } +} + +// Lower a call for the 64-bit ABI. +SDValue +SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc DL = CLI.DL; + SDValue Chain = CLI.Chain; + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), + DAG.getTarget(), ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64); + + // Get the size of the outgoing arguments stack space requirement. + // The stack offset computed by CC_Sparc64 includes all arguments. + // Called functions expect 6 argument words to exist in the stack frame, used + // or not. + unsigned ArgsSize = std::max(6*8u, CCInfo.getNextStackOffset()); + + // Keep stack frames 16-byte aligned. + ArgsSize = RoundUpToAlignment(ArgsSize, 16); + + // Varargs calls require special treatment. + if (CLI.IsVarArg) + fixupVariableFloatArgs(ArgLocs, CLI.Outs); + + // Adjust the stack pointer to make room for the arguments. + // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls + // with more than 6 arguments. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true), + DL); + + // Collect the set of registers to pass to the function and their values. + // This will be emitted as a sequence of CopyToReg nodes glued to the call + // instruction. + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + + // Collect chains from all the memory opeations that copy arguments to the + // stack. They must follow the stack pointer adjustment above and precede the + // call instruction itself. + SmallVector<SDValue, 8> MemOpChains; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + const CCValAssign &VA = ArgLocs[i]; + SDValue Arg = CLI.OutVals[i]; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown location info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + // The custom bit on an i32 return value indicates that it should be + // passed in the high bits of the register. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) { + Arg = DAG.getNode(ISD::SHL, DL, MVT::i64, Arg, + DAG.getConstant(32, MVT::i32)); + + // The next value may go in the low bits of the same register. + // Handle both at once. + if (i+1 < ArgLocs.size() && ArgLocs[i+1].isRegLoc() && + ArgLocs[i+1].getLocReg() == VA.getLocReg()) { + SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, + CLI.OutVals[i+1]); + Arg = DAG.getNode(ISD::OR, DL, MVT::i64, Arg, NV); + // Skip the next value, it's already done. + ++i; + } + } + RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()), Arg)); + continue; + } + + assert(VA.isMemLoc()); + + // Create a store off the stack pointer for this argument. + SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy()); + // The argument area starts at %fp+BIAS+128 in the callee frame, + // %sp+BIAS+128 in ours. + SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + + Subtarget->getStackPointerBias() + + 128); + PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, DL, Arg, PtrOff, + MachinePointerInfo(), + false, false, 0)); + } + + // Emit all stores, make sure they occur before the call. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of CopyToReg nodes glued together with token chain and + // glue operands which copy the outgoing args into registers. The InGlue is + // necessary since all emitted instructions must be stuck together in order + // to pass the live physical registers. + SDValue InGlue; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, DL, + RegsToPass[i].first, RegsToPass[i].second, InGlue); + InGlue = Chain.getValue(1); + } + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. + SDValue Callee = CLI.Callee; + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy()); + else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy()); + + // Build the operands for the call instruction itself. + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // Make sure the CopyToReg nodes are glued to the call instruction which + // consumes the registers. + if (InGlue.getNode()) + Ops.push_back(InGlue); + + // Now the call itself. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, &Ops[0], Ops.size()); + InGlue = Chain.getValue(1); + + // Revert the stack pointer immediately after the call. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), + DAG.getIntPtrConstant(0, true), InGlue, DL); + InGlue = Chain.getValue(1); + + // Now extract the return values. This is more or less the same as + // LowerFormalArguments_64. + + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RVLocs; + CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), + DAG.getTarget(), RVLocs, *DAG.getContext()); + RVInfo.AnalyzeCallResult(CLI.Ins, CC_Sparc64); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + unsigned Reg = toCallerWindow(VA.getLocReg()); + + // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can + // reside in the same register in the high and low bits. Reuse the + // CopyFromReg previous node to avoid duplicate copies. + SDValue RV; + if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1))) + if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg) + RV = Chain.getValue(0); + + // But usually we'll create a new CopyFromReg for a different register. + if (!RV.getNode()) { + RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue); + Chain = RV.getValue(1); + InGlue = Chain.getValue(2); + } + + // Get the high bits for i32 struct elements. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) + RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV, + DAG.getConstant(32, MVT::i32)); + + // The callee promoted the return value, so insert an Assert?ext SDNode so + // we won't promote the value again in this function. + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV, + DAG.getValueType(VA.getValVT())); + break; + case CCValAssign::ZExt: + RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV, + DAG.getValueType(VA.getValVT())); + break; + default: + break; + } + + // Truncate the register down to the return value type. + if (VA.isExtInLoc()) + RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV); + + InVals.push_back(RV); + } + + return Chain; +} + //===----------------------------------------------------------------------===// // TargetLowering Implementation //===----------------------------------------------------------------------===// @@ -689,11 +1238,14 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) { SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { + Subtarget = &TM.getSubtarget<SparcSubtarget>(); // Set up the register classes. addRegisterClass(MVT::i32, &SP::IntRegsRegClass); addRegisterClass(MVT::f32, &SP::FPRegsRegClass); addRegisterClass(MVT::f64, &SP::DFPRegsRegClass); + if (Subtarget->is64Bit()) + addRegisterClass(MVT::i64, &SP::I64RegsRegClass); // Turn FP extload into load/fextend setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); @@ -703,9 +1255,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f32, Expand); // Custom legalize GlobalAddress nodes into LO/HI parts. - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - setOperationAction(ISD::ConstantPool , MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, getPointerTy(), Custom); + setOperationAction(ISD::GlobalTLSAddress, getPointerTy(), Custom); + setOperationAction(ISD::ConstantPool, getPointerTy(), Custom); + setOperationAction(ISD::BlockAddress, getPointerTy(), Custom); // Sparc doesn't have sext_inreg, replace them with shl/sra setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); @@ -749,11 +1302,25 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::BITCAST, MVT::f64, Expand); + setOperationAction(ISD::BITCAST, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SETCC, MVT::i64, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); + } + // FIXME: There are instructions available for ATOMIC_FENCE // on SparcV8 and later. - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + if (!Subtarget->isV9()) { + // SparcV8 does not have FNEGD and FABSD. + setOperationAction(ISD::FNEG, MVT::f64, Custom); + setOperationAction(ISD::FABS, MVT::f64, Custom); + } + setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); @@ -804,7 +1371,7 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setStackPointerRegisterToSaveRestore(SP::O6); - if (TM.getSubtarget<SparcSubtarget>().isV9()) + if (Subtarget->isV9()) setOperationAction(ISD::CTPOP, MVT::i32, Legal); setMinFunctionAlignment(2); @@ -818,8 +1385,10 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const { case SPISD::CMPICC: return "SPISD::CMPICC"; case SPISD::CMPFCC: return "SPISD::CMPFCC"; case SPISD::BRICC: return "SPISD::BRICC"; + case SPISD::BRXCC: return "SPISD::BRXCC"; case SPISD::BRFCC: return "SPISD::BRFCC"; case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC"; + case SPISD::SELECT_XCC: return "SPISD::SELECT_XCC"; case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC"; case SPISD::Hi: return "SPISD::Hi"; case SPISD::Lo: return "SPISD::Lo"; @@ -835,17 +1404,19 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const { /// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to /// be zero. Op is expected to be a target specific node. Used by DAG /// combiner. -void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void SparcTargetLowering::computeMaskedBitsForTargetNode + (const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { APInt KnownZero2, KnownOne2; KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case SPISD::SELECT_ICC: + case SPISD::SELECT_XCC: case SPISD::SELECT_FCC: DAG.ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); @@ -866,7 +1437,8 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, if (isa<ConstantSDNode>(RHS) && cast<ConstantSDNode>(RHS)->isNullValue() && CC == ISD::SETNE && - ((LHS.getOpcode() == SPISD::SELECT_ICC && + (((LHS.getOpcode() == SPISD::SELECT_ICC || + LHS.getOpcode() == SPISD::SELECT_XCC) && LHS.getOperand(3).getOpcode() == SPISD::CMPICC) || (LHS.getOpcode() == SPISD::SELECT_FCC && LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) && @@ -881,50 +1453,104 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, } } +// Convert to a target node and set target flags. +SDValue SparcTargetLowering::withTargetFlags(SDValue Op, unsigned TF, + SelectionDAG &DAG) const { + if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) + return DAG.getTargetGlobalAddress(GA->getGlobal(), + SDLoc(GA), + GA->getValueType(0), + GA->getOffset(), TF); + + if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) + return DAG.getTargetConstantPool(CP->getConstVal(), + CP->getValueType(0), + CP->getAlignment(), + CP->getOffset(), TF); + + if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) + return DAG.getTargetBlockAddress(BA->getBlockAddress(), + Op.getValueType(), + 0, + TF); + + if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) + return DAG.getTargetExternalSymbol(ES->getSymbol(), + ES->getValueType(0), TF); + + llvm_unreachable("Unhandled address SDNode"); +} + +// Split Op into high and low parts according to HiTF and LoTF. +// Return an ADD node combining the parts. +SDValue SparcTargetLowering::makeHiLoPair(SDValue Op, + unsigned HiTF, unsigned LoTF, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Hi = DAG.getNode(SPISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG)); + SDValue Lo = DAG.getNode(SPISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG)); + return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo); +} + +// Build SDNodes for producing an address from a GlobalAddress, ConstantPool, +// or ExternalSymbol SDNode. +SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VT = getPointerTy(); + + // Handle PIC mode first. + if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { + // This is the pic32 code model, the GOT is known to be smaller than 4GB. + SDValue HiLo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG); + SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, VT); + SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo); + return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr, + MachinePointerInfo::getGOT(), false, false, false, 0); + } + + // This is one of the absolute code models. + switch(getTargetMachine().getCodeModel()) { + default: + llvm_unreachable("Unsupported absolute code model"); + case CodeModel::Small: + // abs32. + return makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG); + case CodeModel::Medium: { + // abs44. + SDValue H44 = makeHiLoPair(Op, SPII::MO_H44, SPII::MO_M44, DAG); + H44 = DAG.getNode(ISD::SHL, DL, VT, H44, DAG.getConstant(12, MVT::i32)); + SDValue L44 = withTargetFlags(Op, SPII::MO_L44, DAG); + L44 = DAG.getNode(SPISD::Lo, DL, VT, L44); + return DAG.getNode(ISD::ADD, DL, VT, H44, L44); + } + case CodeModel::Large: { + // abs64. + SDValue Hi = makeHiLoPair(Op, SPII::MO_HH, SPII::MO_HM, DAG); + Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, DAG.getConstant(32, MVT::i32)); + SDValue Lo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG); + return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo); + } + } +} + SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - // FIXME there isn't really any debug info here - DebugLoc dl = Op.getDebugLoc(); - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); - SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA); - SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA); - - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) - return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); - - SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl, - getPointerTy()); - SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); - SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, - GlobalBase, RelAddr); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - AbsAddr, MachinePointerInfo(), false, false, false, 0); + return makeAddress(Op, DAG); } SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { - ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); - // FIXME there isn't really any debug info here - DebugLoc dl = Op.getDebugLoc(); - const Constant *C = N->getConstVal(); - SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment()); - SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP); - SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) - return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); - - SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl, - getPointerTy()); - SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); - SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, - GlobalBase, RelAddr); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - AbsAddr, MachinePointerInfo(), false, false, false, 0); + return makeAddress(Op, DAG); +} + +SDValue SparcTargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + return makeAddress(Op, DAG); } static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Convert the fp value to integer in an FP register. assert(Op.getValueType() == MVT::i32); Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0)); @@ -932,7 +1558,7 @@ static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); assert(Op.getOperand(0).getValueType() == MVT::i32); SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); // Convert the int value to FP in an FP register. @@ -945,7 +1571,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) { SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Opc, SPCC = ~0U; // If this is a br_cc of a "setcc", and if the setcc got lowered into @@ -954,12 +1580,11 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) { // Get the condition flag. SDValue CompareFlag; - if (LHS.getValueType() == MVT::i32) { - EVT VTs[] = { MVT::i32, MVT::Glue }; - SDValue Ops[2] = { LHS, RHS }; - CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1); + if (LHS.getValueType().isInteger()) { + CompareFlag = DAG.getNode(SPISD::CMPICC, dl, MVT::Glue, LHS, RHS); if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC); - Opc = SPISD::BRICC; + // 32-bit compares use the icc flags, 64-bit uses the xcc flags. + Opc = LHS.getValueType() == MVT::i32 ? SPISD::BRICC : SPISD::BRXCC; } else { CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS); if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC); @@ -975,7 +1600,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Opc, SPCC = ~0U; // If this is a select_cc of a "setcc", and if the setcc got lowered into @@ -983,12 +1608,10 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { LookThroughSetCC(LHS, RHS, CC, SPCC); SDValue CompareFlag; - if (LHS.getValueType() == MVT::i32) { - // subcc returns a value - EVT VTs[] = { LHS.getValueType(), MVT::Glue }; - SDValue Ops[2] = { LHS, RHS }; - CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1); - Opc = SPISD::SELECT_ICC; + if (LHS.getValueType().isInteger()) { + CompareFlag = DAG.getNode(SPISD::CMPICC, dl, MVT::Glue, LHS, RHS); + Opc = LHS.getValueType() == MVT::i32 ? + SPISD::SELECT_ICC : SPISD::SELECT_XCC; if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC); } else { CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS); @@ -1004,16 +1627,18 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, MachineFunction &MF = DAG.getMachineFunction(); SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); + // Need frame address to find the address of VarArgsFrameIndex. + MF.getFrameInfo()->setFrameAddressIsTaken(true); + // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - DebugLoc dl = Op.getDebugLoc(); + SDLoc DL(Op); SDValue Offset = - DAG.getNode(ISD::ADD, dl, MVT::i32, - DAG.getRegister(SP::I6, MVT::i32), - DAG.getConstant(FuncInfo->getVarArgsFrameOffset(), - MVT::i32)); + DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), + DAG.getRegister(SP::I6, TLI.getPointerTy()), + DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset())); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), + return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1), MachinePointerInfo(SV), false, false, 0); } @@ -1022,39 +1647,28 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { EVT VT = Node->getValueType(0); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); + EVT PtrVT = VAListPtr.getValueType(); const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); - DebugLoc dl = Node->getDebugLoc(); - SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, + SDLoc DL(Node); + SDValue VAList = DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV), false, false, false, 0); - // Increment the pointer, VAList, to the next vaarg - SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList, - DAG.getConstant(VT.getSizeInBits()/8, - MVT::i32)); - // Store the incremented VAList to the legalized pointer - InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr, + // Increment the pointer, VAList, to the next vaarg. + SDValue NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, + DAG.getIntPtrConstant(VT.getSizeInBits()/8)); + // Store the incremented VAList to the legalized pointer. + InChain = DAG.getStore(VAList.getValue(1), DL, NextPtr, VAListPtr, MachinePointerInfo(SV), false, false, 0); - // Load the actual argument out of the pointer VAList, unless this is an - // f64 load. - if (VT != MVT::f64) - return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(), - false, false, false, 0); - - // Otherwise, load it as i64, then do a bitconvert. - SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(), - false, false, false, 0); - - // Bit-Convert the value to f64. - SDValue Ops[2] = { - DAG.getNode(ISD::BITCAST, dl, MVT::f64, V), - V.getValue(1) - }; - return DAG.getMergeValues(Ops, 2, dl); + // Load the actual argument out of the pointer VAList. + // We can't count on greater alignment than the word size. + return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(), + false, false, false, + std::min(PtrVT.getSizeInBits(), VT.getSizeInBits())/8); } static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { SDValue Chain = Op.getOperand(0); // Legalize the chain. SDValue Size = Op.getOperand(1); // Legalize the size. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned SPReg = SP::O6; SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32); @@ -1071,7 +1685,7 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue Chain = DAG.getNode(SPISD::FLUSHW, dl, MVT::Other, DAG.getEntryNode()); return Chain; @@ -1082,7 +1696,7 @@ static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned FrameReg = SP::I6; uint64_t depth = Op.getConstantOperandVal(0); @@ -1108,20 +1722,25 @@ static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { return FrameAddr; } -static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); +static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG, + const SparcTargetLowering &TLI) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); - unsigned RetReg = SP::I7; - + SDLoc dl(Op); uint64_t depth = Op.getConstantOperandVal(0); SDValue RetAddr; - if (depth == 0) + if (depth == 0) { + unsigned RetReg = MF.addLiveIn(SP::I7, + TLI.getRegClassFor(TLI.getPointerTy())); RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT); - else { + } else { + // Need frame address to find return address of the caller. + MFI->setFrameAddressIsTaken(true); + // flush first to make sure the windowed registers' values are in stack SDValue Chain = getFLUSHW(Op, DAG); RetAddr = DAG.getCopyFromReg(Chain, dl, SP::I6, VT); @@ -1140,15 +1759,48 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { return RetAddr; } +static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG) +{ + SDLoc dl(Op); + + assert(Op.getValueType() == MVT::f64 && "LowerF64Op called on non-double!"); + assert(Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS); + + // Lower fneg/fabs on f64 to fneg/fabs on f32. + // fneg f64 => fneg f32:sub_even, fmov f32:sub_odd. + // fabs f64 => fabs f32:sub_even, fmov f32:sub_odd. + + SDValue SrcReg64 = Op.getOperand(0); + SDValue Hi32 = DAG.getTargetExtractSubreg(SP::sub_even, dl, MVT::f32, + SrcReg64); + SDValue Lo32 = DAG.getTargetExtractSubreg(SP::sub_odd, dl, MVT::f32, + SrcReg64); + + Hi32 = DAG.getNode(Op.getOpcode(), dl, MVT::f32, Hi32); + + SDValue DstReg64 = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, MVT::f64), 0); + DstReg64 = DAG.getTargetInsertSubreg(SP::sub_even, dl, MVT::f64, + DstReg64, Hi32); + DstReg64 = DAG.getTargetInsertSubreg(SP::sub_odd, dl, MVT::f64, + DstReg64, Lo32); + return DstReg64; +} + SDValue SparcTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); - case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + + case ISD::FNEG: + case ISD::FABS: return LowerF64Op(Op, DAG); + + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG, *this); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for Sparc."); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); @@ -1256,7 +1908,7 @@ SparcTargetLowering::getConstraintType(const std::string &Constraint) const { std::pair<unsigned, const TargetRegisterClass*> SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 09148ea..261c25a 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -19,14 +19,18 @@ #include "llvm/Target/TargetLowering.h" namespace llvm { + class SparcSubtarget; + namespace SPISD { enum { FIRST_NUMBER = ISD::BUILTIN_OP_END, - CMPICC, // Compare two GPR operands, set icc. + CMPICC, // Compare two GPR operands, set icc+xcc. CMPFCC, // Compare two FP operands, set fcc. BRICC, // Branch to dest on icc condition + BRXCC, // Branch to dest on xcc condition (64-bit only). BRFCC, // Branch to dest on fcc condition SELECT_ICC, // Select between two values using the current ICC flags. + SELECT_XCC, // Select between two values using the current XCC flags. SELECT_FCC, // Select between two values using the current FCC flags. Hi, Lo, // Hi/Lo operations, typically on a global address. @@ -42,6 +46,7 @@ namespace llvm { } class SparcTargetLowering : public TargetLowering { + const SparcSubtarget *Subtarget; public: SparcTargetLowering(TargetMachine &TM); virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -63,33 +68,65 @@ namespace llvm { ConstraintType getConstraintType(const std::string &Constraint) const; std::pair<unsigned, const TargetRegisterClass*> - getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; + getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + SDValue LowerFormalArguments_32(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + SDValue LowerFormalArguments_64(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const; + SDValue LowerCall_32(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const; + SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const; + SDLoc dl, SelectionDAG &DAG) const; + SDValue LowerReturn_32(SDValue Chain, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + SDLoc DL, SelectionDAG &DAG) const; + SDValue LowerReturn_64(SDValue Chain, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + SDLoc DL, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const; + SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const; + SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, + SelectionDAG &DAG) const; + SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const; }; } // end namespace llvm diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td new file mode 100644 index 0000000..47658ee --- /dev/null +++ b/lib/Target/Sparc/SparcInstr64Bit.td @@ -0,0 +1,358 @@ +//===-- SparcInstr64Bit.td - 64-bit instructions for Sparc Target ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains instruction definitions and patterns needed for 64-bit +// code generation on SPARC v9. +// +// Some SPARC v9 instructions are defined in SparcInstrInfo.td because they can +// also be used in 32-bit code running on a SPARC v9 CPU. +// +//===----------------------------------------------------------------------===// + +let Predicates = [Is64Bit] in { +// The same integer registers are used for i32 and i64 values. +// When registers hold i32 values, the high bits are don't care. +// This give us free trunc and anyext. +def : Pat<(i64 (anyext i32:$val)), (COPY_TO_REGCLASS $val, I64Regs)>; +def : Pat<(i32 (trunc i64:$val)), (COPY_TO_REGCLASS $val, IntRegs)>; + +} // Predicates = [Is64Bit] + + +//===----------------------------------------------------------------------===// +// 64-bit Shift Instructions. +//===----------------------------------------------------------------------===// +// +// The 32-bit shift instructions are still available. The left shift srl +// instructions shift all 64 bits, but it only accepts a 5-bit shift amount. +// +// The srl instructions only shift the low 32 bits and clear the high 32 bits. +// Finally, sra shifts the low 32 bits and sign-extends to 64 bits. + +let Predicates = [Is64Bit] in { + +def : Pat<(i64 (zext i32:$val)), (SRLri $val, 0)>; +def : Pat<(i64 (sext i32:$val)), (SRAri $val, 0)>; + +def : Pat<(i64 (and i64:$val, 0xffffffff)), (SRLri $val, 0)>; +def : Pat<(i64 (sext_inreg i64:$val, i32)), (SRAri $val, 0)>; + +defm SLLX : F3_S<"sllx", 0b100101, 1, shl, i64, I64Regs>; +defm SRLX : F3_S<"srlx", 0b100110, 1, srl, i64, I64Regs>; +defm SRAX : F3_S<"srax", 0b100111, 1, sra, i64, I64Regs>; + +} // Predicates = [Is64Bit] + + +//===----------------------------------------------------------------------===// +// 64-bit Immediates. +//===----------------------------------------------------------------------===// +// +// All 32-bit immediates can be materialized with sethi+or, but 64-bit +// immediates may require more code. There may be a point where it is +// preferable to use a constant pool load instead, depending on the +// microarchitecture. + +// Single-instruction patterns. + +// The ALU instructions want their simm13 operands as i32 immediates. +def as_i32imm : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32); +}]>; +def : Pat<(i64 simm13:$val), (ORri (i64 G0), (as_i32imm $val))>; +def : Pat<(i64 SETHIimm:$val), (SETHIi (HI22 $val))>; + +// Double-instruction patterns. + +// All unsigned i32 immediates can be handled by sethi+or. +def uimm32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>; +def : Pat<(i64 uimm32:$val), (ORri (SETHIi (HI22 $val)), (LO10 $val))>, + Requires<[Is64Bit]>; + +// All negative i33 immediates can be handled by sethi+xor. +def nimm33 : PatLeaf<(imm), [{ + int64_t Imm = N->getSExtValue(); + return Imm < 0 && isInt<33>(Imm); +}]>; +// Bits 10-31 inverted. Same as assembler's %hix. +def HIX22 : SDNodeXForm<imm, [{ + uint64_t Val = (~N->getZExtValue() >> 10) & ((1u << 22) - 1); + return CurDAG->getTargetConstant(Val, MVT::i32); +}]>; +// Bits 0-9 with ones in bits 10-31. Same as assembler's %lox. +def LOX10 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(~(~N->getZExtValue() & 0x3ff), MVT::i32); +}]>; +def : Pat<(i64 nimm33:$val), (XORri (SETHIi (HIX22 $val)), (LOX10 $val))>, + Requires<[Is64Bit]>; + +// More possible patterns: +// +// (sllx sethi, n) +// (sllx simm13, n) +// +// 3 instrs: +// +// (xor (sllx sethi), simm13) +// (sllx (xor sethi, simm13)) +// +// 4 instrs: +// +// (or sethi, (sllx sethi)) +// (xnor sethi, (sllx sethi)) +// +// 5 instrs: +// +// (or (sllx sethi), (or sethi, simm13)) +// (xnor (sllx sethi), (or sethi, simm13)) +// (or (sllx sethi), (sllx sethi)) +// (xnor (sllx sethi), (sllx sethi)) +// +// Worst case is 6 instrs: +// +// (or (sllx (or sethi, simmm13)), (or sethi, simm13)) + +// Bits 42-63, same as assembler's %hh. +def HH22 : SDNodeXForm<imm, [{ + uint64_t Val = (N->getZExtValue() >> 42) & ((1u << 22) - 1); + return CurDAG->getTargetConstant(Val, MVT::i32); +}]>; +// Bits 32-41, same as assembler's %hm. +def HM10 : SDNodeXForm<imm, [{ + uint64_t Val = (N->getZExtValue() >> 32) & ((1u << 10) - 1); + return CurDAG->getTargetConstant(Val, MVT::i32); +}]>; +def : Pat<(i64 imm:$val), + (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i32 32)), + (ORri (SETHIi (HI22 $val)), (LO10 $val)))>, + Requires<[Is64Bit]>; + + +//===----------------------------------------------------------------------===// +// 64-bit Integer Arithmetic and Logic. +//===----------------------------------------------------------------------===// + +let Predicates = [Is64Bit] in { + +// Register-register instructions. + +def : Pat<(and i64:$a, i64:$b), (ANDrr $a, $b)>; +def : Pat<(or i64:$a, i64:$b), (ORrr $a, $b)>; +def : Pat<(xor i64:$a, i64:$b), (XORrr $a, $b)>; + +def : Pat<(and i64:$a, (not i64:$b)), (ANDNrr $a, $b)>; +def : Pat<(or i64:$a, (not i64:$b)), (ORNrr $a, $b)>; +def : Pat<(xor i64:$a, (not i64:$b)), (XNORrr $a, $b)>; + +def : Pat<(add i64:$a, i64:$b), (ADDrr $a, $b)>; +def : Pat<(sub i64:$a, i64:$b), (SUBrr $a, $b)>; + +// Add/sub with carry were renamed to addc/subc in SPARC v9. +def : Pat<(adde i64:$a, i64:$b), (ADDXrr $a, $b)>; +def : Pat<(sube i64:$a, i64:$b), (SUBXrr $a, $b)>; + +def : Pat<(addc i64:$a, i64:$b), (ADDCCrr $a, $b)>; +def : Pat<(subc i64:$a, i64:$b), (SUBCCrr $a, $b)>; + +def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>; + +// Register-immediate instructions. + +def : Pat<(and i64:$a, (i64 simm13:$b)), (ANDri $a, (as_i32imm $b))>; +def : Pat<(or i64:$a, (i64 simm13:$b)), (ORri $a, (as_i32imm $b))>; +def : Pat<(xor i64:$a, (i64 simm13:$b)), (XORri $a, (as_i32imm $b))>; + +def : Pat<(add i64:$a, (i64 simm13:$b)), (ADDri $a, (as_i32imm $b))>; +def : Pat<(sub i64:$a, (i64 simm13:$b)), (SUBri $a, (as_i32imm $b))>; + +def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>; + +} // Predicates = [Is64Bit] + + +//===----------------------------------------------------------------------===// +// 64-bit Integer Multiply and Divide. +//===----------------------------------------------------------------------===// + +let Predicates = [Is64Bit] in { + +def MULXrr : F3_1<2, 0b001001, + (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2), + "mulx $rs1, $rs2, $rd", + [(set i64:$rd, (mul i64:$rs1, i64:$rs2))]>; +def MULXri : F3_2<2, 0b001001, + (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i), + "mulx $rs1, $i, $rd", + [(set i64:$rd, (mul i64:$rs1, (i64 simm13:$i)))]>; + +// Division can trap. +let hasSideEffects = 1 in { +def SDIVXrr : F3_1<2, 0b101101, + (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2), + "sdivx $rs1, $rs2, $rd", + [(set i64:$rd, (sdiv i64:$rs1, i64:$rs2))]>; +def SDIVXri : F3_2<2, 0b101101, + (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i), + "sdivx $rs1, $i, $rd", + [(set i64:$rd, (sdiv i64:$rs1, (i64 simm13:$i)))]>; + +def UDIVXrr : F3_1<2, 0b001101, + (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2), + "udivx $rs1, $rs2, $rd", + [(set i64:$rd, (udiv i64:$rs1, i64:$rs2))]>; +def UDIVXri : F3_2<2, 0b001101, + (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i), + "udivx $rs1, $i, $rd", + [(set i64:$rd, (udiv i64:$rs1, (i64 simm13:$i)))]>; +} // hasSideEffects = 1 + +} // Predicates = [Is64Bit] + + +//===----------------------------------------------------------------------===// +// 64-bit Loads and Stores. +//===----------------------------------------------------------------------===// +// +// All the 32-bit loads and stores are available. The extending loads are sign +// or zero-extending to 64 bits. The LDrr and LDri instructions load 32 bits +// zero-extended to i64. Their mnemonic is lduw in SPARC v9 (Load Unsigned +// Word). +// +// SPARC v9 adds 64-bit loads as well as a sign-extending ldsw i32 loads. + +let Predicates = [Is64Bit] in { + +// 64-bit loads. +def LDXrr : F3_1<3, 0b001011, + (outs I64Regs:$dst), (ins MEMrr:$addr), + "ldx [$addr], $dst", + [(set i64:$dst, (load ADDRrr:$addr))]>; +def LDXri : F3_2<3, 0b001011, + (outs I64Regs:$dst), (ins MEMri:$addr), + "ldx [$addr], $dst", + [(set i64:$dst, (load ADDRri:$addr))]>; + +// Extending loads to i64. +def : Pat<(i64 (zextloadi1 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>; +def : Pat<(i64 (zextloadi1 ADDRri:$addr)), (LDUBri ADDRri:$addr)>; +def : Pat<(i64 (extloadi1 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>; +def : Pat<(i64 (extloadi1 ADDRri:$addr)), (LDUBri ADDRri:$addr)>; + +def : Pat<(i64 (zextloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>; +def : Pat<(i64 (zextloadi8 ADDRri:$addr)), (LDUBri ADDRri:$addr)>; +def : Pat<(i64 (extloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>; +def : Pat<(i64 (extloadi8 ADDRri:$addr)), (LDUBri ADDRri:$addr)>; +def : Pat<(i64 (sextloadi8 ADDRrr:$addr)), (LDSBrr ADDRrr:$addr)>; +def : Pat<(i64 (sextloadi8 ADDRri:$addr)), (LDSBri ADDRri:$addr)>; + +def : Pat<(i64 (zextloadi16 ADDRrr:$addr)), (LDUHrr ADDRrr:$addr)>; +def : Pat<(i64 (zextloadi16 ADDRri:$addr)), (LDUHri ADDRri:$addr)>; +def : Pat<(i64 (extloadi16 ADDRrr:$addr)), (LDUHrr ADDRrr:$addr)>; +def : Pat<(i64 (extloadi16 ADDRri:$addr)), (LDUHri ADDRri:$addr)>; +def : Pat<(i64 (sextloadi16 ADDRrr:$addr)), (LDSHrr ADDRrr:$addr)>; +def : Pat<(i64 (sextloadi16 ADDRri:$addr)), (LDSHri ADDRri:$addr)>; + +def : Pat<(i64 (zextloadi32 ADDRrr:$addr)), (LDrr ADDRrr:$addr)>; +def : Pat<(i64 (zextloadi32 ADDRri:$addr)), (LDri ADDRri:$addr)>; +def : Pat<(i64 (extloadi32 ADDRrr:$addr)), (LDrr ADDRrr:$addr)>; +def : Pat<(i64 (extloadi32 ADDRri:$addr)), (LDri ADDRri:$addr)>; + +// Sign-extending load of i32 into i64 is a new SPARC v9 instruction. +def LDSWrr : F3_1<3, 0b001011, + (outs I64Regs:$dst), (ins MEMrr:$addr), + "ldsw [$addr], $dst", + [(set i64:$dst, (sextloadi32 ADDRrr:$addr))]>; +def LDSWri : F3_2<3, 0b001011, + (outs I64Regs:$dst), (ins MEMri:$addr), + "ldsw [$addr], $dst", + [(set i64:$dst, (sextloadi32 ADDRri:$addr))]>; + +// 64-bit stores. +def STXrr : F3_1<3, 0b001110, + (outs), (ins MEMrr:$addr, I64Regs:$src), + "stx $src, [$addr]", + [(store i64:$src, ADDRrr:$addr)]>; +def STXri : F3_2<3, 0b001110, + (outs), (ins MEMri:$addr, I64Regs:$src), + "stx $src, [$addr]", + [(store i64:$src, ADDRri:$addr)]>; + +// Truncating stores from i64 are identical to the i32 stores. +def : Pat<(truncstorei8 i64:$src, ADDRrr:$addr), (STBrr ADDRrr:$addr, $src)>; +def : Pat<(truncstorei8 i64:$src, ADDRri:$addr), (STBri ADDRri:$addr, $src)>; +def : Pat<(truncstorei16 i64:$src, ADDRrr:$addr), (STHrr ADDRrr:$addr, $src)>; +def : Pat<(truncstorei16 i64:$src, ADDRri:$addr), (STHri ADDRri:$addr, $src)>; +def : Pat<(truncstorei32 i64:$src, ADDRrr:$addr), (STrr ADDRrr:$addr, $src)>; +def : Pat<(truncstorei32 i64:$src, ADDRri:$addr), (STri ADDRri:$addr, $src)>; + +// store 0, addr -> store %g0, addr +def : Pat<(store (i64 0), ADDRrr:$dst), (STXrr ADDRrr:$dst, (i64 G0))>; +def : Pat<(store (i64 0), ADDRri:$dst), (STXri ADDRri:$dst, (i64 G0))>; + +} // Predicates = [Is64Bit] + + +//===----------------------------------------------------------------------===// +// 64-bit Conditionals. +//===----------------------------------------------------------------------===// +// +// Flag-setting instructions like subcc and addcc set both icc and xcc flags. +// The icc flags correspond to the 32-bit result, and the xcc are for the +// full 64-bit result. +// +// We reuse CMPICC SDNodes for compares, but use new BRXCC branch nodes for +// 64-bit compares. See LowerBR_CC. + +let Predicates = [Is64Bit] in { + +let Uses = [ICC] in +def BPXCC : BranchSP<0, (ins brtarget:$dst, CCOp:$cc), + "b$cc %xcc, $dst", + [(SPbrxcc bb:$dst, imm:$cc)]>; + +// Conditional moves on %xcc. +let Uses = [ICC], Constraints = "$f = $rd" in { +def MOVXCCrr : Pseudo<(outs IntRegs:$rd), + (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cond), + "mov$cond %xcc, $rs2, $rd", + [(set i32:$rd, + (SPselectxcc i32:$rs2, i32:$f, imm:$cond))]>; +def MOVXCCri : Pseudo<(outs IntRegs:$rd), + (ins i32imm:$i, IntRegs:$f, CCOp:$cond), + "mov$cond %xcc, $i, $rd", + [(set i32:$rd, + (SPselectxcc simm11:$i, i32:$f, imm:$cond))]>; +def FMOVS_XCC : Pseudo<(outs FPRegs:$rd), + (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cond), + "fmovs$cond %xcc, $rs2, $rd", + [(set f32:$rd, + (SPselectxcc f32:$rs2, f32:$f, imm:$cond))]>; +def FMOVD_XCC : Pseudo<(outs DFPRegs:$rd), + (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond), + "fmovd$cond %xcc, $rs2, $rd", + [(set f64:$rd, + (SPselectxcc f64:$rs2, f64:$f, imm:$cond))]>; +} // Uses, Constraints + +def : Pat<(SPselectxcc i64:$t, i64:$f, imm:$cond), + (MOVXCCrr $t, $f, imm:$cond)>; +def : Pat<(SPselectxcc (i64 simm11:$t), i64:$f, imm:$cond), + (MOVXCCri (as_i32imm $t), $f, imm:$cond)>; + +def : Pat<(SPselecticc i64:$t, i64:$f, imm:$cond), + (MOVICCrr $t, $f, imm:$cond)>; +def : Pat<(SPselecticc (i64 simm11:$t), i64:$f, imm:$cond), + (MOVICCri (as_i32imm $t), $f, imm:$cond)>; + +def : Pat<(SPselectfcc i64:$t, i64:$f, imm:$cond), + (MOVFCCrr $t, $f, imm:$cond)>; +def : Pat<(SPselectfcc (i64 simm11:$t), i64:$f, imm:$cond), + (MOVFCCri (as_i32imm $t), $f, imm:$cond)>; + +} // Predicates = [Is64Bit] diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td index dce3312..6cdf6bc 100644 --- a/lib/Target/Sparc/SparcInstrFormats.td +++ b/lib/Target/Sparc/SparcInstrFormats.td @@ -7,14 +7,15 @@ // //===----------------------------------------------------------------------===// -class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction { +class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> + : Instruction { field bits<32> Inst; let Namespace = "SP"; bits<2> op; let Inst{31-30} = op; // Top two bits are the 'op' field - + dag OutOperandList = outs; dag InOperandList = ins; let AsmString = asmstr; @@ -46,7 +47,7 @@ class F2_1<bits<3> op2Val, dag outs, dag ins, string asmstr, list<dag> pattern> let Inst{29-25} = rd; } -class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr, +class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr, list<dag> pattern> : F2<outs, ins, asmstr, pattern> { bits<4> cond; bit annul = 0; // currently unused @@ -88,7 +89,7 @@ class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins, let Inst{4-0} = rs2; } -class F3_2<bits<2> opVal, bits<6> op3val, dag outs, dag ins, +class F3_2<bits<2> opVal, bits<6> op3val, dag outs, dag ins, string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> { bits<13> simm13; @@ -111,4 +112,41 @@ class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins, let Inst{4-0} = rs2; } +// Shift by register rs2. +class F3_Sr<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins, + string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> { + bit x = xVal; // 1 for 64-bit shifts. + bits<5> rs2; + + let op = opVal; + let op3 = op3val; + + let Inst{13} = 0; // i field = 0 + let Inst{12} = x; // extended registers. + let Inst{4-0} = rs2; +} +// Shift by immediate. +class F3_Si<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins, + string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> { + bit x = xVal; // 1 for 64-bit shifts. + bits<6> shcnt; // shcnt32 / shcnt64. + + let op = opVal; + let op3 = op3val; + + let Inst{13} = 1; // i field = 1 + let Inst{12} = x; // extended registers. + let Inst{5-0} = shcnt; +} + +// Define rr and ri shift instructions with patterns. +multiclass F3_S<string OpcStr, bits<6> Op3Val, bit XVal, SDNode OpNode, + ValueType VT, RegisterClass RC> { + def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, IntRegs:$rs2), + !strconcat(OpcStr, " $rs, $rs2, $rd"), + [(set VT:$rd, (OpNode VT:$rs, i32:$rs2))]>; + def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, i32imm:$shcnt), + !strconcat(OpcStr, " $rs, $shcnt, $rd"), + [(set VT:$rd, (OpNode VT:$rs, (i32 imm:$shcnt)))]>; +} diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 39d7329..6c14bc9 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -17,7 +17,9 @@ #include "SparcSubtarget.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -29,7 +31,7 @@ using namespace llvm; SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST) : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), - RI(ST, *this), Subtarget(ST) { + RI(ST), Subtarget(ST) { } /// isLoadFromStackSlot - If the specified machine instruction is a direct @@ -40,6 +42,7 @@ SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST) unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (MI->getOpcode() == SP::LDri || + MI->getOpcode() == SP::LDXri || MI->getOpcode() == SP::LDFri || MI->getOpcode() == SP::LDDFri) { if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && @@ -59,6 +62,7 @@ unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (MI->getOpcode() == SP::STri || + MI->getOpcode() == SP::STXri || MI->getOpcode() == SP::STFri || MI->getOpcode() == SP::STDFri) { if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() && @@ -112,18 +116,6 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC) llvm_unreachable("Invalid cond code"); } -MachineInstr * -SparcInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc dl) const { - MachineInstrBuilder MIB = BuildMI(MF, dl, get(SP::DBG_VALUE)) - .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); - return &*MIB; -} - - bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -139,15 +131,15 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, if (I->isDebugValue()) continue; - //When we see a non-terminator, we are done + // When we see a non-terminator, we are done. if (!isUnpredicatedTerminator(I)) break; - //Terminator is not a branch + // Terminator is not a branch. if (!I->isBranch()) return true; - //Handle Unconditional branches + // Handle Unconditional branches. if (I->getOpcode() == SP::BA) { UnCondBrIter = I; @@ -176,7 +168,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned Opcode = I->getOpcode(); if (Opcode != SP::BCOND && Opcode != SP::FBCOND) - return true; //Unknown Opcode + return true; // Unknown Opcode. SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm(); @@ -185,7 +177,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, if (AllowModify && UnCondBrIter != MBB.end() && MBB.isLayoutSuccessor(TargetBB)) { - //Transform the code + // Transform the code // // brCC L1 // ba L2 @@ -219,8 +211,8 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, Cond.push_back(MachineOperand::CreateImm(BranchCode)); continue; } - //FIXME: Handle subsequent conditional branches - //For now, we can't handle multiple conditional branches + // FIXME: Handle subsequent conditional branches. + // For now, we can't handle multiple conditional branches. return true; } return false; @@ -241,7 +233,7 @@ SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, return 1; } - //Conditional branch + // Conditional branch unsigned CC = Cond[0].getImm(); if (IsIntegerCC(CC)) @@ -287,10 +279,28 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (SP::FPRegsRegClass.contains(DestReg, SrcReg)) BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); - else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) - BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else + else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) { + if (Subtarget.isV9()) { + BuildMI(MBB, I, DL, get(SP::FMOVD), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else { + // Use two FMOVS instructions. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineInstr *MovMI = 0; + unsigned subRegIdx[] = {SP::sub_even, SP::sub_odd}; + for (unsigned i = 0; i != 2; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]); + unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]); + assert(Dst && Src && "Bad sub-register"); + + MovMI = BuildMI(MBB, I, DL, get(SP::FMOVS), Dst).addReg(Src); + } + // Add implicit super-register defs and kills to the last MovMI. + MovMI->addRegisterDefined(DestReg, TRI); + if (KillSrc) + MovMI->addRegisterKilled(SrcReg, TRI); + } + } else llvm_unreachable("Impossible reg-to-reg copy"); } @@ -302,16 +312,27 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo &MFI = *MF->getFrameInfo(); + MachineMemOperand *MMO = + MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOStore, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + // On the order of operands here: think "[FrameIdx + 0] = SrcReg". - if (RC == &SP::IntRegsRegClass) + if (RC == &SP::I64RegsRegClass) + BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + else if (RC == &SP::IntRegsRegClass) BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::DFPRegsRegClass) BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else llvm_unreachable("Can't store this register to stack slot"); } @@ -324,12 +345,26 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); - if (RC == &SP::IntRegsRegClass) - BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0); + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo &MFI = *MF->getFrameInfo(); + MachineMemOperand *MMO = + MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + + if (RC == &SP::I64RegsRegClass) + BuildMI(MBB, I, DL, get(SP::LDXri), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); + else if (RC == &SP::IntRegsRegClass) + BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); else if (RC == &SP::FPRegsRegClass) - BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0); + BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); else if (RC == &SP::DFPRegsRegClass) - BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0); + BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); else llvm_unreachable("Can't load this register from stack slot"); } diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index 204f698..d0b220b 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -53,7 +53,7 @@ public: /// any side effects other than loading from the stack slot. virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; - + /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If @@ -62,14 +62,6 @@ public: virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - /// emitFrameIndexDebugValue - Emit a target-dependent form of - /// DBG_VALUE encoding the address of a frame index. - virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc dl) const; - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, @@ -86,7 +78,7 @@ public: MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -98,7 +90,7 @@ public: unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - + unsigned getGlobalBaseReg(MachineFunction *MF) const; }; diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 90b698d..d4cac4d 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -21,6 +21,12 @@ include "SparcInstrFormats.td" // Feature predicates. //===----------------------------------------------------------------------===// +// True when generating 32-bit code. +def Is32Bit : Predicate<"!Subtarget.is64Bit()">; + +// True when generating 64-bit code. This also implies HasV9. +def Is64Bit : Predicate<"Subtarget.is64Bit()">; + // HasV9 - This predicate is true when the target processor supports V9 // instructions. Note that the machine may be running in 32-bit mode. def HasV9 : Predicate<"Subtarget.isV9()">; @@ -58,22 +64,21 @@ def HI22 : SDNodeXForm<imm, [{ }]>; def SETHIimm : PatLeaf<(imm), [{ - return (((unsigned)N->getZExtValue() >> 10) << 10) == - (unsigned)N->getZExtValue(); + return isShiftedUInt<22, 10>(N->getZExtValue()); }], HI22>; // Addressing modes. -def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; -def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>; +def ADDRrr : ComplexPattern<iPTR, 2, "SelectADDRrr", [], []>; +def ADDRri : ComplexPattern<iPTR, 2, "SelectADDRri", [frameindex], []>; // Address operands -def MEMrr : Operand<i32> { +def MEMrr : Operand<iPTR> { let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops IntRegs, IntRegs); + let MIOperandInfo = (ops ptr_rc, ptr_rc); } -def MEMri : Operand<i32> { +def MEMri : Operand<iPTR> { let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops IntRegs, i32imm); + let MIOperandInfo = (ops ptr_rc, i32imm); } // Branch targets have OtherVT type. @@ -84,9 +89,11 @@ def calltarget : Operand<i32>; let PrintMethod = "printCCOperand" in def CCOp : Operand<i32>; -def SDTSPcmpfcc : +def SDTSPcmpicc : +SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>; +def SDTSPcmpfcc : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>; -def SDTSPbrcc : +def SDTSPbrcc : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>; def SDTSPselectcc : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>]>; @@ -95,9 +102,10 @@ SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; def SDTSPITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; -def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>; +def SPcmpicc : SDNode<"SPISD::CMPICC", SDTSPcmpicc, [SDNPOutGlue]>; def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>; def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>; +def SPbrxcc : SDNode<"SPISD::BRXCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>; def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>; def SPhi : SDNode<"SPISD::Hi", SDTIntUnaryOp>; @@ -107,6 +115,7 @@ def SPftoi : SDNode<"SPISD::FTOI", SDTSPFTOI>; def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>; def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>; +def SPselectxcc : SDNode<"SPISD::SELECT_XCC", SDTSPselectcc, [SDNPInGlue]>; def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>; // These are target-independent nodes, but have target-specific formats. @@ -179,20 +188,20 @@ def FCC_O : FCC_VAL<29>; // Ordered /// F3_12 multiclass - Define a normal F3_1/F3_2 pattern in one shot. multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> { - def rr : F3_1<2, Op3Val, + def rr : F3_1<2, Op3Val, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), !strconcat(OpcStr, " $b, $c, $dst"), - [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + [(set i32:$dst, (OpNode i32:$b, i32:$c))]>; def ri : F3_2<2, Op3Val, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), !strconcat(OpcStr, " $b, $c, $dst"), - [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>; + [(set i32:$dst, (OpNode i32:$b, (i32 simm13:$c)))]>; } /// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no /// pattern. multiclass F3_12np<string OpcStr, bits<6> Op3Val> { - def rr : F3_1<2, Op3Val, + def rr : F3_1<2, Op3Val, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), !strconcat(OpcStr, " $b, $c, $dst"), []>; def ri : F3_2<2, Op3Val, @@ -236,40 +245,24 @@ let hasSideEffects = 1, mayStore = 1 in { def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val), "unimp $val", []>; -// FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the -// fpmover pass. -let Predicates = [HasNoV9] in { // Only emit these in V8 mode. - def FpMOVD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src), - "!FpMOVD $src, $dst", []>; - def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src), - "!FpNEGD $src, $dst", - [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>; - def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src), - "!FpABSD $src, $dst", - [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>; -} - // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. This has to handle all // permutations of selection between i32/f32/f64 on ICC and FCC. - // Expanded after instruction selection. -let Uses = [ICC], usesCustomInserter = 1 in { +// Expanded after instruction selection. +let Uses = [ICC], usesCustomInserter = 1 in { def SELECT_CC_Int_ICC : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond), "; SELECT_CC_Int_ICC PSEUDO!", - [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F, - imm:$Cond))]>; + [(set i32:$dst, (SPselecticc i32:$T, i32:$F, imm:$Cond))]>; def SELECT_CC_FP_ICC : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond), "; SELECT_CC_FP_ICC PSEUDO!", - [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F, - imm:$Cond))]>; + [(set f32:$dst, (SPselecticc f32:$T, f32:$F, imm:$Cond))]>; def SELECT_CC_DFP_ICC : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), "; SELECT_CC_DFP_ICC PSEUDO!", - [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F, - imm:$Cond))]>; + [(set f64:$dst, (SPselecticc f64:$T, f64:$F, imm:$Cond))]>; } let usesCustomInserter = 1, Uses = [FCC] in { @@ -277,19 +270,16 @@ let usesCustomInserter = 1, Uses = [FCC] in { def SELECT_CC_Int_FCC : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond), "; SELECT_CC_Int_FCC PSEUDO!", - [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F, - imm:$Cond))]>; + [(set i32:$dst, (SPselectfcc i32:$T, i32:$F, imm:$Cond))]>; def SELECT_CC_FP_FCC : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond), "; SELECT_CC_FP_FCC PSEUDO!", - [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F, - imm:$Cond))]>; + [(set f32:$dst, (SPselectfcc f32:$T, f32:$F, imm:$Cond))]>; def SELECT_CC_DFP_FCC : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), "; SELECT_CC_DFP_FCC PSEUDO!", - [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F, - imm:$Cond))]>; + [(set f64:$dst, (SPselectfcc f64:$T, f64:$F, imm:$Cond))]>; } @@ -309,111 +299,111 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in { def LDSBrr : F3_1<3, 0b001001, (outs IntRegs:$dst), (ins MEMrr:$addr), "ldsb [$addr], $dst", - [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>; + [(set i32:$dst, (sextloadi8 ADDRrr:$addr))]>; def LDSBri : F3_2<3, 0b001001, (outs IntRegs:$dst), (ins MEMri:$addr), "ldsb [$addr], $dst", - [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>; + [(set i32:$dst, (sextloadi8 ADDRri:$addr))]>; def LDSHrr : F3_1<3, 0b001010, (outs IntRegs:$dst), (ins MEMrr:$addr), "ldsh [$addr], $dst", - [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>; + [(set i32:$dst, (sextloadi16 ADDRrr:$addr))]>; def LDSHri : F3_2<3, 0b001010, (outs IntRegs:$dst), (ins MEMri:$addr), "ldsh [$addr], $dst", - [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>; + [(set i32:$dst, (sextloadi16 ADDRri:$addr))]>; def LDUBrr : F3_1<3, 0b000001, (outs IntRegs:$dst), (ins MEMrr:$addr), "ldub [$addr], $dst", - [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>; + [(set i32:$dst, (zextloadi8 ADDRrr:$addr))]>; def LDUBri : F3_2<3, 0b000001, (outs IntRegs:$dst), (ins MEMri:$addr), "ldub [$addr], $dst", - [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>; + [(set i32:$dst, (zextloadi8 ADDRri:$addr))]>; def LDUHrr : F3_1<3, 0b000010, (outs IntRegs:$dst), (ins MEMrr:$addr), "lduh [$addr], $dst", - [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>; + [(set i32:$dst, (zextloadi16 ADDRrr:$addr))]>; def LDUHri : F3_2<3, 0b000010, (outs IntRegs:$dst), (ins MEMri:$addr), "lduh [$addr], $dst", - [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>; + [(set i32:$dst, (zextloadi16 ADDRri:$addr))]>; def LDrr : F3_1<3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr), "ld [$addr], $dst", - [(set IntRegs:$dst, (load ADDRrr:$addr))]>; + [(set i32:$dst, (load ADDRrr:$addr))]>; def LDri : F3_2<3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr), "ld [$addr], $dst", - [(set IntRegs:$dst, (load ADDRri:$addr))]>; + [(set i32:$dst, (load ADDRri:$addr))]>; // Section B.2 - Load Floating-point Instructions, p. 92 def LDFrr : F3_1<3, 0b100000, (outs FPRegs:$dst), (ins MEMrr:$addr), "ld [$addr], $dst", - [(set FPRegs:$dst, (load ADDRrr:$addr))]>; + [(set f32:$dst, (load ADDRrr:$addr))]>; def LDFri : F3_2<3, 0b100000, (outs FPRegs:$dst), (ins MEMri:$addr), "ld [$addr], $dst", - [(set FPRegs:$dst, (load ADDRri:$addr))]>; + [(set f32:$dst, (load ADDRri:$addr))]>; def LDDFrr : F3_1<3, 0b100011, (outs DFPRegs:$dst), (ins MEMrr:$addr), "ldd [$addr], $dst", - [(set DFPRegs:$dst, (load ADDRrr:$addr))]>; + [(set f64:$dst, (load ADDRrr:$addr))]>; def LDDFri : F3_2<3, 0b100011, (outs DFPRegs:$dst), (ins MEMri:$addr), "ldd [$addr], $dst", - [(set DFPRegs:$dst, (load ADDRri:$addr))]>; + [(set f64:$dst, (load ADDRri:$addr))]>; // Section B.4 - Store Integer Instructions, p. 95 def STBrr : F3_1<3, 0b000101, (outs), (ins MEMrr:$addr, IntRegs:$src), "stb $src, [$addr]", - [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>; + [(truncstorei8 i32:$src, ADDRrr:$addr)]>; def STBri : F3_2<3, 0b000101, (outs), (ins MEMri:$addr, IntRegs:$src), "stb $src, [$addr]", - [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>; + [(truncstorei8 i32:$src, ADDRri:$addr)]>; def STHrr : F3_1<3, 0b000110, (outs), (ins MEMrr:$addr, IntRegs:$src), "sth $src, [$addr]", - [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>; + [(truncstorei16 i32:$src, ADDRrr:$addr)]>; def STHri : F3_2<3, 0b000110, (outs), (ins MEMri:$addr, IntRegs:$src), "sth $src, [$addr]", - [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>; + [(truncstorei16 i32:$src, ADDRri:$addr)]>; def STrr : F3_1<3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src), "st $src, [$addr]", - [(store IntRegs:$src, ADDRrr:$addr)]>; + [(store i32:$src, ADDRrr:$addr)]>; def STri : F3_2<3, 0b000100, (outs), (ins MEMri:$addr, IntRegs:$src), "st $src, [$addr]", - [(store IntRegs:$src, ADDRri:$addr)]>; + [(store i32:$src, ADDRri:$addr)]>; // Section B.5 - Store Floating-point Instructions, p. 97 def STFrr : F3_1<3, 0b100100, (outs), (ins MEMrr:$addr, FPRegs:$src), "st $src, [$addr]", - [(store FPRegs:$src, ADDRrr:$addr)]>; + [(store f32:$src, ADDRrr:$addr)]>; def STFri : F3_2<3, 0b100100, (outs), (ins MEMri:$addr, FPRegs:$src), "st $src, [$addr]", - [(store FPRegs:$src, ADDRri:$addr)]>; + [(store f32:$src, ADDRri:$addr)]>; def STDFrr : F3_1<3, 0b100111, (outs), (ins MEMrr:$addr, DFPRegs:$src), "std $src, [$addr]", - [(store DFPRegs:$src, ADDRrr:$addr)]>; + [(store f64:$src, ADDRrr:$addr)]>; def STDFri : F3_2<3, 0b100111, (outs), (ins MEMri:$addr, DFPRegs:$src), "std $src, [$addr]", - [(store DFPRegs:$src, ADDRri:$addr)]>; + [(store f64:$src, ADDRri:$addr)]>; // Section B.9 - SETHI Instruction, p. 104 def SETHIi: F2_1<0b100, (outs IntRegs:$dst), (ins i32imm:$src), "sethi $src, $dst", - [(set IntRegs:$dst, SETHIimm:$src)]>; + [(set i32:$dst, SETHIimm:$src)]>; // Section B.10 - NOP Instruction, p. 105 // (It's a special case of SETHI) @@ -426,7 +416,7 @@ defm AND : F3_12<"and", 0b000001, and>; def ANDNrr : F3_1<2, 0b000101, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), "andn $b, $c, $dst", - [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>; + [(set i32:$dst, (and i32:$b, (not i32:$c)))]>; def ANDNri : F3_2<2, 0b000101, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), "andn $b, $c, $dst", []>; @@ -436,7 +426,7 @@ defm OR : F3_12<"or", 0b000010, or>; def ORNrr : F3_1<2, 0b000110, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), "orn $b, $c, $dst", - [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>; + [(set i32:$dst, (or i32:$b, (not i32:$c)))]>; def ORNri : F3_2<2, 0b000110, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), "orn $b, $c, $dst", []>; @@ -445,7 +435,7 @@ defm XOR : F3_12<"xor", 0b000011, xor>; def XNORrr : F3_1<2, 0b000111, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), "xnor $b, $c, $dst", - [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>; + [(set i32:$dst, (not (xor i32:$b, i32:$c)))]>; def XNORri : F3_2<2, 0b000111, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), "xnor $b, $c, $dst", []>; @@ -462,9 +452,9 @@ defm ADD : F3_12<"add", 0b000000, add>; def LEA_ADDri : F3_2<2, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr), "add ${addr:arith}, $dst", - [(set IntRegs:$dst, ADDRri:$addr)]>; + [(set iPTR:$dst, ADDRri:$addr)]>; -let Defs = [ICC] in +let Defs = [ICC] in defm ADDCC : F3_12<"addcc", 0b010000, addc>; let Uses = [ICC] in @@ -472,14 +462,24 @@ let Uses = [ICC] in // Section B.15 - Subtract Instructions, p. 110 defm SUB : F3_12 <"sub" , 0b000100, sub>; -let Uses = [ICC] in +let Uses = [ICC] in defm SUBX : F3_12 <"subx" , 0b001100, sube>; -let Defs = [ICC] in - defm SUBCC : F3_12 <"subcc", 0b010100, SPcmpicc>; +let Defs = [ICC] in { + defm SUBCC : F3_12 <"subcc", 0b010100, subc>; + + def CMPrr : F3_1<2, 0b010100, + (outs), (ins IntRegs:$b, IntRegs:$c), + "cmp $b, $c", + [(SPcmpicc i32:$b, i32:$c)]>; + def CMPri : F3_1<2, 0b010100, + (outs), (ins IntRegs:$b, i32imm:$c), + "cmp $b, $c", + [(SPcmpicc i32:$b, (i32 simm13:$c))]>; +} let Uses = [ICC], Defs = [ICC] in - def SUBXCCrr: F3_1<2, 0b011100, + def SUBXCCrr: F3_1<2, 0b011100, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), "subxcc $b, $c, $dst", []>; @@ -515,6 +515,20 @@ let isBarrier = 1 in "ba $dst", [(br bb:$dst)]>; +// Indirect branch instructions. +let isTerminator = 1, isBarrier = 1, + hasDelaySlot = 1, isBranch =1, + isIndirectBranch = 1 in { + def BINDrr : F3_1<2, 0b111000, + (outs), (ins MEMrr:$ptr), + "jmp $ptr", + [(brind ADDRrr:$ptr)]>; + def BINDri : F3_2<2, 0b111000, + (outs), (ins MEMri:$ptr), + "jmp $ptr", + [(brind ADDRri:$ptr)]>; +} + // FIXME: the encoding for the JIT should look at the condition field. let Uses = [ICC] in def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc), @@ -552,7 +566,7 @@ let Uses = [O6], let op = 1; let Inst{29-0} = disp; } - + // indirect calls def JMPLrr : F3_1<2, 0b111000, (outs), (ins MEMrr:$ptr, variable_ops), @@ -565,7 +579,7 @@ let Uses = [O6], } // Section B.28 - Read State Register Instructions -let Uses = [Y] in +let Uses = [Y] in def RDY : F3_1<2, 0b101000, (outs IntRegs:$dst), (ins), "rd %y, $dst", []>; @@ -584,7 +598,7 @@ def FITOS : F3_3<2, 0b110100, 0b011000100, (outs FPRegs:$dst), (ins FPRegs:$src), "fitos $src, $dst", [(set FPRegs:$dst, (SPitof FPRegs:$src))]>; -def FITOD : F3_3<2, 0b110100, 0b011001000, +def FITOD : F3_3<2, 0b110100, 0b011001000, (outs DFPRegs:$dst), (ins FPRegs:$src), "fitod $src, $dst", [(set DFPRegs:$dst, (SPitof FPRegs:$src))]>; @@ -600,38 +614,38 @@ def FDTOI : F3_3<2, 0b110100, 0b011010010, [(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>; // Convert between Floating-point Formats Instructions, p. 143 -def FSTOD : F3_3<2, 0b110100, 0b011001001, +def FSTOD : F3_3<2, 0b110100, 0b011001001, (outs DFPRegs:$dst), (ins FPRegs:$src), "fstod $src, $dst", - [(set DFPRegs:$dst, (fextend FPRegs:$src))]>; + [(set f64:$dst, (fextend f32:$src))]>; def FDTOS : F3_3<2, 0b110100, 0b011000110, (outs FPRegs:$dst), (ins DFPRegs:$src), "fdtos $src, $dst", - [(set FPRegs:$dst, (fround DFPRegs:$src))]>; + [(set f32:$dst, (fround f64:$src))]>; // Floating-point Move Instructions, p. 144 def FMOVS : F3_3<2, 0b110100, 0b000000001, (outs FPRegs:$dst), (ins FPRegs:$src), "fmovs $src, $dst", []>; -def FNEGS : F3_3<2, 0b110100, 0b000000101, +def FNEGS : F3_3<2, 0b110100, 0b000000101, (outs FPRegs:$dst), (ins FPRegs:$src), "fnegs $src, $dst", - [(set FPRegs:$dst, (fneg FPRegs:$src))]>; -def FABSS : F3_3<2, 0b110100, 0b000001001, + [(set f32:$dst, (fneg f32:$src))]>; +def FABSS : F3_3<2, 0b110100, 0b000001001, (outs FPRegs:$dst), (ins FPRegs:$src), "fabss $src, $dst", - [(set FPRegs:$dst, (fabs FPRegs:$src))]>; + [(set f32:$dst, (fabs f32:$src))]>; // Floating-point Square Root Instructions, p.145 -def FSQRTS : F3_3<2, 0b110100, 0b000101001, +def FSQRTS : F3_3<2, 0b110100, 0b000101001, (outs FPRegs:$dst), (ins FPRegs:$src), "fsqrts $src, $dst", - [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>; -def FSQRTD : F3_3<2, 0b110100, 0b000101010, + [(set f32:$dst, (fsqrt f32:$src))]>; +def FSQRTD : F3_3<2, 0b110100, 0b000101010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fsqrtd $src, $dst", - [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>; + [(set f64:$dst, (fsqrt f64:$src))]>; @@ -639,42 +653,42 @@ def FSQRTD : F3_3<2, 0b110100, 0b000101010, def FADDS : F3_3<2, 0b110100, 0b001000001, (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fadds $src1, $src2, $dst", - [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>; + [(set f32:$dst, (fadd f32:$src1, f32:$src2))]>; def FADDD : F3_3<2, 0b110100, 0b001000010, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "faddd $src1, $src2, $dst", - [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>; + [(set f64:$dst, (fadd f64:$src1, f64:$src2))]>; def FSUBS : F3_3<2, 0b110100, 0b001000101, (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fsubs $src1, $src2, $dst", - [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>; + [(set f32:$dst, (fsub f32:$src1, f32:$src2))]>; def FSUBD : F3_3<2, 0b110100, 0b001000110, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "fsubd $src1, $src2, $dst", - [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>; + [(set f64:$dst, (fsub f64:$src1, f64:$src2))]>; // Floating-point Multiply and Divide Instructions, p. 147 def FMULS : F3_3<2, 0b110100, 0b001001001, (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fmuls $src1, $src2, $dst", - [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>; + [(set f32:$dst, (fmul f32:$src1, f32:$src2))]>; def FMULD : F3_3<2, 0b110100, 0b001001010, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "fmuld $src1, $src2, $dst", - [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>; + [(set f64:$dst, (fmul f64:$src1, f64:$src2))]>; def FSMULD : F3_3<2, 0b110100, 0b001101001, (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fsmuld $src1, $src2, $dst", - [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1), - (fextend FPRegs:$src2)))]>; + [(set f64:$dst, (fmul (fextend f32:$src1), + (fextend f32:$src2)))]>; def FDIVS : F3_3<2, 0b110100, 0b001001101, (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fdivs $src1, $src2, $dst", - [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>; + [(set f32:$dst, (fdiv f32:$src1, f32:$src2))]>; def FDIVD : F3_3<2, 0b110100, 0b001001110, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "fdivd $src1, $src2, $dst", - [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>; + [(set f64:$dst, (fdiv f64:$src1, f64:$src2))]>; // Floating-point Compare Instructions, p. 148 // Note: the 2nd template arg is different for these guys. @@ -685,11 +699,11 @@ let Defs = [FCC] in { def FCMPS : F3_3<2, 0b110101, 0b001010001, (outs), (ins FPRegs:$src1, FPRegs:$src2), "fcmps $src1, $src2\n\tnop", - [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>; + [(SPcmpfcc f32:$src1, f32:$src2)]>; def FCMPD : F3_3<2, 0b110101, 0b001010010, (outs), (ins DFPRegs:$src1, DFPRegs:$src2), "fcmpd $src1, $src2\n\tnop", - [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>; + [(SPcmpfcc f64:$src1, f64:$src2)]>; } //===----------------------------------------------------------------------===// @@ -697,59 +711,51 @@ let Defs = [FCC] in { //===----------------------------------------------------------------------===// // V9 Conditional Moves. -let Predicates = [HasV9], Constraints = "$T = $dst" in { +let Predicates = [HasV9], Constraints = "$f = $rd" in { // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual. // FIXME: Add instruction encodings for the JIT some day. let Uses = [ICC] in { def MOVICCrr - : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc), - "mov$cc %icc, $F, $dst", - [(set IntRegs:$dst, - (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>; + : Pseudo<(outs IntRegs:$rd), (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cc), + "mov$cc %icc, $rs2, $rd", + [(set i32:$rd, (SPselecticc i32:$rs2, i32:$f, imm:$cc))]>; def MOVICCri - : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc), - "mov$cc %icc, $F, $dst", - [(set IntRegs:$dst, - (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>; + : Pseudo<(outs IntRegs:$rd), (ins i32imm:$i, IntRegs:$f, CCOp:$cc), + "mov$cc %icc, $i, $rd", + [(set i32:$rd, (SPselecticc simm11:$i, i32:$f, imm:$cc))]>; } let Uses = [FCC] in { def MOVFCCrr - : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc), - "mov$cc %fcc0, $F, $dst", - [(set IntRegs:$dst, - (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>; + : Pseudo<(outs IntRegs:$rd), (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cc), + "mov$cc %fcc0, $rs2, $rd", + [(set i32:$rd, (SPselectfcc i32:$rs2, i32:$f, imm:$cc))]>; def MOVFCCri - : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc), - "mov$cc %fcc0, $F, $dst", - [(set IntRegs:$dst, - (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>; + : Pseudo<(outs IntRegs:$rd), (ins i32imm:$i, IntRegs:$f, CCOp:$cc), + "mov$cc %fcc0, $i, $rd", + [(set i32:$rd, (SPselectfcc simm11:$i, i32:$f, imm:$cc))]>; } let Uses = [ICC] in { def FMOVS_ICC - : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc), - "fmovs$cc %icc, $F, $dst", - [(set FPRegs:$dst, - (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>; + : Pseudo<(outs FPRegs:$rd), (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cc), + "fmovs$cc %icc, $rs2, $rd", + [(set f32:$rd, (SPselecticc f32:$rs2, f32:$f, imm:$cc))]>; def FMOVD_ICC - : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc), - "fmovd$cc %icc, $F, $dst", - [(set DFPRegs:$dst, - (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>; + : Pseudo<(outs DFPRegs:$rd), (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cc), + "fmovd$cc %icc, $rs2, $rd", + [(set f64:$rd, (SPselecticc f64:$rs2, f64:$f, imm:$cc))]>; } let Uses = [FCC] in { def FMOVS_FCC - : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc), - "fmovs$cc %fcc0, $F, $dst", - [(set FPRegs:$dst, - (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>; + : Pseudo<(outs FPRegs:$rd), (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cc), + "fmovs$cc %fcc0, $rs2, $rd", + [(set f32:$rd, (SPselectfcc f32:$rs2, f32:$f, imm:$cc))]>; def FMOVD_FCC - : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc), - "fmovd$cc %fcc0, $F, $dst", - [(set DFPRegs:$dst, - (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>; + : Pseudo<(outs DFPRegs:$rd), (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cc), + "fmovd$cc %fcc0, $rs2, $rd", + [(set f64:$rd, (SPselectfcc f64:$rs2, f64:$f, imm:$cc))]>; } } @@ -759,23 +765,23 @@ let Predicates = [HasV9] in { def FMOVD : F3_3<2, 0b110100, 0b000000010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fmovd $src, $dst", []>; - def FNEGD : F3_3<2, 0b110100, 0b000000110, + def FNEGD : F3_3<2, 0b110100, 0b000000110, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fnegd $src, $dst", - [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>; - def FABSD : F3_3<2, 0b110100, 0b000001010, + [(set f64:$dst, (fneg f64:$src))]>; + def FABSD : F3_3<2, 0b110100, 0b000001010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fabsd $src, $dst", - [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>; + [(set f64:$dst, (fabs f64:$src))]>; } // POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear // the top 32-bits before using it. To do this clearing, we use a SLLri X,0. -def POPCrr : F3_1<2, 0b101110, +def POPCrr : F3_1<2, 0b101110, (outs IntRegs:$dst), (ins IntRegs:$src), "popc $src, $dst", []>, Requires<[HasV9]>; -def : Pat<(ctpop IntRegs:$src), - (POPCrr (SLLri IntRegs:$src, 0))>; +def : Pat<(ctpop i32:$src), + (POPCrr (SLLri $src, 0))>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -783,30 +789,29 @@ def : Pat<(ctpop IntRegs:$src), // Small immediates. def : Pat<(i32 simm13:$val), - (ORri G0, imm:$val)>; + (ORri (i32 G0), imm:$val)>; // Arbitrary immediates. def : Pat<(i32 imm:$val), (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>; -// subc -def : Pat<(subc IntRegs:$b, IntRegs:$c), - (SUBCCrr IntRegs:$b, IntRegs:$c)>; -def : Pat<(subc IntRegs:$b, simm13:$val), - (SUBCCri IntRegs:$b, imm:$val)>; // Global addresses, constant pool entries def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>; -def : Pat<(SPlo tglobaladdr:$in), (ORri G0, tglobaladdr:$in)>; +def : Pat<(SPlo tglobaladdr:$in), (ORri (i32 G0), tglobaladdr:$in)>; def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>; -def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>; +def : Pat<(SPlo tconstpool:$in), (ORri (i32 G0), tconstpool:$in)>; + +// Blockaddress +def : Pat<(SPhi tblockaddress:$in), (SETHIi tblockaddress:$in)>; +def : Pat<(SPlo tblockaddress:$in), (ORri (i32 G0), tblockaddress:$in)>; // Add reg, lo. This is used when taking the addr of a global/constpool entry. -def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)), - (ADDri IntRegs:$r, tglobaladdr:$in)>; -def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)), - (ADDri IntRegs:$r, tconstpool:$in)>; +def : Pat<(add iPTR:$r, (SPlo tglobaladdr:$in)), (ADDri $r, tglobaladdr:$in)>; +def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)), (ADDri $r, tconstpool:$in)>; +def : Pat<(add iPTR:$r, (SPlo tblockaddress:$in)), + (ADDri $r, tblockaddress:$in)>; -// Calls: +// Calls: def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>; def : Pat<(call texternalsym:$dst), @@ -823,3 +828,9 @@ def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>; // zextload bool -> zextload byte def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>; def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>; + +// store 0, addr -> store %g0, addr +def : Pat<(store (i32 0), ADDRrr:$dst), (STrr ADDRrr:$dst, (i32 G0))>; +def : Pat<(store (i32 0), ADDRri:$dst), (STri ADDRri:$dst, (i32 G0))>; + +include "SparcInstr64Bit.td" diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h index 90c27a4..3783c16 100644 --- a/lib/Target/Sparc/SparcMachineFunctionInfo.h +++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h @@ -28,11 +28,16 @@ namespace llvm { /// SRetReturnReg - Holds the virtual register into which the sret /// argument is passed. unsigned SRetReturnReg; + + /// IsLeafProc - True if the function is a leaf procedure. + bool IsLeafProc; public: SparcMachineFunctionInfo() - : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {} + : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0), + IsLeafProc(false) {} explicit SparcMachineFunctionInfo(MachineFunction &MF) - : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {} + : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0), + IsLeafProc(false) {} unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } @@ -42,6 +47,9 @@ namespace llvm { unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + void setLeafProc(bool rhs) { IsLeafProc = rhs; } + bool isLeafProc() const { return IsLeafProc; } }; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 25e90b7..dc97f06 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -13,6 +13,7 @@ #include "SparcRegisterInfo.h" #include "Sparc.h" +#include "SparcMachineFunctionInfo.h" #include "SparcSubtarget.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" @@ -20,6 +21,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/IR/Type.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" @@ -28,9 +30,12 @@ using namespace llvm; -SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st, - const TargetInstrInfo &tii) - : SparcGenRegisterInfo(SP::I7), Subtarget(st), TII(tii) { +static cl::opt<bool> +ReserveAppRegisters("sparc-reserve-app-registers", cl::Hidden, cl::init(false), + cl::desc("Reserve application registers (%g2-%g4)")); + +SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st) + : SparcGenRegisterInfo(SP::I7), Subtarget(st) { } const uint16_t* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) @@ -43,19 +48,32 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); // FIXME: G1 reserved for now for large imm generation by frame code. Reserved.set(SP::G1); - Reserved.set(SP::G2); - Reserved.set(SP::G3); - Reserved.set(SP::G4); + + // G1-G4 can be used in applications. + if (ReserveAppRegisters) { + Reserved.set(SP::G2); + Reserved.set(SP::G3); + Reserved.set(SP::G4); + } + // G5 is not reserved in 64 bit mode. + if (!Subtarget.is64Bit()) + Reserved.set(SP::G5); + Reserved.set(SP::O6); Reserved.set(SP::I6); Reserved.set(SP::I7); Reserved.set(SP::G0); - Reserved.set(SP::G5); Reserved.set(SP::G6); Reserved.set(SP::G7); return Reserved; } +const TargetRegisterClass* +SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass; +} + void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, @@ -68,23 +86,33 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Addressable stack objects are accessed using neg. offsets from %fp MachineFunction &MF = *MI.getParent()->getParent(); - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(FIOperandNum + 1).getImm(); + int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + MI.getOperand(FIOperandNum + 1).getImm() + + Subtarget.getStackPointerBias(); + SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); + unsigned FramePtr = SP::I6; + if (FuncInfo->isLeafProc()) { + // Use %sp and adjust offset if needed. + FramePtr = SP::O6; + int stackSize = MF.getFrameInfo()->getStackSize(); + Offset += (stackSize) ? Subtarget.getAdjustedFrameSize(stackSize) : 0 ; + } // Replace frame index with a frame pointer reference. if (Offset >= -4096 && Offset <= 4095) { // If the offset is small enough to fit in the immediate field, directly // encode it. - MI.getOperand(FIOperandNum).ChangeToRegister(SP::I6, false); + MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } else { - // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to + // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to // scavenge a register here instead of reserving G1 all of the time. + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); unsigned OffHi = (unsigned)Offset >> 10U; BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi); // Emit G1 = G1 + I6 BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1) - .addReg(SP::I6); + .addReg(FramePtr); // Insert: G1+%lo(offset) into the user. MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1)); diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index b53a1ed..6b77d4e 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -27,15 +27,17 @@ class Type; struct SparcRegisterInfo : public SparcGenRegisterInfo { SparcSubtarget &Subtarget; - const TargetInstrInfo &TII; - SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii); + SparcRegisterInfo(SparcSubtarget &st); /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; BitVector getReservedRegs(const MachineFunction &MF) const; + const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const; + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td index 81bff6c..a59c442 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.td +++ b/lib/Target/Sparc/SparcRegisterInfo.td @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Declarations that describe the Sparc register file +// Declarations that describe the Sparc register file //===----------------------------------------------------------------------===// class SparcReg<string n> : Register<n> { @@ -21,8 +21,8 @@ class SparcCtrlReg<string n>: Register<n> { } let Namespace = "SP" in { -def sub_even : SubRegIndex; -def sub_odd : SubRegIndex; +def sub_even : SubRegIndex<32>; +def sub_odd : SubRegIndex<32, 32>; } // Registers are identified with 5-bit ID numbers. @@ -43,7 +43,7 @@ class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> { } // Control Registers -def ICC : SparcCtrlReg<"ICC">; +def ICC : SparcCtrlReg<"ICC">; // This represents icc and xcc in 64-bit code. def FCC : SparcCtrlReg<"FCC">; // Y register @@ -52,68 +52,68 @@ def Y : SparcCtrlReg<"Y">; // Integer registers def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>; def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>; -def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>; +def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>; def G3 : Ri< 3, "G3">, DwarfRegNum<[3]>; def G4 : Ri< 4, "G4">, DwarfRegNum<[4]>; -def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>; +def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>; def G6 : Ri< 6, "G6">, DwarfRegNum<[6]>; def G7 : Ri< 7, "G7">, DwarfRegNum<[7]>; def O0 : Ri< 8, "O0">, DwarfRegNum<[8]>; def O1 : Ri< 9, "O1">, DwarfRegNum<[9]>; -def O2 : Ri<10, "O2">, DwarfRegNum<[10]>; +def O2 : Ri<10, "O2">, DwarfRegNum<[10]>; def O3 : Ri<11, "O3">, DwarfRegNum<[11]>; def O4 : Ri<12, "O4">, DwarfRegNum<[12]>; -def O5 : Ri<13, "O5">, DwarfRegNum<[13]>; +def O5 : Ri<13, "O5">, DwarfRegNum<[13]>; def O6 : Ri<14, "SP">, DwarfRegNum<[14]>; def O7 : Ri<15, "O7">, DwarfRegNum<[15]>; def L0 : Ri<16, "L0">, DwarfRegNum<[16]>; def L1 : Ri<17, "L1">, DwarfRegNum<[17]>; -def L2 : Ri<18, "L2">, DwarfRegNum<[18]>; +def L2 : Ri<18, "L2">, DwarfRegNum<[18]>; def L3 : Ri<19, "L3">, DwarfRegNum<[19]>; def L4 : Ri<20, "L4">, DwarfRegNum<[20]>; -def L5 : Ri<21, "L5">, DwarfRegNum<[21]>; +def L5 : Ri<21, "L5">, DwarfRegNum<[21]>; def L6 : Ri<22, "L6">, DwarfRegNum<[22]>; def L7 : Ri<23, "L7">, DwarfRegNum<[23]>; def I0 : Ri<24, "I0">, DwarfRegNum<[24]>; def I1 : Ri<25, "I1">, DwarfRegNum<[25]>; -def I2 : Ri<26, "I2">, DwarfRegNum<[26]>; +def I2 : Ri<26, "I2">, DwarfRegNum<[26]>; def I3 : Ri<27, "I3">, DwarfRegNum<[27]>; def I4 : Ri<28, "I4">, DwarfRegNum<[28]>; -def I5 : Ri<29, "I5">, DwarfRegNum<[29]>; +def I5 : Ri<29, "I5">, DwarfRegNum<[29]>; def I6 : Ri<30, "FP">, DwarfRegNum<[30]>; def I7 : Ri<31, "I7">, DwarfRegNum<[31]>; // Floating-point registers def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>; def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>; -def F2 : Rf< 2, "F2">, DwarfRegNum<[34]>; +def F2 : Rf< 2, "F2">, DwarfRegNum<[34]>; def F3 : Rf< 3, "F3">, DwarfRegNum<[35]>; def F4 : Rf< 4, "F4">, DwarfRegNum<[36]>; -def F5 : Rf< 5, "F5">, DwarfRegNum<[37]>; +def F5 : Rf< 5, "F5">, DwarfRegNum<[37]>; def F6 : Rf< 6, "F6">, DwarfRegNum<[38]>; def F7 : Rf< 7, "F7">, DwarfRegNum<[39]>; -def F8 : Rf< 8, "F8">, DwarfRegNum<[40]>; +def F8 : Rf< 8, "F8">, DwarfRegNum<[40]>; def F9 : Rf< 9, "F9">, DwarfRegNum<[41]>; def F10 : Rf<10, "F10">, DwarfRegNum<[42]>; -def F11 : Rf<11, "F11">, DwarfRegNum<[43]>; +def F11 : Rf<11, "F11">, DwarfRegNum<[43]>; def F12 : Rf<12, "F12">, DwarfRegNum<[44]>; def F13 : Rf<13, "F13">, DwarfRegNum<[45]>; -def F14 : Rf<14, "F14">, DwarfRegNum<[46]>; +def F14 : Rf<14, "F14">, DwarfRegNum<[46]>; def F15 : Rf<15, "F15">, DwarfRegNum<[47]>; def F16 : Rf<16, "F16">, DwarfRegNum<[48]>; -def F17 : Rf<17, "F17">, DwarfRegNum<[49]>; +def F17 : Rf<17, "F17">, DwarfRegNum<[49]>; def F18 : Rf<18, "F18">, DwarfRegNum<[50]>; def F19 : Rf<19, "F19">, DwarfRegNum<[51]>; -def F20 : Rf<20, "F20">, DwarfRegNum<[52]>; +def F20 : Rf<20, "F20">, DwarfRegNum<[52]>; def F21 : Rf<21, "F21">, DwarfRegNum<[53]>; def F22 : Rf<22, "F22">, DwarfRegNum<[54]>; def F23 : Rf<23, "F23">, DwarfRegNum<[55]>; def F24 : Rf<24, "F24">, DwarfRegNum<[56]>; def F25 : Rf<25, "F25">, DwarfRegNum<[57]>; -def F26 : Rf<26, "F26">, DwarfRegNum<[58]>; +def F26 : Rf<26, "F26">, DwarfRegNum<[58]>; def F27 : Rf<27, "F27">, DwarfRegNum<[59]>; def F28 : Rf<28, "F28">, DwarfRegNum<[60]>; -def F29 : Rf<29, "F29">, DwarfRegNum<[61]>; +def F29 : Rf<29, "F29">, DwarfRegNum<[61]>; def F30 : Rf<30, "F30">, DwarfRegNum<[62]>; def F31 : Rf<31, "F31">, DwarfRegNum<[63]>; @@ -140,21 +140,22 @@ def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>; // FIXME: the register order should be defined in terms of the preferred // allocation order... // -def IntRegs : RegisterClass<"SP", [i32], 32, - (add L0, L1, L2, L3, L4, L5, L6, - L7, I0, I1, I2, I3, I4, I5, - O0, O1, O2, O3, O4, O5, O7, - G1, - // Non-allocatable regs: - G2, G3, G4, // FIXME: OK for use only in - // applications, not libraries. - O6, // stack ptr - I6, // frame ptr - I7, // return address - G0, // constant zero - G5, G6, G7 // reserved for kernel - )>; +// This register class should not be used to hold i64 values, use the I64Regs +// register class for that. The i64 type is included here to allow i64 patterns +// using the integer instructions. +def IntRegs : RegisterClass<"SP", [i32, i64], 32, + (add (sequence "I%u", 0, 7), + (sequence "G%u", 0, 7), + (sequence "L%u", 0, 7), + (sequence "O%u", 0, 7))>; +// Register class for 64-bit mode, with a 64-bit spill slot size. +// These are the same as the 32-bit registers, so TableGen will consider this +// to be a sub-class of IntRegs. That works out because requiring a 64-bit +// spill slot is a stricter constraint than only requiring a 32-bit spill slot. +def I64Regs : RegisterClass<"SP", [i64], 64, (add IntRegs)>; + +// Floating point register classes. def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>; def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>; diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp index e5b2aeb..f9ce098 100644 --- a/lib/Target/Sparc/SparcSubtarget.cpp +++ b/lib/Target/Sparc/SparcSubtarget.cpp @@ -13,6 +13,7 @@ #include "SparcSubtarget.h" #include "Sparc.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC @@ -30,7 +31,7 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU, V8DeprecatedInsts(false), IsVIS(false), Is64Bit(is64Bit) { - + // Determine default and user specified characteristics std::string CPUName = CPU; if (CPUName.empty()) { @@ -44,3 +45,30 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU, // Parse features string. ParseSubtargetFeatures(CPUName, FS); } + + +int SparcSubtarget::getAdjustedFrameSize(int frameSize) const { + + if (is64Bit()) { + // All 64-bit stack frames must be 16-byte aligned, and must reserve space + // for spilling the 16 window registers at %sp+BIAS..%sp+BIAS+128. + frameSize += 128; + // Frames with calls must also reserve space for 6 outgoing arguments + // whether they are used or not. LowerCall_64 takes care of that. + assert(frameSize % 16 == 0 && "Stack size not 16-byte aligned"); + } else { + // Emit the correct save instruction based on the number of bytes in + // the frame. Minimum stack frame size according to V8 ABI is: + // 16 words for register window spill + // 1 word for address of returned aggregate-value + // + 6 words for passing parameters on the stack + // ---------- + // 23 words * 4 bytes per word = 92 bytes + frameSize += 92; + + // Round up to next doubleword boundary -- a double-word boundary + // is required by the ABI. + frameSize = RoundUpToAlignment(frameSize, 8); + } + return frameSize; +} diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index a81931b..2bf599d 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -29,7 +29,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo { bool V8DeprecatedInsts; bool IsVIS; bool Is64Bit; - + public: SparcSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64bit); @@ -37,11 +37,11 @@ public: bool isV9() const { return IsV9; } bool isVIS() const { return IsVIS; } bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; } - - /// ParseSubtargetFeatures - Parses features string setting specified + + /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - + bool is64Bit() const { return Is64Bit; } std::string getDataLayout() const { const char *p; @@ -52,6 +52,18 @@ public: } return std::string(p); } + + /// The 64-bit ABI uses biased stack and frame pointers, so the stack frame + /// of the current function is the area from [%sp+BIAS] to [%fp+BIAS]. + int64_t getStackPointerBias() const { + return is64Bit() ? 2047 : 0; + } + + /// Given a actual stack size as determined by FrameInfo, this function + /// returns adjusted framesize which includes space for register window + /// spills and arguments. + int getAdjustedFrameSize(int stackSize) const; + }; } // end namespace llvm diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 60bceb7..a7355f4 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -37,6 +37,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { + initAsmInfo(); } namespace { @@ -68,7 +69,6 @@ bool SparcPassConfig::addInstSelector() { /// passes immediately before machine code is emitted. This should return /// true if -print-machineinstrs should print out the code after the passes. bool SparcPassConfig::addPreEmitPass(){ - addPass(createSparcFPMoverPass(getSparcTargetMachine())); addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine())); return true; } |
