diff options
Diffstat (limited to 'lib/Target/Sparc')
28 files changed, 773 insertions, 450 deletions
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index efb10db..0ab7a1c 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -10,7 +10,6 @@ add_public_tablegen_target(SparcCommonTableGen) add_llvm_target(SparcCodeGen DelaySlotFiller.cpp - FPMover.cpp SparcAsmPrinter.cpp SparcInstrInfo.cpp SparcISelDAGToDAG.cpp diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index 6123773..b93f5e4 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -39,11 +39,10 @@ namespace { /// layout, etc. /// TargetMachine &TM; - const TargetInstrInfo *TII; static char ID; - Filler(TargetMachine &tm) - : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } + Filler(TargetMachine &tm) + : MachineFunctionPass(ID), TM(tm) { } virtual const char *getPassName() const { return "SPARC Delay Slot Filler"; @@ -61,8 +60,9 @@ namespace { bool isDelayFiller(MachineBasicBlock &MBB, MachineBasicBlock::iterator candidate); - void insertCallUses(MachineBasicBlock::iterator MI, - SmallSet<unsigned, 32>& RegUses); + void insertCallDefsUses(MachineBasicBlock::iterator MI, + SmallSet<unsigned, 32>& RegDefs, + SmallSet<unsigned, 32>& RegUses); void insertDefsUses(MachineBasicBlock::iterator MI, SmallSet<unsigned, 32>& RegDefs, @@ -81,6 +81,9 @@ namespace { bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize); + bool tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); + }; char Filler::ID = 0; } // end of anonymous namespace @@ -99,29 +102,45 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) { bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) - if (I->hasDelaySlot()) { - MachineBasicBlock::iterator D = MBB.end(); - MachineBasicBlock::iterator J = I; - - if (!DisableDelaySlotFiller) - D = findDelayInstr(MBB, I); - - ++FilledSlots; - Changed = true; - - if (D == MBB.end()) - BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP)); - else - MBB.splice(++J, &MBB, D); - unsigned structSize = 0; - if (needsUnimp(I, structSize)) { - MachineBasicBlock::iterator J = I; - ++J; //skip the delay filler. - BuildMI(MBB, ++J, I->getDebugLoc(), - TII->get(SP::UNIMP)).addImm(structSize); - } + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { + MachineBasicBlock::iterator MI = I; + ++I; + + // If MI is restore, try combining it with previous inst. + if (!DisableDelaySlotFiller && + (MI->getOpcode() == SP::RESTORErr + || MI->getOpcode() == SP::RESTOREri)) { + Changed |= tryCombineRestoreWithPrevInst(MBB, MI); + continue; + } + + // If MI has no delay slot, skip. + if (!MI->hasDelaySlot()) + continue; + + MachineBasicBlock::iterator D = MBB.end(); + + if (!DisableDelaySlotFiller) + D = findDelayInstr(MBB, MI); + + ++FilledSlots; + Changed = true; + + const TargetInstrInfo *TII = TM.getInstrInfo(); + if (D == MBB.end()) + BuildMI(MBB, I, MI->getDebugLoc(), TII->get(SP::NOP)); + else + MBB.splice(I, &MBB, D); + + unsigned structSize = 0; + if (needsUnimp(MI, structSize)) { + MachineBasicBlock::iterator J = MI; + ++J; // skip the delay filler. + assert (J != MBB.end() && "MI needs a delay instruction."); + BuildMI(MBB, ++J, I->getDebugLoc(), + TII->get(SP::UNIMP)).addImm(structSize); } + } return Changed; } @@ -134,28 +153,34 @@ Filler::findDelayInstr(MachineBasicBlock &MBB, bool sawLoad = false; bool sawStore = false; - MachineBasicBlock::iterator I = slot; + if (slot == MBB.begin()) + return MBB.end(); if (slot->getOpcode() == SP::RET) return MBB.end(); if (slot->getOpcode() == SP::RETL) { - --I; - if (I->getOpcode() != SP::RESTORErr) - return MBB.end(); - //change retl to ret - slot->setDesc(TII->get(SP::RET)); - return I; + MachineBasicBlock::iterator J = slot; + --J; + + if (J->getOpcode() == SP::RESTORErr + || J->getOpcode() == SP::RESTOREri) { + // change retl to ret. + slot->setDesc(TM.getInstrInfo()->get(SP::RET)); + return J; + } } - //Call's delay filler can def some of call's uses. + // Call's delay filler can def some of call's uses. if (slot->isCall()) - insertCallUses(slot, RegUses); + insertCallDefsUses(slot, RegDefs, RegUses); else insertDefsUses(slot, RegDefs, RegUses); bool done = false; + MachineBasicBlock::iterator I = slot; + while (!done) { done = (I == MBB.begin()); @@ -216,12 +241,12 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, unsigned Reg = MO.getReg(); if (MO.isDef()) { - //check whether Reg is defined or used before delay slot. + // check whether Reg is defined or used before delay slot. if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg)) return true; } if (MO.isUse()) { - //check whether Reg is defined before delay slot. + // check whether Reg is defined before delay slot. if (IsRegInSet(RegDefs, Reg)) return true; } @@ -230,9 +255,12 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, } -void Filler::insertCallUses(MachineBasicBlock::iterator MI, - SmallSet<unsigned, 32>& RegUses) +void Filler::insertCallDefsUses(MachineBasicBlock::iterator MI, + SmallSet<unsigned, 32>& RegDefs, + SmallSet<unsigned, 32>& RegUses) { + // Call defines o7, which is visible to the instruction in delay slot. + RegDefs.insert(SP::O7); switch(MI->getOpcode()) { default: llvm_unreachable("Unknown opcode."); @@ -255,7 +283,7 @@ void Filler::insertCallUses(MachineBasicBlock::iterator MI, } } -//Insert Defs and Uses of MI into the sets RegDefs and RegUses. +// Insert Defs and Uses of MI into the sets RegDefs and RegUses. void Filler::insertDefsUses(MachineBasicBlock::iterator MI, SmallSet<unsigned, 32>& RegDefs, SmallSet<unsigned, 32>& RegUses) @@ -270,13 +298,17 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI, continue; if (MO.isDef()) RegDefs.insert(Reg); - if (MO.isUse()) + if (MO.isUse()) { + // Implicit register uses of retl are return values and + // retl does not use them. + if (MO.isImplicit() && MI->getOpcode() == SP::RETL) + continue; RegUses.insert(Reg); - + } } } -//returns true if the Reg or its alias is in the RegSet. +// returns true if the Reg or its alias is in the RegSet. bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) { // Check Reg and all aliased Registers. @@ -318,3 +350,142 @@ bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize) StructSize = MO.getImm(); return true; } + +static bool combineRestoreADD(MachineBasicBlock::iterator RestoreMI, + MachineBasicBlock::iterator AddMI, + const TargetInstrInfo *TII) +{ + // Before: add <op0>, <op1>, %i[0-7] + // restore %g0, %g0, %i[0-7] + // + // After : restore <op0>, <op1>, %o[0-7] + + unsigned reg = AddMI->getOperand(0).getReg(); + if (reg < SP::I0 || reg > SP::I7) + return false; + + // Erase RESTORE. + RestoreMI->eraseFromParent(); + + // Change ADD to RESTORE. + AddMI->setDesc(TII->get((AddMI->getOpcode() == SP::ADDrr) + ? SP::RESTORErr + : SP::RESTOREri)); + + // Map the destination register. + AddMI->getOperand(0).setReg(reg - SP::I0 + SP::O0); + + return true; +} + +static bool combineRestoreOR(MachineBasicBlock::iterator RestoreMI, + MachineBasicBlock::iterator OrMI, + const TargetInstrInfo *TII) +{ + // Before: or <op0>, <op1>, %i[0-7] + // restore %g0, %g0, %i[0-7] + // and <op0> or <op1> is zero, + // + // After : restore <op0>, <op1>, %o[0-7] + + unsigned reg = OrMI->getOperand(0).getReg(); + if (reg < SP::I0 || reg > SP::I7) + return false; + + // check whether it is a copy. + if (OrMI->getOpcode() == SP::ORrr + && OrMI->getOperand(1).getReg() != SP::G0 + && OrMI->getOperand(2).getReg() != SP::G0) + return false; + + if (OrMI->getOpcode() == SP::ORri + && OrMI->getOperand(1).getReg() != SP::G0 + && (!OrMI->getOperand(2).isImm() || OrMI->getOperand(2).getImm() != 0)) + return false; + + // Erase RESTORE. + RestoreMI->eraseFromParent(); + + // Change OR to RESTORE. + OrMI->setDesc(TII->get((OrMI->getOpcode() == SP::ORrr) + ? SP::RESTORErr + : SP::RESTOREri)); + + // Map the destination register. + OrMI->getOperand(0).setReg(reg - SP::I0 + SP::O0); + + return true; +} + +static bool combineRestoreSETHIi(MachineBasicBlock::iterator RestoreMI, + MachineBasicBlock::iterator SetHiMI, + const TargetInstrInfo *TII) +{ + // Before: sethi imm3, %i[0-7] + // restore %g0, %g0, %g0 + // + // After : restore %g0, (imm3<<10), %o[0-7] + + unsigned reg = SetHiMI->getOperand(0).getReg(); + if (reg < SP::I0 || reg > SP::I7) + return false; + + if (!SetHiMI->getOperand(1).isImm()) + return false; + + int64_t imm = SetHiMI->getOperand(1).getImm(); + + // Is it a 3 bit immediate? + if (!isInt<3>(imm)) + return false; + + // Make it a 13 bit immediate. + imm = (imm << 10) & 0x1FFF; + + assert(RestoreMI->getOpcode() == SP::RESTORErr); + + RestoreMI->setDesc(TII->get(SP::RESTOREri)); + + RestoreMI->getOperand(0).setReg(reg - SP::I0 + SP::O0); + RestoreMI->getOperand(1).setReg(SP::G0); + RestoreMI->getOperand(2).ChangeToImmediate(imm); + + + // Erase the original SETHI. + SetHiMI->eraseFromParent(); + + return true; +} + +bool Filler::tryCombineRestoreWithPrevInst(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) +{ + // No previous instruction. + if (MBBI == MBB.begin()) + return false; + + // assert that MBBI is a "restore %g0, %g0, %g0". + assert(MBBI->getOpcode() == SP::RESTORErr + && MBBI->getOperand(0).getReg() == SP::G0 + && MBBI->getOperand(1).getReg() == SP::G0 + && MBBI->getOperand(2).getReg() == SP::G0); + + MachineBasicBlock::iterator PrevInst = MBBI; --PrevInst; + + // It cannot combine with a delay filler. + if (isDelayFiller(MBB, PrevInst)) + return false; + + const TargetInstrInfo *TII = TM.getInstrInfo(); + + switch (PrevInst->getOpcode()) { + default: break; + case SP::ADDrr: + case SP::ADDri: return combineRestoreADD(MBBI, PrevInst, TII); break; + case SP::ORrr: + case SP::ORri: return combineRestoreOR(MBBI, PrevInst, TII); break; + case SP::SETHIi: return combineRestoreSETHIi(MBBI, PrevInst, TII); break; + } + // It cannot combine with the previous instruction. + return false; +} diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp deleted file mode 100644 index 1325b98..0000000 --- a/lib/Target/Sparc/FPMover.cpp +++ /dev/null @@ -1,141 +0,0 @@ -//===-- FPMover.cpp - Sparc double-precision floating point move fixer ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Expand FpMOVD/FpABSD/FpNEGD instructions into their single-precision pieces. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "fpmover" -#include "Sparc.h" -#include "SparcSubtarget.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -STATISTIC(NumFpDs , "Number of instructions translated"); -STATISTIC(NoopFpDs, "Number of noop instructions removed"); - -namespace { - struct FPMover : public MachineFunctionPass { - /// Target machine description which we query for reg. names, data - /// layout, etc. - /// - TargetMachine &TM; - - static char ID; - explicit FPMover(TargetMachine &tm) - : MachineFunctionPass(ID), TM(tm) { } - - virtual const char *getPassName() const { - return "Sparc Double-FP Move Fixer"; - } - - bool runOnMachineBasicBlock(MachineBasicBlock &MBB); - bool runOnMachineFunction(MachineFunction &F); - }; - char FPMover::ID = 0; -} // end of anonymous namespace - -/// createSparcFPMoverPass - Returns a pass that turns FpMOVD -/// instructions into FMOVS instructions -/// -FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) { - return new FPMover(tm); -} - -/// getDoubleRegPair - Given a DFP register, return the even and odd FP -/// registers that correspond to it. -static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg, - unsigned &OddReg) { - static const uint16_t EvenHalvesOfPairs[] = { - SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14, - SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30 - }; - static const uint16_t OddHalvesOfPairs[] = { - SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15, - SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31 - }; - static const uint16_t DoubleRegsInOrder[] = { - SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8, - SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15 - }; - for (unsigned i = 0; i < array_lengthof(DoubleRegsInOrder); ++i) - if (DoubleRegsInOrder[i] == DoubleReg) { - EvenReg = EvenHalvesOfPairs[i]; - OddReg = OddHalvesOfPairs[i]; - return; - } - llvm_unreachable("Can't find reg"); -} - -/// runOnMachineBasicBlock - Fixup FpMOVD instructions in this MBB. -/// -bool FPMover::runOnMachineBasicBlock(MachineBasicBlock &MBB) { - bool Changed = false; - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { - MachineInstr *MI = I++; - DebugLoc dl = MI->getDebugLoc(); - if (MI->getOpcode() == SP::FpMOVD || MI->getOpcode() == SP::FpABSD || - MI->getOpcode() == SP::FpNEGD) { - Changed = true; - unsigned DestDReg = MI->getOperand(0).getReg(); - unsigned SrcDReg = MI->getOperand(1).getReg(); - if (DestDReg == SrcDReg && MI->getOpcode() == SP::FpMOVD) { - MBB.erase(MI); // Eliminate the noop copy. - ++NoopFpDs; - continue; - } - - unsigned EvenSrcReg = 0, OddSrcReg = 0, EvenDestReg = 0, OddDestReg = 0; - getDoubleRegPair(DestDReg, EvenDestReg, OddDestReg); - getDoubleRegPair(SrcDReg, EvenSrcReg, OddSrcReg); - - const TargetInstrInfo *TII = TM.getInstrInfo(); - if (MI->getOpcode() == SP::FpMOVD) - MI->setDesc(TII->get(SP::FMOVS)); - else if (MI->getOpcode() == SP::FpNEGD) - MI->setDesc(TII->get(SP::FNEGS)); - else if (MI->getOpcode() == SP::FpABSD) - MI->setDesc(TII->get(SP::FABSS)); - else - llvm_unreachable("Unknown opcode!"); - - MI->getOperand(0).setReg(EvenDestReg); - MI->getOperand(1).setReg(EvenSrcReg); - DEBUG(errs() << "FPMover: the modified instr is: " << *MI); - // Insert copy for the other half of the double. - if (DestDReg != SrcDReg) { - MI = BuildMI(MBB, I, dl, TM.getInstrInfo()->get(SP::FMOVS), OddDestReg) - .addReg(OddSrcReg); - DEBUG(errs() << "FPMover: the inserted instr is: " << *MI); - } - ++NumFpDs; - } - } - return Changed; -} - -bool FPMover::runOnMachineFunction(MachineFunction &F) { - // If the target has V9 instructions, the fp-mover pseudos will never be - // emitted. Avoid a scan of the instructions to improve compile time. - if (TM.getSubtarget<SparcSubtarget>().isV9()) - return false; - - bool Changed = false; - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) - Changed |= runOnMachineBasicBlock(*FI); - return Changed; -} diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt index fe20d2f..7d54d32 100644 --- a/lib/Target/Sparc/LLVMBuild.txt +++ b/lib/Target/Sparc/LLVMBuild.txt @@ -28,5 +28,6 @@ has_asmprinter = 1 type = Library name = SparcCodeGen parent = Sparc -required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc SparcInfo Support Target +required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc + SparcInfo Support Target add_to_library_groups = Sparc diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 3d4bfdc..5a52abe 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -18,7 +18,7 @@ using namespace llvm; void SparcELFMCAsmInfo::anchor() { } -SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) { +SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) { IsLittleEndian = false; Triple TheTriple(TT); if (TheTriple.getArch() == Triple::sparcv9) { diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h index f0e1354..621e8ff 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h @@ -18,12 +18,11 @@ namespace llvm { class StringRef; - class Target; class SparcELFMCAsmInfo : public MCAsmInfo { virtual void anchor(); public: - explicit SparcELFMCAsmInfo(const Target &T, StringRef TT); + explicit SparcELFMCAsmInfo(StringRef TT); }; } // namespace llvm diff --git a/lib/Target/Sparc/README.txt b/lib/Target/Sparc/README.txt index b4991fe..34e68cf 100644 --- a/lib/Target/Sparc/README.txt +++ b/lib/Target/Sparc/README.txt @@ -38,7 +38,7 @@ t1: 1) should be replaced with a brz in V9 mode. -* Same as above, but emit conditional move on register zero (p192) in V9 +* Same as above, but emit conditional move on register zero (p192) in V9 mode. Testcase: int %t1(int %a, int %b) { @@ -47,13 +47,15 @@ int %t1(int %a, int %b) { ret int %D } -* Emit MULX/[SU]DIVX instructions in V9 mode instead of fiddling +* Emit MULX/[SU]DIVX instructions in V9 mode instead of fiddling with the Y register, if they are faster. * Codegen bswap(load)/store(bswap) -> load/store ASI -* Implement frame pointer elimination, e.g. eliminate save/restore for +* Implement frame pointer elimination, e.g. eliminate save/restore for leaf fns. * Fill delay slots * Implement JIT support + +* Use %g0 directly to materialize 0. No instruction is required. diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h index ce6ae17..98563db 100644 --- a/lib/Target/Sparc/Sparc.h +++ b/lib/Target/Sparc/Sparc.h @@ -26,7 +26,6 @@ namespace llvm { FunctionPass *createSparcISelDag(SparcTargetMachine &TM); FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM); - FunctionPass *createSparcFPMoverPass(TargetMachine &TM); } // end namespace llvm; @@ -51,7 +50,7 @@ namespace llvm { ICC_NEG = 6 , // Negative ICC_VC = 15 , // Overflow Clear ICC_VS = 7 , // Overflow Set - + //FCC_A = 8+16, // Always //FCC_N = 0+16, // Never FCC_U = 7+16, // Unordered @@ -70,7 +69,7 @@ namespace llvm { FCC_O = 15+16 // Ordered }; } - + inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) { switch (CC) { case SPCC::ICC_NE: return "ne"; diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td index 611f8e8..d42c40f 100644 --- a/lib/Target/Sparc/Sparc.td +++ b/lib/Target/Sparc/Sparc.td @@ -19,7 +19,7 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// // SPARC Subtarget features. // - + def FeatureV9 : SubtargetFeature<"v9", "IsV9", "true", "Enable SPARC-V9 instructions">; diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index 108eb90..b538d5c 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -60,7 +60,7 @@ namespace { raw_ostream &O); bool printGetPCX(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS); - + virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; @@ -120,6 +120,9 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, case MachineOperand::MO_GlobalAddress: O << *Mang->getSymbol(MO.getGlobal()); break; + case MachineOperand::MO_BlockAddress: + O << GetBlockAddressSymbol(MO.getBlockAddress())->getName(); + break; case MachineOperand::MO_ExternalSymbol: O << MO.getSymbolName(); break; @@ -164,7 +167,7 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum, case MachineOperand::MO_Register: assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && "Operand is not a physical register "); - assert(MO.getReg() != SP::O7 && + assert(MO.getReg() != SP::O7 && "%o7 is assigned as destination for getpcx!"); operand = "%" + StringRef(getRegisterName(MO.getReg())).lower(); break; @@ -177,15 +180,15 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum, O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ; O << "\t sethi\t" - << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum + << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), " << operand << '\n' ; O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ; - O << "\tor\t" << operand + O << "\tor\t" << operand << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), " << operand << '\n'; - O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; - + O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; + return true; } @@ -243,19 +246,19 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { // then nothing falls through to it. if (MBB->isLandingPad() || MBB->pred_empty()) return false; - + // If there isn't exactly one predecessor, it can't be a fall through. MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; ++PI2; if (PI2 != MBB->pred_end()) return false; - + // The predecessor has to be immediately before this block. const MachineBasicBlock *Pred = *PI; - + if (!Pred->isLayoutSuccessor(MBB)) return false; - + // Check if the last terminator is an unconditional branch. MachineBasicBlock::const_iterator I = Pred->end(); while (I != Pred->begin() && !(--I)->isTerminator()) @@ -273,7 +276,7 @@ getDebugValueLocation(const MachineInstr *MI) const { } // Force static initialization. -extern "C" void LLVMInitializeSparcAsmPrinter() { +extern "C" void LLVMInitializeSparcAsmPrinter() { RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget); RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target); } diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td index 54784e0..a181bcf 100644 --- a/lib/Target/Sparc/SparcCallingConv.td +++ b/lib/Target/Sparc/SparcCallingConv.td @@ -16,7 +16,7 @@ //===----------------------------------------------------------------------===// def CC_Sparc32 : CallingConv<[ - //Custom assign SRet to [sp+64]. + // Custom assign SRet to [sp+64]. CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>, // i32 f32 arguments get passed in integer registers if there is space. CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index 7874240..7e91bc3 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -26,7 +26,16 @@ using namespace llvm; +static cl::opt<bool> +DisableLeafProc("disable-sparc-leaf-proc", + cl::init(false), + cl::desc("Disable Sparc leaf procedure optimization."), + cl::Hidden); + + void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { + SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); + MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); const SparcInstrInfo &TII = @@ -37,31 +46,18 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { // Get the number of bytes to allocate from the FrameInfo int NumBytes = (int) MFI->getStackSize(); - if (SubTarget.is64Bit()) { - // All 64-bit stack frames must be 16-byte aligned, and must reserve space - // for spilling the 16 window registers at %sp+BIAS..%sp+BIAS+128. - NumBytes += 128; - // Frames with calls must also reserve space for 6 outgoing arguments - // whether they are used or not. LowerCall_64 takes care of that. - assert(NumBytes % 16 == 0 && "Stack size not 16-byte aligned"); - } else { - // Emit the correct save instruction based on the number of bytes in - // the frame. Minimum stack frame size according to V8 ABI is: - // 16 words for register window spill - // 1 word for address of returned aggregate-value - // + 6 words for passing parameters on the stack - // ---------- - // 23 words * 4 bytes per word = 92 bytes - NumBytes += 92; - - // Round up to next doubleword boundary -- a double-word boundary - // is required by the ABI. - NumBytes = RoundUpToAlignment(NumBytes, 8); + unsigned SAVEri = SP::SAVEri; + unsigned SAVErr = SP::SAVErr; + if (FuncInfo->isLeafProc()) { + if (NumBytes == 0) + return; + SAVEri = SP::ADDri; + SAVErr = SP::ADDrr; } - NumBytes = -NumBytes; + NumBytes = - SubTarget.getAdjustedFrameSize(NumBytes); if (NumBytes >= -4096) { - BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6) + BuildMI(MBB, MBBI, dl, TII.get(SAVEri), SP::O6) .addReg(SP::O6).addImm(NumBytes); } else { // Emit this the hard way. This clobbers G1 which we always know is @@ -71,7 +67,7 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { // Emit G1 = G1 + I6 BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1) .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1)); - BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6) + BuildMI(MBB, MBBI, dl, TII.get(SAVErr), SP::O6) .addReg(SP::O6).addReg(SP::G1); } } @@ -97,12 +93,115 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, void SparcFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { + SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); const SparcInstrInfo &TII = *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo()); DebugLoc dl = MBBI->getDebugLoc(); assert(MBBI->getOpcode() == SP::RETL && "Can only put epilog before 'retl' instruction!"); - BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0) - .addReg(SP::G0); + if (!FuncInfo->isLeafProc()) { + BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0) + .addReg(SP::G0); + return; + } + MachineFrameInfo *MFI = MF.getFrameInfo(); + + int NumBytes = (int) MFI->getStackSize(); + if (NumBytes == 0) + return; + + NumBytes = SubTarget.getAdjustedFrameSize(NumBytes); + + if (NumBytes < 4096) { + BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), SP::O6) + .addReg(SP::O6).addImm(NumBytes); + } else { + // Emit this the hard way. This clobbers G1 which we always know is + // available here. + unsigned OffHi = (unsigned)NumBytes >> 10U; + BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi); + // Emit G1 = G1 + I6 + BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1) + .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1)); + BuildMI(MBB, MBBI, dl, TII.get(SP::ADDrr), SP::O6) + .addReg(SP::O6).addReg(SP::G1); + } +} + +bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + // Reserve call frame if there are no variable sized objects on the stack. + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +bool SparcFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); +} + + +static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI) +{ + + for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) + if (MRI->isPhysRegUsed(reg)) + return false; + + for (unsigned reg = SP::L0; reg <= SP::L7; ++reg) + if (MRI->isPhysRegUsed(reg)) + return false; + + return true; +} + +bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const +{ + + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + return !(MFI->hasCalls() // has calls + || MRI.isPhysRegUsed(SP::L0) // Too many registers needed + || MRI.isPhysRegUsed(SP::O6) // %SP is used + || hasFP(MF)); // need %FP +} + +void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const { + + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Remap %i[0-7] to %o[0-7]. + for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) { + if (!MRI.isPhysRegUsed(reg)) + continue; + unsigned mapped_reg = (reg - SP::I0 + SP::O0); + assert(!MRI.isPhysRegUsed(mapped_reg)); + + // Replace I register with O register. + MRI.replaceRegWith(reg, mapped_reg); + + // Mark the reg unused. + MRI.setPhysRegUnused(reg); + } + + assert(verifyLeafProcRegUse(&MRI)); +#ifdef XDEBUG + MF.verify(0, "After LeafProc Remapping"); +#endif +} + +void SparcFrameLowering::processFunctionBeforeCalleeSavedScan + (MachineFunction &MF, RegScavenger *RS) const { + + if (!DisableLeafProc && isLeafProc(MF)) { + SparcMachineFunctionInfo *MFI = MF.getInfo<SparcMachineFunctionInfo>(); + MFI->setLeafProc(true); + + remapRegsForLeafProc(MF); + } + } diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h index c375662..8eaef59 100644 --- a/lib/Target/Sparc/SparcFrameLowering.h +++ b/lib/Target/Sparc/SparcFrameLowering.h @@ -38,7 +38,17 @@ public: MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - bool hasFP(const MachineFunction &MF) const { return false; } + bool hasReservedCallFrame(const MachineFunction &MF) const; + bool hasFP(const MachineFunction &MF) const; + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + +private: + // Remap input registers to output registers for leaf procedure. + void remapRegsForLeafProc(MachineFunction &MF) const; + + // Returns true if MF is a leaf procedure. + bool isLeafProc(MachineFunction &MF) const; }; } // End llvm namespace diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index a709685..e85cf74 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -67,13 +67,13 @@ private: SDNode* SparcDAGToDAGISel::getGlobalBaseReg() { unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF); - return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); + return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode(); } bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr, SDValue &Base, SDValue &Offset) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI->getPointerTy()); Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -88,7 +88,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr, dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { // Constant offset from frame ref. Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), - TLI.getPointerTy()); + TLI->getPointerTy()); } else { Base = Addr.getOperand(0); } @@ -131,12 +131,12 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) { } R1 = Addr; - R2 = CurDAG->getRegister(SP::G0, TLI.getPointerTy()); + R2 = CurDAG->getRegister(SP::G0, TLI->getPointerTy()); return true; } SDNode *SparcDAGToDAGISel::Select(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->isMachineOpcode()) return NULL; // Already selected. diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 3863e2c..1d765f2 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -40,7 +40,7 @@ static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT, { assert (ArgFlags.isSRet()); - //Assign SRet argument + // Assign SRet argument. State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 0, LocVT, LocInfo)); @@ -54,18 +54,18 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT, static const uint16_t RegList[] = { SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 }; - //Try to get first reg + // Try to get first reg. if (unsigned Reg = State.AllocateReg(RegList, 6)) { State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); } else { - //Assign whole thing in stack + // Assign whole thing in stack. State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, State.AllocateStack(8,4), LocVT, LocInfo)); return true; } - //Try to get second reg + // Try to get second reg. if (unsigned Reg = State.AllocateReg(RegList, 6)) State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); else @@ -164,7 +164,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc DL, SelectionDAG &DAG) const { + SDLoc DL, SelectionDAG &DAG) const { if (Subtarget->is64Bit()) return LowerReturn_64(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG); return LowerReturn_32(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG); @@ -175,7 +175,7 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc DL, SelectionDAG &DAG) const { + SDLoc DL, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); // CCValAssign - represent the assignment of the return value to locations. @@ -206,7 +206,7 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain, RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - unsigned RetAddrOffset = 8; //Call Inst + Delay Slot + unsigned RetAddrOffset = 8; // Call Inst + Delay Slot // If the function returns a struct, copy the SRetReturnReg to I0 if (MF.getFunction()->hasStructRetAttr()) { SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>(); @@ -238,7 +238,7 @@ SparcTargetLowering::LowerReturn_64(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc DL, SelectionDAG &DAG) const { + SDLoc DL, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. SmallVector<CCValAssign, 16> RVLocs; @@ -314,7 +314,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc DL, + SDLoc DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { if (Subtarget->is64Bit()) @@ -332,7 +332,7 @@ LowerFormalArguments_32(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -351,7 +351,7 @@ LowerFormalArguments_32(SDValue Chain, CCValAssign &VA = ArgLocs[i]; if (i == 0 && Ins[i].Flags.isSRet()) { - //Get SRet from [%fp+64] + // Get SRet from [%fp+64]. int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, @@ -410,7 +410,7 @@ LowerFormalArguments_32(SDValue Chain, if (VA.needsCustom()) { assert(VA.getValVT() == MVT::f64); - //If it is double-word aligned, just load. + // If it is double-word aligned, just load. if (Offset % 8 == 0) { int FI = MF.getFrameInfo()->CreateFixedObject(8, Offset, @@ -470,7 +470,7 @@ LowerFormalArguments_32(SDValue Chain, } if (MF.getFunction()->hasStructRetAttr()) { - //Copy the SRet Argument to SRetReturnReg + // Copy the SRet Argument to SRetReturnReg. SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>(); unsigned Reg = SFI->getSRetReturnReg(); if (!Reg) { @@ -532,7 +532,7 @@ LowerFormalArguments_64(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc DL, + SDLoc DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -653,7 +653,7 @@ SDValue SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; + SDLoc &dl = CLI.DL; SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; SmallVector<SDValue, 32> &OutVals = CLI.OutVals; SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; @@ -680,7 +680,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - //Create local copies for byval args. + // Create local copies for byval args. SmallVector<SDValue, 8> ByValArgs; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { ISD::ArgFlagsTy Flags = Outs[i].Flags; @@ -696,13 +696,14 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, SDValue SizeNode = DAG.getConstant(Size, MVT::i32); Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align, - false, //isVolatile, - (Size <= 32), //AlwaysInline if size <= 32 + false, // isVolatile, + (Size <= 32), // AlwaysInline if size <= 32 MachinePointerInfo(), MachinePointerInfo()); ByValArgs.push_back(FIPtr); } - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true), + dl); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<SDValue, 8> MemOpChains; @@ -718,7 +719,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; - //Use local copy if it is a byval arg. + // Use local copy if it is a byval arg. if (Flags.isByVal()) Arg = ByValArgs[byvalArgIdx++]; @@ -758,7 +759,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, if (VA.isMemLoc()) { unsigned Offset = VA.getLocMemOffset() + StackOffset; - //if it is double-word aligned, just store. + // if it is double-word aligned, just store. if (Offset % 8 == 0) { SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(Offset); @@ -791,7 +792,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, if (NextVA.isRegLoc()) { RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo)); } else { - //Store the low part in stack. + // Store the low part in stack. unsigned Offset = NextVA.getLocMemOffset() + StackOffset; SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(Offset); @@ -886,7 +887,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), - DAG.getIntPtrConstant(0, true), InFlag); + DAG.getIntPtrConstant(0, true), InFlag, dl); InFlag = Chain.getValue(1); // Assign locations to each value returned by this call. @@ -979,7 +980,7 @@ SDValue SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { SelectionDAG &DAG = CLI.DAG; - DebugLoc DL = CLI.DL; + SDLoc DL = CLI.DL; SDValue Chain = CLI.Chain; // Analyze operands of the call, assigning locations to each operand. @@ -1004,7 +1005,8 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, // Adjust the stack pointer to make room for the arguments. // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls // with more than 6 arguments. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true), + DL); // Collect the set of registers to pass to the function and their values. // This will be emitted as a sequence of CopyToReg nodes glued to the call @@ -1122,7 +1124,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, // Revert the stack pointer immediately after the call. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), - DAG.getIntPtrConstant(0, true), InGlue); + DAG.getIntPtrConstant(0, true), InGlue, DL); InGlue = Chain.getValue(1); // Now extract the return values. This is more or less the same as @@ -1256,6 +1258,7 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::GlobalAddress, getPointerTy(), Custom); setOperationAction(ISD::GlobalTLSAddress, getPointerTy(), Custom); setOperationAction(ISD::ConstantPool, getPointerTy(), Custom); + setOperationAction(ISD::BlockAddress, getPointerTy(), Custom); // Sparc doesn't have sext_inreg, replace them with shl/sra setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); @@ -1300,6 +1303,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); if (Subtarget->is64Bit()) { + setOperationAction(ISD::BITCAST, MVT::f64, Expand); + setOperationAction(ISD::BITCAST, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SETCC, MVT::i64, Expand); setOperationAction(ISD::BR_CC, MVT::i64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); } @@ -1308,6 +1315,12 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) // on SparcV8 and later. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + if (!Subtarget->isV9()) { + // SparcV8 does not have FNEGD and FABSD. + setOperationAction(ISD::FNEG, MVT::f64, Custom); + setOperationAction(ISD::FABS, MVT::f64, Custom); + } + setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); @@ -1358,7 +1371,7 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setStackPointerRegisterToSaveRestore(SP::O6); - if (TM.getSubtarget<SparcSubtarget>().isV9()) + if (Subtarget->isV9()) setOperationAction(ISD::CTPOP, MVT::i32, Legal); setMinFunctionAlignment(2); @@ -1391,11 +1404,12 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const { /// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to /// be zero. Op is expected to be a target specific node. Used by DAG /// combiner. -void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void SparcTargetLowering::computeMaskedBitsForTargetNode + (const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { APInt KnownZero2, KnownOne2; KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); @@ -1444,7 +1458,7 @@ SDValue SparcTargetLowering::withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const { if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) return DAG.getTargetGlobalAddress(GA->getGlobal(), - GA->getDebugLoc(), + SDLoc(GA), GA->getValueType(0), GA->getOffset(), TF); @@ -1454,6 +1468,12 @@ SDValue SparcTargetLowering::withTargetFlags(SDValue Op, unsigned TF, CP->getAlignment(), CP->getOffset(), TF); + if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) + return DAG.getTargetBlockAddress(BA->getBlockAddress(), + Op.getValueType(), + 0, + TF); + if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), TF); @@ -1466,7 +1486,7 @@ SDValue SparcTargetLowering::withTargetFlags(SDValue Op, unsigned TF, SDValue SparcTargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue Hi = DAG.getNode(SPISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG)); SDValue Lo = DAG.getNode(SPISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG)); @@ -1476,7 +1496,7 @@ SDValue SparcTargetLowering::makeHiLoPair(SDValue Op, // Build SDNodes for producing an address from a GlobalAddress, ConstantPool, // or ExternalSymbol SDNode. SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); EVT VT = getPointerTy(); // Handle PIC mode first. @@ -1524,8 +1544,13 @@ SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, return makeAddress(Op, DAG); } +SDValue SparcTargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + return makeAddress(Op, DAG); +} + static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Convert the fp value to integer in an FP register. assert(Op.getValueType() == MVT::i32); Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0)); @@ -1533,7 +1558,7 @@ static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); assert(Op.getOperand(0).getValueType() == MVT::i32); SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); // Convert the int value to FP in an FP register. @@ -1546,7 +1571,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) { SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Opc, SPCC = ~0U; // If this is a br_cc of a "setcc", and if the setcc got lowered into @@ -1556,9 +1581,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) { // Get the condition flag. SDValue CompareFlag; if (LHS.getValueType().isInteger()) { - EVT VTs[] = { LHS.getValueType(), MVT::Glue }; - SDValue Ops[2] = { LHS, RHS }; - CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1); + CompareFlag = DAG.getNode(SPISD::CMPICC, dl, MVT::Glue, LHS, RHS); if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC); // 32-bit compares use the icc flags, 64-bit uses the xcc flags. Opc = LHS.getValueType() == MVT::i32 ? SPISD::BRICC : SPISD::BRXCC; @@ -1577,7 +1600,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Opc, SPCC = ~0U; // If this is a select_cc of a "setcc", and if the setcc got lowered into @@ -1586,10 +1609,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { SDValue CompareFlag; if (LHS.getValueType().isInteger()) { - // subcc returns a value - EVT VTs[] = { LHS.getValueType(), MVT::Glue }; - SDValue Ops[2] = { LHS, RHS }; - CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1); + CompareFlag = DAG.getNode(SPISD::CMPICC, dl, MVT::Glue, LHS, RHS); Opc = LHS.getValueType() == MVT::i32 ? SPISD::SELECT_ICC : SPISD::SELECT_XCC; if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC); @@ -1607,9 +1627,12 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, MachineFunction &MF = DAG.getMachineFunction(); SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); + // Need frame address to find the address of VarArgsFrameIndex. + MF.getFrameInfo()->setFrameAddressIsTaken(true); + // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue Offset = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), DAG.getRegister(SP::I6, TLI.getPointerTy()), @@ -1626,7 +1649,7 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { SDValue VAListPtr = Node->getOperand(1); EVT PtrVT = VAListPtr.getValueType(); const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); - DebugLoc DL = Node->getDebugLoc(); + SDLoc DL(Node); SDValue VAList = DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV), false, false, false, 0); // Increment the pointer, VAList, to the next vaarg. @@ -1645,7 +1668,7 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { SDValue Chain = Op.getOperand(0); // Legalize the chain. SDValue Size = Op.getOperand(1); // Legalize the size. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned SPReg = SP::O6; SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32); @@ -1662,7 +1685,7 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue Chain = DAG.getNode(SPISD::FLUSHW, dl, MVT::Other, DAG.getEntryNode()); return Chain; @@ -1673,7 +1696,7 @@ static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned FrameReg = SP::I6; uint64_t depth = Op.getConstantOperandVal(0); @@ -1704,7 +1727,7 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { MFI->setReturnAddressIsTaken(true); EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned RetReg = SP::I7; uint64_t depth = Op.getConstantOperandVal(0); @@ -1713,6 +1736,9 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { if (depth == 0) RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT); else { + // Need frame address to find return address of the caller. + MFI->setFrameAddressIsTaken(true); + // flush first to make sure the windowed registers' values are in stack SDValue Chain = getFLUSHW(Op, DAG); RetAddr = DAG.getCopyFromReg(Chain, dl, SP::I6, VT); @@ -1731,15 +1757,48 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { return RetAddr; } +static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG) +{ + SDLoc dl(Op); + + assert(Op.getValueType() == MVT::f64 && "LowerF64Op called on non-double!"); + assert(Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS); + + // Lower fneg/fabs on f64 to fneg/fabs on f32. + // fneg f64 => fneg f32:sub_even, fmov f32:sub_odd. + // fabs f64 => fabs f32:sub_even, fmov f32:sub_odd. + + SDValue SrcReg64 = Op.getOperand(0); + SDValue Hi32 = DAG.getTargetExtractSubreg(SP::sub_even, dl, MVT::f32, + SrcReg64); + SDValue Lo32 = DAG.getTargetExtractSubreg(SP::sub_odd, dl, MVT::f32, + SrcReg64); + + Hi32 = DAG.getNode(Op.getOpcode(), dl, MVT::f32, Hi32); + + SDValue DstReg64 = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, MVT::f64), 0); + DstReg64 = DAG.getTargetInsertSubreg(SP::sub_even, dl, MVT::f64, + DstReg64, Hi32); + DstReg64 = DAG.getTargetInsertSubreg(SP::sub_odd, dl, MVT::f64, + DstReg64, Lo32); + return DstReg64; +} + SDValue SparcTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); + + case ISD::FNEG: + case ISD::FABS: return LowerF64Op(Op, DAG); + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for Sparc."); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index fd706be..7137171 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -78,19 +78,19 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue LowerFormalArguments_32(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue LowerFormalArguments_64(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; virtual SDValue @@ -106,20 +106,21 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const; + SDLoc dl, SelectionDAG &DAG) const; SDValue LowerReturn_32(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc DL, SelectionDAG &DAG) const; + SDLoc DL, SelectionDAG &DAG) const; SDValue LowerReturn_64(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc DL, SelectionDAG &DAG) const; + SDLoc DL, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const; SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const; diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td index 91805f9..47658ee 100644 --- a/lib/Target/Sparc/SparcInstr64Bit.td +++ b/lib/Target/Sparc/SparcInstr64Bit.td @@ -59,10 +59,6 @@ defm SRAX : F3_S<"srax", 0b100111, 1, sra, i64, I64Regs>; // preferable to use a constant pool load instead, depending on the // microarchitecture. -// The %g0 register is constant 0. -// This is useful for stx %g0, [...], for example. -def : Pat<(i64 0), (i64 G0)>, Requires<[Is64Bit]>; - // Single-instruction patterns. // The ALU instructions want their simm13 operands as i32 immediates. @@ -164,7 +160,7 @@ def : Pat<(sube i64:$a, i64:$b), (SUBXrr $a, $b)>; def : Pat<(addc i64:$a, i64:$b), (ADDCCrr $a, $b)>; def : Pat<(subc i64:$a, i64:$b), (SUBCCrr $a, $b)>; -def : Pat<(SPcmpicc i64:$a, i64:$b), (SUBCCrr $a, $b)>; +def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>; // Register-immediate instructions. @@ -175,7 +171,7 @@ def : Pat<(xor i64:$a, (i64 simm13:$b)), (XORri $a, (as_i32imm $b))>; def : Pat<(add i64:$a, (i64 simm13:$b)), (ADDri $a, (as_i32imm $b))>; def : Pat<(sub i64:$a, (i64 simm13:$b)), (SUBri $a, (as_i32imm $b))>; -def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (SUBCCri $a, (as_i32imm $b))>; +def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>; } // Predicates = [Is64Bit] @@ -243,6 +239,11 @@ def LDXri : F3_2<3, 0b001011, [(set i64:$dst, (load ADDRri:$addr))]>; // Extending loads to i64. +def : Pat<(i64 (zextloadi1 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>; +def : Pat<(i64 (zextloadi1 ADDRri:$addr)), (LDUBri ADDRri:$addr)>; +def : Pat<(i64 (extloadi1 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>; +def : Pat<(i64 (extloadi1 ADDRri:$addr)), (LDUBri ADDRri:$addr)>; + def : Pat<(i64 (zextloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>; def : Pat<(i64 (zextloadi8 ADDRri:$addr)), (LDUBri ADDRri:$addr)>; def : Pat<(i64 (extloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>; @@ -290,6 +291,10 @@ def : Pat<(truncstorei16 i64:$src, ADDRri:$addr), (STHri ADDRri:$addr, $src)>; def : Pat<(truncstorei32 i64:$src, ADDRrr:$addr), (STrr ADDRrr:$addr, $src)>; def : Pat<(truncstorei32 i64:$src, ADDRri:$addr), (STri ADDRri:$addr, $src)>; +// store 0, addr -> store %g0, addr +def : Pat<(store (i64 0), ADDRrr:$dst), (STXrr ADDRrr:$dst, (i64 G0))>; +def : Pat<(store (i64 0), ADDRri:$dst), (STXri ADDRri:$dst, (i64 G0))>; + } // Predicates = [Is64Bit] @@ -308,7 +313,7 @@ let Predicates = [Is64Bit] in { let Uses = [ICC] in def BPXCC : BranchSP<0, (ins brtarget:$dst, CCOp:$cc), - "bp$cc %xcc, $dst", + "b$cc %xcc, $dst", [(SPbrxcc bb:$dst, imm:$cc)]>; // Conditional moves on %xcc. @@ -322,7 +327,17 @@ def MOVXCCri : Pseudo<(outs IntRegs:$rd), (ins i32imm:$i, IntRegs:$f, CCOp:$cond), "mov$cond %xcc, $i, $rd", [(set i32:$rd, - (SPselecticc simm11:$i, i32:$f, imm:$cond))]>; + (SPselectxcc simm11:$i, i32:$f, imm:$cond))]>; +def FMOVS_XCC : Pseudo<(outs FPRegs:$rd), + (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cond), + "fmovs$cond %xcc, $rs2, $rd", + [(set f32:$rd, + (SPselectxcc f32:$rs2, f32:$f, imm:$cond))]>; +def FMOVD_XCC : Pseudo<(outs DFPRegs:$rd), + (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond), + "fmovd$cond %xcc, $rs2, $rd", + [(set f64:$rd, + (SPselectxcc f64:$rs2, f64:$f, imm:$cond))]>; } // Uses, Constraints def : Pat<(SPselectxcc i64:$t, i64:$f, imm:$cond), @@ -330,4 +345,14 @@ def : Pat<(SPselectxcc i64:$t, i64:$f, imm:$cond), def : Pat<(SPselectxcc (i64 simm11:$t), i64:$f, imm:$cond), (MOVXCCri (as_i32imm $t), $f, imm:$cond)>; +def : Pat<(SPselecticc i64:$t, i64:$f, imm:$cond), + (MOVICCrr $t, $f, imm:$cond)>; +def : Pat<(SPselecticc (i64 simm11:$t), i64:$f, imm:$cond), + (MOVICCri (as_i32imm $t), $f, imm:$cond)>; + +def : Pat<(SPselectfcc i64:$t, i64:$f, imm:$cond), + (MOVFCCrr $t, $f, imm:$cond)>; +def : Pat<(SPselectfcc (i64 simm11:$t), i64:$f, imm:$cond), + (MOVFCCri (as_i32imm $t), $f, imm:$cond)>; + } // Predicates = [Is64Bit] diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td index e7fde08..6cdf6bc 100644 --- a/lib/Target/Sparc/SparcInstrFormats.td +++ b/lib/Target/Sparc/SparcInstrFormats.td @@ -7,14 +7,15 @@ // //===----------------------------------------------------------------------===// -class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction { +class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> + : Instruction { field bits<32> Inst; let Namespace = "SP"; bits<2> op; let Inst{31-30} = op; // Top two bits are the 'op' field - + dag OutOperandList = outs; dag InOperandList = ins; let AsmString = asmstr; @@ -46,7 +47,7 @@ class F2_1<bits<3> op2Val, dag outs, dag ins, string asmstr, list<dag> pattern> let Inst{29-25} = rd; } -class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr, +class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr, list<dag> pattern> : F2<outs, ins, asmstr, pattern> { bits<4> cond; bit annul = 0; // currently unused @@ -88,7 +89,7 @@ class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins, let Inst{4-0} = rs2; } -class F3_2<bits<2> opVal, bits<6> op3val, dag outs, dag ins, +class F3_2<bits<2> opVal, bits<6> op3val, dag outs, dag ins, string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> { bits<13> simm13; diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 39d7329..626bc40 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -29,7 +29,7 @@ using namespace llvm; SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST) : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), - RI(ST, *this), Subtarget(ST) { + RI(ST), Subtarget(ST) { } /// isLoadFromStackSlot - If the specified machine instruction is a direct @@ -40,6 +40,7 @@ SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST) unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (MI->getOpcode() == SP::LDri || + MI->getOpcode() == SP::LDXri || MI->getOpcode() == SP::LDFri || MI->getOpcode() == SP::LDDFri) { if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && @@ -59,6 +60,7 @@ unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (MI->getOpcode() == SP::STri || + MI->getOpcode() == SP::STXri || MI->getOpcode() == SP::STFri || MI->getOpcode() == SP::STDFri) { if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() && @@ -139,15 +141,15 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, if (I->isDebugValue()) continue; - //When we see a non-terminator, we are done + // When we see a non-terminator, we are done. if (!isUnpredicatedTerminator(I)) break; - //Terminator is not a branch + // Terminator is not a branch. if (!I->isBranch()) return true; - //Handle Unconditional branches + // Handle Unconditional branches. if (I->getOpcode() == SP::BA) { UnCondBrIter = I; @@ -176,7 +178,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned Opcode = I->getOpcode(); if (Opcode != SP::BCOND && Opcode != SP::FBCOND) - return true; //Unknown Opcode + return true; // Unknown Opcode. SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm(); @@ -185,7 +187,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, if (AllowModify && UnCondBrIter != MBB.end() && MBB.isLayoutSuccessor(TargetBB)) { - //Transform the code + // Transform the code // // brCC L1 // ba L2 @@ -219,8 +221,8 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, Cond.push_back(MachineOperand::CreateImm(BranchCode)); continue; } - //FIXME: Handle subsequent conditional branches - //For now, we can't handle multiple conditional branches + // FIXME: Handle subsequent conditional branches. + // For now, we can't handle multiple conditional branches. return true; } return false; @@ -241,7 +243,7 @@ SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, return 1; } - //Conditional branch + // Conditional branch unsigned CC = Cond[0].getImm(); if (IsIntegerCC(CC)) @@ -287,10 +289,28 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (SP::FPRegsRegClass.contains(DestReg, SrcReg)) BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); - else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) - BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - else + else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) { + if (Subtarget.isV9()) { + BuildMI(MBB, I, DL, get(SP::FMOVD), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else { + // Use two FMOVS instructions. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineInstr *MovMI = 0; + unsigned subRegIdx[] = {SP::sub_even, SP::sub_odd}; + for (unsigned i = 0; i != 2; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]); + unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]); + assert(Dst && Src && "Bad sub-register"); + + MovMI = BuildMI(MBB, I, DL, get(SP::FMOVS), Dst).addReg(Src); + } + // Add implicit super-register defs and kills to the last MovMI. + MovMI->addRegisterDefined(DestReg, TRI); + if (KillSrc) + MovMI->addRegisterKilled(SrcReg, TRI); + } + } else llvm_unreachable("Impossible reg-to-reg copy"); } @@ -303,7 +323,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (I != MBB.end()) DL = I->getDebugLoc(); // On the order of operands here: think "[FrameIdx + 0] = SrcReg". - if (RC == &SP::IntRegsRegClass) + if (RC == &SP::I64RegsRegClass) + BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)); + else if (RC == &SP::IntRegsRegClass) BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)); else if (RC == &SP::FPRegsRegClass) @@ -324,7 +347,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); - if (RC == &SP::IntRegsRegClass) + if (RC == &SP::I64RegsRegClass) + BuildMI(MBB, I, DL, get(SP::LDXri), DestReg).addFrameIndex(FI).addImm(0); + else if (RC == &SP::IntRegsRegClass) BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0); else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0); diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index 204f698..a0a0ffd8 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -53,7 +53,7 @@ public: /// any side effects other than loading from the stack slot. virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; - + /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If @@ -86,7 +86,7 @@ public: MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -98,7 +98,7 @@ public: unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - + unsigned getGlobalBaseReg(MachineFunction *MF) const; }; diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index baefb06..d4cac4d 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -89,9 +89,11 @@ def calltarget : Operand<i32>; let PrintMethod = "printCCOperand" in def CCOp : Operand<i32>; -def SDTSPcmpfcc : +def SDTSPcmpicc : +SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>; +def SDTSPcmpfcc : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>; -def SDTSPbrcc : +def SDTSPbrcc : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>; def SDTSPselectcc : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>]>; @@ -100,7 +102,7 @@ SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; def SDTSPITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; -def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>; +def SPcmpicc : SDNode<"SPISD::CMPICC", SDTSPcmpicc, [SDNPOutGlue]>; def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>; def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>; def SPbrxcc : SDNode<"SPISD::BRXCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>; @@ -186,7 +188,7 @@ def FCC_O : FCC_VAL<29>; // Ordered /// F3_12 multiclass - Define a normal F3_1/F3_2 pattern in one shot. multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> { - def rr : F3_1<2, Op3Val, + def rr : F3_1<2, Op3Val, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), !strconcat(OpcStr, " $b, $c, $dst"), [(set i32:$dst, (OpNode i32:$b, i32:$c))]>; @@ -199,7 +201,7 @@ multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> { /// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no /// pattern. multiclass F3_12np<string OpcStr, bits<6> Op3Val> { - def rr : F3_1<2, Op3Val, + def rr : F3_1<2, Op3Val, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), !strconcat(OpcStr, " $b, $c, $dst"), []>; def ri : F3_2<2, Op3Val, @@ -243,24 +245,11 @@ let hasSideEffects = 1, mayStore = 1 in { def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val), "unimp $val", []>; -// FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the -// fpmover pass. -let Predicates = [HasNoV9] in { // Only emit these in V8 mode. - def FpMOVD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src), - "!FpMOVD $src, $dst", []>; - def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src), - "!FpNEGD $src, $dst", - [(set f64:$dst, (fneg f64:$src))]>; - def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src), - "!FpABSD $src, $dst", - [(set f64:$dst, (fabs f64:$src))]>; -} - // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. This has to handle all // permutations of selection between i32/f32/f64 on ICC and FCC. - // Expanded after instruction selection. -let Uses = [ICC], usesCustomInserter = 1 in { +// Expanded after instruction selection. +let Uses = [ICC], usesCustomInserter = 1 in { def SELECT_CC_Int_ICC : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond), "; SELECT_CC_Int_ICC PSEUDO!", @@ -463,9 +452,9 @@ defm ADD : F3_12<"add", 0b000000, add>; def LEA_ADDri : F3_2<2, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr), "add ${addr:arith}, $dst", - [(set i32:$dst, ADDRri:$addr)]>; + [(set iPTR:$dst, ADDRri:$addr)]>; -let Defs = [ICC] in +let Defs = [ICC] in defm ADDCC : F3_12<"addcc", 0b010000, addc>; let Uses = [ICC] in @@ -473,14 +462,24 @@ let Uses = [ICC] in // Section B.15 - Subtract Instructions, p. 110 defm SUB : F3_12 <"sub" , 0b000100, sub>; -let Uses = [ICC] in +let Uses = [ICC] in defm SUBX : F3_12 <"subx" , 0b001100, sube>; -let Defs = [ICC] in - defm SUBCC : F3_12 <"subcc", 0b010100, SPcmpicc>; +let Defs = [ICC] in { + defm SUBCC : F3_12 <"subcc", 0b010100, subc>; + + def CMPrr : F3_1<2, 0b010100, + (outs), (ins IntRegs:$b, IntRegs:$c), + "cmp $b, $c", + [(SPcmpicc i32:$b, i32:$c)]>; + def CMPri : F3_1<2, 0b010100, + (outs), (ins IntRegs:$b, i32imm:$c), + "cmp $b, $c", + [(SPcmpicc i32:$b, (i32 simm13:$c))]>; +} let Uses = [ICC], Defs = [ICC] in - def SUBXCCrr: F3_1<2, 0b011100, + def SUBXCCrr: F3_1<2, 0b011100, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), "subxcc $b, $c, $dst", []>; @@ -516,6 +515,20 @@ let isBarrier = 1 in "ba $dst", [(br bb:$dst)]>; +// Indirect branch instructions. +let isTerminator = 1, isBarrier = 1, + hasDelaySlot = 1, isBranch =1, + isIndirectBranch = 1 in { + def BINDrr : F3_1<2, 0b111000, + (outs), (ins MEMrr:$ptr), + "jmp $ptr", + [(brind ADDRrr:$ptr)]>; + def BINDri : F3_2<2, 0b111000, + (outs), (ins MEMri:$ptr), + "jmp $ptr", + [(brind ADDRri:$ptr)]>; +} + // FIXME: the encoding for the JIT should look at the condition field. let Uses = [ICC] in def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc), @@ -553,7 +566,7 @@ let Uses = [O6], let op = 1; let Inst{29-0} = disp; } - + // indirect calls def JMPLrr : F3_1<2, 0b111000, (outs), (ins MEMrr:$ptr, variable_ops), @@ -566,7 +579,7 @@ let Uses = [O6], } // Section B.28 - Read State Register Instructions -let Uses = [Y] in +let Uses = [Y] in def RDY : F3_1<2, 0b101000, (outs IntRegs:$dst), (ins), "rd %y, $dst", []>; @@ -585,7 +598,7 @@ def FITOS : F3_3<2, 0b110100, 0b011000100, (outs FPRegs:$dst), (ins FPRegs:$src), "fitos $src, $dst", [(set FPRegs:$dst, (SPitof FPRegs:$src))]>; -def FITOD : F3_3<2, 0b110100, 0b011001000, +def FITOD : F3_3<2, 0b110100, 0b011001000, (outs DFPRegs:$dst), (ins FPRegs:$src), "fitod $src, $dst", [(set DFPRegs:$dst, (SPitof FPRegs:$src))]>; @@ -601,7 +614,7 @@ def FDTOI : F3_3<2, 0b110100, 0b011010010, [(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>; // Convert between Floating-point Formats Instructions, p. 143 -def FSTOD : F3_3<2, 0b110100, 0b011001001, +def FSTOD : F3_3<2, 0b110100, 0b011001001, (outs DFPRegs:$dst), (ins FPRegs:$src), "fstod $src, $dst", [(set f64:$dst, (fextend f32:$src))]>; @@ -614,22 +627,22 @@ def FDTOS : F3_3<2, 0b110100, 0b011000110, def FMOVS : F3_3<2, 0b110100, 0b000000001, (outs FPRegs:$dst), (ins FPRegs:$src), "fmovs $src, $dst", []>; -def FNEGS : F3_3<2, 0b110100, 0b000000101, +def FNEGS : F3_3<2, 0b110100, 0b000000101, (outs FPRegs:$dst), (ins FPRegs:$src), "fnegs $src, $dst", [(set f32:$dst, (fneg f32:$src))]>; -def FABSS : F3_3<2, 0b110100, 0b000001001, +def FABSS : F3_3<2, 0b110100, 0b000001001, (outs FPRegs:$dst), (ins FPRegs:$src), "fabss $src, $dst", [(set f32:$dst, (fabs f32:$src))]>; // Floating-point Square Root Instructions, p.145 -def FSQRTS : F3_3<2, 0b110100, 0b000101001, +def FSQRTS : F3_3<2, 0b110100, 0b000101001, (outs FPRegs:$dst), (ins FPRegs:$src), "fsqrts $src, $dst", [(set f32:$dst, (fsqrt f32:$src))]>; -def FSQRTD : F3_3<2, 0b110100, 0b000101010, +def FSQRTD : F3_3<2, 0b110100, 0b000101010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fsqrtd $src, $dst", [(set f64:$dst, (fsqrt f64:$src))]>; @@ -698,52 +711,51 @@ let Defs = [FCC] in { //===----------------------------------------------------------------------===// // V9 Conditional Moves. -let Predicates = [HasV9], Constraints = "$T = $dst" in { +let Predicates = [HasV9], Constraints = "$f = $rd" in { // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual. // FIXME: Add instruction encodings for the JIT some day. let Uses = [ICC] in { def MOVICCrr - : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc), - "mov$cc %icc, $F, $dst", - [(set i32:$dst, (SPselecticc i32:$F, i32:$T, imm:$cc))]>; + : Pseudo<(outs IntRegs:$rd), (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cc), + "mov$cc %icc, $rs2, $rd", + [(set i32:$rd, (SPselecticc i32:$rs2, i32:$f, imm:$cc))]>; def MOVICCri - : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc), - "mov$cc %icc, $F, $dst", - [(set i32:$dst, (SPselecticc simm11:$F, i32:$T, imm:$cc))]>; + : Pseudo<(outs IntRegs:$rd), (ins i32imm:$i, IntRegs:$f, CCOp:$cc), + "mov$cc %icc, $i, $rd", + [(set i32:$rd, (SPselecticc simm11:$i, i32:$f, imm:$cc))]>; } let Uses = [FCC] in { def MOVFCCrr - : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc), - "mov$cc %fcc0, $F, $dst", - [(set i32:$dst, (SPselectfcc i32:$F, i32:$T, imm:$cc))]>; + : Pseudo<(outs IntRegs:$rd), (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cc), + "mov$cc %fcc0, $rs2, $rd", + [(set i32:$rd, (SPselectfcc i32:$rs2, i32:$f, imm:$cc))]>; def MOVFCCri - : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc), - "mov$cc %fcc0, $F, $dst", - [(set i32:$dst, (SPselectfcc simm11:$F, i32:$T, imm:$cc))]>; + : Pseudo<(outs IntRegs:$rd), (ins i32imm:$i, IntRegs:$f, CCOp:$cc), + "mov$cc %fcc0, $i, $rd", + [(set i32:$rd, (SPselectfcc simm11:$i, i32:$f, imm:$cc))]>; } let Uses = [ICC] in { def FMOVS_ICC - : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc), - "fmovs$cc %icc, $F, $dst", - [(set f32:$dst, - (SPselecticc f32:$F, f32:$T, imm:$cc))]>; + : Pseudo<(outs FPRegs:$rd), (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cc), + "fmovs$cc %icc, $rs2, $rd", + [(set f32:$rd, (SPselecticc f32:$rs2, f32:$f, imm:$cc))]>; def FMOVD_ICC - : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc), - "fmovd$cc %icc, $F, $dst", - [(set f64:$dst, (SPselecticc f64:$F, f64:$T, imm:$cc))]>; + : Pseudo<(outs DFPRegs:$rd), (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cc), + "fmovd$cc %icc, $rs2, $rd", + [(set f64:$rd, (SPselecticc f64:$rs2, f64:$f, imm:$cc))]>; } let Uses = [FCC] in { def FMOVS_FCC - : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc), - "fmovs$cc %fcc0, $F, $dst", - [(set f32:$dst, (SPselectfcc f32:$F, f32:$T, imm:$cc))]>; + : Pseudo<(outs FPRegs:$rd), (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cc), + "fmovs$cc %fcc0, $rs2, $rd", + [(set f32:$rd, (SPselectfcc f32:$rs2, f32:$f, imm:$cc))]>; def FMOVD_FCC - : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc), - "fmovd$cc %fcc0, $F, $dst", - [(set f64:$dst, (SPselectfcc f64:$F, f64:$T, imm:$cc))]>; + : Pseudo<(outs DFPRegs:$rd), (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cc), + "fmovd$cc %fcc0, $rs2, $rd", + [(set f64:$rd, (SPselectfcc f64:$rs2, f64:$f, imm:$cc))]>; } } @@ -753,11 +765,11 @@ let Predicates = [HasV9] in { def FMOVD : F3_3<2, 0b110100, 0b000000010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fmovd $src, $dst", []>; - def FNEGD : F3_3<2, 0b110100, 0b000000110, + def FNEGD : F3_3<2, 0b110100, 0b000000110, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fnegd $src, $dst", [(set f64:$dst, (fneg f64:$src))]>; - def FABSD : F3_3<2, 0b110100, 0b000001010, + def FABSD : F3_3<2, 0b110100, 0b000001010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fabsd $src, $dst", [(set f64:$dst, (fabs f64:$src))]>; @@ -765,7 +777,7 @@ let Predicates = [HasV9] in { // POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear // the top 32-bits before using it. To do this clearing, we use a SLLri X,0. -def POPCrr : F3_1<2, 0b101110, +def POPCrr : F3_1<2, 0b101110, (outs IntRegs:$dst), (ins IntRegs:$src), "popc $src, $dst", []>, Requires<[HasV9]>; def : Pat<(ctpop i32:$src), @@ -782,11 +794,6 @@ def : Pat<(i32 simm13:$val), def : Pat<(i32 imm:$val), (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>; -// subc -def : Pat<(subc i32:$b, i32:$c), - (SUBCCrr $b, $c)>; -def : Pat<(subc i32:$b, simm13:$val), - (SUBCCri $b, imm:$val)>; // Global addresses, constant pool entries def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>; @@ -794,11 +801,17 @@ def : Pat<(SPlo tglobaladdr:$in), (ORri (i32 G0), tglobaladdr:$in)>; def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>; def : Pat<(SPlo tconstpool:$in), (ORri (i32 G0), tconstpool:$in)>; +// Blockaddress +def : Pat<(SPhi tblockaddress:$in), (SETHIi tblockaddress:$in)>; +def : Pat<(SPlo tblockaddress:$in), (ORri (i32 G0), tblockaddress:$in)>; + // Add reg, lo. This is used when taking the addr of a global/constpool entry. def : Pat<(add iPTR:$r, (SPlo tglobaladdr:$in)), (ADDri $r, tglobaladdr:$in)>; def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)), (ADDri $r, tconstpool:$in)>; +def : Pat<(add iPTR:$r, (SPlo tblockaddress:$in)), + (ADDri $r, tblockaddress:$in)>; -// Calls: +// Calls: def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>; def : Pat<(call texternalsym:$dst), @@ -816,4 +829,8 @@ def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>; def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>; def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>; +// store 0, addr -> store %g0, addr +def : Pat<(store (i32 0), ADDRrr:$dst), (STrr ADDRrr:$dst, (i32 G0))>; +def : Pat<(store (i32 0), ADDRri:$dst), (STri ADDRri:$dst, (i32 G0))>; + include "SparcInstr64Bit.td" diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h index 90c27a4..3783c16 100644 --- a/lib/Target/Sparc/SparcMachineFunctionInfo.h +++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h @@ -28,11 +28,16 @@ namespace llvm { /// SRetReturnReg - Holds the virtual register into which the sret /// argument is passed. unsigned SRetReturnReg; + + /// IsLeafProc - True if the function is a leaf procedure. + bool IsLeafProc; public: SparcMachineFunctionInfo() - : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {} + : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0), + IsLeafProc(false) {} explicit SparcMachineFunctionInfo(MachineFunction &MF) - : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {} + : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0), + IsLeafProc(false) {} unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } @@ -42,6 +47,9 @@ namespace llvm { unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + void setLeafProc(bool rhs) { IsLeafProc = rhs; } + bool isLeafProc() const { return IsLeafProc; } }; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 3af4c61..dc97f06 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -13,6 +13,7 @@ #include "SparcRegisterInfo.h" #include "Sparc.h" +#include "SparcMachineFunctionInfo.h" #include "SparcSubtarget.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" @@ -20,6 +21,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/IR/Type.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" @@ -28,9 +30,12 @@ using namespace llvm; -SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st, - const TargetInstrInfo &tii) - : SparcGenRegisterInfo(SP::I7), Subtarget(st), TII(tii) { +static cl::opt<bool> +ReserveAppRegisters("sparc-reserve-app-registers", cl::Hidden, cl::init(false), + cl::desc("Reserve application registers (%g2-%g4)")); + +SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st) + : SparcGenRegisterInfo(SP::I7), Subtarget(st) { } const uint16_t* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) @@ -43,14 +48,21 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); // FIXME: G1 reserved for now for large imm generation by frame code. Reserved.set(SP::G1); - Reserved.set(SP::G2); - Reserved.set(SP::G3); - Reserved.set(SP::G4); + + // G1-G4 can be used in applications. + if (ReserveAppRegisters) { + Reserved.set(SP::G2); + Reserved.set(SP::G3); + Reserved.set(SP::G4); + } + // G5 is not reserved in 64 bit mode. + if (!Subtarget.is64Bit()) + Reserved.set(SP::G5); + Reserved.set(SP::O6); Reserved.set(SP::I6); Reserved.set(SP::I7); Reserved.set(SP::G0); - Reserved.set(SP::G5); Reserved.set(SP::G6); Reserved.set(SP::G7); return Reserved; @@ -77,21 +89,30 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + MI.getOperand(FIOperandNum + 1).getImm() + Subtarget.getStackPointerBias(); + SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); + unsigned FramePtr = SP::I6; + if (FuncInfo->isLeafProc()) { + // Use %sp and adjust offset if needed. + FramePtr = SP::O6; + int stackSize = MF.getFrameInfo()->getStackSize(); + Offset += (stackSize) ? Subtarget.getAdjustedFrameSize(stackSize) : 0 ; + } // Replace frame index with a frame pointer reference. if (Offset >= -4096 && Offset <= 4095) { // If the offset is small enough to fit in the immediate field, directly // encode it. - MI.getOperand(FIOperandNum).ChangeToRegister(SP::I6, false); + MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } else { - // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to + // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to // scavenge a register here instead of reserving G1 all of the time. + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); unsigned OffHi = (unsigned)Offset >> 10U; BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi); // Emit G1 = G1 + I6 BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1) - .addReg(SP::I6); + .addReg(FramePtr); // Insert: G1+%lo(offset) into the user. MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1)); diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index f91df53..6b77d4e 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -27,9 +27,8 @@ class Type; struct SparcRegisterInfo : public SparcGenRegisterInfo { SparcSubtarget &Subtarget; - const TargetInstrInfo &TII; - SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii); + SparcRegisterInfo(SparcSubtarget &st); /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td index 497e7c5..a59c442 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.td +++ b/lib/Target/Sparc/SparcRegisterInfo.td @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Declarations that describe the Sparc register file +// Declarations that describe the Sparc register file //===----------------------------------------------------------------------===// class SparcReg<string n> : Register<n> { @@ -21,8 +21,8 @@ class SparcCtrlReg<string n>: Register<n> { } let Namespace = "SP" in { -def sub_even : SubRegIndex; -def sub_odd : SubRegIndex; +def sub_even : SubRegIndex<32>; +def sub_odd : SubRegIndex<32, 32>; } // Registers are identified with 5-bit ID numbers. @@ -52,68 +52,68 @@ def Y : SparcCtrlReg<"Y">; // Integer registers def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>; def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>; -def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>; +def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>; def G3 : Ri< 3, "G3">, DwarfRegNum<[3]>; def G4 : Ri< 4, "G4">, DwarfRegNum<[4]>; -def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>; +def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>; def G6 : Ri< 6, "G6">, DwarfRegNum<[6]>; def G7 : Ri< 7, "G7">, DwarfRegNum<[7]>; def O0 : Ri< 8, "O0">, DwarfRegNum<[8]>; def O1 : Ri< 9, "O1">, DwarfRegNum<[9]>; -def O2 : Ri<10, "O2">, DwarfRegNum<[10]>; +def O2 : Ri<10, "O2">, DwarfRegNum<[10]>; def O3 : Ri<11, "O3">, DwarfRegNum<[11]>; def O4 : Ri<12, "O4">, DwarfRegNum<[12]>; -def O5 : Ri<13, "O5">, DwarfRegNum<[13]>; +def O5 : Ri<13, "O5">, DwarfRegNum<[13]>; def O6 : Ri<14, "SP">, DwarfRegNum<[14]>; def O7 : Ri<15, "O7">, DwarfRegNum<[15]>; def L0 : Ri<16, "L0">, DwarfRegNum<[16]>; def L1 : Ri<17, "L1">, DwarfRegNum<[17]>; -def L2 : Ri<18, "L2">, DwarfRegNum<[18]>; +def L2 : Ri<18, "L2">, DwarfRegNum<[18]>; def L3 : Ri<19, "L3">, DwarfRegNum<[19]>; def L4 : Ri<20, "L4">, DwarfRegNum<[20]>; -def L5 : Ri<21, "L5">, DwarfRegNum<[21]>; +def L5 : Ri<21, "L5">, DwarfRegNum<[21]>; def L6 : Ri<22, "L6">, DwarfRegNum<[22]>; def L7 : Ri<23, "L7">, DwarfRegNum<[23]>; def I0 : Ri<24, "I0">, DwarfRegNum<[24]>; def I1 : Ri<25, "I1">, DwarfRegNum<[25]>; -def I2 : Ri<26, "I2">, DwarfRegNum<[26]>; +def I2 : Ri<26, "I2">, DwarfRegNum<[26]>; def I3 : Ri<27, "I3">, DwarfRegNum<[27]>; def I4 : Ri<28, "I4">, DwarfRegNum<[28]>; -def I5 : Ri<29, "I5">, DwarfRegNum<[29]>; +def I5 : Ri<29, "I5">, DwarfRegNum<[29]>; def I6 : Ri<30, "FP">, DwarfRegNum<[30]>; def I7 : Ri<31, "I7">, DwarfRegNum<[31]>; // Floating-point registers def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>; def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>; -def F2 : Rf< 2, "F2">, DwarfRegNum<[34]>; +def F2 : Rf< 2, "F2">, DwarfRegNum<[34]>; def F3 : Rf< 3, "F3">, DwarfRegNum<[35]>; def F4 : Rf< 4, "F4">, DwarfRegNum<[36]>; -def F5 : Rf< 5, "F5">, DwarfRegNum<[37]>; +def F5 : Rf< 5, "F5">, DwarfRegNum<[37]>; def F6 : Rf< 6, "F6">, DwarfRegNum<[38]>; def F7 : Rf< 7, "F7">, DwarfRegNum<[39]>; -def F8 : Rf< 8, "F8">, DwarfRegNum<[40]>; +def F8 : Rf< 8, "F8">, DwarfRegNum<[40]>; def F9 : Rf< 9, "F9">, DwarfRegNum<[41]>; def F10 : Rf<10, "F10">, DwarfRegNum<[42]>; -def F11 : Rf<11, "F11">, DwarfRegNum<[43]>; +def F11 : Rf<11, "F11">, DwarfRegNum<[43]>; def F12 : Rf<12, "F12">, DwarfRegNum<[44]>; def F13 : Rf<13, "F13">, DwarfRegNum<[45]>; -def F14 : Rf<14, "F14">, DwarfRegNum<[46]>; +def F14 : Rf<14, "F14">, DwarfRegNum<[46]>; def F15 : Rf<15, "F15">, DwarfRegNum<[47]>; def F16 : Rf<16, "F16">, DwarfRegNum<[48]>; -def F17 : Rf<17, "F17">, DwarfRegNum<[49]>; +def F17 : Rf<17, "F17">, DwarfRegNum<[49]>; def F18 : Rf<18, "F18">, DwarfRegNum<[50]>; def F19 : Rf<19, "F19">, DwarfRegNum<[51]>; -def F20 : Rf<20, "F20">, DwarfRegNum<[52]>; +def F20 : Rf<20, "F20">, DwarfRegNum<[52]>; def F21 : Rf<21, "F21">, DwarfRegNum<[53]>; def F22 : Rf<22, "F22">, DwarfRegNum<[54]>; def F23 : Rf<23, "F23">, DwarfRegNum<[55]>; def F24 : Rf<24, "F24">, DwarfRegNum<[56]>; def F25 : Rf<25, "F25">, DwarfRegNum<[57]>; -def F26 : Rf<26, "F26">, DwarfRegNum<[58]>; +def F26 : Rf<26, "F26">, DwarfRegNum<[58]>; def F27 : Rf<27, "F27">, DwarfRegNum<[59]>; def F28 : Rf<28, "F28">, DwarfRegNum<[60]>; -def F29 : Rf<29, "F29">, DwarfRegNum<[61]>; +def F29 : Rf<29, "F29">, DwarfRegNum<[61]>; def F30 : Rf<30, "F30">, DwarfRegNum<[62]>; def F31 : Rf<31, "F31">, DwarfRegNum<[63]>; @@ -144,19 +144,10 @@ def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>; // register class for that. The i64 type is included here to allow i64 patterns // using the integer instructions. def IntRegs : RegisterClass<"SP", [i32, i64], 32, - (add L0, L1, L2, L3, L4, L5, L6, - L7, I0, I1, I2, I3, I4, I5, - O0, O1, O2, O3, O4, O5, O7, - G1, - // Non-allocatable regs: - G2, G3, G4, // FIXME: OK for use only in - // applications, not libraries. - O6, // stack ptr - I6, // frame ptr - I7, // return address - G0, // constant zero - G5, G6, G7 // reserved for kernel - )>; + (add (sequence "I%u", 0, 7), + (sequence "G%u", 0, 7), + (sequence "L%u", 0, 7), + (sequence "O%u", 0, 7))>; // Register class for 64-bit mode, with a 64-bit spill slot size. // These are the same as the 32-bit registers, so TableGen will consider this diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp index e5b2aeb..f9ce098 100644 --- a/lib/Target/Sparc/SparcSubtarget.cpp +++ b/lib/Target/Sparc/SparcSubtarget.cpp @@ -13,6 +13,7 @@ #include "SparcSubtarget.h" #include "Sparc.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC @@ -30,7 +31,7 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU, V8DeprecatedInsts(false), IsVIS(false), Is64Bit(is64Bit) { - + // Determine default and user specified characteristics std::string CPUName = CPU; if (CPUName.empty()) { @@ -44,3 +45,30 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU, // Parse features string. ParseSubtargetFeatures(CPUName, FS); } + + +int SparcSubtarget::getAdjustedFrameSize(int frameSize) const { + + if (is64Bit()) { + // All 64-bit stack frames must be 16-byte aligned, and must reserve space + // for spilling the 16 window registers at %sp+BIAS..%sp+BIAS+128. + frameSize += 128; + // Frames with calls must also reserve space for 6 outgoing arguments + // whether they are used or not. LowerCall_64 takes care of that. + assert(frameSize % 16 == 0 && "Stack size not 16-byte aligned"); + } else { + // Emit the correct save instruction based on the number of bytes in + // the frame. Minimum stack frame size according to V8 ABI is: + // 16 words for register window spill + // 1 word for address of returned aggregate-value + // + 6 words for passing parameters on the stack + // ---------- + // 23 words * 4 bytes per word = 92 bytes + frameSize += 92; + + // Round up to next doubleword boundary -- a double-word boundary + // is required by the ABI. + frameSize = RoundUpToAlignment(frameSize, 8); + } + return frameSize; +} diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index b94dd11..2bf599d 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -29,7 +29,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo { bool V8DeprecatedInsts; bool IsVIS; bool Is64Bit; - + public: SparcSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64bit); @@ -37,11 +37,11 @@ public: bool isV9() const { return IsV9; } bool isVIS() const { return IsVIS; } bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; } - - /// ParseSubtargetFeatures - Parses features string setting specified + + /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - + bool is64Bit() const { return Is64Bit; } std::string getDataLayout() const { const char *p; @@ -58,6 +58,12 @@ public: int64_t getStackPointerBias() const { return is64Bit() ? 2047 : 0; } + + /// Given a actual stack size as determined by FrameInfo, this function + /// returns adjusted framesize which includes space for register window + /// spills and arguments. + int getAdjustedFrameSize(int stackSize) const; + }; } // end namespace llvm diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 60bceb7..a7355f4 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -37,6 +37,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { + initAsmInfo(); } namespace { @@ -68,7 +69,6 @@ bool SparcPassConfig::addInstSelector() { /// passes immediately before machine code is emitted. This should return /// true if -print-machineinstrs should print out the code after the passes. bool SparcPassConfig::addPreEmitPass(){ - addPass(createSparcFPMoverPass(getSparcTargetMachine())); addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine())); return true; } |