diff options
Diffstat (limited to 'lib/Target')
180 files changed, 11592 insertions, 4726 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 6fe7c2c..8e537d8 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -643,6 +643,13 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); + // tGPR is used sometimes in ARM instructions that need to avoid using + // certain registers. Just treat it as GPR here. + if (DestRC == ARM::tGPRRegisterClass) + DestRC = ARM::GPRRegisterClass; + if (SrcRC == ARM::tGPRRegisterClass) + SrcRC = ARM::GPRRegisterClass; + if (DestRC != SrcRC) { if (DestRC->getSize() != SrcRC->getSize()) return false; @@ -697,6 +704,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MFI.getObjectSize(FI), Align); + // tGPR is used sometimes in ARM instructions that need to avoid using + // certain registers. Just treat it as GPR here. + if (RC == ARM::tGPRRegisterClass) + RC = ARM::GPRRegisterClass; + if (RC == ARM::GPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) .addReg(SrcReg, getKillRegState(isKill)) @@ -745,6 +757,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MFI.getObjectSize(FI), Align); + // tGPR is used sometimes in ARM instructions that need to avoid using + // certain registers. Just treat it as GPR here. + if (RC == ARM::tGPRRegisterClass) + RC = ARM::GPRRegisterClass; + if (RC == ARM::GPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); @@ -1020,9 +1037,8 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { return MI; } -bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0, - const MachineInstr *MI1, - const MachineRegisterInfo *MRI) const { +bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, + const MachineInstr *MI1) const { int Opcode = MI0->getOpcode(); if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic || @@ -1051,7 +1067,7 @@ bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0, return ACPV0->hasSameValue(ACPV1); } - return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI); + return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); } /// getInstrPredicate - If instruction is predicated, returns its predicate diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 0d9d4a7..0194231 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -289,8 +289,8 @@ public: MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; - virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other, - const MachineRegisterInfo *MRI) const; + virtual bool produceSameValue(const MachineInstr *MI0, + const MachineInstr *MI1) const; }; static inline @@ -332,7 +332,7 @@ bool isJumpTableBranchOpcode(int Opc) { static inline bool isIndirectBranchOpcode(int Opc) { - return Opc == ARM::BRIND || Opc == ARM::tBRIND; + return Opc == ARM::BRIND || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND; } /// getInstrPredicate - If instruction is predicated, returns its predicate diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index cb0bd1d..577c363 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -513,7 +513,7 @@ cannotEliminateFrame(const MachineFunction &MF) const { } /// estimateStackSize - Estimate and return the size of the frame. -static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) { +static unsigned estimateStackSize(MachineFunction &MF) { const MachineFrameInfo *FFI = MF.getFrameInfo(); int Offset = 0; for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { @@ -583,14 +583,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, SmallVector<unsigned, 4> UnspilledCS2GPRs; ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - - // Calculate and set max stack object alignment early, so we can decide - // whether we will need stack realignment (and thus FP). - if (RealignStack) { - MachineFrameInfo *MFI = MF.getFrameInfo(); - MFI->calculateMaxStackAlignment(); - } - // Spill R4 if Thumb2 function requires stack realignment - it will be used as // scratch register. // FIXME: It will be better just to find spare register here. @@ -679,8 +671,16 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } } + // If any of the stack slot references may be out of range of an immediate + // offset, make sure a register (or a spill slot) is available for the + // register scavenger. Note that if we're indexing off the frame pointer, the + // effective stack size is 4 bytes larger since the FP points to the stack + // slot of the previous FP. + bool BigStack = RS && + estimateStackSize(MF) + (hasFP(MF) ? 4 : 0) >= estimateRSStackSizeLimit(MF); + bool ExtraCSSpill = false; - if (!CanEliminateFrame || cannotEliminateFrame(MF)) { + if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. @@ -735,51 +735,43 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // callee-saved register or reserve a special spill slot to facilitate // register scavenging. Thumb1 needs a spill slot for stack pointer // adjustments also, even when the frame itself is small. - if (RS && !ExtraCSSpill) { - MachineFrameInfo *MFI = MF.getFrameInfo(); - // If any of the stack slot references may be out of range of an - // immediate offset, make sure a register (or a spill slot) is - // available for the register scavenger. Note that if we're indexing - // off the frame pointer, the effective stack size is 4 bytes larger - // since the FP points to the stack slot of the previous FP. - if (estimateStackSize(MF, MFI) + (hasFP(MF) ? 4 : 0) - >= estimateRSStackSizeLimit(MF)) { - // If any non-reserved CS register isn't spilled, just spill one or two - // extra. That should take care of it! - unsigned NumExtras = TargetAlign / 4; - SmallVector<unsigned, 2> Extras; - while (NumExtras && !UnspilledCS1GPRs.empty()) { - unsigned Reg = UnspilledCS1GPRs.back(); - UnspilledCS1GPRs.pop_back(); + if (BigStack && !ExtraCSSpill) { + // If any non-reserved CS register isn't spilled, just spill one or two + // extra. That should take care of it! + unsigned NumExtras = TargetAlign / 4; + SmallVector<unsigned, 2> Extras; + while (NumExtras && !UnspilledCS1GPRs.empty()) { + unsigned Reg = UnspilledCS1GPRs.back(); + UnspilledCS1GPRs.pop_back(); + if (!isReservedReg(MF, Reg)) { + Extras.push_back(Reg); + NumExtras--; + } + } + // For non-Thumb1 functions, also check for hi-reg CS registers + if (!AFI->isThumb1OnlyFunction()) { + while (NumExtras && !UnspilledCS2GPRs.empty()) { + unsigned Reg = UnspilledCS2GPRs.back(); + UnspilledCS2GPRs.pop_back(); if (!isReservedReg(MF, Reg)) { Extras.push_back(Reg); NumExtras--; } } - // For non-Thumb1 functions, also check for hi-reg CS registers - if (!AFI->isThumb1OnlyFunction()) { - while (NumExtras && !UnspilledCS2GPRs.empty()) { - unsigned Reg = UnspilledCS2GPRs.back(); - UnspilledCS2GPRs.pop_back(); - if (!isReservedReg(MF, Reg)) { - Extras.push_back(Reg); - NumExtras--; - } - } - } - if (Extras.size() && NumExtras == 0) { - for (unsigned i = 0, e = Extras.size(); i != e; ++i) { - MF.getRegInfo().setPhysRegUsed(Extras[i]); - AFI->setCSRegisterIsSpilled(Extras[i]); - } - } else if (!AFI->isThumb1OnlyFunction()) { - // note: Thumb1 functions spill to R12, not the stack. - // Reserve a slot closest to SP or frame pointer. - const TargetRegisterClass *RC = ARM::GPRRegisterClass; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + } + if (Extras.size() && NumExtras == 0) { + for (unsigned i = 0, e = Extras.size(); i != e; ++i) { + MF.getRegInfo().setPhysRegUsed(Extras[i]); + AFI->setCSRegisterIsSpilled(Extras[i]); } + } else if (!AFI->isThumb1OnlyFunction()) { + // note: Thumb1 functions spill to R12, not the stack. Reserve a slot + // closest to SP or frame pointer. + const TargetRegisterClass *RC = ARM::GPRRegisterClass; + MachineFrameInfo *MFI = MF.getFrameInfo(); + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); } } } @@ -1093,6 +1085,15 @@ hasReservedCallFrame(MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); } +// canSimplifyCallFramePseudos - If there is a reserved call frame, the +// call frame pseudos can be simplified. Unlike most targets, having a FP +// is not sufficient here since we still may reference some objects via SP +// even when FP is available in Thumb2 mode. +bool ARMBaseRegisterInfo:: +canSimplifyCallFramePseudos(MachineFunction &MF) const { + return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); +} + static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, @@ -1127,13 +1128,14 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && - "This eliminateCallFramePseudoInstr does not suppor Thumb1!"); + "This eliminateCallFramePseudoInstr does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); // Replace the pseudo instruction with a new instruction... unsigned Opc = Old->getOpcode(); - ARMCC::CondCodes Pred = (ARMCC::CondCodes)Old->getOperand(1).getImm(); - // FIXME: Thumb2 version of ADJCALLSTACKUP and ADJCALLSTACKDOWN? + int PIdx = Old->findFirstPredOperandIdx(); + ARMCC::CondCodes Pred = (PIdx == -1) + ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. unsigned PredReg = Old->getOperand(2).getReg(); @@ -1157,7 +1159,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && "This eliminateFrameIndex does not support Thumb1!"); @@ -1168,12 +1169,12 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } int FrameIndex = MI.getOperand(i).getIndex(); - int Offset = MFI->getObjectOffset(FrameIndex) + MFI->getStackSize() + SPAdj; unsigned FrameReg; - Offset = getFrameIndexReference(MF, FrameIndex, FrameReg); + int Offset = getFrameIndexReference(MF, FrameIndex, FrameReg); if (FrameReg != ARM::SP) SPAdj = 0; + Offset += SPAdj; // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; @@ -1264,7 +1265,7 @@ emitPrologue(MachineFunction &MF) const { MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && - "This emitPrologue does not suppor Thumb1!"); + "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); unsigned NumBytes = MFI->getStackSize(); @@ -1349,7 +1350,9 @@ emitPrologue(MachineFunction &MF) const { unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + if (STI.isTargetDarwin() || hasFP(MF)) + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); @@ -1425,7 +1428,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && - "This emitEpilogue does not suppor Thumb1!"); + "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 33ba21d..64f6ff1 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -138,6 +138,7 @@ public: virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const; virtual bool hasReservedCallFrame(MachineFunction &MF) const; + virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const; virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index bd703f4..21c6cb3 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -86,6 +86,7 @@ namespace { void emitWordLE(unsigned Binary); void emitDWordLE(uint64_t Binary); void emitConstPoolInstruction(const MachineInstr &MI); + void emitMOVi32immInstruction(const MachineInstr &MI); void emitMOVi2piecesInstruction(const MachineInstr &MI); void emitLEApcrelJTInstruction(const MachineInstr &MI); void emitPseudoMoveInstruction(const MachineInstr &MI); @@ -143,6 +144,15 @@ namespace { return getMachineOpValue(MI, MI.getOperand(OpIdx)); } + /// getMovi32Value - Return binary encoding of operand for movw/movt. If the + /// machine operand requires relocation, record the relocation and return zero. + unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO, + unsigned Reloc); + unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx, + unsigned Reloc) { + return getMovi32Value(MI, MI.getOperand(OpIdx), Reloc); + } + /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. /// unsigned getShiftOp(unsigned Imm) const ; @@ -214,6 +224,54 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const { return 0; } +/// getMovi32Value - Return binary encoding of operand for movw/movt. If the +/// machine operand requires relocation, record the relocation and return zero. +unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI, + const MachineOperand &MO, + unsigned Reloc) { + assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw)) + && "Relocation to this function should be for movt or movw"); + switch(MO.getType()) { + case MachineOperand::MO_Register: + return ARMRegisterInfo::getRegisterNumbering(MO.getReg()); + break; + + case MachineOperand::MO_Immediate: + return static_cast<unsigned>(MO.getImm()); + break; + + case MachineOperand::MO_FPImmediate: + return static_cast<unsigned>( + MO.getFPImm()->getValueAPF().bitcastToAPInt().getLimitedValue()); + break; + + case MachineOperand::MO_MachineBasicBlock: + emitMachineBasicBlock(MO.getMBB(), Reloc); + break; + + case MachineOperand::MO_ConstantPoolIndex: + emitConstPoolAddress(MO.getIndex(), Reloc); + break; + + case MachineOperand::MO_JumpTableIndex: + emitJumpTableAddress(MO.getIndex(), Reloc); + break; + + case MachineOperand::MO_ExternalSymbol: + emitExternalSymbolAddress(MO.getSymbolName(), Reloc); + break; + + case MachineOperand::MO_GlobalAddress: + emitGlobalAddress(MO.getGlobal(), Reloc, true, false); + break; + + default: + llvm_unreachable("Unsupported immediate operand type for movw/movt"); + break; + } + return 0; +} + /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, @@ -433,6 +491,41 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { } } +void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) { + const MachineOperand &MO0 = MI.getOperand(0); + const MachineOperand &MO1 = MI.getOperand(1); + + unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF; + + // Emit the 'mov' instruction. + unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode imm. + Binary |= Lo16 & 0xFFF; + Binary |= ((Lo16 >> 12) & 0xF) << 16; // imm4:imm12, Insts[19-16] = imm4, Insts[11-0] = imm12 + emitWordLE(Binary); + + unsigned Hi16 = (getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16) & 0xFFFF; + // Emit the 'mov' instruction. + Binary = 0x34 << 20; // movt: Insts[27-20] = 0b00110100 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + Binary |= Hi16 & 0xFFF; + Binary |= ((Hi16 >> 12) & 0xF) << 16; + emitWordLE(Binary); +} + void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { const MachineOperand &MO0 = MI.getOperand(0); const MachineOperand &MO1 = MI.getOperand(1); @@ -552,7 +645,6 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { switch (Opcode) { default: llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); - // FIXME: Add support for MOVimm32. case TargetOpcode::INLINEASM: { // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. @@ -599,6 +691,11 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { emitMiscLoadStoreInstruction(MI, ARM::PC); break; } + + case ARM::MOVi32imm: + emitMOVi32immInstruction(MI); + break; + case ARM::MOVi2pieces: // Two instructions to materialize a constant. emitMOVi2piecesInstruction(MI); @@ -1138,7 +1235,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { // Set the conditional execution predicate Binary |= II->getPredicate(&MI) << ARMII::CondShift; - if (TID.Opcode == ARM::BX_RET) + if (TID.Opcode == ARM::BX_RET || TID.Opcode == ARM::MOVPCLR) // The return register is LR. Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::LR); else diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index a458269..013e00a 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -65,7 +65,7 @@ public: } SDNode *Select(SDNode *N); - virtual void InstructionSelect(); + bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A, SDValue &B, SDValue &C); bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base, @@ -201,11 +201,6 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { } -void ARMDAGToDAGISel::InstructionSelect() { - SelectRoot(*CurDAG); - CurDAG->RemoveDeadNodes(); -} - bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &BaseReg, diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 614e684..6a2c6bb 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -294,6 +294,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); + setTargetDAGCombine(ISD::SELECT_CC); } computeRegisterProperties(); @@ -544,6 +545,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; + case ARMISD::FMAX: return "ARMISD::FMAX"; + case ARMISD::FMIN: return "ARMISD::FMIN"; } } @@ -863,7 +866,8 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); } return DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset); + PseudoSourceValue::getStack(), LocMemOffset, + false, false, 0); } void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, @@ -920,7 +924,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // These operations are automatically eliminated by the prolog/epilog pass Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); - SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32); + SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); RegsToPassVector RegsToPass; SmallVector<SDValue, 8> MemOpChains; @@ -969,8 +973,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); } else { assert(VA.isMemLoc()); - if (StackPtr.getNode() == 0) - StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, dl, DAG, VA, Flags)); @@ -983,8 +985,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { assert(VA.isMemLoc()); - if (StackPtr.getNode() == 0) - StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, Flags)); @@ -1031,7 +1031,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); @@ -1052,7 +1053,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); @@ -1238,7 +1240,8 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { } CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); if (RelocM == Reloc::Static) return Result; SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -1261,7 +1264,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue Chain = Argument.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -1278,8 +1282,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl, - DAG.GetOrdering(Chain.getNode())); + DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); return CallResult.first; } @@ -1308,21 +1311,24 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); Chain = Offset.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); } else { // local exec model ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); } // The address of the thread local variable is the add of the thread @@ -1358,13 +1364,15 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue Chain = Result.getValue(1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); if (!UseGOTOFF) Result = DAG.getLoad(PtrVT, dl, Chain, Result, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, + false, false, 0); return Result; } else { // If we have T2 ops, we can materialize the address directly via movt/movw @@ -1376,7 +1384,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); } } } @@ -1403,7 +1412,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { @@ -1413,7 +1423,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, Chain, Result, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, + false, false, 0); return Result; } @@ -1434,7 +1445,8 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } @@ -1467,7 +1479,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { @@ -1515,7 +1528,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, + false, false, 0); } SDValue @@ -1592,7 +1606,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, - PseudoSourceValue::getFixedStack(FI), 0); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); } else { Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); @@ -1707,7 +1722,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0)); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0)); } } @@ -1745,7 +1761,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0); + PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0, + false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); @@ -1939,13 +1956,14 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { } if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, - PseudoSourceValue::getJumpTable(), 0); + PseudoSourceValue::getJumpTable(), 0, + false, false, 0); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } else { Addr = DAG.getLoad(PTy, dl, Chain, Addr, - PseudoSourceValue::getJumpTable(), 0); + PseudoSourceValue::getJumpTable(), 0, false, false, 0); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } @@ -1993,7 +2011,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { ? ARM::R7 : ARM::R11; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + false, false, 0); return FrameAddr; } @@ -2038,7 +2057,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff); + SrcSV, SrcSVOff + SrcOff, false, false, 0); TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } @@ -2047,9 +2066,9 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, - DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff); + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff, false, false, 0); DstOff += VTSize; } Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); @@ -2075,7 +2094,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff); + SrcSV, SrcSVOff + SrcOff, false, false, 0); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; @@ -2097,7 +2116,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff); + DstSV, DstSVOff + DstOff, false, false, 0); ++i; DstOff += VTSize; BytesLeft -= VTSize; @@ -3835,23 +3854,106 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC +/// to match f32 max/min patterns to use NEON vmax/vmin instructions. +static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *ST) { + // If the target supports NEON, try to use vmax/vmin instructions for f32 + // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set, + // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is + // a NaN; only do the transformation when it matches that behavior. + + // For now only do this when using NEON for FP operations; if using VFP, it + // is not obvious that the benefit outweighs the cost of switching to the + // NEON pipeline. + if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || + N->getValueType(0) != MVT::f32) + return SDValue(); + + SDValue CondLHS = N->getOperand(0); + SDValue CondRHS = N->getOperand(1); + SDValue LHS = N->getOperand(2); + SDValue RHS = N->getOperand(3); + ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); + + unsigned Opcode = 0; + bool IsReversed; + if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { + IsReversed = false; // x CC y ? x : y + } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { + IsReversed = true ; // x CC y ? y : x + } else { + return SDValue(); + } + + bool IsUnordered; + switch (CC) { + default: break; + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETULT: + case ISD::SETULE: + // If LHS is NaN, an ordered comparison will be false and the result will + // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS + // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. + IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); + if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) + break; + // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin + // will return -0, so vmin can only be used for unsafe math or if one of + // the operands is known to be nonzero. + if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && + !UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) + break; + Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; + break; + + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETUGT: + case ISD::SETUGE: + // If LHS is NaN, an ordered comparison will be false and the result will + // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS + // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. + IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); + if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) + break; + // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax + // will return +0, so vmax can only be used for unsafe math or if one of + // the operands is known to be nonzero. + if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && + !UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) + break; + Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; + break; + } + + if (!Opcode) + return SDValue(); + return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); +} + SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; - case ISD::ADD: return PerformADDCombine(N, DCI); - case ISD::SUB: return PerformSUBCombine(N, DCI); + case ISD::ADD: return PerformADDCombine(N, DCI); + case ISD::SUB: return PerformSUBCombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); - case ISD::INTRINSIC_WO_CHAIN: - return PerformIntrinsicCombine(N, DCI.DAG); + case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: case ISD::SRA: - case ISD::SRL: - return PerformShiftCombine(N, DCI.DAG, Subtarget); + case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: - return PerformExtendCombine(N, DCI.DAG, Subtarget); + case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); + case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); } return SDValue(); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 3c5df45..f8f8adc 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -131,7 +131,11 @@ namespace llvm { VREV16, // reverse elements within 16-bit halfwords VZIP, // zip (interleave) VUZP, // unzip (deinterleave) - VTRN // transpose + VTRN, // transpose + + // Floating-point max and min: + FMAX, + FMIN }; } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 169eeed..76595fa 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -56,6 +56,9 @@ def NEONGetLnFrm : Format<25>; def NEONSetLnFrm : Format<26>; def NEONDupFrm : Format<27>; +def MiscFrm : Format<29>; +def ThumbMiscFrm : Format<30>; + // Misc flags. // the instruction has a Rn register operand. @@ -705,6 +708,20 @@ class AI3ldsbpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{24} = 1; // P bit let Inst{27-25} = 0b000; } +class AI3lddpr<dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin, + opc, asm, cstr, pattern> { + let Inst{4} = 1; + let Inst{5} = 0; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 1; // P bit + let Inst{27-25} = 0b000; +} + // Pre-indexed stores class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin, @@ -720,6 +737,19 @@ class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{24} = 1; // P bit let Inst{27-25} = 0b000; } +class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin, + opc, asm, cstr, pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 1; // W bit + let Inst{24} = 1; // P bit + let Inst{27-25} = 0b000; +} // Post-indexed loads class AI3ldhpo<dag oops, dag iops, Format f, InstrItinClass itin, @@ -731,7 +761,7 @@ class AI3ldhpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{6} = 0; // S bit let Inst{7} = 1; let Inst{20} = 1; // L bit - let Inst{21} = 1; // W bit + let Inst{21} = 0; // W bit let Inst{24} = 0; // P bit let Inst{27-25} = 0b000; } @@ -744,7 +774,7 @@ class AI3ldshpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{6} = 1; // S bit let Inst{7} = 1; let Inst{20} = 1; // L bit - let Inst{21} = 1; // W bit + let Inst{21} = 0; // W bit let Inst{24} = 0; // P bit let Inst{27-25} = 0b000; } @@ -757,7 +787,20 @@ class AI3ldsbpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{6} = 1; // S bit let Inst{7} = 1; let Inst{20} = 1; // L bit - let Inst{21} = 1; // W bit + let Inst{21} = 0; // W bit + let Inst{24} = 0; // P bit + let Inst{27-25} = 0b000; +} +class AI3lddpo<dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin, + opc, asm, cstr, pattern> { + let Inst{4} = 1; + let Inst{5} = 0; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit let Inst{24} = 0; // P bit let Inst{27-25} = 0b000; } @@ -772,7 +815,20 @@ class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{6} = 0; // S bit let Inst{7} = 1; let Inst{20} = 0; // L bit - let Inst{21} = 1; // W bit + let Inst{21} = 0; // W bit + let Inst{24} = 0; // P bit + let Inst{27-25} = 0b000; +} +class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin, + opc, asm, cstr, pattern> { + let Inst{4} = 1; + let Inst{5} = 1; // H bit + let Inst{6} = 1; // S bit + let Inst{7} = 1; + let Inst{20} = 0; // L bit + let Inst{21} = 0; // W bit let Inst{24} = 0; // P bit let Inst{27-25} = 0b000; } @@ -1147,6 +1203,19 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, let Inst{8} = 1; // The W bit. } +// Helper class for disassembly only +// A6.3.16 & A6.3.17 +// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions. +class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + let Inst{31-27} = 0b11111; + let Inst{26-24} = 0b011; + let Inst{23} = long; + let Inst{22-20} = op22_20; + let Inst{7-4} = op7_4; +} + // Tv5Pat - Same as Pat<>, but requires V5T Thumb mode. class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [IsThumb1Only, HasV5T]; @@ -1324,6 +1393,15 @@ class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, let Inst{4} = 0; } +// VFP conversion between floating-point and fixed-point +class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, + dag oops, dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> { + // size (fixed-point number): sx == 0 ? 16 : 32 + let Inst{7} = op5; // sx +} + // VFP conversion instructions, if no NEON class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, dag oops, dag iops, InstrItinClass itin, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 852c74e..3812aba 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -113,6 +113,8 @@ def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // +def HasV4T : Predicate<"Subtarget->hasV4TOps()">; +def NoV4T : Predicate<"!Subtarget->hasV4TOps()">; def HasV5T : Predicate<"Subtarget->hasV5TOps()">; def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; def HasV6 : Predicate<"Subtarget->hasV6Ops()">; @@ -130,8 +132,6 @@ def IsThumb2 : Predicate<"Subtarget->isThumb2()">; def IsARM : Predicate<"!Subtarget->isThumb()">; def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; -def CarryDefIsUnused : Predicate<"!N->hasAnyUseOfValue(1)">; -def CarryDefIsUsed : Predicate<"N->hasAnyUseOfValue(1)">; // FIXME: Eventually this will be just "hasV6T2Ops". def UseMovt : Predicate<"Subtarget->useMovt()">; @@ -176,7 +176,7 @@ def imm16_31 : PatLeaf<(i32 imm), [{ return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32; }]>; -def so_imm_neg : +def so_imm_neg : PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(-(int)N->getZExtValue()) != -1; }], so_imm_neg_XFORM>; @@ -194,7 +194,7 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{ /// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield /// e.g., 0xf000ffff def bf_inv_mask_imm : Operand<i32>, - PatLeaf<(imm), [{ + PatLeaf<(imm), [{ uint32_t v = (uint32_t)N->getZExtValue(); if (v == 0xffffffff) return 0; @@ -227,7 +227,7 @@ def lo16AllZero : PatLeaf<(i32 imm), [{ return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0; }], hi16>; -/// imm0_65535 predicate - True if the 32-bit immediate is in the range +/// imm0_65535 predicate - True if the 32-bit immediate is in the range /// [0.65535]. def imm0_65535 : PatLeaf<(i32 imm), [{ return (uint32_t)N->getZExtValue() < 65536; @@ -236,6 +236,21 @@ def imm0_65535 : PatLeaf<(i32 imm), [{ class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>; +/// adde and sube predicates - True based on whether the carry flag output +/// will be needed or not. +def adde_dead_carry : + PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS), + [{return !N->hasAnyUseOfValue(1);}]>; +def sube_dead_carry : + PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS), + [{return !N->hasAnyUseOfValue(1);}]>; +def adde_live_carry : + PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS), + [{return N->hasAnyUseOfValue(1);}]>; +def sube_live_carry : + PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS), + [{return N->hasAnyUseOfValue(1);}]>; + //===----------------------------------------------------------------------===// // Operand Definitions. // @@ -501,6 +516,22 @@ multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> { } } +multiclass AI_unary_rrot_np<bits<8> opcod, string opc> { + def r : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src), + IIC_iUNAr, opc, "\t$dst, $src", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]> { + let Inst{11-10} = 0b00; + let Inst{19-16} = 0b1111; + } + def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot), + IIC_iUNAsi, opc, "\t$dst, $src, ror $rot", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]> { + let Inst{19-16} = 0b1111; + } +} + /// AI_bin_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> { @@ -510,13 +541,29 @@ multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> { Requires<[IsARM, HasV6]> { let Inst{11-10} = 0b00; } - def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), + def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, + i32imm:$rot), IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot", [(set GPR:$dst, (opnode GPR:$LHS, (rotr GPR:$RHS, rot_imm:$rot)))]>, Requires<[IsARM, HasV6]>; } +// For disassembly only. +multiclass AI_bin_rrot_np<bits<8> opcod, string opc> { + def rr : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), + IIC_iALUr, opc, "\t$dst, $LHS, $RHS", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]> { + let Inst{11-10} = 0b00; + } + def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, + i32imm:$rot), + IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]>; +} + /// AI1_adde_sube_irs - Define instructions and patterns for adde and sube. let Uses = [CPSR] in { multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, @@ -524,13 +571,13 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iALUi, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>, - Requires<[IsARM, CarryDefIsUnused]> { + Requires<[IsARM]> { let Inst{25} = 1; } def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, - Requires<[IsARM, CarryDefIsUnused]> { + Requires<[IsARM]> { let isCommutable = Commutable; let Inst{11-4} = 0b00000000; let Inst{25} = 0; @@ -538,7 +585,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, - Requires<[IsARM, CarryDefIsUnused]> { + Requires<[IsARM]> { let Inst{25} = 0; } } @@ -549,16 +596,14 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iALUi, !strconcat(opc, "\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>, - Requires<[IsARM, CarryDefIsUsed]> { - let Defs = [CPSR]; + Requires<[IsARM]> { let Inst{20} = 1; let Inst{25} = 1; } def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, !strconcat(opc, "\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, - Requires<[IsARM, CarryDefIsUsed]> { - let Defs = [CPSR]; + Requires<[IsARM]> { let Inst{11-4} = 0b00000000; let Inst{20} = 1; let Inst{25} = 0; @@ -566,8 +611,7 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, - Requires<[IsARM, CarryDefIsUsed]> { - let Defs = [CPSR]; + Requires<[IsARM]> { let Inst{20} = 1; let Inst{25} = 0; } @@ -593,18 +637,153 @@ PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size), NoItinerary, "${instid:label} ${cpidx:cpentry}", []>; -let Defs = [SP], Uses = [SP] in { +// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE +// from removing one half of the matched pairs. That breaks PEI, which assumes +// these will always be in pairs, and asserts if it finds otherwise. Better way? +let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def ADJCALLSTACKUP : PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary, "@ ADJCALLSTACKUP $amt1", [(ARMcallseq_end timm:$amt1, timm:$amt2)]>; -def ADJCALLSTACKDOWN : +def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, "@ ADJCALLSTACKDOWN $amt", [(ARMcallseq_start timm:$amt)]>; } +def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000000; +} + +def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000001; +} + +def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000010; +} + +def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000011; +} + +def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel", + "\t$dst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]> { + let Inst{27-20} = 0b01101000; + let Inst{7-4} = 0b1011; +} + +def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-0} = 0b00000100; +} + +// The i32imm operand $val can be used by a debugger to store more information +// about the breakpoint. +def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{27-20} = 0b00010010; + let Inst{7-4} = 0b0111; +} + +// Change Processor State is a system instruction -- for disassembly only. +// The singleton $opt operand contains the following information: +// opt{4-0} = mode from Inst{4-0} +// opt{5} = changemode from Inst{17} +// opt{8-6} = AIF from Inst{8-6} +// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable +def CPS : AXI<(outs),(ins i32imm:$opt), MiscFrm, NoItinerary, "cps${opt:cps}", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{31-28} = 0b1111; + let Inst{27-20} = 0b00010000; + let Inst{16} = 0; + let Inst{5} = 0; +} + +// Preload signals the memory system of possible future data/instruction access. +// These are for disassembly only. +multiclass APreLoad<bit data, bit read, string opc> { + + def i : AXI<(outs), (ins GPR:$base, i32imm:$imm), MiscFrm, NoItinerary, + !strconcat(opc, "\t[$base, $imm]"), []> { + let Inst{31-26} = 0b111101; + let Inst{25} = 0; // 0 for immediate form + let Inst{24} = data; + let Inst{22} = read; + let Inst{21-20} = 0b01; + } + + def r : AXI<(outs), (ins addrmode2:$addr), MiscFrm, NoItinerary, + !strconcat(opc, "\t$addr"), []> { + let Inst{31-26} = 0b111101; + let Inst{25} = 1; // 1 for register form + let Inst{24} = data; + let Inst{22} = read; + let Inst{21-20} = 0b01; + let Inst{4} = 0; + } +} + +defm PLD : APreLoad<1, 1, "pld">; +defm PLDW : APreLoad<1, 0, "pldw">; +defm PLI : APreLoad<0, 1, "pli">; + +def SETENDBE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tbe", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{31-28} = 0b1111; + let Inst{27-20} = 0b00010000; + let Inst{16} = 1; + let Inst{9} = 1; + let Inst{7-4} = 0b0000; +} + +def SETENDLE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tle", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{31-28} = 0b1111; + let Inst{27-20} = 0b00010000; + let Inst{16} = 1; + let Inst{9} = 0; + let Inst{7-4} = 0b0000; +} + +def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV7]> { + let Inst{27-16} = 0b001100100000; + let Inst{7-4} = 0b1111; +} + +// A5.4 Permanently UNDEFINED instructions. +def TRAP : AI<(outs), (ins), MiscFrm, NoItinerary, "trap", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM]> { + let Inst{27-25} = 0b011; + let Inst{24-20} = 0b11111; + let Inst{7-5} = 0b111; + let Inst{4} = 0b1; +} + // Address computation and loads and stores in PIC mode. let isNotDuplicable = 1 in { def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), @@ -665,7 +844,7 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), "(${label}_${id}-(", "${:private}PCRELL${:uid}+8))\n"), !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p\t$dst, pc, #${:private}PCRELV${:uid}")), + "add$p\t$dst, pc, #${:private}PCRELV${:uid}")), []> { let Inst{25} = 1; } @@ -674,24 +853,50 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), // Control Flow Instructions. // -let isReturn = 1, isTerminator = 1, isBarrier = 1 in - def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, - "bx", "\tlr", [(ARMretflag)]> { - let Inst{3-0} = 0b1110; - let Inst{7-4} = 0b0001; - let Inst{19-8} = 0b111111111111; - let Inst{27-20} = 0b00010010; +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { + // ARMV4T and above + def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, + "bx", "\tlr", [(ARMretflag)]>, + Requires<[IsARM, HasV4T]> { + let Inst{3-0} = 0b1110; + let Inst{7-4} = 0b0001; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; + } + + // ARMV4 only + def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br, + "mov", "\tpc, lr", [(ARMretflag)]>, + Requires<[IsARM, NoV4T]> { + let Inst{11-0} = 0b000000001110; + let Inst{15-12} = 0b1111; + let Inst{19-16} = 0b0000; + let Inst{27-20} = 0b00011010; + } } // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { + // ARMV4T and above def BRIND : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst", - [(brind GPR:$dst)]> { + [(brind GPR:$dst)]>, + Requires<[IsARM, HasV4T]> { let Inst{7-4} = 0b0001; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; let Inst{31-28} = 0b1110; } + + // ARMV4 only + def MOVPCRX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "mov\tpc, $dst", + [(brind GPR:$dst)]>, + Requires<[IsARM, NoV4T]> { + let Inst{11-4} = 0b00000000; + let Inst{15-12} = 0b1111; + let Inst{19-16} = 0b0000; + let Inst{27-20} = 0b00011010; + let Inst{31-28} = 0b1110; + } } // FIXME: remove when we have a way to marking a MI with these properties. @@ -732,14 +937,26 @@ let isCall = 1, } // ARMv4T - def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops), + // Note: Restrict $func to the tGPR regclass to prevent it being in LR. + def BX : ABXIx2<(outs), (ins tGPR:$func, variable_ops), IIC_Br, "mov\tlr, pc\n\tbx\t$func", - [(ARMcall_nolink GPR:$func)]>, - Requires<[IsARM, IsNotDarwin]> { + [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsARM, HasV4T, IsNotDarwin]> { let Inst{7-4} = 0b0001; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; } + + // ARMv4 + def BMOVPCRX : ABXIx2<(outs), (ins tGPR:$func, variable_ops), + IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func", + [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsARM, NoV4T, IsNotDarwin]> { + let Inst{11-4} = 0b00000000; + let Inst{15-12} = 0b1111; + let Inst{19-16} = 0b0000; + let Inst{27-20} = 0b00011010; + } } // On Darwin R9 is call-clobbered. @@ -769,13 +986,26 @@ let isCall = 1, } // ARMv4T - def BXr9 : ABXIx2<(outs), (ins GPR:$func, variable_ops), + // Note: Restrict $func to the tGPR regclass to prevent it being in LR. + def BXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops), IIC_Br, "mov\tlr, pc\n\tbx\t$func", - [(ARMcall_nolink GPR:$func)]>, Requires<[IsARM, IsDarwin]> { + [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsARM, HasV4T, IsDarwin]> { let Inst{7-4} = 0b0001; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; } + + // ARMv4 + def BMOVPCRXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops), + IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func", + [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsARM, NoV4T, IsDarwin]> { + let Inst{11-4} = 0b00000000; + let Inst{15-12} = 0b1111; + let Inst{19-16} = 0b0000; + let Inst{27-20} = 0b00011010; + } } let isBranch = 1, isTerminator = 1 in { @@ -821,25 +1051,75 @@ let isBranch = 1, isTerminator = 1 in { } // isBarrier = 1 // FIXME: should be able to write a pattern for ARMBrcond, but can't use - // a two-value operand where a dag node expects two operands. :( + // a two-value operand where a dag node expects two operands. :( def Bcc : ABI<0b1010, (outs), (ins brtarget:$target), IIC_Br, "b", "\t$target", [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>; } +// Branch and Exchange Jazelle -- for disassembly only +def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0010; + //let Inst{19-8} = 0xfff; + let Inst{7-4} = 0b0010; +} + +// Secure Monitor Call is a system instruction -- for disassembly only +def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0110; + let Inst{7-4} = 0b0111; +} + +// Supervisor Call (Software Interrupt) -- for disassembly only +let isCall = 1 in { +def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", + [/* For disassembly only; pattern left blank */]>; +} + +// Store Return State is a system instruction -- for disassembly only +def SRSW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode), + NoItinerary, "srs${addr:submode}\tsp!, $mode", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{22-20} = 0b110; // W = 1 +} + +def SRS : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode), + NoItinerary, "srs${addr:submode}\tsp, $mode", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{22-20} = 0b100; // W = 0 +} + +// Return From Exception is a system instruction -- for disassembly only +def RFEW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base), + NoItinerary, "rfe${addr:submode}\t$base!", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{22-20} = 0b011; // W = 1 +} + +def RFE : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base), + NoItinerary, "rfe${addr:submode}\t$base", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{22-20} = 0b001; // W = 0 +} + //===----------------------------------------------------------------------===// // Load / store Instructions. // // Load -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", "\t$dst, $addr", [(set GPR:$dst, (load addrmode2:$addr))]>; // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", "\t$dst, $addr", []>; @@ -848,7 +1128,7 @@ def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr, "ldrh", "\t$dst, $addr", [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>; -def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, +def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldrb", "\t$dst, $addr", [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>; @@ -907,6 +1187,51 @@ def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb), def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; + +// For disassembly only +def LDRD_PRE : AI3lddpr<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb), + (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr, + "ldrd", "\t$dst1, $dst2, $addr!", "$addr.base = $base_wb", []>, + Requires<[IsARM, HasV5TE]>; + +// For disassembly only +def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb), + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadr, + "ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>, + Requires<[IsARM, HasV5TE]>; + +} + +// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only. + +def LDRT : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru, + "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { + let Inst{21} = 1; // overwrite +} + +def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru, + "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { + let Inst{21} = 1; // overwrite +} + +def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am2offset:$offset), LdMiscFrm, IIC_iLoadru, + "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { + let Inst{21} = 1; // overwrite +} + +def LDRHT : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, am3offset:$offset), LdMiscFrm, IIC_iLoadru, + "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { + let Inst{21} = 1; // overwrite +} + +def LDRSHT : AI3ldshpo<(outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, + "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { + let Inst{21} = 1; // overwrite } // Store @@ -915,8 +1240,8 @@ def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, [(store GPR:$src, addrmode2:$addr)]>; // Stores with truncate -def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer, - "strh", "\t$src, $addr", +def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, + IIC_iStorer, "strh", "\t$src, $addr", [(truncstorei16 GPR:$src, addrmode3:$addr)]>; def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, @@ -931,47 +1256,87 @@ def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), // Indexed stores def STR_PRE : AI2stwpr<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base, am2offset:$offset), + (ins GPR:$src, GPR:$base, am2offset:$offset), StFrm, IIC_iStoreru, "str", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>; def STR_POST : AI2stwpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am2offset:$offset), + (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, "str", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_store GPR:$src, GPR:$base, am2offset:$offset))]>; def STRH_PRE : AI3sthpr<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am3offset:$offset), + (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, IIC_iStoreru, "strh", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>; def STRH_POST: AI3sthpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am3offset:$offset), + (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, IIC_iStoreru, "strh", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$src, GPR:$base, am3offset:$offset))]>; def STRB_PRE : AI2stbpr<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am2offset:$offset), + (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, "strb", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; def STRB_POST: AI2stbpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am2offset:$offset), + (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, "strb", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; +// For disassembly only +def STRD_PRE : AI3stdpr<(outs GPR:$base_wb), + (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), + StMiscFrm, IIC_iStoreru, + "strd", "\t$src1, $src2, [$base, $offset]!", + "$base = $base_wb", []>; + +// For disassembly only +def STRD_POST: AI3stdpo<(outs GPR:$base_wb), + (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), + StMiscFrm, IIC_iStoreru, + "strd", "\t$src1, $src2, [$base], $offset", + "$base = $base_wb", []>; + +// STRT, STRBT, and STRHT are for disassembly only. + +def STRT : AI2stwpo<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am2offset:$offset), + StFrm, IIC_iStoreru, + "strt", "\t$src, [$base], $offset", "$base = $base_wb", + [/* For disassembly only; pattern left blank */]> { + let Inst{21} = 1; // overwrite +} + +def STRBT : AI2stbpo<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am2offset:$offset), + StFrm, IIC_iStoreru, + "strbt", "\t$src, [$base], $offset", "$base = $base_wb", + [/* For disassembly only; pattern left blank */]> { + let Inst{21} = 1; // overwrite +} + +def STRHT: AI3sthpo<(outs GPR:$base_wb), + (ins GPR:$src, GPR:$base,am3offset:$offset), + StMiscFrm, IIC_iStoreru, + "strht", "\t$src, [$base], $offset", "$base = $base_wb", + [/* For disassembly only; pattern left blank */]> { + let Inst{21} = 1; // overwrite +} + //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -999,7 +1364,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, let Inst{25} = 0; } -def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), +def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, IIC_iMOVsr, "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP { let Inst{25} = 0; @@ -1012,7 +1377,7 @@ def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi, } let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), +def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), DPFrm, IIC_iMOVi, "movw", "\t$dst, $src", [(set GPR:$dst, imm0_65535:$src)]>, @@ -1026,7 +1391,7 @@ def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm), DPFrm, IIC_iMOVi, "movt", "\t$dst, $imm", [(set GPR:$dst, - (or (and GPR:$src, 0xffff), + (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>, UnaryDP, Requires<[IsARM, HasV6T2]> { let Inst{20} = 0; @@ -1045,7 +1410,7 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi, // due to flag operands. let Defs = [CPSR] in { -def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, +def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1", [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP; def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, @@ -1069,7 +1434,11 @@ defm SXTAB : AI_bin_rrot<0b01101010, defm SXTAH : AI_bin_rrot<0b01101011, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; -// TODO: SXT(A){B|H}16 +// For disassembly only +defm SXTB16 : AI_unary_rrot_np<0b01101000, "sxtb16">; + +// For disassembly only +defm SXTAB16 : AI_bin_rrot_np<0b01101000, "sxtab16">; // Zero extenders @@ -1093,9 +1462,9 @@ defm UXTAH : AI_bin_rrot<0b01101111, "uxtah", } // This isn't safe in general, the add is two 16-bit units, not a 32-bit add. -//defm UXTAB16 : xxx<"uxtab16", 0xff00ff>; +// For disassembly only +defm UXTAB16 : AI_bin_rrot_np<0b01101100, "uxtab16">; -// TODO: UXT(A){B|H}16 def SBFX : I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), @@ -1131,13 +1500,13 @@ defm SUBS : AI1_bin_s_irs<0b0010, "subs", BinOpFrag<(subc node:$LHS, node:$RHS)>>; defm ADC : AI1_adde_sube_irs<0b0101, "adc", - BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>; defm SBC : AI1_adde_sube_irs<0b0110, "sbc", - BinOpFrag<(sube node:$LHS, node:$RHS)>>; + BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs", - BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs", - BinOpFrag<(sube node:$LHS, node:$RHS)>>; + BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>; // These don't define reg/reg forms, because they are handled above. def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, @@ -1171,14 +1540,14 @@ def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, let Uses = [CPSR] in { def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iALUi, "rsc", "\t$dst, $a, $b", - [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>, - Requires<[IsARM, CarryDefIsUnused]> { + [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>, + Requires<[IsARM]> { let Inst{25} = 1; } def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b", - [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>, - Requires<[IsARM, CarryDefIsUnused]> { + [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>, + Requires<[IsARM]> { let Inst{25} = 0; } } @@ -1187,15 +1556,15 @@ def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), let Defs = [CPSR], Uses = [CPSR] in { def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iALUi, "rscs\t$dst, $a, $b", - [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>, - Requires<[IsARM, CarryDefIsUnused]> { + [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>, + Requires<[IsARM]> { let Inst{20} = 1; let Inst{25} = 1; } def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b", - [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>, - Requires<[IsARM, CarryDefIsUnused]> { + [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>, + Requires<[IsARM]> { let Inst{20} = 1; let Inst{25} = 0; } @@ -1216,6 +1585,126 @@ def : ARMPat<(add GPR:$src, so_imm_neg:$imm), // (mul X, 2^n+1) -> (add (X << n), X) // (mul X, 2^n-1) -> (rsb X, (X << n)) +// ARM Arithmetic Instruction -- for disassembly only +// GPR:$dst = GPR:$a op GPR:$b +class AAI<bits<8> op27_20, bits<4> op7_4, string opc> + : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, + opc, "\t$dst, $a, $b", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-20} = op27_20; + let Inst{7-4} = op7_4; +} + +// Saturating add/subtract -- for disassembly only + +def QADD : AAI<0b00010000, 0b0101, "qadd">; +def QADD16 : AAI<0b01100010, 0b0001, "qadd16">; +def QADD8 : AAI<0b01100010, 0b1001, "qadd8">; +def QASX : AAI<0b01100010, 0b0011, "qasx">; +def QDADD : AAI<0b00010100, 0b0101, "qdadd">; +def QDSUB : AAI<0b00010110, 0b0101, "qdsub">; +def QSAX : AAI<0b01100010, 0b0101, "qsax">; +def QSUB : AAI<0b00010010, 0b0101, "qsub">; +def QSUB16 : AAI<0b01100010, 0b0111, "qsub16">; +def QSUB8 : AAI<0b01100010, 0b1111, "qsub8">; +def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">; +def UQADD8 : AAI<0b01100110, 0b1001, "uqadd8">; +def UQASX : AAI<0b01100110, 0b0011, "uqasx">; +def UQSAX : AAI<0b01100110, 0b0101, "uqsax">; +def UQSUB16 : AAI<0b01100110, 0b0111, "uqsub16">; +def UQSUB8 : AAI<0b01100110, 0b1111, "uqsub8">; + +// Signed/Unsigned add/subtract -- for disassembly only + +def SASX : AAI<0b01100001, 0b0011, "sasx">; +def SADD16 : AAI<0b01100001, 0b0001, "sadd16">; +def SADD8 : AAI<0b01100001, 0b1001, "sadd8">; +def SSAX : AAI<0b01100001, 0b0101, "ssax">; +def SSUB16 : AAI<0b01100001, 0b0111, "ssub16">; +def SSUB8 : AAI<0b01100001, 0b1111, "ssub8">; +def UASX : AAI<0b01100101, 0b0011, "uasx">; +def UADD16 : AAI<0b01100101, 0b0001, "uadd16">; +def UADD8 : AAI<0b01100101, 0b1001, "uadd8">; +def USAX : AAI<0b01100101, 0b0101, "usax">; +def USUB16 : AAI<0b01100101, 0b0111, "usub16">; +def USUB8 : AAI<0b01100101, 0b1111, "usub8">; + +// Signed/Unsigned halving add/subtract -- for disassembly only + +def SHASX : AAI<0b01100011, 0b0011, "shasx">; +def SHADD16 : AAI<0b01100011, 0b0001, "shadd16">; +def SHADD8 : AAI<0b01100011, 0b1001, "shadd8">; +def SHSAX : AAI<0b01100011, 0b0101, "shsax">; +def SHSUB16 : AAI<0b01100011, 0b0111, "shsub16">; +def SHSUB8 : AAI<0b01100011, 0b1111, "shsub8">; +def UHASX : AAI<0b01100111, 0b0011, "uhasx">; +def UHADD16 : AAI<0b01100111, 0b0001, "uhadd16">; +def UHADD8 : AAI<0b01100111, 0b1001, "uhadd8">; +def UHSAX : AAI<0b01100111, 0b0101, "uhsax">; +def UHSUB16 : AAI<0b01100111, 0b0111, "uhsub16">; +def UHSUB8 : AAI<0b01100111, 0b1111, "uhsub8">; + +// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only + +def USAD8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), + MulFrm /* for convenience */, NoItinerary, "usad8", + "\t$dst, $a, $b", []>, + Requires<[IsARM, HasV6]> { + let Inst{27-20} = 0b01111000; + let Inst{15-12} = 0b1111; + let Inst{7-4} = 0b0001; +} +def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + MulFrm /* for convenience */, NoItinerary, "usada8", + "\t$dst, $a, $b, $acc", []>, + Requires<[IsARM, HasV6]> { + let Inst{27-20} = 0b01111000; + let Inst{7-4} = 0b0001; +} + +// Signed/Unsigned saturate -- for disassembly only + +def SSATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), + DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-21} = 0b0110101; + let Inst{6-4} = 0b001; +} + +def SSATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), + DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-21} = 0b0110101; + let Inst{6-4} = 0b101; +} + +def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm, + NoItinerary, "ssat16", "\t$dst, $bit_pos, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-20} = 0b01101010; + let Inst{7-4} = 0b0011; +} + +def USATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), + DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-21} = 0b0110111; + let Inst{6-4} = 0b001; +} + +def USATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt), + DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-21} = 0b0110111; + let Inst{6-4} = 0b101; +} + +def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm, + NoItinerary, "usat16", "\t$dst, $bit_pos, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-20} = 0b01101110; + let Inst{7-4} = 0b0011; +} //===----------------------------------------------------------------------===// // Bitwise Instructions. @@ -1239,6 +1728,17 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), let Inst{6-0} = 0b0011111; } +// A8.6.18 BFI - Bitfield insert (Encoding A1) +// Added for disassembler with the pattern field purposely left blank. +def BFI : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), + AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, + "bfi", "\t$dst, $src, $imm", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-21} = 0b0111110; + let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15 +} + def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, "mvn", "\t$dst, $src", [(set GPR:$dst, (not GPR:$src))]>, UnaryDP { @@ -1251,7 +1751,7 @@ def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, let Inst{25} = 0; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, +def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, IIC_iMOVi, "mvn", "\t$dst, $imm", [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP { let Inst{25} = 1; @@ -1314,6 +1814,14 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), let Inst{15-12} = 0b1111; } +def SMMULR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + IIC_iMUL32, "smmulr", "\t$dst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b0011; // R = 1 + let Inst{15-12} = 0b1111; +} + def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c", [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>, @@ -1321,6 +1829,12 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), let Inst{7-4} = 0b0001; } +def SMMLAR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + IIC_iMAC32, "smmlar", "\t$dst, $a, $b, $c", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b0011; // R = 1 +} def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c", @@ -1329,6 +1843,13 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), let Inst{7-4} = 0b1101; } +def SMMLSR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + IIC_iMAC32, "smmlsr", "\t$dst, $a, $b, $c", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV6]> { + let Inst{7-4} = 0b1111; // R = 1 +} + multiclass AI_smul<string opc, PatFrag opnode> { def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b", @@ -1400,7 +1921,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, (i32 16)))))]>, + (sra GPR:$b, (i32 16)))))]>, Requires<[IsARM, HasV5TE]> { let Inst{5} = 0; let Inst{6} = 1; @@ -1446,8 +1967,87 @@ multiclass AI_smla<string opc, PatFrag opnode> { defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; -// TODO: Halfword multiple accumulate long: SMLAL<x><y> -// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD +// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only +def SMLALBB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), + IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 0; +} + +def SMLALBT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), + IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 0; + let Inst{6} = 1; +} + +def SMLALTB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), + IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 0; +} + +def SMLALTT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), + IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV5TE]> { + let Inst{5} = 1; + let Inst{6} = 1; +} + +// Helper class for AI_smld -- for disassembly only +class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops, + InstrItinClass itin, string opc, string asm> + : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> { + let Inst{4} = 1; + let Inst{5} = swap; + let Inst{6} = sub; + let Inst{7} = 0; + let Inst{21-20} = 0b00; + let Inst{22} = long; + let Inst{27-23} = 0b01110; +} + +multiclass AI_smld<bit sub, string opc> { + + def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b, $acc">; + + def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), + NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b, $acc">; + + def LD : AMulDualI<1, sub, 0, (outs GPR:$ldst,GPR:$hdst), (ins GPR:$a,GPR:$b), + NoItinerary, !strconcat(opc, "ld"), "\t$ldst, $hdst, $a, $b">; + + def LDX : AMulDualI<1, sub, 1, (outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), + NoItinerary, !strconcat(opc, "ldx"),"\t$ldst, $hdst, $a, $b">; + +} + +defm SMLA : AI_smld<0, "smla">; +defm SMLS : AI_smld<1, "smls">; + +multiclass AI_sdml<bit sub, string opc> { + + def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b"> { + let Inst{15-12} = 0b1111; + } + + def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b"> { + let Inst{15-12} = 0b1111; + } + +} + +defm SMUA : AI_sdml<0, "smua">; +defm SMUS : AI_sdml<1, "smus">; //===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. @@ -1505,7 +2105,7 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt", + IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), (and (shl GPR:$src2, (i32 imm:$shamt)), 0xFFFF0000)))]>, @@ -1522,7 +2122,7 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt", + IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), (and (sra GPR:$src2, imm16_31:$shamt), 0xFFFF)))]>, Requires<[IsARM, HasV6]> { @@ -1568,7 +2168,7 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use -// a two-value operand where a dag node expects two operands. :( +// a two-value operand where a dag node expects two operands. :( def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm, IIC_iCMOVr, "mov", "\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, @@ -1606,6 +2206,7 @@ def Int_MemBarrierV7 : AInoP<(outs), (ins), Requires<[IsARM, HasV7]> { let Inst{31-4} = 0xf57ff05; // FIXME: add support for options other than a full system DMB + // See DMB disassembly-only variants below. let Inst{3-0} = 0b1111; } @@ -1616,6 +2217,7 @@ def Int_SyncBarrierV7 : AInoP<(outs), (ins), Requires<[IsARM, HasV7]> { let Inst{31-4} = 0xf57ff04; // FIXME: add support for options other than a full system DSB + // See DSB disassembly-only variants below. let Inst{3-0} = 0b1111; } @@ -1638,6 +2240,64 @@ def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero), } } +// Helper class for multiclass MemB -- for disassembly only +class AMBI<string opc, string asm> + : AInoP<(outs), (ins), MiscFrm, NoItinerary, opc, asm, + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV7]> { + let Inst{31-20} = 0xf57; +} + +multiclass MemB<bits<4> op7_4, string opc> { + + def st : AMBI<opc, "\tst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b1110; + } + + def ish : AMBI<opc, "\tish"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b1011; + } + + def ishst : AMBI<opc, "\tishst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b1010; + } + + def nsh : AMBI<opc, "\tnsh"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0111; + } + + def nshst : AMBI<opc, "\tnshst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0110; + } + + def osh : AMBI<opc, "\tosh"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0011; + } + + def oshst : AMBI<opc, "\toshst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0010; + } +} + +// These DMB variants are for disassembly only. +defm DMB : MemB<0b0101, "dmb">; + +// These DSB variants are for disassembly only. +defm DSB : MemB<0b0100, "dsb">; + +// ISB has only full system option -- for disassembly only +def ISBsy : AMBI<"isb", ""> { + let Inst{7-4} = 0b0110; + let Inst{3-0} = 0b1111; +} + let usesCustomInserter = 1 in { let Uses = [CPSR] in { def ATOMIC_LOAD_ADD_I8 : PseudoInst< @@ -1777,6 +2437,35 @@ def STREXD : AIstrex<0b01, (outs GPR:$success), []>; } +// Clear-Exclusive is for disassembly only. +def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV7]> { + let Inst{31-20} = 0xf57; + let Inst{7-4} = 0b0001; +} + +// SWP/SWPB are deprecated in V6/V7 and for disassembly only. +let mayLoad = 1 in { +def SWP : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary, + "swp", "\t$dst, $src, [$ptr]", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-23} = 0b00010; + let Inst{22} = 0; // B = 0 + let Inst{21-20} = 0b00; + let Inst{7-4} = 0b1001; +} + +def SWPB : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary, + "swpb", "\t$dst, $src, [$ptr]", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-23} = 0b00010; + let Inst{22} = 1; // B = 1 + let Inst{21-20} = 0b00; + let Inst{7-4} = 0b1001; +} +} + //===----------------------------------------------------------------------===// // TLS Instructions // @@ -1827,7 +2516,7 @@ let Defs = // Two piece so_imms. let isReMaterializable = 1 in -def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), +def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), Pseudo, IIC_iMOVi, "mov", "\t$dst, $src", [(set GPR:$dst, so_imm2part:$src)]>, @@ -1852,7 +2541,7 @@ def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS), // FIXME: Remove this when we can do generalized remat. let isReMaterializable = 1 in def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi, - "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", + "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", [(set GPR:$dst, (i32 imm:$src))]>, Requires<[IsARM, HasV6T2]>; @@ -1959,3 +2648,226 @@ include "ARMInstrVFP.td" // include "ARMInstrNEON.td" + +//===----------------------------------------------------------------------===// +// Coprocessor Instructions. For disassembly only. +// + +def CDP : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "cdp", "\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{4} = 0; +} + +def CDP2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "cdp2\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{4} = 0; +} + +class ACI<dag oops, dag iops, string opc, string asm> + : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, NoItinerary, + opc, asm, "", [/* For disassembly only; pattern left blank */]> { + let Inst{27-25} = 0b110; +} + +multiclass LdStCop<bits<4> op31_28, bit load, string opc> { + + def _OFFSET : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), + opc, "\tp$cop, cr$CRd, $addr"> { + let Inst{31-28} = op31_28; + let Inst{24} = 1; // P = 1 + let Inst{21} = 0; // W = 0 + let Inst{22} = 0; // D = 0 + let Inst{20} = load; + } + + def _PRE : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), + opc, "\tp$cop, cr$CRd, $addr!"> { + let Inst{31-28} = op31_28; + let Inst{24} = 1; // P = 1 + let Inst{21} = 1; // W = 1 + let Inst{22} = 0; // D = 0 + let Inst{20} = load; + } + + def _POST : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset), + opc, "\tp$cop, cr$CRd, [$base], $offset"> { + let Inst{31-28} = op31_28; + let Inst{24} = 0; // P = 0 + let Inst{21} = 1; // W = 1 + let Inst{22} = 0; // D = 0 + let Inst{20} = load; + } + + def _OPTION : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, i32imm:$option), + opc, "\tp$cop, cr$CRd, [$base], $option"> { + let Inst{31-28} = op31_28; + let Inst{24} = 0; // P = 0 + let Inst{23} = 1; // U = 1 + let Inst{21} = 0; // W = 0 + let Inst{22} = 0; // D = 0 + let Inst{20} = load; + } + + def L_OFFSET : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), + opc, "l\tp$cop, cr$CRd, $addr"> { + let Inst{31-28} = op31_28; + let Inst{24} = 1; // P = 1 + let Inst{21} = 0; // W = 0 + let Inst{22} = 1; // D = 1 + let Inst{20} = load; + } + + def L_PRE : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), + opc, "l\tp$cop, cr$CRd, $addr!"> { + let Inst{31-28} = op31_28; + let Inst{24} = 1; // P = 1 + let Inst{21} = 1; // W = 1 + let Inst{22} = 1; // D = 1 + let Inst{20} = load; + } + + def L_POST : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset), + opc, "l\tp$cop, cr$CRd, [$base], $offset"> { + let Inst{31-28} = op31_28; + let Inst{24} = 0; // P = 0 + let Inst{21} = 1; // W = 1 + let Inst{22} = 1; // D = 1 + let Inst{20} = load; + } + + def L_OPTION : ACI<(outs), + (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option), + opc, "l\tp$cop, cr$CRd, [$base], $option"> { + let Inst{31-28} = op31_28; + let Inst{24} = 0; // P = 0 + let Inst{23} = 1; // U = 1 + let Inst{21} = 0; // W = 0 + let Inst{22} = 1; // D = 1 + let Inst{20} = load; + } +} + +defm LDC : LdStCop<{?,?,?,?}, 1, "ldc">; +defm LDC2 : LdStCop<0b1111, 1, "ldc2">; +defm STC : LdStCop<{?,?,?,?}, 0, "stc">; +defm STC2 : LdStCop<0b1111, 0, "stc2">; + +def MCR : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "mcr", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{20} = 0; + let Inst{4} = 1; +} + +def MCR2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "mcr2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{20} = 0; + let Inst{4} = 1; +} + +def MRC : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "mrc", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{20} = 1; + let Inst{4} = 1; +} + +def MRC2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, + GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), + NoItinerary, "mrc2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{20} = 1; + let Inst{4} = 1; +} + +def MCRR : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, + GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), + NoItinerary, "mcrr", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0100; +} + +def MCRR2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, + GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), + NoItinerary, "mcrr2\tp$cop, $opc, $Rt, $Rt2, cr$CRm", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{23-20} = 0b0100; +} + +def MRRC : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, + GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), + NoItinerary, "mrrc", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0101; +} + +def MRRC2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, + GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), + NoItinerary, "mrrc2\tp$cop, $opc, $Rt, $Rt2, cr$CRm", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{23-20} = 0b0101; +} + +//===----------------------------------------------------------------------===// +// Move between special register and ARM core register -- for disassembly only +// + +def MRS : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary, "mrs", "\t$dst, cpsr", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0000; + let Inst{7-4} = 0b0000; +} + +def MRSsys : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary,"mrs","\t$dst, spsr", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0100; + let Inst{7-4} = 0b0000; +} + +// FIXME: mask is ignored for the time being. +def MSR : ABI<0b0001,(outs),(ins GPR:$src), NoItinerary, "msr", "\tcpsr, $src", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0010; + let Inst{7-4} = 0b0000; +} + +// FIXME: mask is ignored for the time being. +def MSRi : ABI<0b0011,(outs),(ins so_imm:$a), NoItinerary, "msr", "\tcpsr, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0010; + let Inst{7-4} = 0b0000; +} + +// FIXME: mask is ignored for the time being. +def MSRsys : ABI<0b0001,(outs),(ins GPR:$src),NoItinerary,"msr","\tspsr, $src", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0110; + let Inst{7-4} = 0b0000; +} + +// FIXME: mask is ignored for the time being. +def MSRsysi : ABI<0b0011,(outs),(ins so_imm:$a),NoItinerary,"msr","\tspsr, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-20} = 0b0110; + let Inst{7-4} = 0b0000; +} diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index e2be7ba..3aa0810 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -83,11 +83,17 @@ def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>; def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; +def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>]>; +def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; +def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; + //===----------------------------------------------------------------------===// // NEON operand definitions //===----------------------------------------------------------------------===// @@ -123,9 +129,7 @@ def h64imm : Operand<i64> { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { def VLDMD : NI<(outs), (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), - IIC_fpLoadm, - "vldm", "${addr:submode} ${addr:base}, $dst1", - []> { + IIC_fpLoadm, "vldm", "${addr:submode} ${addr:base}, $dst1", []> { let Inst{27-25} = 0b110; let Inst{20} = 1; let Inst{11-9} = 0b101; @@ -133,9 +137,7 @@ def VLDMD : NI<(outs), def VLDMS : NI<(outs), (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), - IIC_fpLoadm, - "vldm", "${addr:submode} ${addr:base}, $dst1", - []> { + IIC_fpLoadm, "vldm", "${addr:submode} ${addr:base}, $dst1", []> { let Inst{27-25} = 0b110; let Inst{20} = 1; let Inst{11-9} = 0b101; @@ -144,10 +146,9 @@ def VLDMS : NI<(outs), */ // Use vldmia to load a Q register as a D register pair. -def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), - IIC_fpLoadm, - "vldmia", "$addr, ${dst:dregpair}", - [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { +def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm, + "vldmia", "$addr, ${dst:dregpair}", + [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { let Inst{27-25} = 0b110; let Inst{24} = 0; // P bit let Inst{23} = 1; // U bit @@ -156,10 +157,9 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), } // Use vstmia to store a Q register as a D register pair. -def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), - IIC_fpStorem, - "vstmia", "$addr, ${src:dregpair}", - [(store (v2f64 QPR:$src), addrmode4:$addr)]> { +def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, + "vstmia", "$addr, ${src:dregpair}", + [(store (v2f64 QPR:$src), addrmode4:$addr)]> { let Inst{27-25} = 0b110; let Inst{24} = 0; // P bit let Inst{23} = 1; // U bit @@ -191,6 +191,29 @@ def VLD1q32 : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>; def VLD1qf : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>; def VLD1q64 : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>; +// These (dreg triple/quadruple) are for disassembly only. +class VLD1D3<bits<4> op7_4, string OpcodeStr, string Dt> + : NLdSt<0, 0b10, 0b0110, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt, + "\\{$dst1, $dst2, $dst3\\}, $addr", "", + [/* For disassembly only; pattern left blank */]>; +class VLD1D4<bits<4> op7_4, string OpcodeStr, string Dt> + : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt, + "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", + [/* For disassembly only; pattern left blank */]>; + +def VLD1d8T : VLD1D3<0b0000, "vld1", "8">; +def VLD1d16T : VLD1D3<0b0100, "vld1", "16">; +def VLD1d32T : VLD1D3<0b1000, "vld1", "32">; +//def VLD1d64T : VLD1D3<0b1100, "vld1", "64">; + +def VLD1d8Q : VLD1D4<0b0000, "vld1", "8">; +def VLD1d16Q : VLD1D4<0b0100, "vld1", "16">; +def VLD1d32Q : VLD1D4<0b1000, "vld1", "32">; +//def VLD1d64Q : VLD1D4<0b1100, "vld1", "64">; + + let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // VLD2 : Vector Load (multiple 2-element structures) @@ -216,6 +239,16 @@ def VLD2q8 : VLD2Q<0b0000, "vld2", "8">; def VLD2q16 : VLD2Q<0b0100, "vld2", "16">; def VLD2q32 : VLD2Q<0b1000, "vld2", "32">; +// These (double-spaced dreg pair) are for disassembly only. +class VLD2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt> + : NLdSt<0,0b10,0b1001,op7_4, (outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr), IIC_VLD2, + OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; + +def VLD2d8D : VLD2Ddbl<0b0000, "vld2", "8">; +def VLD2d16D : VLD2Ddbl<0b0100, "vld2", "16">; +def VLD2d32D : VLD2Ddbl<0b1000, "vld2", "32">; + // VLD3 : Vector Load (multiple 3-element structures) class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), @@ -285,105 +318,64 @@ def VLD4q32b : VLD4WB<0b1000, "vld4", "32">; class VLD2LN<bits<4> op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VLD2, - OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", + IIC_VLD2, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", "$src1 = $dst1, $src2 = $dst2", []>; // vld2 to single-spaced registers. def VLD2LNd8 : VLD2LN<0b0001, "vld2", "8">; -def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { - let Inst{5} = 0; -} -def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { - let Inst{6} = 0; -} +def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 0; } +def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 0; } // vld2 to double-spaced even registers. -def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { - let Inst{5} = 1; -} -def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { - let Inst{6} = 1; -} +def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; } +def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; } // vld2 to double-spaced odd registers. -def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { - let Inst{5} = 1; -} -def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { - let Inst{6} = 1; -} +def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; } +def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; } // VLD3LN : Vector Load (single 3-element structure to one lane) class VLD3LN<bits<4> op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VLD3, - OpcodeStr, Dt, + nohash_imm:$lane), IIC_VLD3, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; // vld3 to single-spaced registers. -def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> { - let Inst{4} = 0; -} -def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { - let Inst{5-4} = 0b00; -} -def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { - let Inst{6-4} = 0b000; -} +def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> { let Inst{4} = 0; } +def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b00; } +def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b000; } // vld3 to double-spaced even registers. -def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { - let Inst{5-4} = 0b10; -} -def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { - let Inst{6-4} = 0b100; -} +def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; } +def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; } // vld3 to double-spaced odd registers. -def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { - let Inst{5-4} = 0b10; -} -def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { - let Inst{6-4} = 0b100; -} +def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; } +def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; } // VLD4LN : Vector Load (single 4-element structure to one lane) class VLD4LN<bits<4> op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VLD4, - OpcodeStr, Dt, + nohash_imm:$lane), IIC_VLD4, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr", "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; // vld4 to single-spaced registers. def VLD4LNd8 : VLD4LN<0b0011, "vld4", "8">; -def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { - let Inst{5} = 0; -} -def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { - let Inst{6} = 0; -} +def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 0; } +def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 0; } // vld4 to double-spaced even registers. -def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { - let Inst{5} = 1; -} -def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { - let Inst{6} = 1; -} +def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; } +def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } // vld4 to double-spaced odd registers. -def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { - let Inst{5} = 1; -} -def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { - let Inst{6} = 1; -} +def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; } +def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } // VLD1DUP : Vector Load (single element to all lanes) // VLD2DUP : Vector Load (single 2-element structure to all lanes) @@ -418,6 +410,31 @@ def VST1qf : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>; def VST1q64 : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>; } // hasExtraSrcRegAllocReq +// These (dreg triple/quadruple) are for disassembly only. +class VST1D3<bits<4> op7_4, string OpcodeStr, string Dt> + : NLdSt<0, 0b00, 0b0110, op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, + OpcodeStr, Dt, + "\\{$src1, $src2, $src3\\}, $addr", "", + [/* For disassembly only; pattern left blank */]>; +class VST1D4<bits<4> op7_4, string OpcodeStr, string Dt> + : NLdSt<0, 0b00, 0b0010, op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, OpcodeStr, Dt, + "\\{$src1, $src2, $src3, $src4\\}, $addr", "", + [/* For disassembly only; pattern left blank */]>; + +def VST1d8T : VST1D3<0b0000, "vst1", "8">; +def VST1d16T : VST1D3<0b0100, "vst1", "16">; +def VST1d32T : VST1D3<0b1000, "vst1", "32">; +//def VST1d64T : VST1D3<0b1100, "vst1", "64">; + +def VST1d8Q : VST1D4<0b0000, "vst1", "8">; +def VST1d16Q : VST1D4<0b0100, "vst1", "16">; +def VST1d32Q : VST1D4<0b1000, "vst1", "32">; +//def VST1d64Q : VST1D4<0b1100, "vst1", "64">; + + let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { // VST2 : Vector Store (multiple 2-element structures) @@ -428,8 +445,7 @@ class VST2D<bits<4> op7_4, string OpcodeStr, string Dt> class VST2Q<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0011,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; def VST2d8 : VST2D<0b0000, "vst2", "8">; @@ -443,6 +459,16 @@ def VST2q8 : VST2Q<0b0000, "vst2", "8">; def VST2q16 : VST2Q<0b0100, "vst2", "16">; def VST2q32 : VST2Q<0b1000, "vst2", "32">; +// These (double-spaced dreg pair) are for disassembly only. +class VST2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt> + : NLdSt<0, 0b00, 0b1001, op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, + OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>; + +def VST2d8D : VST2Ddbl<0b0000, "vst2", "8">; +def VST2d16D : VST2Ddbl<0b0100, "vst2", "16">; +def VST2d32D : VST2Ddbl<0b1000, "vst2", "32">; + // VST3 : Vector Store (multiple 3-element structures) class VST3D<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0100,op7_4, (outs), @@ -476,14 +502,12 @@ def VST3q32b : VST3WB<0b1000, "vst3", "32">; class VST4D<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0000,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; class VST4WB<bits<4> op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "$addr.addr = $wb", []>; def VST4d8 : VST4D<0b0000, "vst4", "8">; @@ -511,104 +535,63 @@ def VST4q32b : VST4WB<0b1000, "vst4", "32">; // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LN<bits<4> op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VST, - OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", - "", []>; + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VST, OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", + "", []>; // vst2 to single-spaced registers. def VST2LNd8 : VST2LN<0b0001, "vst2", "8">; -def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { - let Inst{5} = 0; -} -def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { - let Inst{6} = 0; -} +def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 0; } +def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 0; } // vst2 to double-spaced even registers. -def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { - let Inst{5} = 1; -} -def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { - let Inst{6} = 1; -} +def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; } +def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; } // vst2 to double-spaced odd registers. -def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { - let Inst{5} = 1; -} -def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { - let Inst{6} = 1; -} +def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; } +def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; } // VST3LN : Vector Store (single 3-element structure from one lane) class VST3LN<bits<4> op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VST, - OpcodeStr, Dt, - "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VST, OpcodeStr, Dt, + "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; // vst3 to single-spaced registers. -def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> { - let Inst{4} = 0; -} -def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { - let Inst{5-4} = 0b00; -} -def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { - let Inst{6-4} = 0b000; -} +def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> { let Inst{4} = 0; } +def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b00; } +def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b000; } // vst3 to double-spaced even registers. -def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { - let Inst{5-4} = 0b10; -} -def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { - let Inst{6-4} = 0b100; -} +def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; } +def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; } // vst3 to double-spaced odd registers. -def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { - let Inst{5-4} = 0b10; -} -def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { - let Inst{6-4} = 0b100; -} +def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; } +def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; } // VST4LN : Vector Store (single 4-element structure from one lane) class VST4LN<bits<4> op11_8, string OpcodeStr, string Dt> : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VST, - OpcodeStr, Dt, + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VST, OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr", - "", []>; + "", []>; // vst4 to single-spaced registers. def VST4LNd8 : VST4LN<0b0011, "vst4", "8">; -def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { - let Inst{5} = 0; -} -def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { - let Inst{6} = 0; -} +def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 0; } +def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 0; } // vst4 to double-spaced even registers. -def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { - let Inst{5} = 1; -} -def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { - let Inst{6} = 1; -} +def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; } +def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; } // vst4 to double-spaced odd registers. -def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { - let Inst{5} = 1; -} -def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { - let Inst{6} = 1; -} +def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; } +def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; } } // mayStore = 1, hasExtraSrcRegAllocReq = 1 @@ -656,34 +639,26 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{ // Instruction Classes //===----------------------------------------------------------------------===// -// Basic 2-register operations, both double- and quad-register. +// Basic 2-register operations: single-, double- and quad-register. +class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), + IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>; class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; -// Basic 2-register operations, scalar single-precision. -class N2VDs<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), - IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>; - -class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> - : NEONFPPat<(ResTy (OpNode SPR:$a)), - (EXTRACT_SUBREG - (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)), - arm_ssubreg_0)>; - // Basic 2-register intrinsics, both double- and quad-register. class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, @@ -700,21 +675,6 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; -// Basic 2-register intrinsics, scalar single-precision -class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin, - OpcodeStr, Dt, "$dst, $src", "", []>; - -class N2VDIntsPat<SDNode OpNode, NeonI Inst> - : NEONFPPat<(f32 (OpNode SPR:$a)), - (EXTRACT_SUBREG - (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)), - arm_ssubreg_0)>; - // Narrow 2-register intrinsics. class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -742,15 +702,22 @@ class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, InstrItinClass itin, string OpcodeStr, string Dt> : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), - (ins QPR:$src1, QPR:$src2), itin, - OpcodeStr, Dt, "$dst1, $dst2", + (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2", "$src1 = $dst1, $src2 = $dst2", []>; -// Basic 3-register operations, both double- and quad-register. +// Basic 3-register operations: single-, double- and quad-register. +class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, + SDNode OpNode, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, + OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { + let isCommutable = Commutable; +} + class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, - SDNode OpNode, bit Commutable> + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, OpcodeStr, Dt, "$dst, $src1, $src2", "", @@ -763,9 +730,9 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3VX<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, - OpcodeStr, "$dst, $src1, $src2", "", - [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, + OpcodeStr, "$dst, $src1, $src2", "", + [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{ let isCommutable = Commutable; } class N3VDSL<bits<2> op21_20, bits<4> op11_8, @@ -776,27 +743,23 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), - (Ty (NEONvduplane (Ty DPR_VFP2:$src2), - imm:$lane)))))]> { + (Ty (NEONvduplane (Ty DPR_VFP2:$src2), imm:$lane)))))]>{ let isCommutable = 0; } class N3VDSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - IIC_VMULi16D, - OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + IIC_VMULi16D, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), - (Ty (NEONvduplane (Ty DPR_8:$src2), - imm:$lane)))))]> { + (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { let isCommutable = 0; } class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, - SDNode OpNode, bit Commutable> + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 1, op4, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst, $src1, $src2", "", @@ -805,12 +768,11 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, } class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, - ValueType ResTy, ValueType OpTy, - SDNode OpNode, bit Commutable> + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3VX<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, - OpcodeStr, "$dst, $src1, $src2", "", - [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, + OpcodeStr, "$dst, $src1, $src2", "", + [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{ let isCommutable = Commutable; } class N3VQSL<bits<2> op21_20, bits<4> op11_8, @@ -825,13 +787,11 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8, imm:$lane)))))]> { let isCommutable = 0; } -class N3VQSL16<bits<2> op21_20, bits<4> op11_8, - string OpcodeStr, string Dt, +class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - IIC_VMULi16Q, - OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", + IIC_VMULi16Q, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_8:$src2), @@ -839,27 +799,10 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, let isCommutable = 0; } -// Basic 3-register operations, scalar single-precision -class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDNode OpNode, bit Commutable> - : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, - OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { - let isCommutable = Commutable; -} -class N3VDsPat<SDNode OpNode, NeonI Inst> - : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), - (EXTRACT_SUBREG - (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)), - arm_ssubreg_0)>; - // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, - Intrinsic IntOp, bit Commutable> + ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, OpcodeStr, Dt, "$dst, $src1, $src2", "", @@ -891,8 +834,7 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, - Intrinsic IntOp, bit Commutable> + ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 1, op4, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst, $src1, $src2", "", @@ -924,7 +866,15 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, let isCommutable = 0; } -// Multiply-Add/Sub operations, both double- and quad-register. +// Multiply-Add/Sub operations: single-, double- and quad-register. +class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDNode MulOp, SDNode OpNode> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR_VFP2:$dst), + (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin, + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; + class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> @@ -976,8 +926,8 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (MulOp QPR:$src2, - (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), - imm:$lane)))))))]>; + (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), + imm:$lane)))))))]>; class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, @@ -989,25 +939,8 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (MulOp QPR:$src2, - (ResTy (NEONvduplane (OpTy DPR_8:$src3), - imm:$lane)))))))]>; - -// Multiply-Add/Sub operations, scalar single-precision -class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode MulOp, SDNode OpNode> - : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR_VFP2:$dst), - (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin, - OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; - -class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst> - : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), - (EXTRACT_SUBREG - (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)), - arm_ssubreg_0)>; + (ResTy (NEONvduplane (OpTy DPR_8:$src3), + imm:$lane)))))))]>; // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. @@ -1050,9 +983,9 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, (OpTy DPR:$src2), (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3), imm:$lane)))))]>; -class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> +class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, @@ -1063,7 +996,6 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass iti (OpTy (NEONvduplane (OpTy DPR_8:$src3), imm:$lane)))))]>; - // Narrowing 3-register intrinsics. class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, @@ -1095,9 +1027,9 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, (ResTy (IntOp (OpTy DPR:$src1), (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2), imm:$lane)))))]>; -class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> +class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", @@ -1249,6 +1181,45 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, // S = single int (32 bit) elements // D = double int (64 bit) elements +// Neon 2-register vector operations -- for disassembly only. + +// First with only element sizes of 8, 16 and 32 bits: +multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op4, string opc, string Dt, + string asm> { + // 64-bit vector types. + def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, + (outs DPR:$dst), (ins DPR:$src), NoItinerary, + opc, !strconcat(Dt, "8"), asm, "", []>; + def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, + (outs DPR:$dst), (ins DPR:$src), NoItinerary, + opc, !strconcat(Dt, "16"), asm, "", []>; + def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, + (outs DPR:$dst), (ins DPR:$src), NoItinerary, + opc, !strconcat(Dt, "32"), asm, "", []>; + def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, + (outs DPR:$dst), (ins DPR:$src), NoItinerary, + opc, "f32", asm, "", []> { + let Inst{10} = 1; // overwrite F = 1 + } + + // 128-bit vector types. + def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, + (outs QPR:$dst), (ins QPR:$src), NoItinerary, + opc, !strconcat(Dt, "8"), asm, "", []>; + def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, + (outs QPR:$dst), (ins QPR:$src), NoItinerary, + opc, !strconcat(Dt, "16"), asm, "", []>; + def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, + (outs QPR:$dst), (ins QPR:$src), NoItinerary, + opc, !strconcat(Dt, "32"), asm, "", []>; + def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, + (outs QPR:$dst), (ins QPR:$src), NoItinerary, + opc, "f32", asm, "", []> { + let Inst{10} = 1; // overwrite F = 1 + } +} + // Neon 3-register vector operations. // First with only element sizes of 8, 16 and 32 bits: @@ -1262,22 +1233,22 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, OpNode, Commutable>; def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, - OpcodeStr, !strconcat(Dt, "16"), - v4i16, v4i16, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "16"), + v4i16, v4i16, OpNode, Commutable>; def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, - OpcodeStr, !strconcat(Dt, "32"), - v2i32, v2i32, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "32"), + v2i32, v2i32, OpNode, Commutable>; // 128-bit vector types. def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, - OpcodeStr, !strconcat(Dt, "8"), - v16i8, v16i8, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "8"), + v16i8, v16i8, OpNode, Commutable>; def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, - OpcodeStr, !strconcat(Dt, "16"), - v8i16, v8i16, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "16"), + v8i16, v8i16, OpNode, Commutable>; def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, - OpcodeStr, !strconcat(Dt, "32"), - v4i32, v4i32, OpNode, Commutable>; + OpcodeStr, !strconcat(Dt, "32"), + v4i32, v4i32, OpNode, Commutable>; } multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> { @@ -1372,7 +1343,7 @@ multiclass N3VIntSL_HS<bits<4> op11_8, def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, - OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; } @@ -1386,8 +1357,8 @@ multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, : N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, OpcodeStr, Dt, IntOp, Commutable> { def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16, - OpcodeStr, !strconcat(Dt, "8"), - v8i8, v8i8, IntOp, Commutable>; + OpcodeStr, !strconcat(Dt, "8"), + v8i8, v8i8, IntOp, Commutable>; def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp, Commutable>; @@ -1402,11 +1373,11 @@ multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, : N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, OpcodeStr, Dt, IntOp, Commutable> { def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32, - OpcodeStr, !strconcat(Dt, "64"), - v1i64, v1i64, IntOp, Commutable>; + OpcodeStr, !strconcat(Dt, "64"), + v1i64, v1i64, IntOp, Commutable>; def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32, - OpcodeStr, !strconcat(Dt, "64"), - v2i64, v2i64, IntOp, Commutable>; + OpcodeStr, !strconcat(Dt, "64"), + v2i64, v2i64, IntOp, Commutable>; } @@ -1511,9 +1482,11 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8, def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, - OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, + mul, ShOp>; def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, - OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, + mul, ShOp>; } // Neon 3-argument intrinsics, @@ -1522,19 +1495,19 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, - OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; + OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, - OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D, - OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q, - OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; + OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q, - OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; + OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q, - OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; + OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; } @@ -1576,17 +1549,17 @@ multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, - itinD, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; + itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, - itinD, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; + itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; // 128-bit vector types. def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, - itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; + itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, - itinQ, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; + itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, - itinQ, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; + itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; } @@ -1846,29 +1819,31 @@ def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8", def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32", - v2f32, v2f32, fmul, 1>; + v2f32, v2f32, fmul, 1>; def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32", - v4f32, v4f32, fmul, 1>; -defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; -def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; -def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>; + v4f32, v4f32, fmul, 1>; +defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; +def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; +def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, + v2f32, fmul>; + def : Pat<(v8i16 (mul (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), (v4i16 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (mul (v4i32 QPR:$src1), (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), (v2i32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), (v4f32 (VMULslfq (v4f32 QPR:$src1), (v2f32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; // VQDMULH : Vector Saturating Doubling Multiply Returning High Half @@ -1883,14 +1858,14 @@ def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), imm:$lane)))), (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), (v4i16 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), (v2i32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half @@ -1905,14 +1880,14 @@ def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), imm:$lane)))), (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), (v4i16 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), (v2i32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) @@ -1950,30 +1925,28 @@ def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", v4f32, v2f32, fmul, fadd>; def : Pat<(v8i16 (add (v8i16 QPR:$src1), - (mul (v8i16 QPR:$src2), - (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), - (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), - (v8i16 QPR:$src2), + (mul (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), + (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), (v4i16 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (add (v4i32 QPR:$src1), - (mul (v4i32 QPR:$src2), - (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), - (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), - (v4i32 QPR:$src2), + (mul (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), + (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), (v2i32 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), - (fmul (v4f32 QPR:$src2), - (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), + (fmul (v4f32 QPR:$src2), + (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), (v4f32 (VMLAslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), (v2f32 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; // VMLAL : Vector Multiply Accumulate Long (Q += D * D) @@ -2003,30 +1976,27 @@ def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", v4f32, v2f32, fmul, fsub>; def : Pat<(v8i16 (sub (v8i16 QPR:$src1), - (mul (v8i16 QPR:$src2), - (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), - (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), - (v8i16 QPR:$src2), + (mul (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), + (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), (v4i16 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (sub (v4i32 QPR:$src1), - (mul (v4i32 QPR:$src2), - (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), - (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), - (v4i32 QPR:$src2), + (mul (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), + (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), (v2i32 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), - (fmul (v4f32 QPR:$src2), - (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), - (v4f32 (VMLSslfq (v4f32 QPR:$src1), - (v4f32 QPR:$src2), + (fmul (v4f32 QPR:$src2), + (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), + (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), (v2f32 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; // VMLSL : Vector Multiply Subtract Long (Q -= D * D) @@ -2088,6 +2058,10 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, NEONvceq, 1>; def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, NEONvceq, 1>; +// For disassembly only. +defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", + "$dst, $src, #0">; + // VCGE : Vector Compare Greater Than or Equal defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>; @@ -2097,6 +2071,13 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, NEONvcge, 0>; def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, NEONvcge, 0>; +// For disassembly only. +defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", + "$dst, $src, #0">; +// For disassembly only. +defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", + "$dst, $src, #0">; + // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>; @@ -2106,6 +2087,13 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, NEONvcgt, 0>; def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, NEONvcgt, 0>; +// For disassembly only. +defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", + "$dst, $src, #0">; +// For disassembly only. +defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", + "$dst, $src, #0">; + // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; @@ -2247,9 +2235,9 @@ defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>; // Vector Maximum and Minimum. // VMAX : Vector Maximum -defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, +defm VMAXs : N3VInt_QHS<0,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>; -defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, +defm VMAXu : N3VInt_QHS<1,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>; def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32", v2f32, v2f32, int_arm_neon_vmaxs, 1>; @@ -2257,9 +2245,9 @@ def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; // VMIN : Vector Minimum -defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, +defm VMINs : N3VInt_QHS<0,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>; -defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, +defm VMINu : N3VInt_QHS<1,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>; def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32", v2f32, v2f32, int_arm_neon_vmins, 1>; @@ -2401,16 +2389,17 @@ def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", v2i64, v2i32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow -defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", NEONvshrn>; +defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", + NEONvshrn>; // VRSHL : Vector Rounding Shift defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts, 0>; + IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts,0>; defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu, 0>; + IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu,0>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>; -defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>; +defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>; +defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", @@ -2418,14 +2407,14 @@ defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", // VQSHL : Vector Saturating Shift defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts, 0>; + IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts,0>; defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu, 0>; + IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu,0>; // VQSHL : Vector Saturating Shift Left (Immediate) -defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>; -defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>; +defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>; +defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>; // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) -defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu", "s", NEONvqshlsu>; +defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D, "vqshlu","s",NEONvqshlsu>; // VQSHRN : Vector Saturating Shift Right and Narrow defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", @@ -2438,10 +2427,10 @@ defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", NEONvqshrnsu>; // VQRSHL : Vector Saturating Rounding Shift -defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, +defm VQRSHLs : N3VInt_QHSD<0,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vqrshl", "s", int_arm_neon_vqrshifts, 0>; -defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, +defm VQRSHLu : N3VInt_QHSD<1,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vqrshl", "u", int_arm_neon_vqrshiftu, 0>; @@ -2508,7 +2497,7 @@ def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; // VNEG : Vector Negate (floating-point) -def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, +def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, "vneg", "f32", "$dst, $src", "", [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; @@ -2547,6 +2536,14 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiQ, "vcnt", "8", v16i8, v16i8, int_arm_neon_vcnt>; +// Vector Swap -- for disassembly only. +def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, + (outs DPR:$dst), (ins DPR:$src), NoItinerary, + "vswp", "$dst, $src", "", []>; +def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, + (outs QPR:$dst), (ins QPR:$src), NoItinerary, + "vswp", "$dst, $src", "", []>; + // Vector Move Operations. // VMOV : Vector Move (Register) @@ -2678,10 +2675,10 @@ def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane))>; def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), - (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1), DPR_VFP2)), + (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), (SSubReg_f32_reg imm:$src2))>; def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), - (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1), QPR_VFP2)), + (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), (SSubReg_f32_reg imm:$src2))>; //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; @@ -2849,11 +2846,13 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), (INSERT_SUBREG QPR:$src, - (i64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))), + (i64 (EXTRACT_SUBREG QPR:$src, + (DSubReg_f64_reg imm:$lane))), (DSubReg_f64_other_reg imm:$lane))>; def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), (INSERT_SUBREG QPR:$src, - (f64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))), + (f64 (EXTRACT_SUBREG QPR:$src, + (DSubReg_f64_reg imm:$lane))), (DSubReg_f64_other_reg imm:$lane))>; // VMOVN : Vector Narrowing Move @@ -3092,70 +3091,110 @@ def VTBX4 // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// +class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> + : NEONFPPat<(ResTy (OpNode SPR:$a)), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0)), + arm_ssubreg_0)>; + +class N3VSPat<SDNode OpNode, NeonI Inst> + : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$b, arm_ssubreg_0)), + arm_ssubreg_0)>; + +class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> + : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), + (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$acc, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$a, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), + SPR:$b, arm_ssubreg_0)), + arm_ssubreg_0)>; + // These need separate instructions because they must use DPR_VFP2 register // class which have SPR sub-registers. // Vector Add Operations used for single-precision FP let neverHasSideEffects = 1 in -def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd", "f32", v2f32, v2f32, fadd,1>; -def : N3VDsPat<fadd, VADDfd_sfp>; +def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>; +def : N3VSPat<fadd, VADDfd_sfp>; // Vector Sub Operations used for single-precision FP let neverHasSideEffects = 1 in -def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub", "f32", v2f32, v2f32, fsub,0>; -def : N3VDsPat<fsub, VSUBfd_sfp>; +def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>; +def : N3VSPat<fsub, VSUBfd_sfp>; // Vector Multiply Operations used for single-precision FP let neverHasSideEffects = 1 in -def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul", "f32", v2f32, v2f32, fmul,1>; -def : N3VDsPat<fmul, VMULfd_sfp>; +def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>; +def : N3VSPat<fmul, VMULfd_sfp>; // Vector Multiply-Accumulate/Subtract used for single-precision FP // vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so // we want to avoid them for now. e.g., alternating vmla/vadd instructions. //let neverHasSideEffects = 1 in -//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32,fmul,fadd>; -//def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>; +//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32", +// v2f32, fmul, fadd>; +//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>; //let neverHasSideEffects = 1 in -//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32,fmul,fsub>; -//def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>; +//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32", +// v2f32, fmul, fsub>; +//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>; // Vector Absolute used for single-precision FP let neverHasSideEffects = 1 in -def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAD, "vabs", "f32", - v2f32, v2f32, int_arm_neon_vabs>; -def : N2VDIntsPat<fabs, VABSfd_sfp>; +def VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0, + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, + "vabs", "f32", "$dst, $src", "", []>; +def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>; // Vector Negate used for single-precision FP let neverHasSideEffects = 1 in -def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, - "vneg", "f32", "$dst, $src", "", []>; -def : N2VDIntsPat<fneg, VNEGf32d_sfp>; +def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, + "vneg", "f32", "$dst, $src", "", []>; +def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>; + +// Vector Maximum used for single-precision FP +let neverHasSideEffects = 1 in +def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), + (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, + "vmax", "f32", "$dst, $src1, $src2", "", []>; +def : N3VSPat<NEONfmax, VMAXfd_sfp>; + +// Vector Minimum used for single-precision FP +let neverHasSideEffects = 1 in +def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), + (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, + "vmin", "f32", "$dst, $src1, $src2", "", []>; +def : N3VSPat<NEONfmin, VMINfd_sfp>; // Vector Convert between single-precision FP and integer let neverHasSideEffects = 1 in -def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", - v2i32, v2f32, fp_to_sint>; -def : N2VDsPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; +def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", + v2i32, v2f32, fp_to_sint>; +def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; let neverHasSideEffects = 1 in -def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", - v2i32, v2f32, fp_to_uint>; -def : N2VDsPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; +def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", + v2i32, v2f32, fp_to_uint>; +def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; let neverHasSideEffects = 1 in -def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", - v2f32, v2i32, sint_to_fp>; -def : N2VDsPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; +def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", + v2f32, v2i32, sint_to_fp>; +def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; let neverHasSideEffects = 1 in -def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", - v2f32, v2i32, uint_to_fp>; -def : N2VDsPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; +def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", + v2f32, v2i32, uint_to_fp>; +def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 1dcea26..786dd65 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -120,7 +120,10 @@ def t_addrmode_sp : Operand<i32>, // Miscellaneous Instructions. // -let Defs = [SP], Uses = [SP] in { +// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE +// from removing one half of the matched pairs. That breaks PEI, which assumes +// these will always be in pairs, and asserts if it finds otherwise. Better way? +let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def tADJCALLSTACKUP : PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary, "@ tADJCALLSTACKUP $amt1", @@ -132,6 +135,76 @@ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary, [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>; } +def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b11; + let Inst{7-0} = 0b00000000; +} + +def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b11; + let Inst{7-0} = 0b00010000; +} + +def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b11; + let Inst{7-0} = 0b00100000; +} + +def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b11; + let Inst{7-0} = 0b00110000; +} + +def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b11; + let Inst{7-0} = 0b01000000; +} + +def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101101> { + let Inst{9-5} = 0b10010; + let Inst{3} = 1; +} + +def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101101> { + let Inst{9-5} = 0b10010; + let Inst{3} = 0; +} + +// The i32imm operand $val can be used by a debugger to store more information +// about the breakpoint. +def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val", + [/* For disassembly only; pattern left blank */]>, + T1Encoding<0b101111> { + let Inst{9-8} = 0b10; +} + +// Change Processor State is a system instruction -- for disassembly only. +// The singleton $opt operand contains the following information: +// opt{4-0} = mode ==> don't care +// opt{5} = changemode ==> 0 (false for 16-bit Thumb instr) +// opt{8-6} = AIF from Inst{2-0} +// opt{10-9} = 1:imod from Inst{4} with 0b10 as enable and 0b11 as disable +// +// The opt{4-0} and opt{5} sub-fields are to accommodate 32-bit Thumb and ARM +// CPS which has more options. +def tCPS : T1I<(outs), (ins i32imm:$opt), NoItinerary, "cps${opt:cps}", + [/* For disassembly only; pattern left blank */]>, + T1Misc<0b0110011>; + // For both thumb1 and thumb2. let isNotDuplicable = 1 in def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, @@ -200,7 +273,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { let Inst{6-3} = 0b1110; // Rm = lr } // Alternative return instruction used by vararg functions. - def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target", []>, + def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target",[]>, T1Special<{1,1,0,?}>; // A6.2.3 & A8.6.25 } @@ -228,20 +301,20 @@ let isCall = 1, D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, - (outs), (ins i32imm:$func, variable_ops), IIC_Br, + (outs), (ins i32imm:$func, variable_ops), IIC_Br, "bl\t${func:call}", [(ARMtcall tglobaladdr:$func)]>, Requires<[IsThumb, IsNotDarwin]>; // ARMv5T and above, also used for Thumb2 def tBLXi : TIx2<0b11110, 0b11, 0, - (outs), (ins i32imm:$func, variable_ops), IIC_Br, + (outs), (ins i32imm:$func, variable_ops), IIC_Br, "blx\t${func:call}", [(ARMcall tglobaladdr:$func)]>, Requires<[IsThumb, HasV5T, IsNotDarwin]>; // Also used for Thumb2 - def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, + def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, "blx\t$func", [(ARMtcall GPR:$func)]>, Requires<[IsThumb, HasV5T, IsNotDarwin]>, @@ -249,7 +322,7 @@ let isCall = 1, // ARMv4T def tBX : TIx2<{?,?,?,?,?}, {?,?}, ?, - (outs), (ins tGPR:$func, variable_ops), IIC_Br, + (outs), (ins tGPR:$func, variable_ops), IIC_Br, "mov\tlr, pc\n\tbx\t$func", [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb1Only, IsNotDarwin]>; @@ -263,20 +336,20 @@ let isCall = 1, D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { // Also used for Thumb2 def tBLr9 : TIx2<0b11110, 0b11, 1, - (outs), (ins i32imm:$func, variable_ops), IIC_Br, + (outs), (ins i32imm:$func, variable_ops), IIC_Br, "bl\t${func:call}", [(ARMtcall tglobaladdr:$func)]>, Requires<[IsThumb, IsDarwin]>; // ARMv5T and above, also used for Thumb2 def tBLXi_r9 : TIx2<0b11110, 0b11, 0, - (outs), (ins i32imm:$func, variable_ops), IIC_Br, + (outs), (ins i32imm:$func, variable_ops), IIC_Br, "blx\t${func:call}", [(ARMcall tglobaladdr:$func)]>, Requires<[IsThumb, HasV5T, IsDarwin]>; // Also used for Thumb2 - def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, + def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, "blx\t$func", [(ARMtcall GPR:$func)]>, Requires<[IsThumb, HasV5T, IsDarwin]>, @@ -284,7 +357,7 @@ let isCall = 1, // ARMv4T def tBXr9 : TIx2<{?,?,?,?,?}, {?,?}, ?, - (outs), (ins tGPR:$func, variable_ops), IIC_Br, + (outs), (ins tGPR:$func, variable_ops), IIC_Br, "mov\tlr, pc\n\tbx\t$func", [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb1Only, IsDarwin]>; @@ -299,7 +372,7 @@ let isBranch = 1, isTerminator = 1 in { // Far jump let Defs = [LR] in - def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br, + def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br, "bl\t$target\t@ far jump",[]>; def tBR_JTr : T1JTI<(outs), @@ -332,16 +405,34 @@ let isBranch = 1, isTerminator = 1 in { T1Misc<{1,0,?,1,?,?,?}>; } +// A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only +// A8.6.16 B: Encoding T1 +// If Inst{11-8} == 0b1111 then SEE SVC +let isCall = 1 in { +def tSVC : T1pI<(outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", []>, + Encoding16 { + let Inst{15-12} = 0b1101; + let Inst{11-8} = 0b1111; +} +} + +// A8.6.16 B: Encoding T1 -- for disassembly only +// If Inst{11-8} == 0b1110 then UNDEFINED +def tTRAP : T1I<(outs), (ins), IIC_Br, "trap", []>, Encoding16 { + let Inst{15-12} = 0b1101; + let Inst{11-8} = 0b1110; +} + //===----------------------------------------------------------------------===// // Load Store Instructions. // -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in -def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, +let canFoldAsLoad = 1, isReMaterializable = 1 in +def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, "ldr", "\t$dst, $addr", [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>, T1LdSt<0b100>; -def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, +def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, "ldr", "\t$dst, $addr", []>, T1LdSt4Imm<{1,?,?}>; @@ -391,15 +482,14 @@ def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, // Load tconstpool // FIXME: Use ldr.n to work around a Darwin assembler bug. -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, "ldr", ".n\t$dst, $addr", [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>, T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59 // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, "ldr", "\t$dst, $addr", []>, T1LdStSP<{1,?,?}>; @@ -644,7 +734,7 @@ def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, // multiply register let isCommutable = 1 in def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32, - "mul", "\t$dst, $rhs", + "mul", "\t$dst, $rhs, $dst", /* A8.6.105 MUL Encoding T1 */ [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>, T1DataProcessing<0b1101>; @@ -761,7 +851,7 @@ def tUXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, T1Misc<{0,0,1,0,1,0,?}>; -// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation. +// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation. // Expanded after instruction selection into a branch sequence. let usesCustomInserter = 1 in // Expanded after instruction selection. def tMOVCCr_pseudo : diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 55c7aa2..6241766 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -13,7 +13,7 @@ // IT block predicate field def it_pred : Operand<i32> { - let PrintMethod = "printPredicateOperand"; + let PrintMethod = "printMandatoryPredicateOperand"; } // IT block condition mask @@ -53,10 +53,10 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ // bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11]. def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ - return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1; + return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1; }]>; -// t2_so_imm_not - Match an immediate that is a complement +// t2_so_imm_not - Match an immediate that is a complement // of a t2_so_imm. def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{ @@ -114,13 +114,13 @@ def imm0_4095 : Operand<i32>, return (uint32_t)N->getZExtValue() < 4096; }]>; -def imm0_4095_neg : PatLeaf<(i32 imm), [{ - return (uint32_t)(-N->getZExtValue()) < 4096; -}], imm_neg_XFORM>; +def imm0_4095_neg : PatLeaf<(i32 imm), [{ + return (uint32_t)(-N->getZExtValue()) < 4096; +}], imm_neg_XFORM>; def imm0_255_neg : PatLeaf<(i32 imm), [{ return (uint32_t)(-N->getZExtValue()) < 255; -}], imm_neg_XFORM>; +}], imm_neg_XFORM>; // Define Thumb2 specific addressing modes. @@ -131,7 +131,7 @@ def t2addrmode_imm12 : Operand<i32>, let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } -// t2addrmode_imm8 := reg - imm8 +// t2addrmode_imm8 := reg +/- imm8 def t2addrmode_imm8 : Operand<i32>, ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { let PrintMethod = "printT2AddrModeImm8Operand"; @@ -208,7 +208,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a // binary operation that produces a value. These are predicable and can be /// changed to modify CPSR. -multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, +multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0, string wide =""> { // shifted imm def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, @@ -368,15 +368,16 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, } /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns -/// for a binary operation that produces a value and use and define the carry +/// for a binary operation that produces a value and use the carry /// bit. It's not predicable. let Uses = [CPSR] in { -multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { +multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, + bit Commutable = 0> { // shifted imm def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, "\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUnused]> { + Requires<[IsThumb2]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -387,7 +388,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUnused]> { + Requires<[IsThumb2]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -401,19 +402,23 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUnused]> { + Requires<[IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; let Inst{20} = 0; // The S bit. } - // Carry setting variants +} + +// Carry setting variants +let Defs = [CPSR] in { +multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, + bit Commutable = 0> { // shifted imm - def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, - !strconcat(opc, "s\t$dst, $lhs, $rhs"), - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUsed]> { - let Defs = [CPSR]; + def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + opc, "\t$dst, $lhs, $rhs", + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, + Requires<[IsThumb2]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -421,11 +426,10 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm let Inst{15} = 0; } // register - def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - !strconcat(opc, "s.w\t$dst, $lhs, $rhs"), - [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUsed]> { - let Defs = [CPSR]; + def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + opc, ".w\t$dst, $lhs, $rhs", + [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, + Requires<[IsThumb2]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -436,11 +440,10 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm let Inst{5-4} = 0b00; // type } // shifted register - def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, - !strconcat(opc, "s.w\t$dst, $lhs, $rhs"), - [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUsed]> { - let Defs = [CPSR]; + def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + opc, ".w\t$dst, $lhs, $rhs", + [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, + Requires<[IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -448,6 +451,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm } } } +} /// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit. let Defs = [CPSR] in { @@ -626,19 +630,6 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> { } } -/// T2I_picld - Defines the PIC load pattern. -class T2I_picld<string opc, PatFrag opnode> : - T2I<(outs GPR:$dst), (ins addrmodepc:$addr), IIC_iLoadi, - !strconcat("\n${addr:label}:\n\t", opc), "\t$dst, $addr", - [(set GPR:$dst, (opnode addrmodepc:$addr))]>; - -/// T2I_picst - Defines the PIC store pattern. -class T2I_picst<string opc, PatFrag opnode> : - T2I<(outs), (ins GPR:$src, addrmodepc:$addr), IIC_iStorer, - !strconcat("\n${addr:label}:\n\t", opc), "\t$src, $addr", - [(opnode GPR:$src, addrmodepc:$addr)]>; - - /// T2I_unary_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { @@ -666,6 +657,57 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { } } +// SXTB16 and UXTB16 do not need the .w qualifier. +multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> { + def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + opc, "\t$dst, $src", + [(set GPR:$dst, (opnode GPR:$src))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = 0b00; // rotate + } + def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + opc, "\t$dst, $src, ror $rot", + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = {?,?}; // rotate + } +} + +// DO variant - disassembly only, no pattern + +multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> { + def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + opc, "\t$dst, $src", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = 0b00; // rotate + } + def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + opc, "\t$dst, $src, ror $rot", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = {?,?}; // rotate + } +} + /// T2I_bin_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { @@ -692,6 +734,29 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { } } +// DO variant - disassembly only, no pattern + +multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { + def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr, + opc, "\t$dst, $LHS, $RHS", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = 0b00; // rotate + } + def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), + IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = {?,?}; // rotate + } +} + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -734,7 +799,7 @@ def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), let Inst{19-16} = 0b1101; // Rn = sp let Inst{15} = 0; } -def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), +def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), IIC_iALUi, "addw", "\t$dst, $sp, $imm", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; @@ -787,6 +852,25 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), let Inst{15} = 0; } +// Signed and unsigned division, for disassembly only +def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, + "sdiv", "\t$dst, $a, $b", []> { + let Inst{31-27} = 0b11111; + let Inst{26-21} = 0b011100; + let Inst{20} = 0b1; + let Inst{15-12} = 0b1111; + let Inst{7-4} = 0b1111; +} + +def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, + "udiv", "\t$dst, $a, $b", []> { + let Inst{31-27} = 0b11111; + let Inst{26-21} = 0b011101; + let Inst{20} = 0b1; + let Inst{15-12} = 0b1111; + let Inst{7-4} = 0b1111; +} + // Pseudo instruction that will expand into a t2SUBrSPi + a copy. let usesCustomInserter = 1 in { // Expanded after instruction selection. def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), @@ -803,7 +887,7 @@ def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), // // Load -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in defm t2LDR : T2I_ld<0, 0b10, "ldr", UnOpFrag<(load node:$Src)>>; // Loads with zero extension @@ -925,10 +1009,32 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb), []>; } +// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are +// for disassembly only. +// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 +class T2IldT<bit signed, bits<2> type, string opc> + : T2Ii8<(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc, + "\t$dst, $addr", []> { + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = signed; + let Inst{23} = 0; + let Inst{22-21} = type; + let Inst{20} = 1; // load + let Inst{11} = 1; + let Inst{10-8} = 0b110; // PUW. +} + +def t2LDRT : T2IldT<0, 0b10, "ldrt">; +def t2LDRBT : T2IldT<0, 0b00, "ldrbt">; +def t2LDRHT : T2IldT<0, 0b01, "ldrht">; +def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt">; +def t2LDRSHT : T2IldT<1, 0b01, "ldrsht">; + // Store -defm t2STR : T2I_st<0b10, "str", BinOpFrag<(store node:$LHS, node:$RHS)>>; -defm t2STRB : T2I_st<0b00, "strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; -defm t2STRH : T2I_st<0b01, "strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; +defm t2STR :T2I_st<0b10,"str", BinOpFrag<(store node:$LHS, node:$RHS)>>; +defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; +defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in @@ -979,9 +1085,98 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; +// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly +// only. +// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4 +class T2IstT<bits<2> type, string opc> + : T2Ii8<(outs GPR:$src), (ins t2addrmode_imm8:$addr), IIC_iStorei, opc, + "\t$src, $addr", []> { + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = 0; // not signed + let Inst{23} = 0; + let Inst{22-21} = type; + let Inst{20} = 0; // store + let Inst{11} = 1; + let Inst{10-8} = 0b110; // PUW +} + +def t2STRT : T2IstT<0b10, "strt">; +def t2STRBT : T2IstT<0b00, "strbt">; +def t2STRHT : T2IstT<0b01, "strht">; // FIXME: ldrd / strd pre / post variants +// T2Ipl (Preload Data/Instruction) signals the memory system of possible future +// data/instruction access. These are for disassembly only. +multiclass T2Ipl<bit instr, bit write, string opc> { + + def i12 : T2I<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoadi, opc, + "\t$addr", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 1; // U = 1 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + } + + def i8 : T2I<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc, + "\t$addr", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 0; // U = 0 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + let Inst{11-8} = 0b1100; + } + + // A8.6.118 #0 and #-0 differs. Translates -0 to -1, -1 to -2, ..., etc. + def pci : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoadi, opc, + "\t[pc, ${imm:negzero}]", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = ?; // add = (U == 1) + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{19-16} = 0b1111; // Rn = 0b1111 + let Inst{15-12} = 0b1111; + } + + def r : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoadi, opc, + "\t[$base, $a]", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 0; // add = TRUE for T1 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + let Inst{11-6} = 0000000; + let Inst{5-4} = 0b00; // no shift is applied + } + + def s : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoadi, opc, + "\t[$base, $a, lsl $shamt]", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 0; // add = TRUE for T1 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + let Inst{11-6} = 0000000; + } +} + +defm t2PLD : T2Ipl<0, 0, "pld">; +defm t2PLDW : T2Ipl<0, 1, "pldw">; +defm t2PLI : T2Ipl<1, 0, "pli">; + //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -989,7 +1184,7 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), let mayLoad = 1, hasExtraDefRegAllocReq = 1 in def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> { + IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' @@ -1001,7 +1196,7 @@ def t2LDM : T2XI<(outs), let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> { + IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' @@ -1074,13 +1269,15 @@ defm t2SXTB : T2I_unary_rrot<0b100, "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; defm t2SXTH : T2I_unary_rrot<0b000, "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; +defm t2SXTB16 : T2I_unary_rrot_DO<0b010, "sxtb16">; defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; defm t2SXTAH : T2I_bin_rrot<0b000, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; +defm t2SXTAB16 : T2I_bin_rrot_DO<0b010, "sxtab16">; -// TODO: SXT(A){B|H}16 +// TODO: SXT(A){B|H}16 - done for disassembly only // Zero extenders @@ -1089,7 +1286,7 @@ defm t2UXTB : T2I_unary_rrot<0b101, "uxtb", UnOpFrag<(and node:$Src, 0x000000FF)>>; defm t2UXTH : T2I_unary_rrot<0b001, "uxth", UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm t2UXTB16 : T2I_unary_rrot<0b011, "uxtb16", +defm t2UXTB16 : T2I_unary_rrot_nw<0b011, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), @@ -1101,6 +1298,7 @@ defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; defm t2UXTAH : T2I_bin_rrot<0b001, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; +defm t2UXTAB16 : T2I_bin_rrot_DO<0b011, "uxtab16">; } //===----------------------------------------------------------------------===// @@ -1119,9 +1317,13 @@ defm t2SUBS : T2I_bin_s_irs <0b1101, "sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>; defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", - BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>; + BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>; defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", - BinOpFrag<(sube node:$LHS, node:$RHS)>>; + BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; +defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc", + BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; +defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc", + BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>; // RSB defm t2RSB : T2I_rbin_is <0b1110, "rsb", @@ -1138,6 +1340,155 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; +// Select Bytes -- for disassembly only + +def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel", + "\t$dst, $a, $b", []> { + let Inst{31-27} = 0b11111; + let Inst{26-24} = 0b010; + let Inst{23} = 0b1; + let Inst{22-20} = 0b010; + let Inst{15-12} = 0b1111; + let Inst{7} = 0b1; + let Inst{6-4} = 0b000; +} + +// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned) +// And Miscellaneous operations -- for disassembly only +class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc> + : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, opc, + "\t$dst, $a, $b", [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0101; + let Inst{22-20} = op22_20; + let Inst{15-12} = 0b1111; + let Inst{7-4} = op7_4; +} + +// Saturating add/subtract -- for disassembly only + +def t2QADD : T2I_pam<0b000, 0b1000, "qadd">; +def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">; +def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">; +def t2QASX : T2I_pam<0b010, 0b0001, "qasx">; +def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd">; +def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub">; +def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">; +def t2QSUB : T2I_pam<0b000, 0b1010, "qsub">; +def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">; +def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">; +def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">; +def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">; +def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">; +def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">; +def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">; +def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">; + +// Signed/Unsigned add/subtract -- for disassembly only + +def t2SASX : T2I_pam<0b010, 0b0000, "sasx">; +def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">; +def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">; +def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">; +def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">; +def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">; +def t2UASX : T2I_pam<0b010, 0b0100, "uasx">; +def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">; +def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">; +def t2USAX : T2I_pam<0b110, 0b0100, "usax">; +def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">; +def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">; + +// Signed/Unsigned halving add/subtract -- for disassembly only + +def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">; +def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">; +def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">; +def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">; +def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">; +def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">; +def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">; +def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">; +def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">; +def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">; +def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">; +def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">; + +// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only + +def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + NoItinerary, "usad8", "\t$dst, $a, $b", []> { + let Inst{15-12} = 0b1111; +} +def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), + (ins GPR:$a, GPR:$b, GPR:$acc), NoItinerary, "usada8", + "\t$dst, $a, $b, $acc", []>; + +// Signed/Unsigned saturate -- for disassembly only + +def t2SSATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), + NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{25-22} = 0b1100; + let Inst{20} = 0; + let Inst{15} = 0; + let Inst{21} = 0; // sh = '0' +} + +def t2SSATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), + NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{25-22} = 0b1100; + let Inst{20} = 0; + let Inst{15} = 0; + let Inst{21} = 1; // sh = '1' +} + +def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, + "ssat16", "\t$dst, $bit_pos, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{25-22} = 0b1100; + let Inst{20} = 0; + let Inst{15} = 0; + let Inst{21} = 1; // sh = '1' + let Inst{14-12} = 0b000; // imm3 = '000' + let Inst{7-6} = 0b00; // imm2 = '00' +} + +def t2USATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), + NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{25-22} = 0b1110; + let Inst{20} = 0; + let Inst{15} = 0; + let Inst{21} = 0; // sh = '0' +} + +def t2USATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt), + NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{25-22} = 0b1110; + let Inst{20} = 0; + let Inst{15} = 0; + let Inst{21} = 1; // sh = '1' +} + +def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary, + "usat16", "\t$dst, $bit_pos, $a", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{25-22} = 0b1110; + let Inst{20} = 0; + let Inst{15} = 0; + let Inst{21} = 1; // sh = '1' + let Inst{14-12} = 0b000; // imm3 = '000' + let Inst{7-6} = 0b00; // imm2 = '00' +} //===----------------------------------------------------------------------===// // Shift and rotate Instructions. @@ -1342,6 +1693,8 @@ def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, } } // neverHasSideEffects +// Rounding variants of the below included for disassembly only + // Most significant word multiply def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, "smmul", "\t$dst, $a, $b", @@ -1353,6 +1706,15 @@ def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } +def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + "smmulr", "\t$dst, $a, $b", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b101; + let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate) + let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) +} + def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c", [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]> { @@ -1363,6 +1725,14 @@ def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } +def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, + "smmlar", "\t$dst, $a, $b, $c", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b101; + let Inst{15-12} = {?, ?, ?, ?}; // Ra + let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) +} def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c", @@ -1374,6 +1744,15 @@ def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } +def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, + "smmlsr", "\t$dst, $a, $b, $c", []> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0110; + let Inst{22-20} = 0b110; + let Inst{15-12} = {?, ?, ?, ?}; // Ra + let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) +} + multiclass T2I_smul<string opc, PatFrag opnode> { def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b", @@ -1466,7 +1845,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, (i32 16)))))]> { + (sra GPR:$b, (i32 16)))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1490,7 +1869,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), - (sra GPR:$b, (i32 16)))))]> { + (sra GPR:$b, (i32 16)))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1502,7 +1881,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sext_inreg GPR:$b, i16)), (i32 16))))]> { + (sext_inreg GPR:$b, i16)), (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1514,7 +1893,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sra GPR:$b, (i32 16))), (i32 16))))]> { + (sra GPR:$b, (i32 16))), (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1527,16 +1906,70 @@ multiclass T2I_smla<string opc, PatFrag opnode> { defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; -// TODO: Halfword multiple accumulate long: SMLAL<x><y> -// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD - +// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only +def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>; +def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>; +def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>; +def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b", + [/* For disassembly only; pattern left blank */]>; + +// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD +// These are for disassembly only. + +def t2SMUAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> { + let Inst{15-12} = 0b1111; +} +def t2SMUADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> { + let Inst{15-12} = 0b1111; +} +def t2SMUSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> { + let Inst{15-12} = 0b1111; +} +def t2SMUSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), + IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> { + let Inst{15-12} = 0b1111; +} +def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), + (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlad", + "\t$dst, $a, $b, $acc", []>; +def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), + (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smladx", + "\t$dst, $a, $b, $acc", []>; +def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), + (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsd", + "\t$dst, $a, $b, $acc", []>; +def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), + (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsdx", + "\t$dst, $a, $b, $acc", []>; +def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlald", + "\t$ldst, $hdst, $a, $b", []>; +def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaldx", + "\t$ldst, $hdst, $a, $b", []>; +def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsld", + "\t$ldst, $hdst, $a, $b", []>; +def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs GPR:$ldst,GPR:$hdst), + (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsldx", + "\t$ldst, $hdst, $a, $b", []>; //===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. // -class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> +class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> : T2I<oops, iops, itin, opc, asm, pattern> { let Inst{31-27} = 0b11111; let Inst{26-22} = 0b01010; @@ -1572,7 +2005,7 @@ def t2REVSH : T2I_misc<0b01, 0b11, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, (shl GPR:$src, (i32 8))), i16))]>; def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt", + IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), (and (shl GPR:$src2, (i32 imm:$shamt)), 0xFFFF0000)))]> { @@ -1590,7 +2023,7 @@ def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>; def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt", + IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), (and (sra GPR:$src2, imm16_31:$shamt), 0xFFFF)))]> { @@ -1643,7 +2076,7 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use -// a two-value operand where a dag node expects two operands. :( +// a two-value operand where a dag node expects two operands. :( def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr, "mov", ".w\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, @@ -1723,6 +2156,66 @@ def t2Int_SyncBarrierV7 : AInoP<(outs), (ins), } } +// Helper class for multiclass T2MemB -- for disassembly only +class T2I_memb<string opc, string asm> + : T2I<(outs), (ins), NoItinerary, opc, asm, + [/* For disassembly only; pattern left blank */]>, + Requires<[IsThumb2, HasV7]> { + let Inst{31-20} = 0xf3b; + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} + +multiclass T2MemB<bits<4> op7_4, string opc> { + + def st : T2I_memb<opc, "\tst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b1110; + } + + def ish : T2I_memb<opc, "\tish"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b1011; + } + + def ishst : T2I_memb<opc, "\tishst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b1010; + } + + def nsh : T2I_memb<opc, "\tnsh"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0111; + } + + def nshst : T2I_memb<opc, "\tnshst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0110; + } + + def osh : T2I_memb<opc, "\tosh"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0011; + } + + def oshst : T2I_memb<opc, "\toshst"> { + let Inst{7-4} = op7_4; + let Inst{3-0} = 0b0010; + } +} + +// These DMB variants are for disassembly only. +defm t2DMB : T2MemB<0b0101, "dmb">; + +// These DSB variants are for disassembly only. +defm t2DSB : T2MemB<0b0100, "dsb">; + +// ISB has only full system option -- for disassembly only +def t2ISBsy : T2I_memb<"isb", ""> { + let Inst{7-4} = 0b0110; + let Inst{3-0} = 0b1111; +} + class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern, bits<4> rt2 = 0b1111> @@ -1789,6 +2282,16 @@ def t2STREXD : T2I_strex<0b11, (outs GPR:$success), {?, ?, ?, ?}>; } +// Clear-Exclusive is for disassembly only. +def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasV7]> { + let Inst{31-20} = 0xf3b; + let Inst{15-14} = 0b10; + let Inst{12} = 0; + let Inst{7-4} = 0b0010; +} + //===----------------------------------------------------------------------===// // TLS Instructions // @@ -1906,6 +2409,24 @@ def t2TBH : let Inst{15-8} = 0b11110000; let Inst{7-4} = 0b0001; // H form } + +// Generic versions of the above two instructions, for disassembly only + +def t2TBBgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br, + "tbb", "\t[$a, $b]", []>{ + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0001101; + let Inst{15-8} = 0b11110000; + let Inst{7-4} = 0b0000; // B form +} + +def t2TBHgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br, + "tbh", "\t[$a, $b, lsl #1]", []> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0001101; + let Inst{15-8} = 0b11110000; + let Inst{7-4} = 0b0001; // H form +} } // isNotDuplicable, isIndirectBranch } // isBranch, isTerminator, isBarrier @@ -1931,6 +2452,119 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), let Inst{15-8} = 0b10111111; } +// Branch and Exchange Jazelle -- for disassembly only +// Rm = Inst{19-16} +def t2BXJ : T2I<(outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{26} = 0; + let Inst{25-20} = 0b111100; + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} + +// Change Processor State is a system instruction -- for disassembly only. +// The singleton $opt operand contains the following information: +// opt{4-0} = mode from Inst{4-0} +// opt{5} = changemode from Inst{17} +// opt{8-6} = AIF from Inst{8-6} +// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable +def t2CPS : T2XI<(outs),(ins i32imm:$opt), NoItinerary, "cps${opt:cps}", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{26} = 0; + let Inst{25-20} = 0b111010; + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} + +// A6.3.4 Branches and miscellaneous control +// Table A6-14 Change Processor State, and hint instructions +// Helper class for disassembly only. +class T2I_hint<bits<8> op7_0, string opc, string asm> + : T2I<(outs), (ins), NoItinerary, opc, asm, + [/* For disassembly only; pattern left blank */]> { + let Inst{31-20} = 0xf3a; + let Inst{15-14} = 0b10; + let Inst{12} = 0; + let Inst{10-8} = 0b000; + let Inst{7-0} = op7_0; +} + +def t2NOP : T2I_hint<0b00000000, "nop", ".w">; +def t2YIELD : T2I_hint<0b00000001, "yield", ".w">; +def t2WFE : T2I_hint<0b00000010, "wfe", ".w">; +def t2WFI : T2I_hint<0b00000011, "wfi", ".w">; +def t2SEV : T2I_hint<0b00000100, "sev", ".w">; + +def t2DBG : T2I<(outs),(ins i32imm:$opt), NoItinerary, "dbg", "\t$opt", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-20} = 0xf3a; + let Inst{15-14} = 0b10; + let Inst{12} = 0; + let Inst{10-8} = 0b000; + let Inst{7-4} = 0b1111; +} + +// Secure Monitor Call is a system instruction -- for disassembly only +// Option = Inst{19-16} +def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{26-20} = 0b1111111; + let Inst{15-12} = 0b1000; +} + +// Store Return State is a system instruction -- for disassembly only +def t2SRSDBW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0000010; // W = 1 +} + +def t2SRSDB : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0000000; // W = 0 +} + +def t2SRSIAW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0011010; // W = 1 +} + +def t2SRSIA : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0011000; // W = 0 +} + +// Return From Exception is a system instruction -- for disassembly only +def t2RFEDBW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfedb", "\t$base!", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0000011; // W = 1 +} + +def t2RFEDB : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeab", "\t$base", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0000001; // W = 0 +} + +def t2RFEIAW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base!", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0011011; // W = 1 +} + +def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11101; + let Inst{26-20} = 0b0011001; // W = 0 +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns // @@ -1970,9 +2604,59 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // Pseudo instruction that combines ldr from constpool and add pc. This should // be expanded into two instructions late to allow if-conversion and // scheduling. -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb2]>; + +//===----------------------------------------------------------------------===// +// Move between special register and ARM core register -- for disassembly only +// + +// Rd = Instr{11-8} +def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{26} = 0; + let Inst{25-21} = 0b11111; + let Inst{20} = 0; // The R bit. + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} + +// Rd = Instr{11-8} +def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{26} = 0; + let Inst{25-21} = 0b11111; + let Inst{20} = 1; // The R bit. + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} + +// FIXME: mask is ignored for the time being. +// Rn = Inst{19-16} +def t2MSR : T2I<(outs), (ins GPR:$src), NoItinerary, "msr", "\tcpsr, $src", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{26} = 0; + let Inst{25-21} = 0b11100; + let Inst{20} = 0; // The R bit. + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} + +// FIXME: mask is ignored for the time being. +// Rn = Inst{19-16} +def t2MSRsys : T2I<(outs), (ins GPR:$src), NoItinerary, "msr", "\tspsr, $src", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-27} = 0b11110; + let Inst{26} = 0; + let Inst{25-21} = 0b11100; + let Inst{20} = 1; // The R bit. + let Inst{15-14} = 0b10; + let Inst{12} = 0; +} diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 365e1e3..7c117ed 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -54,7 +54,7 @@ def vfp_f64imm : Operand<f64>, // Load / store Instructions. // -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1 in { def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), IIC_fpLoad64, "vldr", ".64\t$dst, $addr", [(set DPR:$dst, (load addrmode5:$addr))]>; @@ -412,6 +412,101 @@ def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, let Inst{7} = 0; // Z bit } +// Convert between floating-point and fixed-point +// Data type for fixed-point naming convention: +// S16 (U=0, sx=0) -> SH +// U16 (U=1, sx=0) -> UH +// S32 (U=0, sx=1) -> SL +// U32 (U=1, sx=1) -> UL + +let Constraints = "$a = $dst" in { + +// FP to Fixed-Point: + +def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +// Fixed-Point to FP: + +def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), + IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", + [/* For disassembly only; pattern left blank */]>; + +} // End of 'let Constraints = "$src = $dst" in' + //===----------------------------------------------------------------------===// // FP FMA Operations. // diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index bef5a06..6db6ba4 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -48,7 +48,13 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction; // write our own wrapper, which does things our way, so we have complete // control over register saving and restoring. extern "C" { -#if defined(__arm__) + // We don't need this on Android (generally on hand-held devices). This + // function is for the purpose of supporting "lazy symbol lookup" (lookup + // undefined symbol at runtime) (Actually, if you tried to remove the + // !defined(ANDROID) guard, you'll get compilation error since Android's + // toolchain choose armv5te as its CPU architecture which does not support + // instruction 'stmdb' and 'ldmia' within the function) +#if defined(__arm__) && !defined(ANDROID) void ARMCompilationCallback(); asm( ".text\n" @@ -60,7 +66,7 @@ extern "C" { // whole compilation callback doesn't exist as far as the caller is // concerned, so we can't just preserve the callee saved regs. "stmdb sp!, {r0, r1, r2, r3, lr}\n" -#ifndef __SOFTFP__ +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" #endif // The LR contains the address of the stub function on entry. @@ -83,7 +89,7 @@ extern "C" { // 6-20 | D0..D7 | Saved VFP registers // +--------+ // -#ifndef __SOFTFP__ +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) // Restore VFP caller-saved registers. "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" #endif @@ -318,6 +324,18 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, *((intptr_t*)RelocPos) |= ResultPtr; break; } + case ARM::reloc_arm_movw: { + ResultPtr = ResultPtr & 0xFFFF; + *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; + *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; // imm4:imm12, Insts[19-16] = imm4, Insts[11-0] = imm12 + break; + } + case ARM::reloc_arm_movt: { + ResultPtr = (ResultPtr >> 16) & 0xFFFF; + *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; + *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; // imm4:imm12, Insts[19-16] = imm4, Insts[11-0] = imm12 + break; + } } } } diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index b78b95b..19f1e3b 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -350,7 +350,8 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, : ARMRegisterInfo::getRegisterNumbering(Reg); // AM4 - register numbers in ascending order. // AM5 - consecutive register numbers in ascending order. - if (NewOffset == Offset + (int)Size && + if (Reg != ARM::SP && + NewOffset == Offset + (int)Size && ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { Offset += Size; PRegNum = RegNum; @@ -747,11 +748,24 @@ static bool isMemoryOp(const MachineInstr *MI) { if (MMO->isVolatile()) return false; - // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is not. + // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is + // not. if (MMO->getAlignment() < 4) return false; } + // str <undef> could probably be eliminated entirely, but for now we just want + // to avoid making a mess of it. + // FIXME: Use str <undef> as a wildcard to enable better stm folding. + if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && + MI->getOperand(0).isUndef()) + return false; + + // Likewise don't mess with references to undefined addresses. + if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && + MI->getOperand(1).isUndef()) + return false; + int Opcode = MI->getOpcode(); switch (Opcode) { default: break; diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h index 2cc2950..86e7206 100644 --- a/lib/Target/ARM/ARMRelocations.h +++ b/lib/Target/ARM/ARMRelocations.h @@ -47,7 +47,13 @@ namespace llvm { reloc_arm_pic_jt, // reloc_arm_branch - Branch address relocation. - reloc_arm_branch + reloc_arm_branch, + + // reloc_arm_movt - MOVT immediate relocation. + reloc_arm_movt, + + // reloc_arm_movw - MOVW immediate relocation. + reloc_arm_movw }; } } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 426862c..622034b 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -33,7 +33,7 @@ UseMOVT("arm-use-movt", ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, bool isT) - : ARMArchVersion(V4T) + : ARMArchVersion(V4) , ARMFPUType(None) , UseNEONForSinglePrecisionFP(UseNEONFP) , IsThumb(isT) @@ -54,6 +54,11 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, // Parse features string. CPUString = ParseSubtargetFeatures(FS, CPUString); + // When no arch is specified either by CPU or by attributes, make the default + // ARMv4T. + if (CPUString == "generic" && (FS.empty() || FS == "generic")) + ARMArchVersion = V4T; + // Set the boolean corresponding to the current target triple, or the default // if one cannot be determined, to true. unsigned Len = TT.length(); @@ -68,25 +73,28 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, } if (Idx) { unsigned SubVer = TT[Idx]; - if (SubVer > '4' && SubVer <= '9') { - if (SubVer >= '7') { - ARMArchVersion = V7A; - } else if (SubVer == '6') { - ARMArchVersion = V6; - if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') - ARMArchVersion = V6T2; - } else if (SubVer == '5') { - ARMArchVersion = V5T; - if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') - ARMArchVersion = V5TE; - } - if (ARMArchVersion >= V6T2) - ThumbMode = Thumb2; + if (SubVer >= '7' && SubVer <= '9') { + ARMArchVersion = V7A; + } else if (SubVer == '6') { + ARMArchVersion = V6; + if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') + ARMArchVersion = V6T2; + } else if (SubVer == '5') { + ARMArchVersion = V5T; + if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') + ARMArchVersion = V5TE; + } else if (SubVer == '4') { + if (Len >= Idx+2 && TT[Idx+1] == 't') + ARMArchVersion = V4T; + else + ARMArchVersion = V4; } } // Thumb2 implies at least V6T2. - if (ARMArchVersion < V6T2 && ThumbMode >= Thumb2) + if (ARMArchVersion >= V6T2) + ThumbMode = Thumb2; + else if (ThumbMode >= Thumb2) ARMArchVersion = V6T2; if (Len >= 10) { diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 3f06b7b..6980851 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -26,7 +26,7 @@ class GlobalValue; class ARMSubtarget : public TargetSubtarget { protected: enum ARMArchEnum { - V4T, V5T, V5TE, V6, V6T2, V7A + V4, V4T, V5T, V5TE, V6, V6T2, V7A }; enum ARMFPEnum { @@ -38,7 +38,7 @@ protected: Thumb2 }; - /// ARMArchVersion - ARM architecture version: V4T (base), V5T, V5TE, + /// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE, /// V6, V6T2, V7A. ARMArchEnum ARMArchVersion; diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index 9703403..a488c0a 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -10,7 +10,7 @@ #ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H #define LLVM_TARGET_ARM_TARGETOBJECTFILE_H -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/MC/MCSectionELF.h" namespace llvm { @@ -24,7 +24,7 @@ namespace llvm { if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) { StaticCtorSection = - getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, + getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, SectionKind::getDataRel()); StaticDtorSection = diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk new file mode 100644 index 0000000..ea796af --- /dev/null +++ b/lib/Target/ARM/Android.mk @@ -0,0 +1,49 @@ +LOCAL_PATH := $(call my-dir) + +# For the device only +# ===================================================== +include $(CLEAR_VARS) +include $(CLEAR_TBLGEN_VARS) + +TBLGEN_TABLES := \ + ARMGenRegisterInfo.h.inc \ + ARMGenRegisterNames.inc \ + ARMGenRegisterInfo.inc \ + ARMGenInstrNames.inc \ + ARMGenInstrInfo.inc \ + ARMGenDAGISel.inc \ + ARMGenSubtarget.inc \ + ARMGenCodeEmitter.inc \ + ARMGenCallingConv.inc + +LOCAL_SRC_FILES := \ + ARMBaseInstrInfo.cpp \ + ARMBaseRegisterInfo.cpp \ + ARMCodeEmitter.cpp \ + ARMConstantIslandPass.cpp \ + ARMConstantPoolValue.cpp \ + ARMExpandPseudoInsts.cpp \ + ARMISelDAGToDAG.cpp \ + ARMISelLowering.cpp \ + ARMInstrInfo.cpp \ + ARMJITInfo.cpp \ + ARMLoadStoreOptimizer.cpp \ + ARMMCAsmInfo.cpp \ + ARMRegisterInfo.cpp \ + ARMSubtarget.cpp \ + ARMTargetMachine.cpp \ + NEONMoveFix.cpp \ + NEONPreAllocPass.cpp \ + Thumb1InstrInfo.cpp \ + Thumb1RegisterInfo.cpp \ + Thumb2ITBlockPass.cpp \ + Thumb2InstrInfo.cpp \ + Thumb2RegisterInfo.cpp \ + Thumb2SizeReduction.cpp + +LOCAL_MODULE:= libLLVMARMCodeGen + +include $(LLVM_DEVICE_BUILD_MK) +include $(LLVM_TBLGEN_RULES_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 0a75c09..d6d595c 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" @@ -37,7 +38,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" @@ -122,6 +122,7 @@ namespace { void printT2AddrModeSoRegOperand(const MachineInstr *MI, int OpNum); void printPredicateOperand(const MachineInstr *MI, int OpNum); + void printMandatoryPredicateOperand(const MachineInstr *MI, int OpNum); void printSBitModifierOperand(const MachineInstr *MI, int OpNum); void printPCLabel(const MachineInstr *MI, int OpNum); void printRegisterList(const MachineInstr *MI, int OpNum); @@ -786,6 +787,12 @@ void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int OpNum) { O << ARMCondCodeToString(CC); } +void ARMAsmPrinter::printMandatoryPredicateOperand(const MachineInstr *MI, + int OpNum) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); + O << ARMCondCodeToString(CC); +} + void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){ unsigned Reg = MI->getOperand(OpNum).getReg(); if (Reg) { diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index d7d8e09..a2084b0 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -325,6 +325,12 @@ void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum) { O << ARMCondCodeToString(CC); } +void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI, + unsigned OpNum) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); + O << ARMCondCodeToString(CC); +} + void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum){ if (MI->getOperand(OpNum).getReg()) { assert(MI->getOperand(OpNum).getReg() == ARM::CPSR && diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index 23a7f05..b7964c9 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -71,6 +71,7 @@ public: void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum) {} void printPredicateOperand(const MCInst *MI, unsigned OpNum); + void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum); void printSBitModifierOperand(const MCInst *MI, unsigned OpNum); void printRegisterList(const MCInst *MI, unsigned OpNum); void printCPInstOperand(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 9efb5a1..57b65cf 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -10,6 +10,8 @@ Reimplement 'select' in terms of 'SEL'. * Implement pre/post increment support. (e.g. PR935) * Implement smarter constant generation for binops with large immediates. +A few ARMv6T2 ops should be pattern matched: BFI, SBFX, and UBFX + //===---------------------------------------------------------------------===// Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the diff --git a/lib/Target/ARM/TargetInfo/Android.mk b/lib/Target/ARM/TargetInfo/Android.mk new file mode 100644 index 0000000..c1998a1 --- /dev/null +++ b/lib/Target/ARM/TargetInfo/Android.mk @@ -0,0 +1,24 @@ +LOCAL_PATH := $(call my-dir) + +# For the device only +# ===================================================== +include $(CLEAR_VARS) +include $(CLEAR_TBLGEN_VARS) + +TBLGEN_TABLES := \ + ARMGenRegisterNames.inc \ + ARMGenInstrNames.inc + +TBLGEN_TD_DIR := $(LOCAL_PATH)/.. + +LOCAL_SRC_FILES := \ + ARMTargetInfo.cpp + +LOCAL_C_INCLUDES += \ + $(LOCAL_PATH)/.. + +LOCAL_MODULE:= libLLVMARMInfo + +include $(LLVM_DEVICE_BUILD_MK) +include $(LLVM_TBLGEN_RULES_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index d6630ce..163d1e9 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -450,9 +450,9 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset -= AFI->getGPRCalleeSavedArea1Offset(); else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) Offset -= AFI->getGPRCalleeSavedArea2Offset(); - else if (hasFP(MF)) { - assert(SPAdj == 0 && "Unexpected"); - // There is alloca()'s in this function, must reference off the frame + else if (MF.getFrameInfo()->hasVarSizedObjects()) { + assert(SPAdj == 0 && hasFP(MF) && "Unexpected"); + // There are alloca()'s in this function, must reference off the frame // pointer instead. FrameReg = getFrameRegister(MF); Offset -= AFI->getFramePtrSpillOffset(); @@ -778,9 +778,19 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { } static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { - return (MI->getOpcode() == ARM::tRestore && - MI->getOperand(1).isFI() && - isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)); + if (MI->getOpcode() == ARM::tRestore && + MI->getOperand(1).isFI() && + isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) + return true; + else if (MI->getOpcode() == ARM::tPOP) { + // The first three operands are predicates and such. The last two are + // imp-def and imp-use of SP. Check everything in between. + for (int i = 3, e = MI->getNumOperands() - 2; i != e; ++i) + if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) + return false; + return true; + } + return false; } void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, @@ -794,13 +804,13 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); + const unsigned *CSRegs = getCalleeSavedRegs(); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. - const unsigned *CSRegs = getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do --MBBI; @@ -836,6 +846,9 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, } if (VARegSaveSize) { + // Move back past the callee-saved register restoration + while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs)) + ++MBBI; // Epilogue for vararg functions: pop LR to R3 and branch off it. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(0) // No write back. @@ -845,6 +858,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) .addReg(ARM::R3, RegState::Kill); + // erase the old tBX_RET instruction MBB.erase(MBBI); } } diff --git a/lib/Target/Alpha/AlphaCallingConv.td b/lib/Target/Alpha/AlphaCallingConv.td index 38ada69..bde8819 100644 --- a/lib/Target/Alpha/AlphaCallingConv.td +++ b/lib/Target/Alpha/AlphaCallingConv.td @@ -14,7 +14,8 @@ //===----------------------------------------------------------------------===// def RetCC_Alpha : CallingConv<[ // i64 is returned in register R0 - CCIfType<[i64], CCAssignToReg<[R0]>>, + // R1 is an llvm extension, I don't know what gcc does + CCIfType<[i64], CCAssignToReg<[R0,R1]>>, // f32 / f64 are returned in F0/F1 CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>> diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index eaefef9..5303d85 100644 --- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -64,7 +64,7 @@ namespace { /// that the bits 1-7 of LHS are already zero. If LHS is non-null, we are /// in checking mode. If LHS is null, we assume that the mask has already /// been validated before. - uint64_t get_zapImm(SDValue LHS, uint64_t Constant) { + uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const { uint64_t BitsToCheck = 0; unsigned Result = 0; for (unsigned i = 0; i != 8; ++i) { @@ -159,10 +159,6 @@ namespace { // target-specific node if it hasn't already been changed. SDNode *Select(SDNode *N); - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); - virtual const char *getPassName() const { return "Alpha DAG->DAG Pattern Instruction Selection"; } @@ -222,20 +218,11 @@ SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() { return CurDAG->getRegister(GlobalRetAddr, TLI.getPointerTy()).getNode(); } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void AlphaDAGToDAGISel::InstructionSelect() { - // Select target instructions for the DAG. - SelectRoot(*CurDAG); - CurDAG->RemoveDeadNodes(); -} - // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. SDNode *AlphaDAGToDAGISel::Select(SDNode *N) { - if (N->isMachineOpcode()) { + if (N->isMachineOpcode()) return NULL; // Already selected. - } DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 0bbe567..5d8310e 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -21,7 +21,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Module.h" @@ -282,7 +282,8 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, DAG.getIntPtrConstant(VA.getLocMemOffset())); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), 0)); + PseudoSourceValue::getStack(), 0, + false, false, 0)); } } @@ -426,7 +427,8 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i64); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, + false, false, 0); } InVals.push_back(ArgVal); } @@ -442,14 +444,16 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false); if (i == 0) VarArgsBase = FI; SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); - LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0)); + LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, + false, false, 0)); if (TargetRegisterInfo::isPhysicalRegister(args_float[i])) args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass); argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64); FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false); SDFI = DAG.getFrameIndex(FI, MVT::i64); - LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0)); + LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, + false, false, 0)); } //Set up a token factor with all the stack traffic @@ -528,11 +532,12 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue(); DebugLoc dl = N->getDebugLoc(); - SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0); + SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0, + false, false, 0); SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, DAG.getConstant(8, MVT::i64)); SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1), - Tmp, NULL, 0, MVT::i32); + Tmp, NULL, 0, MVT::i32, false, false, 0); DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset); if (N->getValueType(0).isFloatingPoint()) { @@ -547,7 +552,7 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset, DAG.getConstant(8, MVT::i64)); Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, NULL, 0, - MVT::i32); + MVT::i32, false, false, 0); } /// LowerOperation - Provide custom lowering hooks for some operations. @@ -694,9 +699,10 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { SDValue Result; if (Op.getValueType() == MVT::i32) Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr, - NULL, 0, MVT::i32); + NULL, 0, MVT::i32, false, false, 0); else - Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0); + Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0, + false, false, 0); return Result; } case ISD::VACOPY: { @@ -706,15 +712,18 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); - SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0); - SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0); + SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0, + false, false, 0); + SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0, + false, false, 0); SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP, DAG.getConstant(8, MVT::i64)); Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, - NP, NULL,0, MVT::i32); + NP, NULL,0, MVT::i32, false, false, 0); SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP, DAG.getConstant(8, MVT::i64)); - return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32); + return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32, + false, false, 0); } case ISD::VASTART: { SDValue Chain = Op.getOperand(0); @@ -723,11 +732,12 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { // vastart stores the address of the VarArgsBase and VarArgsOffset SDValue FR = DAG.getFrameIndex(VarArgsBase, MVT::i64); - SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0); + SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0, + false, false, 0); SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, DAG.getConstant(8, MVT::i64)); return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64), - SA2, NULL, 0, MVT::i32); + SA2, NULL, 0, MVT::i32, false, false, 0); } case ISD::RETURNADDR: return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc::getUnknownLoc(), @@ -749,7 +759,8 @@ void AlphaTargetLowering::ReplaceNodeResults(SDNode *N, SDValue Chain, DataPtr; LowerVAARG(N, Chain, DataPtr, DAG); - SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0); + SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0, + false, false, 0); Results.push_back(Res); Results.push_back(SDValue(Res.getNode(), 1)); } diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index 8917e86..341c4a7 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -92,7 +92,7 @@ def immSExt16int : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended ((int64_t)N->getZExtValue() << 32) >> 32; }], SExt16>; -def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm:$L), [{ +def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{ ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); if (!RHS) return 0; uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue()); @@ -602,9 +602,9 @@ def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle count def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier -def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 1), (i64 imm:$dev)), +def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)), (WMB)>; -def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 imm:$ss), (i64 imm:$dev)), +def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)), (MB)>; //Basic Floating point ops diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index 64bdd62..ba662fb 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -251,7 +251,7 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const { } else { std::string msg; raw_string_ostream Msg(msg); - Msg << "Too big a stack frame at " + NumBytes; + Msg << "Too big a stack frame at " << NumBytes; llvm_report_error(Msg.str()); } @@ -303,15 +303,14 @@ void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF, } else { std::string msg; raw_string_ostream Msg(msg); - Msg << "Too big a stack frame at " + NumBytes; + Msg << "Too big a stack frame at " << NumBytes; llvm_report_error(Msg.str()); } } } unsigned AlphaRegisterInfo::getRARegister() const { - llvm_unreachable("What is the return address register"); - return 0; + return Alpha::R26; } unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/lib/Target/Android.mk b/lib/Target/Android.mk new file mode 100644 index 0000000..8bf4340 --- /dev/null +++ b/lib/Target/Android.mk @@ -0,0 +1,38 @@ +LOCAL_PATH:= $(call my-dir) + +target_SRC_FILES := \ + Mangler.cpp \ + SubtargetFeature.cpp \ + Target.cpp \ + TargetAsmLexer.cpp \ + TargetData.cpp \ + TargetELFWriterInfo.cpp \ + TargetFrameInfo.cpp \ + TargetInstrInfo.cpp \ + TargetIntrinsicInfo.cpp \ + TargetLoweringObjectFile.cpp \ + TargetMachine.cpp \ + TargetRegisterInfo.cpp \ + TargetSubtarget.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(target_SRC_FILES) + +LOCAL_MODULE:= libLLVMTarget + +include $(LLVM_HOST_BUILD_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(target_SRC_FILES) + +LOCAL_MODULE:= libLLVMTarget + +include $(LLVM_DEVICE_BUILD_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp index 2c9cc60..c8d71aa 100644 --- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp +++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp @@ -41,7 +41,7 @@ namespace { BlackfinDAGToDAGISel(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel) : SelectionDAGISel(TM, OptLevel) {} - virtual void InstructionSelect(); + virtual void PostprocessISelDAG(); virtual const char *getPassName() const { return "Blackfin DAG->DAG Pattern Instruction Selection"; @@ -72,13 +72,7 @@ FunctionPass *llvm::createBlackfinISelDag(BlackfinTargetMachine &TM, return new BlackfinDAGToDAGISel(TM, OptLevel); } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void BlackfinDAGToDAGISel::InstructionSelect() { - // Select target instructions for the DAG. - SelectRoot(*CurDAG); - DEBUG(errs() << "Selected selection DAG before regclass fixup:\n"); - DEBUG(CurDAG->dump()); +void BlackfinDAGToDAGISel::PostprocessISelDAG() { FixRegisterClasses(*CurDAG); } diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index 269707a..5ce2013 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -22,7 +22,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -206,7 +206,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain, int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true, false); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + false, false, 0)); } } @@ -329,7 +330,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN, PseudoSourceValue::getStack(), - Offset)); + Offset, false, false, 0)); } } diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index fd4c4e7..10f873f 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -49,7 +49,6 @@ #include "llvm/System/Host.h" #include "llvm/Config/config.h" #include <algorithm> -#include <sstream> using namespace llvm; extern "C" void LLVMInitializeCBackendTarget() { @@ -153,26 +152,16 @@ namespace { return false; } - raw_ostream &printType(formatted_raw_ostream &Out, - const Type *Ty, + raw_ostream &printType(raw_ostream &Out, const Type *Ty, bool isSigned = false, const std::string &VariableName = "", bool IgnoreName = false, const AttrListPtr &PAL = AttrListPtr()); - std::ostream &printType(std::ostream &Out, const Type *Ty, - bool isSigned = false, - const std::string &VariableName = "", - bool IgnoreName = false, - const AttrListPtr &PAL = AttrListPtr()); - raw_ostream &printSimpleType(formatted_raw_ostream &Out, - const Type *Ty, - bool isSigned, - const std::string &NameSoFar = ""); - std::ostream &printSimpleType(std::ostream &Out, const Type *Ty, - bool isSigned, + raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty, + bool isSigned, const std::string &NameSoFar = ""); - void printStructReturnPointerFunctionType(formatted_raw_ostream &Out, + void printStructReturnPointerFunctionType(raw_ostream &Out, const AttrListPtr &PAL, const PointerType *Ty); @@ -385,8 +374,8 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) { // If this isn't a struct or array type, remove it from our set of types // to name. This simplifies emission later. - if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second) && - !isa<ArrayType>(I->second)) { + if (!I->second->isStructTy() && !I->second->isOpaqueTy() && + !I->second->isArrayTy()) { TST.remove(I); } else { // If this is not used, remove it from the symbol table. @@ -405,7 +394,7 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) { unsigned RenameCounter = 0; for (std::set<const Type *>::const_iterator I = UT.begin(), E = UT.end(); I != E; ++I) - if (isa<StructType>(*I) || isa<ArrayType>(*I)) { + if ((*I)->isStructTy() || (*I)->isArrayTy()) { while (M.addTypeName("unnamed"+utostr(RenameCounter), *I)) ++RenameCounter; Changed = true; @@ -454,11 +443,12 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) { /// printStructReturnPointerFunctionType - This is like printType for a struct /// return type, except, instead of printing the type as void (*)(Struct*, ...) /// print it as "Struct (*)(...)", for struct return functions. -void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out, +void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, const AttrListPtr &PAL, const PointerType *TheTy) { const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType()); - std::stringstream FunctionInnards; + std::string tstr; + raw_string_ostream FunctionInnards(tstr); FunctionInnards << " (*) ("; bool PrintedType = false; @@ -470,7 +460,7 @@ void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out, FunctionInnards << ", "; const Type *ArgTy = *I; if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(isa<PointerType>(ArgTy)); + assert(ArgTy->isPointerTy()); ArgTy = cast<PointerType>(ArgTy)->getElementType(); } printType(FunctionInnards, ArgTy, @@ -484,63 +474,14 @@ void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out, FunctionInnards << "void"; } FunctionInnards << ')'; - std::string tstr = FunctionInnards.str(); printType(Out, RetTy, - /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr); + /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str()); } raw_ostream & -CWriter::printSimpleType(formatted_raw_ostream &Out, const Type *Ty, - bool isSigned, +CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned, const std::string &NameSoFar) { - assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) && - "Invalid type for printSimpleType"); - switch (Ty->getTypeID()) { - case Type::VoidTyID: return Out << "void " << NameSoFar; - case Type::IntegerTyID: { - unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); - if (NumBits == 1) - return Out << "bool " << NameSoFar; - else if (NumBits <= 8) - return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar; - else if (NumBits <= 16) - return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar; - else if (NumBits <= 32) - return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar; - else if (NumBits <= 64) - return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar; - else { - assert(NumBits <= 128 && "Bit widths > 128 not implemented yet"); - return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar; - } - } - case Type::FloatTyID: return Out << "float " << NameSoFar; - case Type::DoubleTyID: return Out << "double " << NameSoFar; - // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is - // present matches host 'long double'. - case Type::X86_FP80TyID: - case Type::PPC_FP128TyID: - case Type::FP128TyID: return Out << "long double " << NameSoFar; - - case Type::VectorTyID: { - const VectorType *VTy = cast<VectorType>(Ty); - return printSimpleType(Out, VTy->getElementType(), isSigned, - " __attribute__((vector_size(" + - utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar); - } - - default: -#ifndef NDEBUG - errs() << "Unknown primitive type: " << *Ty << "\n"; -#endif - llvm_unreachable(0); - } -} - -std::ostream & -CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned, - const std::string &NameSoFar) { - assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) && + assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) && "Invalid type for printSimpleType"); switch (Ty->getTypeID()) { case Type::VoidTyID: return Out << "void " << NameSoFar; @@ -587,120 +528,16 @@ CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned, // Pass the Type* and the variable name and this prints out the variable // declaration. // -raw_ostream &CWriter::printType(formatted_raw_ostream &Out, - const Type *Ty, +raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, bool isSigned, const std::string &NameSoFar, bool IgnoreName, const AttrListPtr &PAL) { - if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) { - printSimpleType(Out, Ty, isSigned, NameSoFar); - return Out; - } - - // Check to see if the type is named. - if (!IgnoreName || isa<OpaqueType>(Ty)) { - std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty); - if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar; - } - - switch (Ty->getTypeID()) { - case Type::FunctionTyID: { - const FunctionType *FTy = cast<FunctionType>(Ty); - std::stringstream FunctionInnards; - FunctionInnards << " (" << NameSoFar << ") ("; - unsigned Idx = 1; - for (FunctionType::param_iterator I = FTy->param_begin(), - E = FTy->param_end(); I != E; ++I) { - const Type *ArgTy = *I; - if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(isa<PointerType>(ArgTy)); - ArgTy = cast<PointerType>(ArgTy)->getElementType(); - } - if (I != FTy->param_begin()) - FunctionInnards << ", "; - printType(FunctionInnards, ArgTy, - /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), ""); - ++Idx; - } - if (FTy->isVarArg()) { - if (FTy->getNumParams()) - FunctionInnards << ", ..."; - } else if (!FTy->getNumParams()) { - FunctionInnards << "void"; - } - FunctionInnards << ')'; - std::string tstr = FunctionInnards.str(); - printType(Out, FTy->getReturnType(), - /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr); - return Out; - } - case Type::StructTyID: { - const StructType *STy = cast<StructType>(Ty); - Out << NameSoFar + " {\n"; - unsigned Idx = 0; - for (StructType::element_iterator I = STy->element_begin(), - E = STy->element_end(); I != E; ++I) { - Out << " "; - printType(Out, *I, false, "field" + utostr(Idx++)); - Out << ";\n"; - } - Out << '}'; - if (STy->isPacked()) - Out << " __attribute__ ((packed))"; - return Out; - } - - case Type::PointerTyID: { - const PointerType *PTy = cast<PointerType>(Ty); - std::string ptrName = "*" + NameSoFar; - - if (isa<ArrayType>(PTy->getElementType()) || - isa<VectorType>(PTy->getElementType())) - ptrName = "(" + ptrName + ")"; - - if (!PAL.isEmpty()) - // Must be a function ptr cast! - return printType(Out, PTy->getElementType(), false, ptrName, true, PAL); - return printType(Out, PTy->getElementType(), false, ptrName); - } - - case Type::ArrayTyID: { - const ArrayType *ATy = cast<ArrayType>(Ty); - unsigned NumElements = ATy->getNumElements(); - if (NumElements == 0) NumElements = 1; - // Arrays are wrapped in structs to allow them to have normal - // value semantics (avoiding the array "decay"). - Out << NameSoFar << " { "; - printType(Out, ATy->getElementType(), false, - "array[" + utostr(NumElements) + "]"); - return Out << "; }"; - } - - case Type::OpaqueTyID: { - std::string TyName = "struct opaque_" + itostr(OpaqueCounter++); - assert(TypeNames.find(Ty) == TypeNames.end()); - TypeNames[Ty] = TyName; - return Out << TyName << ' ' << NameSoFar; - } - default: - llvm_unreachable("Unhandled case in getTypeProps!"); - } - - return Out; -} - -// Pass the Type* and the variable name and this prints out the variable -// declaration. -// -std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty, - bool isSigned, const std::string &NameSoFar, - bool IgnoreName, const AttrListPtr &PAL) { - if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) { + if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) { printSimpleType(Out, Ty, isSigned, NameSoFar); return Out; } // Check to see if the type is named. - if (!IgnoreName || isa<OpaqueType>(Ty)) { + if (!IgnoreName || Ty->isOpaqueTy()) { std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty); if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar; } @@ -708,14 +545,15 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty, switch (Ty->getTypeID()) { case Type::FunctionTyID: { const FunctionType *FTy = cast<FunctionType>(Ty); - std::stringstream FunctionInnards; + std::string tstr; + raw_string_ostream FunctionInnards(tstr); FunctionInnards << " (" << NameSoFar << ") ("; unsigned Idx = 1; for (FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I) { const Type *ArgTy = *I; if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(isa<PointerType>(ArgTy)); + assert(ArgTy->isPointerTy()); ArgTy = cast<PointerType>(ArgTy)->getElementType(); } if (I != FTy->param_begin()) @@ -731,9 +569,8 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty, FunctionInnards << "void"; } FunctionInnards << ')'; - std::string tstr = FunctionInnards.str(); printType(Out, FTy->getReturnType(), - /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr); + /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str()); return Out; } case Type::StructTyID: { @@ -756,8 +593,8 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty, const PointerType *PTy = cast<PointerType>(Ty); std::string ptrName = "*" + NameSoFar; - if (isa<ArrayType>(PTy->getElementType()) || - isa<VectorType>(PTy->getElementType())) + if (PTy->getElementType()->isArrayTy() || + PTy->getElementType()->isVectorTy()) ptrName = "(" + ptrName + ")"; if (!PAL.isEmpty()) @@ -1144,7 +981,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { Out << "(("; printType(Out, CPV->getType()); // sign doesn't matter Out << ")/*UNDEF*/"; - if (!isa<VectorType>(CPV->getType())) { + if (!CPV->getType()->isVectorTy()) { Out << "0)"; } else { Out << "{})"; @@ -1396,7 +1233,7 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) { } if (NeedsExplicitCast) { Out << "(("; - if (Ty->isInteger() && Ty != Type::getInt1Ty(Ty->getContext())) + if (Ty->isIntegerTy() && Ty != Type::getInt1Ty(Ty->getContext())) printSimpleType(Out, Ty, TypeIsSigned); else printType(Out, Ty); // not integer, sign doesn't matter @@ -1497,7 +1334,7 @@ void CWriter::writeInstComputationInline(Instruction &I) { // We can't currently support integer types other than 1, 8, 16, 32, 64. // Validate this. const Type *Ty = I.getType(); - if (Ty->isInteger() && (Ty!=Type::getInt1Ty(I.getContext()) && + if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) && Ty!=Type::getInt8Ty(I.getContext()) && Ty!=Type::getInt16Ty(I.getContext()) && Ty!=Type::getInt32Ty(I.getContext()) && @@ -1660,7 +1497,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) { // If the operand was a pointer, convert to a large integer type. const Type* OpTy = Operand->getType(); - if (isa<PointerType>(OpTy)) + if (OpTy->isPointerTy()) OpTy = TD->getIntPtrType(Operand->getContext()); Out << "(("; @@ -2102,10 +1939,10 @@ bool CWriter::doInitialization(Module &M) { // complete. If the value is an aggregate, print out { 0 }, and let // the compiler figure out the rest of the zeros. Out << " = " ; - if (isa<StructType>(I->getInitializer()->getType()) || - isa<VectorType>(I->getInitializer()->getType())) { + if (I->getInitializer()->getType()->isStructTy() || + I->getInitializer()->getType()->isVectorTy()) { Out << "{ 0 }"; - } else if (isa<ArrayType>(I->getInitializer()->getType())) { + } else if (I->getInitializer()->getType()->isArrayTy()) { // As with structs and vectors, but with an extra set of braces // because arrays are wrapped in structs. Out << "{ { 0 } }"; @@ -2274,7 +2111,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) { // Out << "/* Structure contents */\n"; for (I = TST.begin(); I != End; ++I) - if (isa<StructType>(I->second) || isa<ArrayType>(I->second)) + if (I->second->isStructTy() || I->second->isArrayTy()) // Only print out used types! printContainedStructs(I->second, StructPrinted); } @@ -2287,14 +2124,15 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) { void CWriter::printContainedStructs(const Type *Ty, std::set<const Type*> &StructPrinted) { // Don't walk through pointers. - if (isa<PointerType>(Ty) || Ty->isPrimitiveType() || Ty->isInteger()) return; + if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy()) + return; // Print all contained types first. for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); I != E; ++I) printContainedStructs(*I, StructPrinted); - if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) { + if (Ty->isStructTy() || Ty->isArrayTy()) { // Check to see if we have already printed this struct. if (StructPrinted.insert(Ty).second) { // Print structure type out. @@ -2327,7 +2165,8 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { const FunctionType *FT = cast<FunctionType>(F->getFunctionType()); const AttrListPtr &PAL = F->getAttributes(); - std::stringstream FunctionInnards; + std::string tstr; + raw_string_ostream FunctionInnards(tstr); // Print out the name... FunctionInnards << GetValueName(F) << '('; @@ -2382,7 +2221,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { if (PrintedArg) FunctionInnards << ", "; const Type *ArgTy = *I; if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(isa<PointerType>(ArgTy)); + assert(ArgTy->isPointerTy()); ArgTy = cast<PointerType>(ArgTy)->getElementType(); } printType(FunctionInnards, ArgTy, @@ -2423,8 +2262,8 @@ static inline bool isFPIntBitCast(const Instruction &I) { return false; const Type *SrcTy = I.getOperand(0)->getType(); const Type *DstTy = I.getType(); - return (SrcTy->isFloatingPoint() && DstTy->isInteger()) || - (DstTy->isFloatingPoint() && SrcTy->isInteger()); + return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) || + (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy()); } void CWriter::printFunction(Function &F) { @@ -2713,7 +2552,7 @@ void CWriter::visitPHINode(PHINode &I) { void CWriter::visitBinaryOperator(Instruction &I) { // binary instructions, shift instructions, setCond instructions. - assert(!isa<PointerType>(I.getType())); + assert(!I.getType()->isPointerTy()); // We must cast the results of binary operations which might be promoted. bool needsCast = false; @@ -3489,7 +3328,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead. if (isAddressExposed(Ptr)) { writeOperandInternal(Ptr, Static); - } else if (I != E && isa<StructType>(*I)) { + } else if (I != E && (*I)->isStructTy()) { // If we didn't already emit the first operand, see if we can print it as // P->f instead of "P[0].f" writeOperand(Ptr); @@ -3504,13 +3343,13 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, } for (; I != E; ++I) { - if (isa<StructType>(*I)) { + if ((*I)->isStructTy()) { Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue(); - } else if (isa<ArrayType>(*I)) { + } else if ((*I)->isArrayTy()) { Out << ".array["; writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); Out << ']'; - } else if (!isa<VectorType>(*I)) { + } else if (!(*I)->isVectorTy()) { Out << '['; writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); Out << ']'; @@ -3668,7 +3507,7 @@ void CWriter::visitInsertValueInst(InsertValueInst &IVI) { i != e; ++i) { const Type *IndexedTy = ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), b, i+1); - if (isa<ArrayType>(IndexedTy)) + if (IndexedTy->isArrayTy()) Out << ".array[" << *i << "]"; else Out << ".field" << *i; @@ -3689,7 +3528,7 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { i != e; ++i) { const Type *IndexedTy = ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), b, i+1); - if (isa<ArrayType>(IndexedTy)) + if (IndexedTy->isArrayTy()) Out << ".array[" << *i << "]"; else Out << ".field" << *i; @@ -3705,7 +3544,8 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM, formatted_raw_ostream &o, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel) { + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(createGCLoweringPass()); diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h index 715bbda..d178e7f 100644 --- a/lib/Target/CBackend/CTargetMachine.h +++ b/lib/Target/CBackend/CTargetMachine.h @@ -27,7 +27,8 @@ struct CTargetMachine : public TargetMachine { virtual bool addPassesToEmitWholeFile(PassManager &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel); + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td index 06eb149..47cb579 100644 --- a/lib/Target/CellSPU/SPU64InstrInfo.td +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -123,8 +123,8 @@ multiclass CompareLogicalGreaterThan64 { defm I64LGT: CompareLogicalGreaterThan64; def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>; -def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), - I64LGTv2i64.Fragment>; +//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), +// I64LGTv2i64.Fragment>; // i64 setult: def : I64SETCCNegCond<setule, I64LGTr64>; @@ -201,8 +201,8 @@ multiclass CompareGreaterThan64 { defm I64GT: CompareLogicalGreaterThan64; def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>; -def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), - I64GTv2i64.Fragment>; +//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), +// I64GTv2i64.Fragment>; // i64 setult: def : I64SETCCNegCond<setle, I64GTr64>; diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 80693e1..396a921 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -294,15 +294,19 @@ namespace { ((vecVT == MVT::v2i64) && ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || - (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) - return Select(bvNode); + (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) { + HandleSDNode Dummy(SDValue(bvNode, 0)); + if (SDNode *N = Select(bvNode)) + return N; + return Dummy.getValue().getNode(); + } // No, need to emit a constant pool spill: std::vector<Constant*> CV; for (size_t i = 0; i < bvNode->getNumOperands(); ++i) { ConstantSDNode *V = dyn_cast<ConstantSDNode > (bvNode->getOperand(i)); - CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue())); + CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); } Constant *CP = ConstantVector::get(CV); @@ -311,10 +315,15 @@ namespace { SDValue CGPoolOffset = SPU::LowerConstantPool(CPIdx, *CurDAG, SPUtli.getSPUTargetMachine()); - return SelectCode(CurDAG->getLoad(vecVT, dl, - CurDAG->getEntryNode(), CGPoolOffset, - PseudoSourceValue::getConstantPool(), 0, - false, Alignment).getNode()); + + HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl, + CurDAG->getEntryNode(), CGPoolOffset, + PseudoSourceValue::getConstantPool(),0, + false, false, Alignment)); + CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue()); + if (SDNode *N = SelectCode(Dummy.getValue().getNode())) + return N; + return Dummy.getValue().getNode(); } /// Select - Convert the specified operand from a target-independent to a @@ -390,10 +399,6 @@ namespace { return false; } - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); - virtual const char *getPassName() const { return "Cell SPU DAG->DAG Pattern Instruction Selection"; } @@ -411,16 +416,6 @@ namespace { }; } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void -SPUDAGToDAGISel::InstructionSelect() -{ - // Select target instructions for the DAG. - SelectRoot(*CurDAG); - CurDAG->RemoveDeadNodes(); -} - /*! \arg Op The ISD instruction operand \arg N The address to be tested @@ -692,9 +687,8 @@ SPUDAGToDAGISel::Select(SDNode *N) { SDValue Ops[8]; DebugLoc dl = N->getDebugLoc(); - if (N->isMachineOpcode()) { + if (N->isMachineOpcode()) return NULL; // Already selected. - } if (Opc == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); @@ -759,43 +753,67 @@ SPUDAGToDAGISel::Select(SDNode *N) { } SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode()); - SDNode *PromoteScalar = - SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, - Op0VecVT, Op0).getNode()); - + + HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, + Op0VecVT, Op0)); + + SDValue PromScalar; + if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode())) + PromScalar = SDValue(N, 0); + else + PromScalar = PromoteScalar.getValue(); + SDValue zextShuffle = CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, - SDValue(PromoteScalar, 0), - SDValue(PromoteScalar, 0), + PromScalar, PromScalar, SDValue(shufMaskLoad, 0)); - // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we - // re-use it in the VEC2PREFSLOT selection without needing to explicitly - // call SelectCode (it's already done for us.) - SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, OpVecVT, zextShuffle).getNode()); - return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT, - zextShuffle).getNode()); + HandleSDNode Dummy2(zextShuffle); + if (SDNode *N = SelectCode(Dummy2.getValue().getNode())) + zextShuffle = SDValue(N, 0); + else + zextShuffle = Dummy2.getValue(); + HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT, + zextShuffle)); + + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); + SelectCode(Dummy.getValue().getNode()); + return Dummy.getValue().getNode(); } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); - return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, - N->getOperand(0), N->getOperand(1), - SDValue(CGLoad, 0)).getNode()); + HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0))); + + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); + if (SDNode *N = SelectCode(Dummy.getValue().getNode())) + return N; + return Dummy.getValue().getNode(); } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode()); - return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, - N->getOperand(0), N->getOperand(1), - SDValue(CGLoad, 0)).getNode()); + HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0))); + + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); + if (SDNode *N = SelectCode(Dummy.getValue().getNode())) + return N; + return Dummy.getValue().getNode(); } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { SDNode *CGLoad = emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); - return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT, - N->getOperand(0), N->getOperand(1), - SDValue(CGLoad, 0)).getNode()); + HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT, + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0))); + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); + if (SDNode *N = SelectCode(Dummy.getValue().getNode())) + return N; + return Dummy.getValue().getNode(); } else if (Opc == ISD::TRUNCATE) { SDValue Op0 = N->getOperand(0); if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL) @@ -832,17 +850,14 @@ SPUDAGToDAGISel::Select(SDNode *N) { } } } else if (Opc == ISD::SHL) { - if (OpVT == MVT::i64) { + if (OpVT == MVT::i64) return SelectSHLi64(N, OpVT); - } } else if (Opc == ISD::SRL) { - if (OpVT == MVT::i64) { + if (OpVT == MVT::i64) return SelectSRLi64(N, OpVT); - } } else if (Opc == ISD::SRA) { - if (OpVT == MVT::i64) { + if (OpVT == MVT::i64) return SelectSRAi64(N, OpVT); - } } else if (Opc == ISD::FNEG && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) { DebugLoc dl = N->getDebugLoc(); @@ -1224,13 +1239,15 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, ? shufmask.getNode() : emitBuildVector(shufmask.getNode())); - SDNode *shufNode = - Select(CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, + SDValue shufNode = + CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, SDValue(lhsNode, 0), SDValue(rhsNode, 0), - SDValue(shufMaskNode, 0)).getNode()); - - return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, - SDValue(shufNode, 0)); + SDValue(shufMaskNode, 0)); + HandleSDNode Dummy(shufNode); + SDNode *SN = SelectCode(Dummy.getValue().getNode()); + if (SN == 0) SN = Dummy.getValue().getNode(); + + return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(SN, 0)); } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) { return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(emitBuildVector(i64vec.getNode()), 0)); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index fe0f019..e863ee3 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -25,7 +25,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Support/Debug.h" @@ -118,8 +118,7 @@ namespace { TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, Op.getDebugLoc(), - DAG.GetOrdering(InChain.getNode())); + Callee, Args, DAG, Op.getDebugLoc()); return CallInfo.first; } @@ -669,7 +668,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // Re-emit as a v16i8 vector load result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), 16); + LN->isVolatile(), LN->isNonTemporal(), 16); // Update the chain the_chain = result.getValue(1); @@ -820,7 +819,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // Re-emit as a v16i8 vector load alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, SN->getSrcValue(), SN->getSrcValueOffset(), - SN->isVolatile(), 16); + SN->isVolatile(), SN->isNonTemporal(), 16); // Update the chain the_chain = alignLoadVec.getValue(1); @@ -861,7 +860,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { result = DAG.getStore(the_chain, dl, result, basePtr, LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), LN->getAlignment()); + LN->isVolatile(), LN->isNonTemporal(), + LN->getAlignment()); #if 0 && !defined(NDEBUG) if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { @@ -1086,7 +1086,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, // or we're forced to do vararg int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0); ArgOffset += StackSlotSize; } @@ -1108,7 +1108,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, true, false); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8); - SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0, + false, false, 0); Chain = Store.getOperand(0); MemOps.push_back(Store); @@ -1190,7 +1191,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); ArgOffset += StackSlotSize; } break; @@ -1199,7 +1201,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); ArgOffset += StackSlotSize; } break; @@ -1212,7 +1215,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); ArgOffset += StackSlotSize; } break; diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp index 5ef3c6b..3e17a51 100644 --- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp +++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp @@ -34,5 +34,8 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) { // Exception handling is not supported on CellSPU (think about it: you only // have 256K for code+data. Would you support exception handling?) ExceptionsType = ExceptionHandling::None; + + // SPU assembly requires ".section" before ".bss" + UsesELFSectionDirectiveForBSS = true; } diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 3dd8ca7..9c5893c 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -221,7 +221,7 @@ namespace { APFloat APF = APFloat(CFP->getValueAPF()); // copy if (CFP->getType() == Type::getFloatTy(CFP->getContext())) APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); - Out << "ConstantFP::get(getGlobalContext(), "; + Out << "ConstantFP::get(mod->getContext(), "; Out << "APFloat("; #if HAVE_PRINTF_A char Buffer[100]; @@ -344,23 +344,23 @@ namespace { std::string CppWriter::getCppName(const Type* Ty) { // First, handle the primitive types .. easy - if (Ty->isPrimitiveType() || Ty->isInteger()) { + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { switch (Ty->getTypeID()) { - case Type::VoidTyID: return "Type::getVoidTy(getGlobalContext())"; + case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())"; case Type::IntegerTyID: { unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth(); - return "IntegerType::get(getGlobalContext(), " + utostr(BitWidth) + ")"; + return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")"; } - case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(getGlobalContext())"; - case Type::FloatTyID: return "Type::getFloatTy(getGlobalContext())"; - case Type::DoubleTyID: return "Type::getDoubleTy(getGlobalContext())"; - case Type::LabelTyID: return "Type::getLabelTy(getGlobalContext())"; + case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())"; + case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())"; + case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())"; + case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())"; default: error("Invalid primitive type"); break; } // shouldn't be returned, but make it sensible - return "Type::getVoidTy(getGlobalContext())"; + return "Type::getVoidTy(mod->getContext())"; } // Now, see if we've seen the type before and return that @@ -493,7 +493,7 @@ namespace { bool CppWriter::printTypeInternal(const Type* Ty) { // We don't print definitions for primitive types - if (Ty->isPrimitiveType() || Ty->isInteger()) + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) return false; // If we already defined this type, we don't need to define it again. @@ -514,7 +514,7 @@ namespace { TypeMap::const_iterator I = UnresolvedTypes.find(Ty); if (I == UnresolvedTypes.end()) { Out << "PATypeHolder " << typeName; - Out << "_fwd = OpaqueType::get(getGlobalContext());"; + Out << "_fwd = OpaqueType::get(mod->getContext());"; nl(Out); UnresolvedTypes[Ty] = typeName; } @@ -615,7 +615,7 @@ namespace { } case Type::OpaqueTyID: { Out << "OpaqueType* " << typeName; - Out << " = OpaqueType::get(getGlobalContext());"; + Out << " = OpaqueType::get(mod->getContext());"; nl(Out); break; } @@ -686,7 +686,7 @@ namespace { // For primitive types and types already defined, just add a name TypeMap::const_iterator TNI = TypeNames.find(TI->second); - if (TI->second->isInteger() || TI->second->isPrimitiveType() || + if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() || TNI != TypeNames.end()) { Out << "mod->addTypeName(\""; printEscapedString(TI->first); @@ -751,7 +751,7 @@ namespace { if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { std::string constValue = CI->getValue().toString(10, true); Out << "ConstantInt* " << constName - << " = ConstantInt::get(getGlobalContext(), APInt(" + << " = ConstantInt::get(mod->getContext(), APInt(" << cast<IntegerType>(CI->getType())->getBitWidth() << ", StringRef(\"" << constValue << "\"), 10));"; } else if (isa<ConstantAggregateZero>(CV)) { @@ -769,7 +769,7 @@ namespace { CA->getType()->getElementType() == Type::getInt8Ty(CA->getContext())) { Out << "Constant* " << constName << - " = ConstantArray::get(getGlobalContext(), \""; + " = ConstantArray::get(mod->getContext(), \""; std::string tmp = CA->getAsString(); bool nullTerminate = false; if (tmp[tmp.length()-1] == 0) { @@ -995,7 +995,7 @@ namespace { void CppWriter::printVariableHead(const GlobalVariable *GV) { nl(Out) << "GlobalVariable* " << getCppName(GV); if (is_inline) { - Out << " = mod->getGlobalVariable(getGlobalContext(), "; + Out << " = mod->getGlobalVariable(mod->getContext(), "; printEscapedString(GV->getName()); Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)"; nl(Out) << "if (!" << getCppName(GV) << ") {"; @@ -1094,7 +1094,7 @@ namespace { case Instruction::Ret: { const ReturnInst* ret = cast<ReturnInst>(I); - Out << "ReturnInst::Create(getGlobalContext(), " + Out << "ReturnInst::Create(mod->getContext(), " << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");"; break; } @@ -1171,7 +1171,7 @@ namespace { } case Instruction::Unreachable: { Out << "new UnreachableInst(" - << "getGlobalContext(), " + << "mod->getContext(), " << bbname << ");"; break; } @@ -1673,7 +1673,7 @@ namespace { BI != BE; ++BI) { std::string bbname(getCppName(BI)); Out << "BasicBlock* " << bbname << - " = BasicBlock::Create(getGlobalContext(), \""; + " = BasicBlock::Create(mod->getContext(), \""; if (BI->hasName()) printEscapedString(BI->getName()); Out << "\"," << getCppName(BI->getParent()) << ",0);"; @@ -2009,7 +2009,8 @@ char CppWriter::ID = 0; bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM, formatted_raw_ostream &o, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel) { + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(new CppWriter(o)); return false; diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index 1f74f76..b7aae91 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -30,7 +30,8 @@ struct CPPTargetMachine : public TargetMachine { virtual bool addPassesToEmitWholeFile(PassManager &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel); + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; diff --git a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt new file mode 100644 index 0000000..cfb2fc8 --- /dev/null +++ b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt @@ -0,0 +1,9 @@ +include_directories(
+ ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/..
+ )
+
+add_llvm_library(LLVMMBlazeAsmPrinter
+ MBlazeAsmPrinter.cpp + )
+add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
\ No newline at end of file diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp new file mode 100644 index 0000000..6fe1026 --- /dev/null +++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp @@ -0,0 +1,302 @@ +//===-- MBlazeAsmPrinter.cpp - MBlaze LLVM assembly writer ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format MBlaze assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mblaze-asm-printer" + +#include "MBlaze.h" +#include "MBlazeSubtarget.h" +#include "MBlazeInstrInfo.h" +#include "MBlazeTargetMachine.h" +#include "MBlazeMachineFunction.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/MathExtras.h" +#include <cctype> + +using namespace llvm; + +namespace { + class MBlazeAsmPrinter : public AsmPrinter { + const MBlazeSubtarget *Subtarget; + public: + explicit MBlazeAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T ) + : AsmPrinter(O, TM, Ctx, Streamer, T) { + Subtarget = &TM.getSubtarget<MBlazeSubtarget>(); + } + + virtual const char *getPassName() const { + return "MBlaze Assembly Printer"; + } + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + void printOperand(const MachineInstr *MI, int opNum); + void printUnsignedImm(const MachineInstr *MI, int opNum); + void printFSLImm(const MachineInstr *MI, int opNum); + void printMemOperand(const MachineInstr *MI, int opNum, + const char *Modifier = 0); + void printFCCOperand(const MachineInstr *MI, int opNum, + const char *Modifier = 0); + void printSavedRegsBitmask(); + void printHex32(unsigned int Value); + + const char *emitCurrentABIString(); + void emitFrameDirective(); + + void printInstruction(const MachineInstr *MI); // autogenerated. + void EmitInstruction(const MachineInstr *MI) { + printInstruction(MI); + O << '\n'; + } + virtual void EmitFunctionBodyStart(); + virtual void EmitFunctionBodyEnd(); + static const char *getRegisterName(unsigned RegNo); + + virtual void EmitFunctionEntryLabel(); + void EmitStartOfAsmFile(Module &M); + }; +} // end of anonymous namespace + +#include "MBlazeGenAsmWriter.inc" + +//===----------------------------------------------------------------------===// +// +// MBlaze Asm Directives +// +// -- Frame directive "frame Stackpointer, Stacksize, RARegister" +// Describe the stack frame. +// +// -- Mask directives "mask bitmask, offset" +// Tells the assembler which registers are saved and where. +// bitmask - contain a little endian bitset indicating which registers are +// saved on function prologue (e.g. with a 0x80000000 mask, the +// assembler knows the register 31 (RA) is saved at prologue. +// offset - the position before stack pointer subtraction indicating where +// the first saved register on prologue is located. (e.g. with a +// +// Consider the following function prologue: +// +// .frame R19,48,R15 +// .mask 0xc0000000,-8 +// addiu R1, R1, -48 +// sw R15, 40(R1) +// sw R19, 36(R1) +// +// With a 0xc0000000 mask, the assembler knows the register 15 (R15) and +// 19 (R19) are saved at prologue. As the save order on prologue is from +// left to right, R15 is saved first. A -8 offset means that after the +// stack pointer subtration, the first register in the mask (R15) will be +// saved at address 48-8=40. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Mask directives +//===----------------------------------------------------------------------===// + +// Create a bitmask with all callee saved registers for CPU or Floating Point +// registers. For CPU registers consider RA, GP and FP for saving if necessary. +void MBlazeAsmPrinter::printSavedRegsBitmask() { + const TargetRegisterInfo &RI = *TM.getRegisterInfo(); + const MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>(); + + // CPU Saved Registers Bitmasks + unsigned int CPUBitmask = 0; + + // Set the CPU Bitmasks + const MachineFrameInfo *MFI = MF->getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(CSI[i].getReg()); + if (CSI[i].getRegClass() == MBlaze::CPURegsRegisterClass) + CPUBitmask |= (1 << RegNum); + } + + // Return Address and Frame registers must also be set in CPUBitmask. + if (RI.hasFP(*MF)) + CPUBitmask |= (1 << MBlazeRegisterInfo:: + getRegisterNumbering(RI.getFrameRegister(*MF))); + + if (MFI->hasCalls()) + CPUBitmask |= (1 << MBlazeRegisterInfo:: + getRegisterNumbering(RI.getRARegister())); + + // Print CPUBitmask + O << "\t.mask \t"; printHex32(CPUBitmask); O << ',' + << MBlazeFI->getCPUTopSavedRegOff() << '\n'; +} + +// Print a 32 bit hex number with all numbers. +void MBlazeAsmPrinter::printHex32(unsigned int Value) { + O << "0x"; + for (int i = 7; i >= 0; i--) + O << utohexstr( (Value & (0xF << (i*4))) >> (i*4) ); +} + +//===----------------------------------------------------------------------===// +// Frame and Set directives +//===----------------------------------------------------------------------===// + +/// Frame Directive +void MBlazeAsmPrinter::emitFrameDirective() { + const TargetRegisterInfo &RI = *TM.getRegisterInfo(); + + unsigned stackReg = RI.getFrameRegister(*MF); + unsigned returnReg = RI.getRARegister(); + unsigned stackSize = MF->getFrameInfo()->getStackSize(); + + + O << "\t.frame\t" << getRegisterName(stackReg) + << ',' << stackSize << ',' + << getRegisterName(returnReg) + << '\n'; +} + +void MBlazeAsmPrinter::EmitFunctionEntryLabel() { + O << "\t.ent\t" << *CurrentFnSym << '\n'; + OutStreamer.EmitLabel(CurrentFnSym); +} + +/// EmitFunctionBodyStart - Targets can override this to emit stuff before +/// the first basic block in the function. +void MBlazeAsmPrinter::EmitFunctionBodyStart() { + emitFrameDirective(); + printSavedRegsBitmask(); +} + +/// EmitFunctionBodyEnd - Targets can override this to emit stuff after +/// the last basic block in the function. +void MBlazeAsmPrinter::EmitFunctionBodyEnd() { + O << "\t.end\t" << *CurrentFnSym << '\n'; +} + +// Print out an operand for an inline asm expression. +bool MBlazeAsmPrinter:: +PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant,const char *ExtraCode){ + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + printOperand(MI, OpNo); + return false; +} + +void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { + const MachineOperand &MO = MI->getOperand(opNum); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << getRegisterName(MO.getReg()); + break; + + case MachineOperand::MO_Immediate: + O << (int)MO.getImm(); + break; + + case MachineOperand::MO_FPImmediate: { + const ConstantFP* fp = MO.getFPImm(); + printHex32(fp->getValueAPF().bitcastToAPInt().getZExtValue()); + O << ";\t# immediate = " << *fp; + break; + } + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(OutContext); + return; + + case MachineOperand::MO_GlobalAddress: + O << *GetGlobalValueSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_ExternalSymbol: + O << *GetExternalSymbolSymbol(MO.getSymbolName()); + break; + + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" + << getFunctionNumber() << "_" << MO.getIndex(); + if (MO.getOffset()) + O << "+" << MO.getOffset(); + break; + + default: + llvm_unreachable("<unknown operand type>"); + } +} + +void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.getType() == MachineOperand::MO_Immediate) + O << (unsigned int)MO.getImm(); + else + printOperand(MI, opNum); +} + +void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.getType() == MachineOperand::MO_Immediate) + O << "rfsl" << (unsigned int)MO.getImm(); + else + printOperand(MI, opNum); +} + +void MBlazeAsmPrinter:: +printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier) { + printOperand(MI, opNum+1); + O << ", "; + printOperand(MI, opNum); +} + +void MBlazeAsmPrinter:: +printFCCOperand(const MachineInstr *MI, int opNum, const char *Modifier) { + const MachineOperand& MO = MI->getOperand(opNum); + O << MBlaze::MBlazeFCCToString((MBlaze::CondCode)MO.getImm()); +} + +void MBlazeAsmPrinter::EmitStartOfAsmFile(Module &M) { +} + +// Force static initialization. +extern "C" void LLVMInitializeMBlazeAsmPrinter() { + RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget); +} diff --git a/lib/Target/MBlaze/AsmPrinter/Makefile b/lib/Target/MBlaze/AsmPrinter/Makefile new file mode 100644 index 0000000..c8e4d8f --- /dev/null +++ b/lib/Target/MBlaze/AsmPrinter/Makefile @@ -0,0 +1,17 @@ +##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMMBlazeAsmPrinter + +# Hack: we need to include 'main' MBlaze target directory to grab +# private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt new file mode 100644 index 0000000..c93e3df --- /dev/null +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -0,0 +1,27 @@ +set(LLVM_TARGET_DEFINITIONS MBlaze.td) + +tablegen(MBlazeGenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(MBlazeGenRegisterNames.inc -gen-register-enums) +tablegen(MBlazeGenRegisterInfo.inc -gen-register-desc) +tablegen(MBlazeGenInstrNames.inc -gen-instr-enums) +tablegen(MBlazeGenInstrInfo.inc -gen-instr-desc) +tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer) +tablegen(MBlazeGenDAGISel.inc -gen-dag-isel) +tablegen(MBlazeGenCallingConv.inc -gen-callingconv) +tablegen(MBlazeGenSubtarget.inc -gen-subtarget) +tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic) + +add_llvm_target(MBlazeCodeGen + MBlazeDelaySlotFiller.cpp + MBlazeInstrInfo.cpp + MBlazeISelDAGToDAG.cpp + MBlazeISelLowering.cpp + MBlazeMCAsmInfo.cpp + MBlazeRegisterInfo.cpp + MBlazeSubtarget.cpp + MBlazeTargetMachine.cpp + MBlazeTargetObjectFile.cpp + MBlazeIntrinsicInfo.cpp + ) + +target_link_libraries (LLVMMBlazeCodeGen LLVMSelectionDAG) diff --git a/lib/Target/MBlaze/MBlaze.h b/lib/Target/MBlaze/MBlaze.h new file mode 100644 index 0000000..f9d828b --- /dev/null +++ b/lib/Target/MBlaze/MBlaze.h @@ -0,0 +1,39 @@ +//===-- MBlaze.h - Top-level interface for MBlaze ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM MBlaze back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_MBLAZE_H +#define TARGET_MBLAZE_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class MBlazeTargetMachine; + class FunctionPass; + class MachineCodeEmitter; + class formatted_raw_ostream; + + FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM); + FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM); + + extern Target TheMBlazeTarget; +} // end namespace llvm; + +// Defines symbolic names for MBlaze registers. This defines a mapping from +// register name to register number. +#include "MBlazeGenRegisterNames.inc" + +// Defines symbolic names for the MBlaze instructions. +#include "MBlazeGenInstrNames.inc" + +#endif diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td new file mode 100644 index 0000000..1679752 --- /dev/null +++ b/lib/Target/MBlaze/MBlaze.td @@ -0,0 +1,85 @@ +//===- MBlaze.td - Describe the MBlaze Target Machine -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is the top level entry point for the MBlaze target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Register File, Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "MBlazeRegisterInfo.td" +include "MBlazeSchedule.td" +include "MBlazeIntrinsics.td" +include "MBlazeInstrInfo.td" +include "MBlazeCallingConv.td" + +def MBlazeInstrInfo : InstrInfo { + let TSFlagsFields = []; + let TSFlagsShifts = []; +} + + +//===----------------------------------------------------------------------===// +// Microblaze Subtarget features // +//===----------------------------------------------------------------------===// + +def FeaturePipe3 : SubtargetFeature<"pipe3", "HasPipe3", "true", + "Implements 3-stage pipeline.">; +def FeatureBarrel : SubtargetFeature<"barrel", "HasBarrel", "true", + "Implements barrel shifter.">; +def FeatureDiv : SubtargetFeature<"div", "HasDiv", "true", + "Implements hardware divider.">; +def FeatureMul : SubtargetFeature<"mul", "HasMul", "true", + "Implements hardware multiplier.">; +def FeatureFSL : SubtargetFeature<"fsl", "HasFSL", "true", + "Implements FSL instructions.">; +def FeatureEFSL : SubtargetFeature<"efsl", "HasEFSL", "true", + "Implements extended FSL instructions.">; +def FeatureMSRSet : SubtargetFeature<"msrset", "HasMSRSet", "true", + "Implements MSR register set and clear.">; +def FeatureException : SubtargetFeature<"exception", "HasException", "true", + "Implements hardware exception support.">; +def FeaturePatCmp : SubtargetFeature<"patcmp", "HasPatCmp", "true", + "Implements pattern compare instruction.">; +def FeatureFPU : SubtargetFeature<"fpu", "HasFPU", "true", + "Implements floating point unit.">; +def FeatureESR : SubtargetFeature<"esr", "HasESR", "true", + "Implements ESR and EAR registers">; +def FeaturePVR : SubtargetFeature<"pvr", "HasPVR", "true", + "Implements processor version register.">; +def FeatureMul64 : SubtargetFeature<"mul64", "HasMul64", "true", + "Implements multiplier with 64-bit result">; +def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true", + "Implements sqrt and floating point convert.">; +def FeatureMMU : SubtargetFeature<"mmu", "HasMMU", "true", + "Implements memory management unit.">; + +//===----------------------------------------------------------------------===// +// MBlaze processors supported. +//===----------------------------------------------------------------------===// + +class Proc<string Name, list<SubtargetFeature> Features> + : Processor<Name, MBlazeGenericItineraries, Features>; + + +def : Proc<"v400", []>; +def : Proc<"v500", []>; +def : Proc<"v600", []>; +def : Proc<"v700", []>; +def : Proc<"v710", []>; + +def MBlaze : Target { + let InstructionSet = MBlazeInstrInfo; +} diff --git a/lib/Target/MBlaze/MBlazeCallingConv.td b/lib/Target/MBlaze/MBlazeCallingConv.td new file mode 100644 index 0000000..ddd4998 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeCallingConv.td @@ -0,0 +1,26 @@ +//===- MBlazeCallingConv.td - Calling Conventions for MBlaze ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for MBlaze architecture. +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget<string F, CCAction A>: + CCIf<!strconcat("State.getTarget().getSubtarget<MBlazeSubtarget>().", F), A>; + +//===----------------------------------------------------------------------===// +// MBlaze ABI Calling Convention +//===----------------------------------------------------------------------===// + +def RetCC_MBlaze : CallingConv<[ + // i32 are returned in registers R3, R4 + CCIfType<[i32], CCAssignToReg<[R3, R4]>>, + + // f32 are returned in registers F3, F4 + CCIfType<[f32], CCAssignToReg<[F3, F4]>> +]>; diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp new file mode 100644 index 0000000..42fea25 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp @@ -0,0 +1,75 @@ +//===-- DelaySlotFiller.cpp - MBlaze delay slot filler --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Simple pass to fills delay slots with NOPs. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "delay-slot-filler" + +#include "MBlaze.h" +#include "MBlazeTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/Statistic.h" + +using namespace llvm; + +STATISTIC(FilledSlots, "Number of delay slots filled"); + +namespace { + struct Filler : public MachineFunctionPass { + + TargetMachine &TM; + const TargetInstrInfo *TII; + + static char ID; + Filler(TargetMachine &tm) + : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { } + + virtual const char *getPassName() const { + return "MBlaze Delay Slot Filler"; + } + + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + bool runOnMachineFunction(MachineFunction &F) { + bool Changed = false; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) + Changed |= runOnMachineBasicBlock(*FI); + return Changed; + } + + }; + char Filler::ID = 0; +} // end of anonymous namespace + +/// runOnMachineBasicBlock - Fill in delay slots for the given basic block. +/// Currently, we fill delay slots with NOPs. We assume there is only one +/// delay slot per delayed instruction. +bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) + if (I->getDesc().hasDelaySlot()) { + MachineBasicBlock::iterator J = I; + ++J; + BuildMI(MBB, J, I->getDebugLoc(), TII->get(MBlaze::NOP)); + ++FilledSlots; + Changed = true; + } + return Changed; +} + +/// createMBlazeDelaySlotFillerPass - Returns a pass that fills in delay +/// slots in MBlaze MachineFunctions +FunctionPass *llvm::createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &tm) { + return new Filler(tm); +} + diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp new file mode 100644 index 0000000..7e59c4a --- /dev/null +++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp @@ -0,0 +1,339 @@ +//===-- MBlazeISelDAGToDAG.cpp - A dag to dag inst selector for MBlaze ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the MBlaze target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mblaze-isel" +#include "MBlaze.h" +#include "MBlazeISelLowering.h" +#include "MBlazeMachineFunction.h" +#include "MBlazeRegisterInfo.h" +#include "MBlazeSubtarget.h" +#include "MBlazeTargetMachine.h" +#include "llvm/GlobalValue.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/CFG.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MBlazeDAGToDAGISel - MBlaze specific code to select MBlaze machine +// instructions for SelectionDAG operations. +//===----------------------------------------------------------------------===// +namespace { + +class MBlazeDAGToDAGISel : public SelectionDAGISel { + + /// TM - Keep a reference to MBlazeTargetMachine. + MBlazeTargetMachine &TM; + + /// Subtarget - Keep a pointer to the MBlazeSubtarget around so that we can + /// make the right decision when generating code for different targets. + const MBlazeSubtarget &Subtarget; + +public: + explicit MBlazeDAGToDAGISel(MBlazeTargetMachine &tm) : + SelectionDAGISel(tm), + TM(tm), Subtarget(tm.getSubtarget<MBlazeSubtarget>()) {} + + // Pass Name + virtual const char *getPassName() const { + return "MBlaze DAG->DAG Pattern Instruction Selection"; + } +private: + // Include the pieces autogenerated from the target description. + #include "MBlazeGenDAGISel.inc" + + /// getTargetMachine - Return a reference to the TargetMachine, casted + /// to the target-specific type. + const MBlazeTargetMachine &getTargetMachine() { + return static_cast<const MBlazeTargetMachine &>(TM); + } + + /// getInstrInfo - Return a reference to the TargetInstrInfo, casted + /// to the target-specific type. + const MBlazeInstrInfo *getInstrInfo() { + return getTargetMachine().getInstrInfo(); + } + + SDNode *getGlobalBaseReg(); + SDNode *Select(SDNode *N); + + // Complex Pattern. + bool SelectAddr(SDNode *Op, SDValue N, + SDValue &Base, SDValue &Offset); + + // Address Selection + bool SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index); + bool SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base); + + // getI32Imm - Return a target constant with the specified value, of type i32. + inline SDValue getI32Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } +}; + +} + +/// isIntS32Immediate - This method tests to see if the node is either a 32-bit +/// or 64-bit immediate, and if the value can be accurately represented as a +/// sign extension from a 32-bit value. If so, this returns true and the +/// immediate. +static bool isIntS32Immediate(SDNode *N, int32_t &Imm) { + unsigned Opc = N->getOpcode(); + if (Opc != ISD::Constant) + return false; + + Imm = (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); + if (N->getValueType(0) == MVT::i32) + return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); + else + return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); +} + +static bool isIntS32Immediate(SDValue Op, int32_t &Imm) { + return isIntS32Immediate(Op.getNode(), Imm); +} + + +/// SelectAddressRegReg - Given the specified addressed, check to see if it +/// can be represented as an indexed [r+r] operation. Returns false if it +/// can be more efficiently represented with [r+imm]. +bool MBlazeDAGToDAGISel:: +SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { + if (N.getOpcode() == ISD::FrameIndex) return false; + if (N.getOpcode() == ISD::TargetExternalSymbol || + N.getOpcode() == ISD::TargetGlobalAddress) + return false; // direct calls. + + int32_t imm = 0; + if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { + if (isIntS32Immediate(N.getOperand(1), imm)) + return false; // r+i + + if (N.getOperand(0).getOpcode() == ISD::TargetJumpTable || + N.getOperand(1).getOpcode() == ISD::TargetJumpTable) + return false; // jump tables. + + Base = N.getOperand(1); + Index = N.getOperand(0); + return true; + } + + return false; +} + +/// Returns true if the address N can be represented by a base register plus +/// a signed 32-bit displacement [r+imm], and if it is not better +/// represented as reg+reg. +bool MBlazeDAGToDAGISel:: +SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) { + // If this can be more profitably realized as r+r, fail. + if (SelectAddrRegReg(Op, N, Disp, Base)) + return false; + + if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { + int32_t imm = 0; + if (isIntS32Immediate(N.getOperand(1), imm)) { + Disp = CurDAG->getTargetConstant(imm, MVT::i32); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { + Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType()); + } else { + Base = N.getOperand(0); + } + DEBUG( errs() << "WESLEY: Using Operand Immediate\n" ); + return true; // [r+i] + } + } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { + // Loading from a constant address. + uint32_t Imm = CN->getZExtValue(); + Disp = CurDAG->getTargetConstant(Imm, CN->getValueType(0)); + Base = CurDAG->getRegister(MBlaze::R0, CN->getValueType(0)); + DEBUG( errs() << "WESLEY: Using Constant Node\n" ); + return true; + } + + Disp = CurDAG->getTargetConstant(0, TM.getTargetLowering()->getPointerTy()); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) + Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType()); + else + Base = N; + return true; // [r+0] +} + +/// getGlobalBaseReg - Output the instructions required to put the +/// GOT address into a register. +SDNode *MBlazeDAGToDAGISel::getGlobalBaseReg() { + unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); + return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); +} + +/// ComplexPattern used on MBlazeInstrInfo +/// Used on MBlaze Load/Store instructions +bool MBlazeDAGToDAGISel:: +SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) { + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + // on PIC code Load GA + if (TM.getRelocationModel() == Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetGlobalAddress) || + (Addr.getOpcode() == ISD::TargetConstantPool) || + (Addr.getOpcode() == ISD::TargetJumpTable)){ + Base = CurDAG->getRegister(MBlaze::R15, MVT::i32); + Offset = Addr; + return true; + } + } else { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { + if (Predicate_immSExt16(CN)) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> + (Addr.getOperand(0))) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + } else { + Base = Addr.getOperand(0); + } + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + return true; + } + } + } + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +/// Select instructions not customized! Used for +/// expanded, promoted and normal instructions +SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + DebugLoc dl = Node->getDebugLoc(); + + // Dump information about the Node being selected + DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); + + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + return NULL; + } + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + switch(Opcode) { + default: break; + + // Get target GOT address. + case ISD::GLOBAL_OFFSET_TABLE: + return getGlobalBaseReg(); + + case ISD::FrameIndex: { + SDValue imm = CurDAG->getTargetConstant(0, MVT::i32); + int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex(); + EVT VT = Node->getValueType(0); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); + unsigned Opc = MBlaze::ADDI; + if (Node->hasOneUse()) + return CurDAG->SelectNodeTo(Node, Opc, VT, TFI, imm); + return CurDAG->getMachineNode(Opc, dl, VT, TFI, imm); + } + + + /// Handle direct and indirect calls when using PIC. On PIC, when + /// GOT is smaller than about 64k (small code) the GA target is + /// loaded with only one instruction. Otherwise GA's target must + /// be loaded with 3 instructions. + case MBlazeISD::JmpLink: { + if (TM.getRelocationModel() == Reloc::PIC_) { + SDValue Chain = Node->getOperand(0); + SDValue Callee = Node->getOperand(1); + SDValue R20Reg = CurDAG->getRegister(MBlaze::R20, MVT::i32); + SDValue InFlag(0, 0); + + if ( (isa<GlobalAddressSDNode>(Callee)) || + (isa<ExternalSymbolSDNode>(Callee)) ) + { + /// Direct call for global addresses and external symbols + SDValue GPReg = CurDAG->getRegister(MBlaze::R15, MVT::i32); + + // Use load to get GOT target + SDValue Ops[] = { Callee, GPReg, Chain }; + SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl, + MVT::i32, MVT::Other, Ops, 3), 0); + Chain = Load.getValue(1); + + // Call target must be on T9 + Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Load, InFlag); + } else + /// Indirect call + Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Callee, InFlag); + + // Emit Jump and Link Register + SDNode *ResNode = CurDAG->getMachineNode(MBlaze::BRLID, dl, MVT::Other, + MVT::Flag, R20Reg, Chain); + Chain = SDValue(ResNode, 0); + InFlag = SDValue(ResNode, 1); + ReplaceUses(SDValue(Node, 0), Chain); + ReplaceUses(SDValue(Node, 1), InFlag); + return ResNode; + } + } + } + + // Select the default instruction + SDNode *ResNode = SelectCode(Node); + + DEBUG(errs() << "=> "); + if (ResNode == NULL || ResNode == Node) + DEBUG(Node->dump(CurDAG)); + else + DEBUG(ResNode->dump(CurDAG)); + DEBUG(errs() << "\n"); + return ResNode; +} + +/// createMBlazeISelDag - This pass converts a legalized DAG into a +/// MBlaze-specific DAG, ready for instruction scheduling. +FunctionPass *llvm::createMBlazeISelDag(MBlazeTargetMachine &TM) { + return new MBlazeDAGToDAGISel(TM); +} diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp new file mode 100644 index 0000000..f0864d0 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -0,0 +1,975 @@ +//===-- MBlazeISelLowering.cpp - MBlaze DAG Lowering Implementation -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that MBlaze uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mblaze-lower" +#include "MBlazeISelLowering.h" +#include "MBlazeMachineFunction.h" +#include "MBlazeTargetMachine.h" +#include "MBlazeTargetObjectFile.h" +#include "MBlazeSubtarget.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +const char *MBlazeTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + case MBlazeISD::JmpLink : return "MBlazeISD::JmpLink"; + case MBlazeISD::GPRel : return "MBlazeISD::GPRel"; + case MBlazeISD::Wrap : return "MBlazeISD::Wrap"; + case MBlazeISD::ICmp : return "MBlazeISD::ICmp"; + case MBlazeISD::Ret : return "MBlazeISD::Ret"; + case MBlazeISD::Select_CC : return "MBlazeISD::Select_CC"; + default : return NULL; + } +} + +MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) + : TargetLowering(TM, new MBlazeTargetObjectFile()) { + Subtarget = &TM.getSubtarget<MBlazeSubtarget>(); + + // MBlaze does not have i1 type, so use i32 for + // setcc operations results (slt, sgt, ...). + setBooleanContents(ZeroOrOneBooleanContent); + + // Set up the register classes + addRegisterClass(MVT::i32, MBlaze::CPURegsRegisterClass); + if (Subtarget->hasFPU()) { + addRegisterClass(MVT::f32, MBlaze::FGR32RegisterClass); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + } + + // Floating point operations which are not supported + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f32, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FPOWI, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FLOG, MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FEXP, MVT::f32, Expand); + + // Load extented operations for i1 types must be promoted + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + + // MBlaze has no REM or DIVREM operations. + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + + // If the processor doesn't support multiply then expand it + if (!Subtarget->hasMul()) { + setOperationAction(ISD::MUL, MVT::i32, Expand); + } + + // If the processor doesn't support 64-bit multiply then expand + if (!Subtarget->hasMul() || !Subtarget->hasMul64()) { + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHU, MVT::i64, Expand); + } + + // If the processor doesn't support division then expand + if (!Subtarget->hasDiv()) { + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + } + + // Expand unsupported conversions + setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); + + // Expand SELECT_CC + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + // MBlaze doesn't have MUL_LOHI + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + + // Used by legalize types to correctly generate the setcc result. + // Without this, every float setcc comes with a AND/OR with the result, + // we don't want this, since the fpcmp result goes to a flag register, + // which is used implicitly by brcond and select operations. + AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32); + AddPromotedToType(ISD::SELECT, MVT::i1, MVT::i32); + AddPromotedToType(ISD::SELECT_CC, MVT::i1, MVT::i32); + + // MBlaze Custom Operations + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + + // Variable Argument support + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + + + // Operations not directly supported by MBlaze. + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTR, MVT::i32, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::CTLZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + + // We don't have line number support yet. + setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); + + // Use the default for now + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + + // MBlaze doesn't have extending float->double load/store + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + setStackPointerRegisterToSaveRestore(MBlaze::R1); + computeRegisterProperties(); +} + +MVT::SimpleValueType MBlazeTargetLowering::getSetCCResultType(EVT VT) const { + return MVT::i32; +} + +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned MBlazeTargetLowering::getFunctionAlignment(const Function *) const { + return 2; +} + +SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) + { + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + } + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Lower helper functions +//===----------------------------------------------------------------------===// +MachineBasicBlock* MBlazeTargetLowering:: +EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, + MachineBasicBlock*> *EM) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + + switch (MI->getOpcode()) { + default: assert(false && "Unexpected instr type to insert"); + case MBlaze::ShiftRL: + case MBlaze::ShiftRA: + case MBlaze::ShiftL: { + // To "insert" a shift left instruction, we actually have to insert a + // simple loop. The incoming instruction knows the destination vreg to + // set, the source vreg to operate over and the shift amount. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // start: + // andi samt, samt, 31 + // beqid samt, finish + // add dst, src, r0 + // loop: + // addik samt, samt, -1 + // sra dst, dst + // bneid samt, loop + // nop + // finish: + MachineFunction *F = BB->getParent(); + MachineRegisterInfo &R = F->getRegInfo(); + MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB); + + unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); + BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT) + .addReg(MI->getOperand(2).getReg()) + .addImm(31); + + unsigned IVAL = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); + BuildMI(BB, dl, TII->get(MBlaze::ADDI), IVAL) + .addReg(MI->getOperand(1).getReg()) + .addImm(0); + + BuildMI(BB, dl, TII->get(MBlaze::BEQID)) + .addReg(IAMT) + .addMBB(finish); + + F->insert(It, loop); + F->insert(It, finish); + + // Update machine-CFG edges by first adding all successors of the current + // block to the new block which will contain the Phi node for the select. + // Also inform sdisel of the edge changes. + for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), + e = BB->succ_end(); i != e; ++i) { + EM->insert(std::make_pair(*i, finish)); + finish->addSuccessor(*i); + } + + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while(!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); + BB->addSuccessor(loop); + BB->addSuccessor(finish); + + // Next, add the finish block as a successor of the loop block + loop->addSuccessor(finish); + loop->addSuccessor(loop); + + unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); + unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); + BuildMI(loop, dl, TII->get(MBlaze::PHI), DST) + .addReg(IVAL).addMBB(BB) + .addReg(NDST).addMBB(loop); + + unsigned SAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); + unsigned NAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); + BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT) + .addReg(IAMT).addMBB(BB) + .addReg(NAMT).addMBB(loop); + + if (MI->getOpcode() == MBlaze::ShiftL) + BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST); + else if (MI->getOpcode() == MBlaze::ShiftRA) + BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST); + else if (MI->getOpcode() == MBlaze::ShiftRL) + BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST); + else + llvm_unreachable( "Cannot lower unknown shift instruction" ); + + BuildMI(loop, dl, TII->get(MBlaze::ADDI), NAMT) + .addReg(SAMT) + .addImm(-1); + + BuildMI(loop, dl, TII->get(MBlaze::BNEID)) + .addReg(NAMT) + .addMBB(loop); + + BuildMI(finish, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + .addReg(IVAL).addMBB(BB) + .addReg(NDST).addMBB(loop); + + // The pseudo instruction is no longer needed so remove it + F->DeleteMachineInstr(MI); + return finish; + } + + case MBlaze::Select_FCC: + case MBlaze::Select_CC: { + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineFunction *F = BB->getParent(); + MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB); + + unsigned Opc; + switch (MI->getOperand(4).getImm()) { + default: llvm_unreachable( "Unknown branch condition" ); + case MBlazeCC::EQ: Opc = MBlaze::BNEID; break; + case MBlazeCC::NE: Opc = MBlaze::BEQID; break; + case MBlazeCC::GT: Opc = MBlaze::BLEID; break; + case MBlazeCC::LT: Opc = MBlaze::BGEID; break; + case MBlazeCC::GE: Opc = MBlaze::BLTID; break; + case MBlazeCC::LE: Opc = MBlaze::BGTID; break; + } + + BuildMI(BB, dl, TII->get(Opc)) + .addReg(MI->getOperand(3).getReg()) + .addMBB(dneBB); + + F->insert(It, flsBB); + F->insert(It, dneBB); + + // Update machine-CFG edges by first adding all successors of the current + // block to the new block which will contain the Phi node for the select. + // Also inform sdisel of the edge changes. + for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), + e = BB->succ_end(); i != e; ++i) { + EM->insert(std::make_pair(*i, dneBB)); + dneBB->addSuccessor(*i); + } + + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while(!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); + BB->addSuccessor(flsBB); + BB->addSuccessor(dneBB); + flsBB->addSuccessor(dneBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + // .addReg(MI->getOperand(1).getReg()).addMBB(flsBB) + // .addReg(MI->getOperand(2).getReg()).addMBB(BB); + + BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(2).getReg()).addMBB(flsBB) + .addReg(MI->getOperand(1).getReg()).addMBB(BB); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return dneBB; + } + } +} + +//===----------------------------------------------------------------------===// +// Misc Lower Operation implementation +//===----------------------------------------------------------------------===// +// + +SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue TrueVal = Op.getOperand(2); + SDValue FalseVal = Op.getOperand(3); + DebugLoc dl = Op.getDebugLoc(); + unsigned Opc; + + SDValue CompareFlag; + if (LHS.getValueType() == MVT::i32) { + Opc = MBlazeISD::Select_CC; + CompareFlag = DAG.getNode(MBlazeISD::ICmp, dl, MVT::i32, LHS, RHS) + .getValue(1); + } else { + llvm_unreachable( "Cannot lower select_cc with unknown type" ); + } + + return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal, + CompareFlag); +} + +SDValue MBlazeTargetLowering:: +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { + // FIXME there isn't actually debug info here + DebugLoc dl = Op.getDebugLoc(); + GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + + return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA); +} + +SDValue MBlazeTargetLowering:: +LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { + llvm_unreachable("TLS not implemented for MicroBlaze."); + return SDValue(); // Not reached +} + +SDValue MBlazeTargetLowering:: +LowerJumpTable(SDValue Op, SelectionDAG &DAG) { + SDValue ResNode; + SDValue HiPart; + // FIXME there isn't actually debug info here + DebugLoc dl = Op.getDebugLoc(); + bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; + unsigned char OpFlag = IsPIC ? MBlazeII::MO_GOT : MBlazeII::MO_ABS_HILO; + + EVT PtrVT = Op.getValueType(); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); + return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI); + //return JTI; +} + +SDValue MBlazeTargetLowering:: +LowerConstantPool(SDValue Op, SelectionDAG &DAG) { + SDValue ResNode; + EVT PtrVT = Op.getValueType(); + ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); + Constant *C = N->getConstVal(); + SDValue Zero = DAG.getConstant(0, PtrVT); + DebugLoc dl = Op.getDebugLoc(); + + SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MBlazeII::MO_ABS_HILO); + return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP); +} + +SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + SDValue FI = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0, + false, false, 0); +} + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +#include "MBlazeGenCallingConv.inc" + +static bool CC_MBlaze2(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + static const unsigned RegsSize=6; + static const unsigned IntRegs[] = { + MBlaze::R5, MBlaze::R6, MBlaze::R7, + MBlaze::R8, MBlaze::R9, MBlaze::R10 + }; + + static const unsigned FltRegs[] = { + MBlaze::F5, MBlaze::F6, MBlaze::F7, + MBlaze::F8, MBlaze::F9, MBlaze::F10 + }; + + unsigned Reg=0; + + // Promote i8 and i16 + if (LocVT == MVT::i8 || LocVT == MVT::i16) { + LocVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } + + if (ValVT == MVT::i32) { + Reg = State.AllocateReg(IntRegs, RegsSize); + LocVT = MVT::i32; + } else if (ValVT == MVT::f32) { + Reg = State.AllocateReg(FltRegs, RegsSize); + LocVT = MVT::f32; + } + + if (!Reg) { + unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; + unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + } else { + unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; + State.AllocateStack(SizeInBytes, SizeInBytes); + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + } + + return false; // CC must always match +} + +//===----------------------------------------------------------------------===// +// Call Calling Convention Implementation +//===----------------------------------------------------------------------===// + +/// LowerCall - functions arguments are copied from virtual regs to +/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. +/// TODO: isVarArg, isTailCall. +SDValue MBlazeTargetLowering:: +LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) { + // MBlaze does not yet support tail call optimization + isTailCall = false; + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, + *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze2); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + SmallVector<SDValue, 8> MemOpChains; + + // First/LastArgStackLoc contains the first/last + // "at stack" argument location. + int LastArgStackLoc = 0; + unsigned FirstStackArgLoc = 0; + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + EVT RegVT = VA.getLocVT(); + SDValue Arg = Outs[i].Val; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); + break; + } + + // Arguments that can be passed on register must be kept at + // RegsToPass vector + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + // Register can't get to this point... + assert(VA.isMemLoc()); + + // Create the frame index object for this incoming parameter + LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset()); + int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + LastArgStackLoc, true, false); + + SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); + + // emit ISD::STORE whichs stores the + // parameter value to a stack Location + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); + } + } + + // Transform all store nodes into one single node because all store + // nodes are independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emited instructions must be + // stuck together. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + unsigned char OpFlag = MBlazeII::MO_NO_FLAG; + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), + getPointerTy(), 0, OpFlag); + else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), + getPointerTy(), OpFlag); + + // MBlazeJmpLink = #chain, #target_address, #opt_in_flags... + // = Chain, Callee, Reg#1, Reg#2, ... + // + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + } + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + Chain = DAG.getNode(MBlazeISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + if (!Ins.empty()) + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, + Ins, dl, DAG, InVals); +} + +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers. +SDValue MBlazeTargetLowering:: +LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, + bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) { + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallResult(Ins, RetCC_MBlaze); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), + RVLocs[i].getValVT(), InFlag).getValue(1); + InFlag = Chain.getValue(2); + InVals.push_back(Chain.getValue(0)); + } + + return Chain; +} + +//===----------------------------------------------------------------------===// +// Formal Arguments Calling Convention Implementation +//===----------------------------------------------------------------------===// + +/// LowerFormalArguments - transform physical registers into +/// virtual registers and generate load operations for +/// arguments places on the stack. +SDValue MBlazeTargetLowering:: +LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + + unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF); + VarArgsFrameIndex = 0; + + // Used with vargs to acumulate store chains. + std::vector<SDValue> OutChains; + + // Keep track of the last register used for arguments + unsigned ArgRegEnd = 0; + + // Assign locations to all of the incoming arguments. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze2); + SDValue StackPtr; + + unsigned FirstStackArgLoc = 0; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + // Arguments stored on registers + if (VA.isRegLoc()) { + EVT RegVT = VA.getLocVT(); + ArgRegEnd = VA.getLocReg(); + TargetRegisterClass *RC = 0; + + if (RegVT == MVT::i32) + RC = MBlaze::CPURegsRegisterClass; + else if (RegVT == MVT::f32) + RC = MBlaze::FGR32RegisterClass; + else + llvm_unreachable("RegVT not supported by LowerFormalArguments"); + + // Transform the arguments stored on + // physical registers into virtual ones + unsigned Reg = MF.addLiveIn(ArgRegEnd, RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); + + // If this is an 8 or 16-bit value, it has been passed promoted + // to 32 bits. Insert an assert[sz]ext to capture this, then + // truncate to the right size. If if is a floating point value + // then convert to the correct type. + if (VA.getLocInfo() != CCValAssign::Full) { + unsigned Opcode = 0; + if (VA.getLocInfo() == CCValAssign::SExt) + Opcode = ISD::AssertSext; + else if (VA.getLocInfo() == CCValAssign::ZExt) + Opcode = ISD::AssertZext; + if (Opcode) + ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + } + + InVals.push_back(ArgValue); + + } else { // VA.isRegLoc() + + // sanity check + assert(VA.isMemLoc()); + + // The last argument is not a register + ArgRegEnd = 0; + + // The stack pointer offset is relative to the caller stack frame. + // Since the real stack size is unknown here, a negative SPOffset + // is used so there's a way to adjust these offsets when the stack + // size get known (on EliminateFrameIndex). A dummy SPOffset is + // used instead of a direct negative address (which is recorded to + // be used on emitPrologue) to avoid mis-calc of the first stack + // offset on PEI::calculateFrameObjectOffsets. + // Arguments are always 32-bit. + unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; + int FI = MFI->CreateFixedObject(ArgSize, 0, true, false); + MBlazeFI->recordLoadArgsFI(FI, -(ArgSize+ + (FirstStackArgLoc + VA.getLocMemOffset()))); + + // Create load nodes to retrieve arguments from the stack + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + false, false, 0)); + } + } + + // To meet ABI, when VARARGS are passed on registers, the registers + // must have their values written to the caller stack frame. If the last + // argument was placed in the stack, there's no need to save any register. + if ((isVarArg) && ArgRegEnd) { + if (StackPtr.getNode() == 0) + StackPtr = DAG.getRegister(StackReg, getPointerTy()); + + // The last register argument that must be saved is MBlaze::R10 + TargetRegisterClass *RC = MBlaze::CPURegsRegisterClass; + + unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5); + unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1); + unsigned End = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R10); + unsigned StackLoc = ArgLocs.size()-1 + (Start - Begin); + + for (; Start <= End; ++Start, ++StackLoc) { + unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start); + unsigned LiveReg = MF.addLiveIn(Reg, RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32); + + int FI = MFI->CreateFixedObject(4, 0, true, false); + MBlazeFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); + OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0, + false, false, 0)); + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + if (!VarArgsFrameIndex) + VarArgsFrameIndex = FI; + } + } + + // All stores are grouped in one node to allow the matching between + // the size of Ins and InVals. This only happens when on varg functions + if (!OutChains.empty()) { + OutChains.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); + } + + return Chain; +} + +//===----------------------------------------------------------------------===// +// Return Value Calling Convention Implementation +//===----------------------------------------------------------------------===// + +SDValue MBlazeTargetLowering:: +LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + DebugLoc dl, SelectionDAG &DAG) { + // CCValAssign - represent the assignment of + // the return value to a location + SmallVector<CCValAssign, 16> RVLocs; + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + + // Analize return values. + CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze); + + // If this is the first return lowered for this function, add + // the regs to the liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Outs[i].Val, Flag); + + // guarantee that all emitted copies are + // stuck together, avoiding something bad + Flag = Chain.getValue(1); + } + + // Return on MBlaze is always a "rtsd R15, 8" + if (Flag.getNode()) + return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other, + Chain, DAG.getRegister(MBlaze::R15, MVT::i32), Flag); + else // Return Void + return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other, + Chain, DAG.getRegister(MBlaze::R15, MVT::i32)); +} + +//===----------------------------------------------------------------------===// +// MBlaze Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +MBlazeTargetLowering::ConstraintType MBlazeTargetLowering:: +getConstraintType(const std::string &Constraint) const +{ + // MBlaze specific constrainy + // + // 'd' : An address register. Equivalent to r. + // 'y' : Equivalent to r; retained for + // backwards compatibility. + // 'f' : Floating Point registers. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default : break; + case 'd': + case 'y': + case 'f': + return C_RegisterClass; + break; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"), +/// return a list of registers that can be used to satisfy the constraint. +/// This should only be used for C_RegisterClass constraints. +std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return std::make_pair(0U, MBlaze::CPURegsRegisterClass); + case 'f': + if (VT == MVT::f32) + return std::make_pair(0U, MBlaze::FGR32RegisterClass); + } + } + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +/// Given a register class constraint, like 'r', if this corresponds directly +/// to an LLVM register class, return a register of 0 and the register class +/// pointer. +std::vector<unsigned> MBlazeTargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { + if (Constraint.size() != 1) + return std::vector<unsigned>(); + + switch (Constraint[0]) { + default : break; + case 'r': + // GCC MBlaze Constraint Letters + case 'd': + case 'y': + return make_vector<unsigned>( + MBlaze::R3, MBlaze::R4, MBlaze::R5, MBlaze::R6, + MBlaze::R7, MBlaze::R9, MBlaze::R10, MBlaze::R11, + MBlaze::R12, MBlaze::R19, MBlaze::R20, MBlaze::R21, + MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25, + MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29, + MBlaze::R30, MBlaze::R31, 0); + + case 'f': + return make_vector<unsigned>( + MBlaze::F3, MBlaze::F4, MBlaze::F5, MBlaze::F6, + MBlaze::F7, MBlaze::F9, MBlaze::F10, MBlaze::F11, + MBlaze::F12, MBlaze::F19, MBlaze::F20, MBlaze::F21, + MBlaze::F22, MBlaze::F23, MBlaze::F24, MBlaze::F25, + MBlaze::F26, MBlaze::F27, MBlaze::F28, MBlaze::F29, + MBlaze::F30, MBlaze::F31, 0); + } + return std::vector<unsigned>(); +} + +bool MBlazeTargetLowering:: +isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The MBlaze target isn't yet aware of offsets. + return false; +} + +bool MBlazeTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + return VT != MVT::f32; +} diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h new file mode 100644 index 0000000..f8b1470 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -0,0 +1,149 @@ +//===-- MBlazeISelLowering.h - MBlaze DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that MBlaze uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef MBlazeISELLOWERING_H +#define MBlazeISELLOWERING_H + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +#include "MBlaze.h" +#include "MBlazeSubtarget.h" + +namespace llvm { + namespace MBlazeCC { + enum CC { + FIRST = 0, + EQ, + NE, + GT, + LT, + GE, + LE + }; + } + + namespace MBlazeISD { + enum NodeType { + // Start the numbering from where ISD NodeType finishes. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // Jump and link (call) + JmpLink, + + // Handle gp_rel (small data/bss sections) relocation. + GPRel, + + // Select CC Pseudo Instruction + Select_CC, + + // Wrap up multiple types of instructions + Wrap, + + // Integer Compare + ICmp, + + // Return + Ret + }; + } + + //===--------------------------------------------------------------------===// + // TargetLowering Implementation + //===--------------------------------------------------------------------===// + + class MBlazeTargetLowering : public TargetLowering { + int VarArgsFrameIndex; // FrameIndex for start of varargs area. + + public: + + explicit MBlazeTargetLowering(MBlazeTargetMachine &TM); + + /// LowerOperation - Provide custom lowering hooks for some operations. + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + + /// getTargetNodeName - This method returns the name of a target specific + // DAG node. + virtual const char *getTargetNodeName(unsigned Opcode) const; + + /// getSetCCResultType - get the ISD::SETCC result ValueType + MVT::SimpleValueType getSetCCResultType(EVT VT) const; + + virtual unsigned getFunctionAlignment(const Function *F) const; + private: + // Subtarget Info + const MBlazeSubtarget *Subtarget; + + + // Lower Operand helpers + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals); + + // Lower Operand specifics + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals); + + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + DebugLoc dl, SelectionDAG &DAG); + + virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + + // Inline asm support + ConstraintType getConstraintType(const std::string &Constraint) const; + + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const; + + std::vector<unsigned> + getRegClassForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const; + + virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + + /// isFPImmLegal - Returns true if the target can instruction select the + /// specified FP immediate natively. If false, the legalizer will + /// materialize the FP immediate as a load from a constant pool. + virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + }; +} + +#endif // MBlazeISELLOWERING_H diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td new file mode 100644 index 0000000..a48a8c9 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeInstrFPU.td @@ -0,0 +1,223 @@ +//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MBlaze profiles and nodes +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MBlaze Operand, Complex Patterns and Transformations Definitions. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Memory Access Instructions +//===----------------------------------------------------------------------===// +class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> : + TA<op, 0x000, (outs FGR32:$dst), (ins memrr:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(set FGR32:$dst, (OpNode xaddr:$addr))], IILoad>; + +class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> : + TAI<op, (outs FGR32:$dst), (ins memri:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(set FGR32:$dst, (OpNode iaddr:$addr))], IILoad>; + +class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> : + TA<op, 0x000, (outs), (ins FGR32:$dst, memrr:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(OpNode FGR32:$dst, xaddr:$addr)], IIStore>; + +class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> : + TAI<op, (outs), (ins FGR32:$dst, memrr:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(OpNode FGR32:$dst, iaddr:$addr)], IIStore>; + +class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, + InstrItinClass itin> : + TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>; + +class CmpFN<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TA<op, flags, (outs CPURegs:$dst), (ins FGR32:$b, FGR32:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], itin>; + +class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, + InstrItinClass itin> : + TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c), + !strconcat(instr_asm, " $dst, $c, $b"), + [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>; + +class ArithF2<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TF<op, flags, (outs FGR32:$dst), (ins FGR32:$b), + !strconcat(instr_asm, " $dst, $b"), + [], itin>; + +class ArithIF<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TF<op, flags, (outs FGR32:$dst), (ins CPURegs:$b), + !strconcat(instr_asm, " $dst, $b"), + [], itin>; + +class ArithFI<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TF<op, flags, (outs CPURegs:$dst), (ins FGR32:$b), + !strconcat(instr_asm, " $dst, $b"), + [], itin>; + +class LogicF<bits<6> op, string instr_asm> : + TAI<op, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], + IIAlu>; + +class LogicFI<bits<6> op, string instr_asm> : + TAI<op, (outs FGR32:$dst), (ins FGR32:$b, fimm:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], + IIAlu>; + +//===----------------------------------------------------------------------===// +// Pseudo instructions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// FPU Arithmetic Instructions +//===----------------------------------------------------------------------===// +let Predicates=[HasFPU] in { + def FOR : LogicF<0x28, "or ">; + def FORI : LogicFI<0x28, "ori ">; + def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIAlu>; + def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIAlu>; + def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIAlu>; + def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIAlu>; + + def LWF : LoadFM<0x32, "lw ", load>; + def LWFI : LoadFMI<0x32, "lwi ", load>; + + def SWF : StoreFM<0x32, "sw ", store>; + def SWFI : StoreFMI<0x32, "swi ", store>; +} + +let Predicates=[HasFPU,HasSqrt] in { + def FLT : ArithIF<0x16, 0x280, "flt ", IIAlu>; + def FINT : ArithFI<0x16, 0x300, "fint ", IIAlu>; + def FSQRT : ArithF2<0x16, 0x300, "fsqrt ", IIAlu>; +} + +let isAsCheapAsAMove = 1 in { + def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIAlu>; + def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIAlu>; + def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIAlu>; + def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIAlu>; + def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIAlu>; + def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIAlu>; + def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIAlu>; +} + + +let usesCustomInserter = 1 in { + def Select_FCC : MBlazePseudo<(outs FGR32:$dst), + (ins FGR32:$T, FGR32:$F, CPURegs:$CMP, i32imm:$CC), + "; SELECT_FCC PSEUDO!", + []>; +} + +// Floating point conversions +let Predicates=[HasFPU] in { + def : Pat<(sint_to_fp CPURegs:$V), (FLT CPURegs:$V)>; + def : Pat<(fp_to_sint FGR32:$V), (FINT FGR32:$V)>; + def : Pat<(fsqrt FGR32:$V), (FSQRT FGR32:$V)>; +} + +// SET_CC operations +let Predicates=[HasFPU] in { + def : Pat<(setcc FGR32:$L, FGR32:$R, SETEQ), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_EQ FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETNE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_EQ FGR32:$L, FGR32:$R), 1)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETOEQ), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_EQ FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (XOR (FCMP_UN FGR32:$L, FGR32:$R), + (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (OR (FCMP_UN FGR32:$L, FGR32:$R), + (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETGT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_GT FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETLT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_LT FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETGE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_GE FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETLE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_LE FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_GT FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_LT FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_GE FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_LE FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETUEQ), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (OR (FCMP_UN FGR32:$L, FGR32:$R), + (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETUNE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_NE FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (OR (FCMP_UN FGR32:$L, FGR32:$R), + (FCMP_GT FGR32:$L, FGR32:$R)), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETULT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (OR (FCMP_UN FGR32:$L, FGR32:$R), + (FCMP_LT FGR32:$L, FGR32:$R)), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (OR (FCMP_UN FGR32:$L, FGR32:$R), + (FCMP_GE FGR32:$L, FGR32:$R)), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETULE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (OR (FCMP_UN FGR32:$L, FGR32:$R), + (FCMP_LE FGR32:$L, FGR32:$R)), 2)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETO), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_UN FGR32:$L, FGR32:$R), 1)>; + def : Pat<(setcc FGR32:$L, FGR32:$R, SETUO), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (FCMP_UN FGR32:$L, FGR32:$R), 2)>; +} + +// SELECT operations +def : Pat<(select CPURegs:$C, FGR32:$T, FGR32:$F), + (Select_FCC FGR32:$T, FGR32:$F, CPURegs:$C, 2)>; + +//===----------------------------------------------------------------------===// +// Patterns for Floating Point Instructions +//===----------------------------------------------------------------------===// +def : Pat<(f32 fpimm:$imm), (FORI F0, fpimm:$imm)>; diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td new file mode 100644 index 0000000..b59999e --- /dev/null +++ b/lib/Target/MBlaze/MBlazeInstrFSL.td @@ -0,0 +1,153 @@ +//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// FSL Instruction Formats +//===----------------------------------------------------------------------===// +class FSLGetD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> : + TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b), + !strconcat(instr_asm, " $dst, $b"), + [(set CPURegs:$dst, (OpNode CPURegs:$b))], IIAlu>; + +class FSLGet<bits<6> op, string instr_asm, Intrinsic OpNode> : + TAI<op, (outs CPURegs:$dst), (ins fslimm:$b), + !strconcat(instr_asm, " $dst, $b"), + [(set CPURegs:$dst, (OpNode immZExt4:$b))], IIAlu>; + +class FSLPutD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> : + TA<op, flags, (outs), (ins CPURegs:$v, CPURegs:$b), + !strconcat(instr_asm, " $v, $b"), + [(OpNode CPURegs:$v, CPURegs:$b)], IIAlu>; + +class FSLPut<bits<6> op, string instr_asm, Intrinsic OpNode> : + TAI<op, (outs), (ins CPURegs:$v, fslimm:$b), + !strconcat(instr_asm, " $v, $b"), + [(OpNode CPURegs:$v, immZExt4:$b)], IIAlu>; + +class FSLPutTD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> : + TA<op, flags, (outs), (ins CPURegs:$b), + !strconcat(instr_asm, " $b"), + [(OpNode CPURegs:$b)], IIAlu>; + +class FSLPutT<bits<6> op, string instr_asm, Intrinsic OpNode> : + TAI<op, (outs), (ins fslimm:$b), + !strconcat(instr_asm, " $b"), + [(OpNode immZExt4:$b)], IIAlu>; + +//===----------------------------------------------------------------------===// +// FSL Get Instructions +//===----------------------------------------------------------------------===// +def GET : FSLGet<0x1B, "get ", int_mblaze_fsl_get>; +def AGET : FSLGet<0x1B, "aget ", int_mblaze_fsl_aget>; +def CGET : FSLGet<0x1B, "cget ", int_mblaze_fsl_cget>; +def CAGET : FSLGet<0x1B, "caget ", int_mblaze_fsl_caget>; +def EGET : FSLGet<0x1B, "eget ", int_mblaze_fsl_eget>; +def EAGET : FSLGet<0x1B, "eaget ", int_mblaze_fsl_eaget>; +def ECGET : FSLGet<0x1B, "ecget ", int_mblaze_fsl_ecget>; +def ECAGET : FSLGet<0x1B, "ecaget ", int_mblaze_fsl_ecaget>; +def NGET : FSLGet<0x1B, "nget ", int_mblaze_fsl_nget>; +def NAGET : FSLGet<0x1B, "naget ", int_mblaze_fsl_naget>; +def NCGET : FSLGet<0x1B, "ncget ", int_mblaze_fsl_ncget>; +def NCAGET : FSLGet<0x1B, "ncaget ", int_mblaze_fsl_ncaget>; +def NEGET : FSLGet<0x1B, "neget ", int_mblaze_fsl_neget>; +def NEAGET : FSLGet<0x1B, "neaget ", int_mblaze_fsl_neaget>; +def NECGET : FSLGet<0x1B, "necget ", int_mblaze_fsl_necget>; +def NECAGET : FSLGet<0x1B, "necaget ", int_mblaze_fsl_necaget>; +def TGET : FSLGet<0x1B, "tget ", int_mblaze_fsl_tget>; +def TAGET : FSLGet<0x1B, "taget ", int_mblaze_fsl_taget>; +def TCGET : FSLGet<0x1B, "tcget ", int_mblaze_fsl_tcget>; +def TCAGET : FSLGet<0x1B, "tcaget ", int_mblaze_fsl_tcaget>; +def TEGET : FSLGet<0x1B, "teget ", int_mblaze_fsl_teget>; +def TEAGET : FSLGet<0x1B, "teaget ", int_mblaze_fsl_teaget>; +def TECGET : FSLGet<0x1B, "tecget ", int_mblaze_fsl_tecget>; +def TECAGET : FSLGet<0x1B, "tecaget ", int_mblaze_fsl_tecaget>; +def TNGET : FSLGet<0x1B, "tnget ", int_mblaze_fsl_tnget>; +def TNAGET : FSLGet<0x1B, "tnaget ", int_mblaze_fsl_tnaget>; +def TNCGET : FSLGet<0x1B, "tncget ", int_mblaze_fsl_tncget>; +def TNCAGET : FSLGet<0x1B, "tncaget ", int_mblaze_fsl_tncaget>; +def TNEGET : FSLGet<0x1B, "tneget ", int_mblaze_fsl_tneget>; +def TNEAGET : FSLGet<0x1B, "tneaget ", int_mblaze_fsl_tneaget>; +def TNECGET : FSLGet<0x1B, "tnecget ", int_mblaze_fsl_tnecget>; +def TNECAGET : FSLGet<0x1B, "tnecaget ", int_mblaze_fsl_tnecaget>; + +//===----------------------------------------------------------------------===// +// FSL Dynamic Get Instructions +//===----------------------------------------------------------------------===// +def GETD : FSLGetD<0x1B, 0x00, "getd ", int_mblaze_fsl_get>; +def AGETD : FSLGetD<0x1B, 0x00, "agetd ", int_mblaze_fsl_aget>; +def CGETD : FSLGetD<0x1B, 0x00, "cgetd ", int_mblaze_fsl_cget>; +def CAGETD : FSLGetD<0x1B, 0x00, "cagetd ", int_mblaze_fsl_caget>; +def EGETD : FSLGetD<0x1B, 0x00, "egetd ", int_mblaze_fsl_eget>; +def EAGETD : FSLGetD<0x1B, 0x00, "eagetd ", int_mblaze_fsl_eaget>; +def ECGETD : FSLGetD<0x1B, 0x00, "ecgetd ", int_mblaze_fsl_ecget>; +def ECAGETD : FSLGetD<0x1B, 0x00, "ecagetd ", int_mblaze_fsl_ecaget>; +def NGETD : FSLGetD<0x1B, 0x00, "ngetd ", int_mblaze_fsl_nget>; +def NAGETD : FSLGetD<0x1B, 0x00, "nagetd ", int_mblaze_fsl_naget>; +def NCGETD : FSLGetD<0x1B, 0x00, "ncgetd ", int_mblaze_fsl_ncget>; +def NCAGETD : FSLGetD<0x1B, 0x00, "ncagetd ", int_mblaze_fsl_ncaget>; +def NEGETD : FSLGetD<0x1B, 0x00, "negetd ", int_mblaze_fsl_neget>; +def NEAGETD : FSLGetD<0x1B, 0x00, "neagetd ", int_mblaze_fsl_neaget>; +def NECGETD : FSLGetD<0x1B, 0x00, "necgetd ", int_mblaze_fsl_necget>; +def NECAGETD : FSLGetD<0x1B, 0x00, "necagetd ", int_mblaze_fsl_necaget>; +def TGETD : FSLGetD<0x1B, 0x00, "tgetd ", int_mblaze_fsl_tget>; +def TAGETD : FSLGetD<0x1B, 0x00, "tagetd ", int_mblaze_fsl_taget>; +def TCGETD : FSLGetD<0x1B, 0x00, "tcgetd ", int_mblaze_fsl_tcget>; +def TCAGETD : FSLGetD<0x1B, 0x00, "tcagetd ", int_mblaze_fsl_tcaget>; +def TEGETD : FSLGetD<0x1B, 0x00, "tegetd ", int_mblaze_fsl_teget>; +def TEAGETD : FSLGetD<0x1B, 0x00, "teagetd ", int_mblaze_fsl_teaget>; +def TECGETD : FSLGetD<0x1B, 0x00, "tecgetd ", int_mblaze_fsl_tecget>; +def TECAGETD : FSLGetD<0x1B, 0x00, "tecagetd ", int_mblaze_fsl_tecaget>; +def TNGETD : FSLGetD<0x1B, 0x00, "tngetd ", int_mblaze_fsl_tnget>; +def TNAGETD : FSLGetD<0x1B, 0x00, "tnagetd ", int_mblaze_fsl_tnaget>; +def TNCGETD : FSLGetD<0x1B, 0x00, "tncgetd ", int_mblaze_fsl_tncget>; +def TNCAGETD : FSLGetD<0x1B, 0x00, "tncagetd ", int_mblaze_fsl_tncaget>; +def TNEGETD : FSLGetD<0x1B, 0x00, "tnegetd ", int_mblaze_fsl_tneget>; +def TNEAGETD : FSLGetD<0x1B, 0x00, "tneagetd ", int_mblaze_fsl_tneaget>; +def TNECGETD : FSLGetD<0x1B, 0x00, "tnecgetd ", int_mblaze_fsl_tnecget>; +def TNECAGETD : FSLGetD<0x1B, 0x00, "tnecagetd", int_mblaze_fsl_tnecaget>; + +//===----------------------------------------------------------------------===// +// FSL Put Instructions +//===----------------------------------------------------------------------===// +def PUT : FSLPut<0x1B, "put ", int_mblaze_fsl_put>; +def APUT : FSLPut<0x1B, "aput ", int_mblaze_fsl_aput>; +def CPUT : FSLPut<0x1B, "cput ", int_mblaze_fsl_cput>; +def CAPUT : FSLPut<0x1B, "caput ", int_mblaze_fsl_caput>; +def NPUT : FSLPut<0x1B, "nput ", int_mblaze_fsl_nput>; +def NAPUT : FSLPut<0x1B, "naput ", int_mblaze_fsl_naput>; +def NCPUT : FSLPut<0x1B, "ncput ", int_mblaze_fsl_ncput>; +def NCAPUT : FSLPut<0x1B, "ncaput ", int_mblaze_fsl_ncaput>; +def TPUT : FSLPutT<0x1B, "tput ", int_mblaze_fsl_tput>; +def TAPUT : FSLPutT<0x1B, "taput ", int_mblaze_fsl_taput>; +def TCPUT : FSLPutT<0x1B, "tcput ", int_mblaze_fsl_tcput>; +def TCAPUT : FSLPutT<0x1B, "tcaput ", int_mblaze_fsl_tcaput>; +def TNPUT : FSLPutT<0x1B, "tnput ", int_mblaze_fsl_tnput>; +def TNAPUT : FSLPutT<0x1B, "tnaput ", int_mblaze_fsl_tnaput>; +def TNCPUT : FSLPutT<0x1B, "tncput ", int_mblaze_fsl_tncput>; +def TNCAPUT : FSLPutT<0x1B, "tncaput ", int_mblaze_fsl_tncaput>; + +//===----------------------------------------------------------------------===// +// FSL Dynamic Put Instructions +//===----------------------------------------------------------------------===// +def PUTD : FSLPutD<0x1B, 0x00, "putd ", int_mblaze_fsl_put>; +def APUTD : FSLPutD<0x1B, 0x00, "aputd ", int_mblaze_fsl_aput>; +def CPUTD : FSLPutD<0x1B, 0x00, "cputd ", int_mblaze_fsl_cput>; +def CAPUTD : FSLPutD<0x1B, 0x00, "caputd ", int_mblaze_fsl_caput>; +def NPUTD : FSLPutD<0x1B, 0x00, "nputd ", int_mblaze_fsl_nput>; +def NAPUTD : FSLPutD<0x1B, 0x00, "naputd ", int_mblaze_fsl_naput>; +def NCPUTD : FSLPutD<0x1B, 0x00, "ncputd ", int_mblaze_fsl_ncput>; +def NCAPUTD : FSLPutD<0x1B, 0x00, "ncaputd ", int_mblaze_fsl_ncaput>; +def TPUTD : FSLPutTD<0x1B, 0x00, "tputd ", int_mblaze_fsl_tput>; +def TAPUTD : FSLPutTD<0x1B, 0x00, "taputd ", int_mblaze_fsl_taput>; +def TCPUTD : FSLPutTD<0x1B, 0x00, "tcputd ", int_mblaze_fsl_tcput>; +def TCAPUTD : FSLPutTD<0x1B, 0x00, "tcaputd ", int_mblaze_fsl_tcaput>; +def TNPUTD : FSLPutTD<0x1B, 0x00, "tnputd ", int_mblaze_fsl_tnput>; +def TNAPUTD : FSLPutTD<0x1B, 0x00, "tnaputd ", int_mblaze_fsl_tnaput>; +def TNCPUTD : FSLPutTD<0x1B, 0x00, "tncputd ", int_mblaze_fsl_tncput>; +def TNCAPUTD : FSLPutTD<0x1B, 0x00, "tncaputd ", int_mblaze_fsl_tncaput>; diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td new file mode 100644 index 0000000..7d65543 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeInstrFormats.td @@ -0,0 +1,246 @@ +//===- MBlazeInstrFormats.td - MB Instruction defs --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe MBlaze instructions format +// +// CPU INSTRUCTION FORMATS +// +// opcode - operation code. +// rd - dst reg. +// ra - first src. reg. +// rb - second src. reg. +// imm16 - 16-bit immediate value. +// +//===----------------------------------------------------------------------===// + +// Generic MBlaze Format +class MBlazeInst<dag outs, dag ins, string asmstr, list<dag> pattern, + InstrItinClass itin> : Instruction +{ + field bits<32> Inst; + + let Namespace = "MBlaze"; + + bits<6> opcode; + + // Top 6 bits are the 'opcode' field + let Inst{0-5} = opcode; + + dag OutOperandList = outs; + dag InOperandList = ins; + + let AsmString = asmstr; + let Pattern = pattern; + let Itinerary = itin; +} + +//===----------------------------------------------------------------------===// +// Pseudo instruction class +//===----------------------------------------------------------------------===// +class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>: + MBlazeInst<outs, ins, asmstr, pattern, IIPseudo>; + +//===----------------------------------------------------------------------===// +// Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|> +//===----------------------------------------------------------------------===// + +class TA<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> rd; + bits<5> ra; + bits<5> rb; + + let opcode = op; + + let Inst{6-10} = rd; + let Inst{11-15} = ra; + let Inst{16-20} = rb; + let Inst{21-31} = flags; +} + +class TAI<bits<6> op, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> rd; + bits<5> ra; + bits<16> imm16; + + let opcode = op; + + let Inst{6-10} = rd; + let Inst{11-15} = ra; + let Inst{16-31} = imm16; +} + +class TIMM<bits<6> op, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> ra; + bits<16> imm16; + + let opcode = op; + + let Inst{6-15} = 0; + let Inst{16-31} = imm16; +} + +class TADDR<bits<6> op, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<26> addr; + + let opcode = op; + + let Inst{6-31} = addr; +} + +//===----------------------------------------------------------------------===// +// Type B instruction class in MBlaze : <|opcode|rd|ra|immediate|> +//===----------------------------------------------------------------------===// + +class TB<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, + InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> rd; + bits<5> ra; + bits<16> imm16; + + let opcode = op; + + let Inst{6-10} = rd; + let Inst{11-15} = ra; + let Inst{16-31} = imm16; +} + +//===----------------------------------------------------------------------===// +// Float instruction class in MBlaze : <|opcode|rd|ra|flags|> +//===----------------------------------------------------------------------===// + +class TF<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> rd; + bits<5> ra; + + let opcode = op; + + let Inst{6-10} = rd; + let Inst{11-15} = ra; + let Inst{16-20} = 0; + let Inst{21-31} = flags; +} + +//===----------------------------------------------------------------------===// +// Branch instruction class in MBlaze : <|opcode|rd|br|ra|flags|> +//===----------------------------------------------------------------------===// + +class TBR<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins, + string asmstr, list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> ra; + + let opcode = op; + + let Inst{6-10} = 0; + let Inst{11-15} = br; + let Inst{16-20} = ra; + let Inst{21-31} = flags; +} + +class TBRC<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins, + string asmstr, list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> ra; + bits<5> rb; + + let opcode = op; + + let Inst{6-10} = br; + let Inst{11-15} = ra; + let Inst{16-20} = rb; + let Inst{21-31} = flags; +} + +class TBRL<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins, + string asmstr, list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> ra; + + let opcode = op; + + let Inst{6-10} = 0xF; + let Inst{11-15} = br; + let Inst{16-20} = ra; + let Inst{21-31} = flags; +} + +class TBRI<bits<6> op, bits<5> br, dag outs, dag ins, + string asmstr, list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<16> imm16; + + let opcode = op; + + let Inst{6-10} = 0; + let Inst{11-15} = br; + let Inst{16-31} = imm16; +} + +class TBRLI<bits<6> op, bits<5> br, dag outs, dag ins, + string asmstr, list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<16> imm16; + + let opcode = op; + + let Inst{6-10} = 0xF; + let Inst{11-15} = br; + let Inst{16-31} = imm16; +} + +class TBRCI<bits<6> op, bits<5> br, dag outs, dag ins, + string asmstr, list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> ra; + bits<16> imm16; + + let opcode = op; + + let Inst{6-10} = br; + let Inst{11-15} = ra; + let Inst{16-31} = imm16; +} + +class TRET<bits<6> op, dag outs, dag ins, + string asmstr, list<dag> pattern, InstrItinClass itin> : + MBlazeInst<outs, ins, asmstr, pattern, itin> +{ + bits<5> ra; + bits<16> imm16; + + let opcode = op; + + let Inst{6-10} = 0x10; + let Inst{11-15} = ra; + let Inst{16-31} = imm16; +} diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp new file mode 100644 index 0000000..a7e8eb7 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -0,0 +1,222 @@ +//===- MBlazeInstrInfo.cpp - MBlaze Instruction Information -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MBlaze implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "MBlazeInstrInfo.h" +#include "MBlazeTargetMachine.h" +#include "MBlazeMachineFunction.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "MBlazeGenInstrInfo.inc" + +using namespace llvm; + +MBlazeInstrInfo::MBlazeInstrInfo(MBlazeTargetMachine &tm) + : TargetInstrInfoImpl(MBlazeInsts, array_lengthof(MBlazeInsts)), + TM(tm), RI(*TM.getSubtargetImpl(), *this) {} + +static bool isZeroImm(const MachineOperand &op) { + return op.isImm() && op.getImm() == 0; +} + +/// Return true if the instruction is a register to register move and +/// leave the source and dest operands in the passed parameters. +bool MBlazeInstrInfo:: +isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const { + SrcSubIdx = DstSubIdx = 0; // No sub-registers. + + // add $dst, $src, $zero || addu $dst, $zero, $src + // or $dst, $src, $zero || or $dst, $zero, $src + if ((MI.getOpcode() == MBlaze::ADD) || (MI.getOpcode() == MBlaze::OR)) { + if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == MBlaze::R0) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); + return true; + } else if (MI.getOperand(2).isReg() && + MI.getOperand(2).getReg() == MBlaze::R0) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } + } + + // addi $dst, $src, 0 + // ori $dst, $src, 0 + if ((MI.getOpcode() == MBlaze::ADDI) || (MI.getOpcode() == MBlaze::ORI)) { + if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } + } + + return false; +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned MBlazeInstrInfo:: +isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { + if (MI->getOpcode() == MBlaze::LWI) { + if ((MI->getOperand(2).isFI()) && // is a stack slot + (MI->getOperand(1).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(1)))) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + } + + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned MBlazeInstrInfo:: +isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { + if (MI->getOpcode() == MBlaze::SWI) { + if ((MI->getOperand(2).isFI()) && // is a stack slot + (MI->getOperand(1).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(1)))) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + } + return 0; +} + +/// insertNoop - If data hazard condition is found insert the target nop +/// instruction. +void MBlazeInstrInfo:: +insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + BuildMI(MBB, MI, DL, get(MBlaze::NOP)); +} + +bool MBlazeInstrInfo:: +copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + DebugLoc dl = DebugLoc::getUnknownLoc(); + llvm::BuildMI(MBB, I, dl, get(MBlaze::ADD), DestReg) + .addReg(SrcReg).addReg(MBlaze::R0); + return true; +} + +void MBlazeInstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC) const { + DebugLoc dl = DebugLoc::getUnknownLoc(); + BuildMI(MBB, I, dl, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill)) + .addImm(0).addFrameIndex(FI); +} + +void MBlazeInstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC) const { + DebugLoc dl = DebugLoc::getUnknownLoc(); + BuildMI(MBB, I, dl, get(MBlaze::LWI), DestReg) + .addImm(0).addFrameIndex(FI); +} + +MachineInstr *MBlazeInstrInfo:: +foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, int FI) const { + if (Ops.size() != 1) return NULL; + + MachineInstr *NewMI = NULL; + + switch (MI->getOpcode()) { + case MBlaze::OR: + case MBlaze::ADD: + if ((MI->getOperand(0).isReg()) && + (MI->getOperand(2).isReg()) && + (MI->getOperand(2).getReg() == MBlaze::R0) && + (MI->getOperand(1).isReg())) { + if (Ops[0] == 0) { // COPY -> STORE + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::SW)) + .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) + .addImm(0).addFrameIndex(FI); + } else { // COPY -> LOAD + unsigned DstReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::LW)) + .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | + getUndefRegState(isUndef)) + .addImm(0).addFrameIndex(FI); + } + } + break; + } + + return NewMI; +} + +//===----------------------------------------------------------------------===// +// Branch Analysis +//===----------------------------------------------------------------------===// +unsigned MBlazeInstrInfo:: +InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond) const { + DebugLoc dl = DebugLoc::getUnknownLoc(); + + // Can only insert uncond branches so far. + assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!"); + BuildMI(&MBB, dl, get(MBlaze::BRI)).addMBB(TBB); + return 1; +} + +/// getGlobalBaseReg - Return a virtual register initialized with the +/// the global base register value. Output instructions required to +/// initialize the register in the function entry block, if necessary. +/// +unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { + MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>(); + unsigned GlobalBaseReg = MBlazeFI->getGlobalBaseReg(); + if (GlobalBaseReg != 0) + return GlobalBaseReg; + + // Insert the set of GlobalBaseReg into the first MBB of the function + MachineBasicBlock &FirstMBB = MF->front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + + GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass); + bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20, + MBlaze::CPURegsRegisterClass, + MBlaze::CPURegsRegisterClass); + assert(Ok && "Couldn't assign to global base register!"); + Ok = Ok; // Silence warning when assertions are turned off. + RegInfo.addLiveIn(MBlaze::R20); + + MBlazeFI->setGlobalBaseReg(GlobalBaseReg); + return GlobalBaseReg; +} diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h new file mode 100644 index 0000000..4f79f1c --- /dev/null +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -0,0 +1,242 @@ +//===- MBlazeInstrInfo.h - MBlaze Instruction Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MBlaze implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZEINSTRUCTIONINFO_H +#define MBLAZEINSTRUCTIONINFO_H + +#include "MBlaze.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "MBlazeRegisterInfo.h" + +namespace llvm { + +namespace MBlaze { + + // MBlaze Branch Codes + enum FPBranchCode { + BRANCH_F, + BRANCH_T, + BRANCH_FL, + BRANCH_TL, + BRANCH_INVALID + }; + + // MBlaze Condition Codes + enum CondCode { + // To be used with float branch True + FCOND_F, + FCOND_UN, + FCOND_EQ, + FCOND_UEQ, + FCOND_OLT, + FCOND_ULT, + FCOND_OLE, + FCOND_ULE, + FCOND_SF, + FCOND_NGLE, + FCOND_SEQ, + FCOND_NGL, + FCOND_LT, + FCOND_NGE, + FCOND_LE, + FCOND_NGT, + + // To be used with float branch False + // This conditions have the same mnemonic as the + // above ones, but are used with a branch False; + FCOND_T, + FCOND_OR, + FCOND_NEQ, + FCOND_OGL, + FCOND_UGE, + FCOND_OGE, + FCOND_UGT, + FCOND_OGT, + FCOND_ST, + FCOND_GLE, + FCOND_SNE, + FCOND_GL, + FCOND_NLT, + FCOND_GE, + FCOND_NLE, + FCOND_GT, + + // Only integer conditions + COND_E, + COND_GZ, + COND_GEZ, + COND_LZ, + COND_LEZ, + COND_NE, + COND_INVALID + }; + + // Turn condition code into conditional branch opcode. + unsigned GetCondBranchFromCond(CondCode CC); + + /// GetOppositeBranchCondition - Return the inverse of the specified cond, + /// e.g. turning COND_E to COND_NE. + CondCode GetOppositeBranchCondition(MBlaze::CondCode CC); + + /// MBlazeCCToString - Map each FP condition code to its string + inline static const char *MBlazeFCCToString(MBlaze::CondCode CC) + { + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case FCOND_F: + case FCOND_T: return "f"; + case FCOND_UN: + case FCOND_OR: return "un"; + case FCOND_EQ: + case FCOND_NEQ: return "eq"; + case FCOND_UEQ: + case FCOND_OGL: return "ueq"; + case FCOND_OLT: + case FCOND_UGE: return "olt"; + case FCOND_ULT: + case FCOND_OGE: return "ult"; + case FCOND_OLE: + case FCOND_UGT: return "ole"; + case FCOND_ULE: + case FCOND_OGT: return "ule"; + case FCOND_SF: + case FCOND_ST: return "sf"; + case FCOND_NGLE: + case FCOND_GLE: return "ngle"; + case FCOND_SEQ: + case FCOND_SNE: return "seq"; + case FCOND_NGL: + case FCOND_GL: return "ngl"; + case FCOND_LT: + case FCOND_NLT: return "lt"; + case FCOND_NGE: + case FCOND_GE: return "ge"; + case FCOND_LE: + case FCOND_NLE: return "nle"; + case FCOND_NGT: + case FCOND_GT: return "gt"; + } + } +} + +/// MBlazeII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace MBlazeII { + /// Target Operand Flag enum. + enum TOF { + //===------------------------------------------------------------------===// + // MBlaze Specific MachineOperand flags. + MO_NO_FLAG, + + /// MO_GOT - Represents the offset into the global offset table at which + /// the address the relocation entry symbol resides during execution. + MO_GOT, + + /// MO_GOT_CALL - Represents the offset into the global offset table at + /// which the address of a call site relocation entry symbol resides + /// during execution. This is different from the above since this flag + /// can only be present in call instructions. + MO_GOT_CALL, + + /// MO_GPREL - Represents the offset from the current gp value to be used + /// for the relocatable object file being produced. + MO_GPREL, + + /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol + /// address. + MO_ABS_HILO + + }; +} + +class MBlazeInstrInfo : public TargetInstrInfoImpl { + MBlazeTargetMachine &TM; + const MBlazeRegisterInfo RI; +public: + explicit MBlazeInstrInfo(MBlazeTargetMachine &TM); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; } + + /// Return true if the instruction is a register to register move and return + /// the source and dest operands and their sub-register indices by reference. + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// Branch Analysis + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond) const; + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const { + return 0; + } + + /// Insert nop instruction when hazard condition is found + virtual void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + /// getGlobalBaseReg - Return a virtual register initialized with the + /// the global base register value. Output instructions required to + /// initialize the register in the function entry block, if necessary. + /// + unsigned getGlobalBaseReg(MachineFunction *MF) const; +}; + +} + +#endif diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td new file mode 100644 index 0000000..3c406dd --- /dev/null +++ b/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -0,0 +1,672 @@ +//===- MBlazeInstrInfo.td - MBlaze Instruction defs -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// +include "MBlazeInstrFormats.td" + +//===----------------------------------------------------------------------===// +// MBlaze profiles and nodes +//===----------------------------------------------------------------------===// +def SDT_MBlazeRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_MBlazeJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; + +// Call +def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink, + [SDNPHasChain,SDNPOptInFlag,SDNPOutFlag]>; + +// Return +def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet, + [SDNPHasChain, SDNPOptInFlag]>; + +// Hi and Lo nodes are used to handle global addresses. Used on +// MBlazeISelLowering to lower stuff like GlobalAddress, ExternalSymbol +// static model. +def MBWrapper : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>; +def MBlazeGPRel : SDNode<"MBlazeISD::GPRel", SDTIntUnaryOp>; + +def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; +def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +// These are target-independent nodes, but have target-specific formats. +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MBCallSeqStart, + [SDNPHasChain, SDNPOutFlag]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>; + +//===----------------------------------------------------------------------===// +// MBlaze Instruction Predicate Definitions. +//===----------------------------------------------------------------------===// +def HasPipe3 : Predicate<"Subtarget.hasPipe3()">; +def HasBarrel : Predicate<"Subtarget.hasBarrel()">; +def NoBarrel : Predicate<"!Subtarget.hasBarrel()">; +def HasDiv : Predicate<"Subtarget.hasDiv()">; +def HasMul : Predicate<"Subtarget.hasMul()">; +def HasFSL : Predicate<"Subtarget.hasFSL()">; +def HasEFSL : Predicate<"Subtarget.hasEFSL()">; +def HasMSRSet : Predicate<"Subtarget.hasMSRSet()">; +def HasException : Predicate<"Subtarget.hasException()">; +def HasPatCmp : Predicate<"Subtarget.hasPatCmp()">; +def HasFPU : Predicate<"Subtarget.hasFPU()">; +def HasESR : Predicate<"Subtarget.hasESR()">; +def HasPVR : Predicate<"Subtarget.hasPVR()">; +def HasMul64 : Predicate<"Subtarget.hasMul64()">; +def HasSqrt : Predicate<"Subtarget.hasSqrt()">; +def HasMMU : Predicate<"Subtarget.hasMMU()">; + +//===----------------------------------------------------------------------===// +// MBlaze Operand, Complex Patterns and Transformations Definitions. +//===----------------------------------------------------------------------===// + +// Instruction operand types +def brtarget : Operand<OtherVT>; +def calltarget : Operand<i32>; +def simm16 : Operand<i32>; +def uimm5 : Operand<i32>; +def fimm : Operand<f32>; + +// Unsigned Operand +def uimm16 : Operand<i32> { + let PrintMethod = "printUnsignedImm"; +} + +// FSL Operand +def fslimm : Operand<i32> { + let PrintMethod = "printFSLImm"; +} + +// Address operand +def memri : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops simm16, CPURegs); +} + +def memrr : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops CPURegs, CPURegs); +} + +// Transformation Function - get the lower 16 bits. +def LO16 : SDNodeXForm<imm, [{ + return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF); +}]>; + +// Transformation Function - get the higher 16 bits. +def HI16 : SDNodeXForm<imm, [{ + return getI32Imm((unsigned)N->getZExtValue() >> 16); +}]>; + +// Node immediate fits as 16-bit sign extended on target immediate. +// e.g. addi, andi +def immSExt16 : PatLeaf<(imm), [{ + return (N->getZExtValue() >> 16) == 0; +}]>; + +// Node immediate fits as 16-bit zero extended on target immediate. +// The LO16 param means that only the lower 16 bits of the node +// immediate are caught. +// e.g. addiu, sltiu +def immZExt16 : PatLeaf<(imm), [{ + return (N->getZExtValue() >> 16) == 0; +}], LO16>; + +// FSL immediate field must fit in 4 bits. +def immZExt4 : PatLeaf<(imm), [{ + return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ; +}]>; + +// shamt field must fit in 5 bits. +def immZExt5 : PatLeaf<(imm), [{ + return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ; +}]>; + +// MBlaze Address Mode! SDNode frameindex could possibily be a match +// since load and store instructions from stack used it. +def iaddr : ComplexPattern<i32, 2, "SelectAddrRegImm", [frameindex], []>; +def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>; + +//===----------------------------------------------------------------------===// +// Pseudo instructions +//===----------------------------------------------------------------------===// + +// As stack alignment is always done with addiu, we need a 16-bit immediate +let Defs = [R1], Uses = [R1] in { +def ADJCALLSTACKDOWN : MBlazePseudo<(outs), (ins simm16:$amt), + "${:comment} ADJCALLSTACKDOWN $amt", + [(callseq_start timm:$amt)]>; +def ADJCALLSTACKUP : MBlazePseudo<(outs), + (ins uimm16:$amt1, simm16:$amt2), + "${:comment} ADJCALLSTACKUP $amt1", + [(callseq_end timm:$amt1, timm:$amt2)]>; +} + +// Some assembly macros need to avoid pseudoinstructions and assembler +// automatic reodering, we should reorder ourselves. +def MACRO : MBlazePseudo<(outs), (ins), ".set macro", []>; +def REORDER : MBlazePseudo<(outs), (ins), ".set reorder", []>; +def NOMACRO : MBlazePseudo<(outs), (ins), ".set nomacro", []>; +def NOREORDER : MBlazePseudo<(outs), (ins), ".set noreorder", []>; + +// When handling PIC code the assembler needs .cpload and .cprestore +// directives. If the real instructions corresponding these directives +// are used, we have the same behavior, but get also a bunch of warnings +// from the assembler. +def CPLOAD : MBlazePseudo<(outs), (ins CPURegs:$reg), ".cpload $reg", []>; +def CPRESTORE : MBlazePseudo<(outs), (ins uimm16:$l), ".cprestore $l\n", []>; + +//===----------------------------------------------------------------------===// +// Instructions specific format +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions +//===----------------------------------------------------------------------===// +class Arith<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, + InstrItinClass itin> : + TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>; + +class ArithI<bits<6> op, string instr_asm, SDNode OpNode, + Operand Od, PatLeaf imm_type> : + TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>; + +class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, + InstrItinClass itin> : + TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b), + !strconcat(instr_asm, " $dst, $c, $b"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>; + +class ArithRI<bits<6> op, string instr_asm, SDNode OpNode, + Operand Od, PatLeaf imm_type> : + TAI<op, (outs CPURegs:$dst), (ins Od:$b, CPURegs:$c), + !strconcat(instr_asm, " $dst, $c, $b"), + [(set CPURegs:$dst, (OpNode imm_type:$b, CPURegs:$c))], IIAlu>; + +class ArithN<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], itin>; + +class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> : + TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], IIAlu>; + +class ArithRN<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b), + !strconcat(instr_asm, " $dst, $b, $c"), + [], itin>; + +class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> : + TAI<op, (outs CPURegs:$dst), (ins Od:$c, CPURegs:$b), + !strconcat(instr_asm, " $dst, $b, $c"), + [], IIAlu>; + +//===----------------------------------------------------------------------===// +// Misc Arithmetic Instructions +//===----------------------------------------------------------------------===// + +class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> : + TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>; + +class LogicI<bits<6> op, string instr_asm, SDNode OpNode> : + TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], + IIAlu>; + +class EffectiveAddress<string instr_asm> : + TAI<0x08, (outs CPURegs:$dst), (ins memri:$addr), + instr_asm, [(set CPURegs:$dst, iaddr:$addr)], IIAlu>; + +//===----------------------------------------------------------------------===// +// Memory Access Instructions +//===----------------------------------------------------------------------===// +class LoadM<bits<6> op, string instr_asm, PatFrag OpNode> : + TA<op, 0x000, (outs CPURegs:$dst), (ins memrr:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(set CPURegs:$dst, (OpNode xaddr:$addr))], IILoad>; + +class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> : + TAI<op, (outs CPURegs:$dst), (ins memri:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(set CPURegs:$dst, (OpNode iaddr:$addr))], IILoad>; + +class StoreM<bits<6> op, string instr_asm, PatFrag OpNode> : + TA<op, 0x000, (outs), (ins CPURegs:$dst, memrr:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(OpNode CPURegs:$dst, xaddr:$addr)], IIStore>; + +class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> : + TAI<op, (outs), (ins CPURegs:$dst, memri:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(OpNode CPURegs:$dst, iaddr:$addr)], IIStore>; + +//===----------------------------------------------------------------------===// +// Branch Instructions +//===----------------------------------------------------------------------===// +class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> : + TBR<op, br, flags, (outs), (ins CPURegs:$target), + !strconcat(instr_asm, " $target"), + [(brind CPURegs:$target)], IIBranch>; + +class BranchI<bits<6> op, bits<5> brf, string instr_asm> : + TBRI<op, brf, (outs), (ins brtarget:$target), + !strconcat(instr_asm, " $target"), + [(br bb:$target)], IIBranch>; + +//===----------------------------------------------------------------------===// +// Branch and Link Instructions +//===----------------------------------------------------------------------===// +class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> : + TBRL<op, br, flags, (outs), (ins CPURegs:$target), + !strconcat(instr_asm, " r15, $target"), + [], IIBranch>; + +class BranchLI<bits<6> op, bits<5> br, string instr_asm> : + TBRLI<op, br, (outs), (ins calltarget:$target), + !strconcat(instr_asm, " r15, $target"), + [], IIBranch>; + +//===----------------------------------------------------------------------===// +// Conditional Branch Instructions +//===----------------------------------------------------------------------===// +class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm, + PatFrag cond_op> : + TBRC<op, br, flags, (outs), + (ins CPURegs:$a, CPURegs:$b, brtarget:$offset), + !strconcat(instr_asm, " $a, $b, $offset"), + [], IIBranch>; + //(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)], + //IIBranch>; + +class BranchCI<bits<6> op, bits<5> br, string instr_asm, PatFrag cond_op> : + TBRCI<op, br, (outs), (ins CPURegs:$a, brtarget:$offset), + !strconcat(instr_asm, " $a, $offset"), + [], IIBranch>; + +//===----------------------------------------------------------------------===// +// MBlaze arithmetic instructions +//===----------------------------------------------------------------------===// + +let isCommutable = 1, isAsCheapAsAMove = 1 in { + def ADD : Arith<0x00, 0x000, "add ", add, IIAlu>; + def ADDC : Arith<0x02, 0x000, "addc ", adde, IIAlu>; + def ADDK : Arith<0x04, 0x000, "addk ", addc, IIAlu>; + def ADDKC : ArithN<0x06, 0x000, "addkc ", IIAlu>; + def AND : Logic<0x21, 0x000, "and ", and>; + def OR : Logic<0x20, 0x000, "or ", or>; + def XOR : Logic<0x22, 0x000, "xor ", xor>; +} + +let isAsCheapAsAMove = 1 in { + def ANDN : ArithN<0x23, 0x000, "andn ", IIAlu>; + def CMP : ArithN<0x05, 0x001, "cmp ", IIAlu>; + def CMPU : ArithN<0x05, 0x003, "cmpu ", IIAlu>; + def RSUB : ArithR<0x01, 0x000, "rsub ", sub, IIAlu>; + def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIAlu>; + def RSUBK : ArithR<0x05, 0x000, "rsubk ", subc, IIAlu>; + def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>; +} + +let isCommutable = 1, Predicates=[HasMul] in { + def MUL : Arith<0x10, 0x000, "mul ", mul, IIAlu>; +} + +let isCommutable = 1, Predicates=[HasMul,HasMul64] in { + def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIAlu>; + def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIAlu>; +} + +let Predicates=[HasMul,HasMul64] in { + def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>; +} + +let Predicates=[HasBarrel] in { + def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIAlu>; + def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIAlu>; + def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIAlu>; + def BSRLI : ArithI<0x11, "bsrli ", srl, uimm5, immZExt5>; + def BSRAI : ArithI<0x11, "bsrai ", sra, uimm5, immZExt5>; + def BSLLI : ArithI<0x11, "bslli ", shl, uimm5, immZExt5>; +} + +let Predicates=[HasDiv] in { + def IDIV : Arith<0x12, 0x000, "idiv ", sdiv, IIAlu>; + def IDIVU : Arith<0x12, 0x002, "idivu ", udiv, IIAlu>; +} + +//===----------------------------------------------------------------------===// +// MBlaze immediate mode arithmetic instructions +//===----------------------------------------------------------------------===// + +let isAsCheapAsAMove = 1 in { + def ADDI : ArithI<0x08, "addi ", add, simm16, immSExt16>; + def ADDIC : ArithNI<0x0A, "addic ", simm16, immSExt16>; + def ADDIK : ArithNI<0x0C, "addik ", simm16, immSExt16>; + def ADDIKC : ArithI<0x0E, "addikc ", addc, simm16, immSExt16>; + def RSUBI : ArithRI<0x09, "rsubi ", sub, simm16, immSExt16>; + def RSUBIC : ArithRNI<0x0B, "rsubi ", simm16, immSExt16>; + def RSUBIK : ArithRNI<0x0E, "rsubic ", simm16, immSExt16>; + def RSUBIKC : ArithRI<0x0F, "rsubikc", subc, simm16, immSExt16>; + def ANDNI : ArithNI<0x2B, "andni ", uimm16, immZExt16>; + def ANDI : LogicI<0x29, "andi ", and>; + def ORI : LogicI<0x28, "ori ", or>; + def XORI : LogicI<0x2A, "xori ", xor>; +} + +let Predicates=[HasMul] in { + def MULI : ArithI<0x18, "muli ", mul, simm16, immSExt16>; +} + +//===----------------------------------------------------------------------===// +// MBlaze memory access instructions +//===----------------------------------------------------------------------===// + +let canFoldAsLoad = 1, isReMaterializable = 1 in { + def LBU : LoadM<0x30, "lbu ", zextloadi8>; + def LHU : LoadM<0x31, "lhu ", zextloadi16>; + def LW : LoadM<0x32, "lw ", load>; + + def LBUI : LoadMI<0x30, "lbui ", zextloadi8>; + def LHUI : LoadMI<0x31, "lhui ", zextloadi16>; + def LWI : LoadMI<0x32, "lwi ", load>; +} + + def SB : StoreM<0x34, "sb ", truncstorei8>; + def SH : StoreM<0x35, "sh ", truncstorei16>; + def SW : StoreM<0x36, "sw ", store>; + + def SBI : StoreMI<0x34, "sbi ", truncstorei8>; + def SHI : StoreMI<0x35, "shi ", truncstorei16>; + def SWI : StoreMI<0x36, "swi ", store>; + +//===----------------------------------------------------------------------===// +// MBlaze branch instructions +//===----------------------------------------------------------------------===// + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { + def BRI : BranchI<0x2E, 0x00, "bri ">; + def BRAI : BranchI<0x2E, 0x08, "brai ">; + def BEQI : BranchCI<0x2F, 0x00, "beqi ", seteq>; + def BNEI : BranchCI<0x2F, 0x01, "bnei ", setne>; + def BLTI : BranchCI<0x2F, 0x02, "blti ", setlt>; + def BLEI : BranchCI<0x2F, 0x03, "blei ", setle>; + def BGTI : BranchCI<0x2F, 0x04, "bgti ", setgt>; + def BGEI : BranchCI<0x2F, 0x05, "bgei ", setge>; +} + +let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { + def BR : Branch<0x26, 0x00, 0x000, "br ">; + def BRA : Branch<0x26, 0x08, 0x000, "bra ">; + def BEQ : BranchC<0x27, 0x00, 0x000, "beq ", seteq>; + def BNE : BranchC<0x27, 0x01, 0x000, "bne ", setne>; + def BLT : BranchC<0x27, 0x02, 0x000, "blt ", setlt>; + def BLE : BranchC<0x27, 0x03, 0x000, "ble ", setle>; + def BGT : BranchC<0x27, 0x04, 0x000, "bgt ", setgt>; + def BGE : BranchC<0x27, 0x05, 0x000, "bge ", setge>; +} + +let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in { + def BRID : BranchI<0x2E, 0x10, "brid ">; + def BRAID : BranchI<0x2E, 0x18, "braid ">; + def BEQID : BranchCI<0x2F, 0x10, "beqid ", seteq>; + def BNEID : BranchCI<0x2F, 0x11, "bneid ", setne>; + def BLTID : BranchCI<0x2F, 0x12, "bltid ", setlt>; + def BLEID : BranchCI<0x2F, 0x13, "bleid ", setle>; + def BGTID : BranchCI<0x2F, 0x14, "bgtid ", setgt>; + def BGEID : BranchCI<0x2F, 0x15, "bgeid ", setge>; +} + +let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, + hasDelaySlot = 1, hasCtrlDep = 1 in { + def BRD : Branch<0x26, 0x10, 0x000, "brd ">; + def BRAD : Branch<0x26, 0x18, 0x000, "brad ">; + def BEQD : BranchC<0x27, 0x10, 0x000, "beqd ", seteq>; + def BNED : BranchC<0x27, 0x11, 0x000, "bned ", setne>; + def BLTD : BranchC<0x27, 0x12, 0x000, "bltd ", setlt>; + def BLED : BranchC<0x27, 0x13, 0x000, "bled ", setle>; + def BGTD : BranchC<0x27, 0x14, 0x000, "bgtd ", setgt>; + def BGED : BranchC<0x27, 0x15, 0x000, "bged ", setge>; +} + +let isCall = 1, hasCtrlDep = 1, isIndirectBranch = 1, + Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12], + Uses = [R1,R5,R6,R7,R8,R9,R10] in { + def BRL : BranchL<0x26, 0x04, 0x000, "brl ">; + def BRAL : BranchL<0x26, 0x0C, 0x000, "bral ">; +} + +let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1, + Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12], + Uses = [R1,R5,R6,R7,R8,R9,R10] in { + def BRLID : BranchLI<0x2E, 0x14, "brlid ">; + def BRALID : BranchLI<0x2E, 0x1C, "bralid ">; +} + +let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1, isIndirectBranch = 1, + Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12], + Uses = [R1,R5,R6,R7,R8,R9,R10] in { + def BRLD : BranchL<0x26, 0x14, 0x000, "brld ">; + def BRALD : BranchL<0x26, 0x1C, 0x000, "brald ">; +} + +let isReturn=1, isTerminator=1, hasDelaySlot=1, + isBarrier=1, hasCtrlDep=1, imm16=0x8 in { + def RTSD : TRET<0x2D, (outs), (ins CPURegs:$target), + "rtsd $target, 8", + [(MBlazeRet CPURegs:$target)], + IIBranch>; +} + +//===----------------------------------------------------------------------===// +// MBlaze misc instructions +//===----------------------------------------------------------------------===// + +let addr = 0 in { + def NOP : TADDR<0x00, (outs), (ins), "nop ", [], IIAlu>; +} + +let usesCustomInserter = 1 in { + //class PseudoSelCC<RegisterClass RC, string asmstr>: + // MBlazePseudo<(outs RC:$D), (ins RC:$T, RC:$F, CPURegs:$CMP), asmstr, + // [(set RC:$D, (MBlazeSelectCC RC:$T, RC:$F, CPURegs:$CMP))]>; + //def Select_CC : PseudoSelCC<CPURegs, "# MBlazeSelect_CC">; + + def Select_CC : MBlazePseudo<(outs CPURegs:$dst), + (ins CPURegs:$T, CPURegs:$F, CPURegs:$CMP, i32imm:$CC), + "; SELECT_CC PSEUDO!", + []>; + + def ShiftL : MBlazePseudo<(outs CPURegs:$dst), + (ins CPURegs:$L, CPURegs:$R), + "; ShiftL PSEUDO!", + []>; + + def ShiftRA : MBlazePseudo<(outs CPURegs:$dst), + (ins CPURegs:$L, CPURegs:$R), + "; ShiftRA PSEUDO!", + []>; + + def ShiftRL : MBlazePseudo<(outs CPURegs:$dst), + (ins CPURegs:$L, CPURegs:$R), + "; ShiftRL PSEUDO!", + []>; +} + + +let rb = 0 in { + def SEXT16 : TA<0x24, 0x061, (outs CPURegs:$dst), (ins CPURegs:$src), + "sext16 $dst, $src", [], IIAlu>; + def SEXT8 : TA<0x24, 0x060, (outs CPURegs:$dst), (ins CPURegs:$src), + "sext8 $dst, $src", [], IIAlu>; + def SRL : TA<0x24, 0x041, (outs CPURegs:$dst), (ins CPURegs:$src), + "srl $dst, $src", [], IIAlu>; + def SRA : TA<0x24, 0x001, (outs CPURegs:$dst), (ins CPURegs:$src), + "sra $dst, $src", [], IIAlu>; + def SRC : TA<0x24, 0x021, (outs CPURegs:$dst), (ins CPURegs:$src), + "src $dst, $src", [], IIAlu>; +} + +def LEA_ADDI : EffectiveAddress<"addi $dst, ${addr:stackloc}">; + +//===----------------------------------------------------------------------===// +// Arbitrary patterns that map to one or more instructions +//===----------------------------------------------------------------------===// + +// Small immediates +def : Pat<(i32 0), (ADD R0, R0)>; +def : Pat<(i32 immSExt16:$imm), (ADDI R0, imm:$imm)>; +def : Pat<(i32 immZExt16:$imm), (ORI R0, imm:$imm)>; + +// Arbitrary immediates +def : Pat<(i32 imm:$imm), (ADDI R0, imm:$imm)>; + +// In register sign extension +def : Pat<(sext_inreg CPURegs:$src, i16), (SEXT16 CPURegs:$src)>; +def : Pat<(sext_inreg CPURegs:$src, i8), (SEXT8 CPURegs:$src)>; + +// Call +def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)), (BRLID tglobaladdr:$dst)>; +def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),(BRLID texternalsym:$dst)>; +def : Pat<(MBlazeJmpLink CPURegs:$dst), (BRLD CPURegs:$dst)>; + +// Shift Instructions +def : Pat<(shl CPURegs:$L, CPURegs:$R), (ShiftL CPURegs:$L, CPURegs:$R)>; +def : Pat<(sra CPURegs:$L, CPURegs:$R), (ShiftRA CPURegs:$L, CPURegs:$R)>; +def : Pat<(srl CPURegs:$L, CPURegs:$R), (ShiftRL CPURegs:$L, CPURegs:$R)>; + +// SET_CC operations +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETEQ), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMP CPURegs:$L, CPURegs:$R), 1)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETNE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMP CPURegs:$L, CPURegs:$R), 2)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMP CPURegs:$L, CPURegs:$R), 3)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMP CPURegs:$L, CPURegs:$R), 4)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMP CPURegs:$L, CPURegs:$R), 5)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMP CPURegs:$L, CPURegs:$R), 6)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMPU CPURegs:$L, CPURegs:$R), 3)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULT), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMPU CPURegs:$L, CPURegs:$R), 4)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMPU CPURegs:$L, CPURegs:$R), 5)>; +def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULE), + (Select_CC (ADDI R0, 1), (ADDI R0, 0), + (CMPU CPURegs:$L, CPURegs:$R), 6)>; + +// SELECT operations +def : Pat<(select CPURegs:$C, CPURegs:$T, CPURegs:$F), + (Select_CC CPURegs:$T, CPURegs:$F, CPURegs:$C, 2)>; + +// SELECT_CC +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETEQ), + (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 1)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETNE), + (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 2)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGT), + (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 3)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLT), + (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 4)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGE), + (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 5)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLE), + (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 6)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGT), + (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 3)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULT), + (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 4)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGE), + (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 5)>; +def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULE), + (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 6)>; + +// BRCOND instructions +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETEQ), bb:$T), + (BEQID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETNE), bb:$T), + (BNEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGT), bb:$T), + (BGTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLT), bb:$T), + (BLTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGE), bb:$T), + (BGEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLE), bb:$T), + (BLEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGT), bb:$T), + (BGTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULT), bb:$T), + (BLTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGE), bb:$T), + (BGEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULE), bb:$T), + (BLEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>; +def : Pat<(brcond CPURegs:$C, bb:$T), + (BNEID CPURegs:$C, bb:$T)>; + +// Jump tables, global addresses, and constant pools +def : Pat<(MBWrapper tglobaladdr:$in), (ORI R0, tglobaladdr:$in)>; +def : Pat<(MBWrapper tjumptable:$in), (ORI R0, tjumptable:$in)>; +def : Pat<(MBWrapper tconstpool:$in), (ORI R0, tconstpool:$in)>; + +// Misc instructions +def : Pat<(and CPURegs:$lh, (not CPURegs:$rh)),(ANDN CPURegs:$lh, CPURegs:$rh)>; + +// Arithmetic with immediates +def : Pat<(add CPURegs:$in, imm:$imm),(ADDI CPURegs:$in, imm:$imm)>; +def : Pat<(or CPURegs:$in, imm:$imm),(ORI CPURegs:$in, imm:$imm)>; +def : Pat<(xor CPURegs:$in, imm:$imm),(XORI CPURegs:$in, imm:$imm)>; + +// extended load and stores +def : Pat<(extloadi1 iaddr:$src), (LBUI iaddr:$src)>; +def : Pat<(extloadi8 iaddr:$src), (LBUI iaddr:$src)>; +def : Pat<(extloadi16 iaddr:$src), (LHUI iaddr:$src)>; +def : Pat<(extloadi1 xaddr:$src), (LBU xaddr:$src)>; +def : Pat<(extloadi8 xaddr:$src), (LBU xaddr:$src)>; +def : Pat<(extloadi16 xaddr:$src), (LHU xaddr:$src)>; + +def : Pat<(sextloadi1 iaddr:$src), (SEXT8 (LBUI iaddr:$src))>; +def : Pat<(sextloadi8 iaddr:$src), (SEXT8 (LBUI iaddr:$src))>; +def : Pat<(sextloadi16 iaddr:$src), (SEXT16 (LHUI iaddr:$src))>; +def : Pat<(sextloadi1 xaddr:$src), (SEXT8 (LBU xaddr:$src))>; +def : Pat<(sextloadi8 xaddr:$src), (SEXT8 (LBU xaddr:$src))>; +def : Pat<(sextloadi16 xaddr:$src), (SEXT16 (LHU xaddr:$src))>; + +// peepholes +def : Pat<(store (i32 0), iaddr:$dst), (SWI R0, iaddr:$dst)>; + +//===----------------------------------------------------------------------===// +// Floating Point Support +//===----------------------------------------------------------------------===// +include "MBlazeInstrFSL.td" +include "MBlazeInstrFPU.td" diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp new file mode 100644 index 0000000..c8faffc --- /dev/null +++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp @@ -0,0 +1,109 @@ +//===- MBlazeIntrinsicInfo.cpp - Intrinsic Information -00-------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MBlaze implementation of TargetIntrinsicInfo. +// +//===----------------------------------------------------------------------===// + +#include "MBlazeIntrinsicInfo.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/Support/raw_ostream.h" +#include <cstring> + +using namespace llvm; + +namespace mblazeIntrinsic { + + enum ID { + last_non_mblaze_intrinsic = Intrinsic::num_intrinsics-1, +#define GET_INTRINSIC_ENUM_VALUES +#include "MBlazeGenIntrinsics.inc" +#undef GET_INTRINSIC_ENUM_VALUES + , num_mblaze_intrinsics + }; + +#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +#include "MBlazeGenIntrinsics.inc" +#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +} + +std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys, + unsigned numTys) const { + static const char *const names[] = { +#define GET_INTRINSIC_NAME_TABLE +#include "MBlazeGenIntrinsics.inc" +#undef GET_INTRINSIC_NAME_TABLE + }; + + assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded"); + if (IntrID < Intrinsic::num_intrinsics) + return 0; + assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics && + "Invalid intrinsic ID"); + + std::string Result(names[IntrID - Intrinsic::num_intrinsics]); + return Result; +} + +unsigned MBlazeIntrinsicInfo:: +lookupName(const char *Name, unsigned Len) const { +#define GET_FUNCTION_RECOGNIZER +#include "MBlazeGenIntrinsics.inc" +#undef GET_FUNCTION_RECOGNIZER + return 0; +} + +unsigned MBlazeIntrinsicInfo:: +lookupGCCName(const char *Name) const { + return mblazeIntrinsic::getIntrinsicForGCCBuiltin("mblaze",Name); +} + +bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const { + // Overload Table + const bool OTable[] = { +#define GET_INTRINSIC_OVERLOAD_TABLE +#include "MBlazeGenIntrinsics.inc" +#undef GET_INTRINSIC_OVERLOAD_TABLE + }; + if (IntrID == 0) + return false; + else + return OTable[IntrID - Intrinsic::num_intrinsics]; +} + +/// This defines the "getAttributes(ID id)" method. +#define GET_INTRINSIC_ATTRIBUTES +#include "MBlazeGenIntrinsics.inc" +#undef GET_INTRINSIC_ATTRIBUTES + +static const FunctionType *getType(LLVMContext &Context, unsigned id) { + const Type *ResultTy = NULL; + std::vector<const Type*> ArgTys; + bool IsVarArg = false; + +#define GET_INTRINSIC_GENERATOR +#include "MBlazeGenIntrinsics.inc" +#undef GET_INTRINSIC_GENERATOR + + return FunctionType::get(ResultTy, ArgTys, IsVarArg); +} + +Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, + const Type **Tys, + unsigned numTy) const { + assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded"); + AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID); + return cast<Function>(M->getOrInsertFunction(getName(IntrID), + getType(M->getContext(), IntrID), + AList)); +} diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h new file mode 100644 index 0000000..9804c77 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h @@ -0,0 +1,33 @@ +//===- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MBlaze implementation of TargetIntrinsicInfo. +// +//===----------------------------------------------------------------------===// +#ifndef MBLAZEINTRINSICS_H +#define MBLAZEINTRINSICS_H + +#include "llvm/Target/TargetIntrinsicInfo.h" + +namespace llvm { + + class MBlazeIntrinsicInfo : public TargetIntrinsicInfo { + public: + std::string getName(unsigned IntrID, const Type **Tys = 0, + unsigned numTys = 0) const; + unsigned lookupName(const char *Name, unsigned Len) const; + unsigned lookupGCCName(const char *Name) const; + bool isOverloaded(unsigned IID) const; + Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0, + unsigned numTys = 0) const; + }; + +} + +#endif diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td new file mode 100644 index 0000000..76eb563 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeIntrinsics.td @@ -0,0 +1,137 @@ +//===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the MicroBlaze-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Definitions for all MBlaze intrinsics. +// + +// MBlaze intrinsic classes. +let TargetPrefix = "mblaze", isTarget = 1 in { + class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty], + [IntrWriteMem]>; + + class MBFSL_Put_Intrinsic : Intrinsic<[llvm_void_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem]>; + + class MBFSL_PutT_Intrinsic : Intrinsic<[llvm_void_ty], + [llvm_i32_ty], + [IntrWriteMem]>; +} + +//===----------------------------------------------------------------------===// +// MicroBlaze FSL Get Intrinsic Definitions. +// + +def int_mblaze_fsl_get : GCCBuiltin<"__builtin_mblaze_fsl_get">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_aget : GCCBuiltin<"__builtin_mblaze_fsl_aget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_cget : GCCBuiltin<"__builtin_mblaze_fsl_cget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_caget : GCCBuiltin<"__builtin_mblaze_fsl_caget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_eget : GCCBuiltin<"__builtin_mblaze_fsl_eget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_eaget : GCCBuiltin<"__builtin_mblaze_fsl_eaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_ecget : GCCBuiltin<"__builtin_mblaze_fsl_ecget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_ecaget : GCCBuiltin<"__builtin_mblaze_fsl_ecaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_nget : GCCBuiltin<"__builtin_mblaze_fsl_nget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_naget : GCCBuiltin<"__builtin_mblaze_fsl_naget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_ncget : GCCBuiltin<"__builtin_mblaze_fsl_ncget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_ncaget : GCCBuiltin<"__builtin_mblaze_fsl_ncaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_neget : GCCBuiltin<"__builtin_mblaze_fsl_neget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_neaget : GCCBuiltin<"__builtin_mblaze_fsl_neaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_necget : GCCBuiltin<"__builtin_mblaze_fsl_necget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_necaget : GCCBuiltin<"__builtin_mblaze_fsl_necaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tget : GCCBuiltin<"__builtin_mblaze_fsl_tget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_taget : GCCBuiltin<"__builtin_mblaze_fsl_taget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tcget : GCCBuiltin<"__builtin_mblaze_fsl_tcget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tcaget : GCCBuiltin<"__builtin_mblaze_fsl_tcaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_teget : GCCBuiltin<"__builtin_mblaze_fsl_teget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_teaget : GCCBuiltin<"__builtin_mblaze_fsl_teaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tecget : GCCBuiltin<"__builtin_mblaze_fsl_tecget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tecaget : GCCBuiltin<"__builtin_mblaze_fsl_tecaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tnget : GCCBuiltin<"__builtin_mblaze_fsl_tnget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tnaget : GCCBuiltin<"__builtin_mblaze_fsl_tnaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tncget : GCCBuiltin<"__builtin_mblaze_fsl_tncget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tncaget : GCCBuiltin<"__builtin_mblaze_fsl_tncaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tneget : GCCBuiltin<"__builtin_mblaze_fsl_tneget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tneaget : GCCBuiltin<"__builtin_mblaze_fsl_tneaget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tnecget : GCCBuiltin<"__builtin_mblaze_fsl_tnecget">, + MBFSL_Get_Intrinsic; +def int_mblaze_fsl_tnecaget : GCCBuiltin<"__builtin_mblaze_fsl_tnecaget">, + MBFSL_Get_Intrinsic; + +//===----------------------------------------------------------------------===// +// MicroBlaze FSL Put Intrinsic Definitions. +// + +def int_mblaze_fsl_put : GCCBuiltin<"__builtin_mblaze_fsl_put">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_aput : GCCBuiltin<"__builtin_mblaze_fsl_aput">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_cput : GCCBuiltin<"__builtin_mblaze_fsl_cput">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_caput : GCCBuiltin<"__builtin_mblaze_fsl_caput">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_nput : GCCBuiltin<"__builtin_mblaze_fsl_nput">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_naput : GCCBuiltin<"__builtin_mblaze_fsl_naput">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_ncput : GCCBuiltin<"__builtin_mblaze_fsl_ncput">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_ncaput : GCCBuiltin<"__builtin_mblaze_fsl_ncaput">, + MBFSL_Put_Intrinsic; +def int_mblaze_fsl_tput : GCCBuiltin<"__builtin_mblaze_fsl_tput">, + MBFSL_PutT_Intrinsic; +def int_mblaze_fsl_taput : GCCBuiltin<"__builtin_mblaze_fsl_taput">, + MBFSL_PutT_Intrinsic; +def int_mblaze_fsl_tcput : GCCBuiltin<"__builtin_mblaze_fsl_tcput">, + MBFSL_PutT_Intrinsic; +def int_mblaze_fsl_tcaput : GCCBuiltin<"__builtin_mblaze_fsl_tcaput">, + MBFSL_PutT_Intrinsic; +def int_mblaze_fsl_tnput : GCCBuiltin<"__builtin_mblaze_fsl_tnput">, + MBFSL_PutT_Intrinsic; +def int_mblaze_fsl_tnaput : GCCBuiltin<"__builtin_mblaze_fsl_tnaput">, + MBFSL_PutT_Intrinsic; +def int_mblaze_fsl_tncput : GCCBuiltin<"__builtin_mblaze_fsl_tncput">, + MBFSL_PutT_Intrinsic; +def int_mblaze_fsl_tncaput : GCCBuiltin<"__builtin_mblaze_fsl_tncaput">, + MBFSL_PutT_Intrinsic; diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp new file mode 100644 index 0000000..7ae465d --- /dev/null +++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp @@ -0,0 +1,27 @@ +//===-- MBlazeMCAsmInfo.cpp - MBlaze asm properties -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the MBlazeMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "MBlazeMCAsmInfo.h" +using namespace llvm; + +MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, const StringRef &TT) { + AlignmentIsInBytes = false; + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = 0; + PrivateGlobalPrefix = "$"; + CommentString = "#"; + ZeroDirective = "\t.space\t"; + GPRel32Directive = "\t.gpword\t"; + HasSetDirective = false; +} diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MBlazeMCAsmInfo.h new file mode 100644 index 0000000..bccb418 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.h @@ -0,0 +1,30 @@ +//=====-- MBlazeMCAsmInfo.h - MBlaze asm properties -----------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MBlazeMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZETARGETASMINFO_H +#define MBLAZETARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + class StringRef; + + class MBlazeMCAsmInfo : public MCAsmInfo { + public: + explicit MBlazeMCAsmInfo(const Target &T, const StringRef &TT); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h new file mode 100644 index 0000000..08d4dca --- /dev/null +++ b/lib/Target/MBlaze/MBlazeMachineFunction.h @@ -0,0 +1,136 @@ +//===-- MBlazeMachineFunctionInfo.h - Private data ----------------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MBlaze specific subclass of MachineFunctionInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZE_MACHINE_FUNCTION_INFO_H +#define MBLAZE_MACHINE_FUNCTION_INFO_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/VectorExtras.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" + +namespace llvm { + +/// MBlazeFunctionInfo - This class is derived from MachineFunction private +/// MBlaze target-specific information for each MachineFunction. +class MBlazeFunctionInfo : public MachineFunctionInfo { + +private: + /// Holds for each function where on the stack the Frame Pointer must be + /// saved. This is used on Prologue and Epilogue to emit FP save/restore + int FPStackOffset; + + /// Holds for each function where on the stack the Return Address must be + /// saved. This is used on Prologue and Epilogue to emit RA save/restore + int RAStackOffset; + + /// At each function entry a special bitmask directive must be emitted + /// to help in debugging CPU callee saved registers. It needs a negative + /// offset from the final stack size and its higher register location on + /// the stack. + int CPUTopSavedRegOff; + + /// MBlazeFIHolder - Holds a FrameIndex and it's Stack Pointer Offset + struct MBlazeFIHolder { + + int FI; + int SPOffset; + + MBlazeFIHolder(int FrameIndex, int StackPointerOffset) + : FI(FrameIndex), SPOffset(StackPointerOffset) {} + }; + + /// When PIC is used the GP must be saved on the stack on the function + /// prologue and must be reloaded from this stack location after every + /// call. A reference to its stack location and frame index must be kept + /// to be used on emitPrologue and processFunctionBeforeFrameFinalized. + MBlazeFIHolder GPHolder; + + /// On LowerFormalArguments the stack size is unknown, so the Stack + /// Pointer Offset calculation of "not in register arguments" must be + /// postponed to emitPrologue. + SmallVector<MBlazeFIHolder, 16> FnLoadArgs; + bool HasLoadArgs; + + // When VarArgs, we must write registers back to caller stack, preserving + // on register arguments. Since the stack size is unknown on + // LowerFormalArguments, the Stack Pointer Offset calculation must be + // postponed to emitPrologue. + SmallVector<MBlazeFIHolder, 4> FnStoreVarArgs; + bool HasStoreVarArgs; + + /// SRetReturnReg - Some subtargets require that sret lowering includes + /// returning the value of the returned struct in a register. This field + /// holds the virtual register into which the sret argument is passed. + unsigned SRetReturnReg; + + /// GlobalBaseReg - keeps track of the virtual register initialized for + /// use as the global base register. This is used for PIC in some PIC + /// relocation models. + unsigned GlobalBaseReg; + +public: + MBlazeFunctionInfo(MachineFunction& MF) + : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), + GPHolder(-1,-1), HasLoadArgs(false), HasStoreVarArgs(false), + SRetReturnReg(0), GlobalBaseReg(0) + {} + + int getFPStackOffset() const { return FPStackOffset; } + void setFPStackOffset(int Off) { FPStackOffset = Off; } + + int getRAStackOffset() const { return RAStackOffset; } + void setRAStackOffset(int Off) { RAStackOffset = Off; } + + int getCPUTopSavedRegOff() const { return CPUTopSavedRegOff; } + void setCPUTopSavedRegOff(int Off) { CPUTopSavedRegOff = Off; } + + int getGPStackOffset() const { return GPHolder.SPOffset; } + int getGPFI() const { return GPHolder.FI; } + void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; } + void setGPFI(int FI) { GPHolder.FI = FI; } + bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; } + + bool hasLoadArgs() const { return HasLoadArgs; } + bool hasStoreVarArgs() const { return HasStoreVarArgs; } + + void recordLoadArgsFI(int FI, int SPOffset) { + if (!HasLoadArgs) HasLoadArgs=true; + FnLoadArgs.push_back(MBlazeFIHolder(FI, SPOffset)); + } + void recordStoreVarArgsFI(int FI, int SPOffset) { + if (!HasStoreVarArgs) HasStoreVarArgs=true; + FnStoreVarArgs.push_back(MBlazeFIHolder(FI, SPOffset)); + } + + void adjustLoadArgsFI(MachineFrameInfo *MFI) const { + if (!hasLoadArgs()) return; + for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i) + MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset ); + } + void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const { + if (!hasStoreVarArgs()) return; + for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i) + MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset ); + } + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + unsigned getGlobalBaseReg() const { return GlobalBaseReg; } + void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } +}; + +} // end of namespace llvm + +#endif // MBLAZE_MACHINE_FUNCTION_INFO_H diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp new file mode 100644 index 0000000..8dfca81 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -0,0 +1,421 @@ +//===- MBlazeRegisterInfo.cpp - MBlaze Register Information -== -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MBlaze implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mblaze-reg-info" + +#include "MBlaze.h" +#include "MBlazeSubtarget.h" +#include "MBlazeRegisterInfo.h" +#include "MBlazeMachineFunction.h" +#include "llvm/Constants.h" +#include "llvm/Type.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; + +MBlazeRegisterInfo:: +MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii) + : MBlazeGenRegisterInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP), + Subtarget(ST), TII(tii) {} + +/// getRegisterNumbering - Given the enum value for some register, e.g. +/// MBlaze::R0, return the number that it corresponds to (e.g. 0). +unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) { + switch (RegEnum) { + case MBlaze::R0 : case MBlaze::F0 : return 0; + case MBlaze::R1 : case MBlaze::F1 : return 1; + case MBlaze::R2 : case MBlaze::F2 : return 2; + case MBlaze::R3 : case MBlaze::F3 : return 3; + case MBlaze::R4 : case MBlaze::F4 : return 4; + case MBlaze::R5 : case MBlaze::F5 : return 5; + case MBlaze::R6 : case MBlaze::F6 : return 6; + case MBlaze::R7 : case MBlaze::F7 : return 7; + case MBlaze::R8 : case MBlaze::F8 : return 8; + case MBlaze::R9 : case MBlaze::F9 : return 9; + case MBlaze::R10 : case MBlaze::F10 : return 10; + case MBlaze::R11 : case MBlaze::F11 : return 11; + case MBlaze::R12 : case MBlaze::F12 : return 12; + case MBlaze::R13 : case MBlaze::F13 : return 13; + case MBlaze::R14 : case MBlaze::F14 : return 14; + case MBlaze::R15 : case MBlaze::F15 : return 15; + case MBlaze::R16 : case MBlaze::F16 : return 16; + case MBlaze::R17 : case MBlaze::F17 : return 17; + case MBlaze::R18 : case MBlaze::F18 : return 18; + case MBlaze::R19 : case MBlaze::F19 : return 19; + case MBlaze::R20 : case MBlaze::F20 : return 20; + case MBlaze::R21 : case MBlaze::F21 : return 21; + case MBlaze::R22 : case MBlaze::F22 : return 22; + case MBlaze::R23 : case MBlaze::F23 : return 23; + case MBlaze::R24 : case MBlaze::F24 : return 24; + case MBlaze::R25 : case MBlaze::F25 : return 25; + case MBlaze::R26 : case MBlaze::F26 : return 26; + case MBlaze::R27 : case MBlaze::F27 : return 27; + case MBlaze::R28 : case MBlaze::F28 : return 28; + case MBlaze::R29 : case MBlaze::F29 : return 29; + case MBlaze::R30 : case MBlaze::F30 : return 30; + case MBlaze::R31 : case MBlaze::F31 : return 31; + default: llvm_unreachable("Unknown register number!"); + } + return 0; // Not reached +} + +/// getRegisterFromNumbering - Given the enum value for some register, e.g. +/// MBlaze::R0, return the number that it corresponds to (e.g. 0). +unsigned MBlazeRegisterInfo::getRegisterFromNumbering(unsigned Reg) { + switch (Reg) { + case 0 : return MBlaze::R0; + case 1 : return MBlaze::R1; + case 2 : return MBlaze::R2; + case 3 : return MBlaze::R3; + case 4 : return MBlaze::R4; + case 5 : return MBlaze::R5; + case 6 : return MBlaze::R6; + case 7 : return MBlaze::R7; + case 8 : return MBlaze::R8; + case 9 : return MBlaze::R9; + case 10 : return MBlaze::R10; + case 11 : return MBlaze::R11; + case 12 : return MBlaze::R12; + case 13 : return MBlaze::R13; + case 14 : return MBlaze::R14; + case 15 : return MBlaze::R15; + case 16 : return MBlaze::R16; + case 17 : return MBlaze::R17; + case 18 : return MBlaze::R18; + case 19 : return MBlaze::R19; + case 20 : return MBlaze::R20; + case 21 : return MBlaze::R21; + case 22 : return MBlaze::R22; + case 23 : return MBlaze::R23; + case 24 : return MBlaze::R24; + case 25 : return MBlaze::R25; + case 26 : return MBlaze::R26; + case 27 : return MBlaze::R27; + case 28 : return MBlaze::R28; + case 29 : return MBlaze::R29; + case 30 : return MBlaze::R30; + case 31 : return MBlaze::R31; + default: llvm_unreachable("Unknown register number!"); + } + return 0; // Not reached +} + +unsigned MBlazeRegisterInfo::getPICCallReg() { + return MBlaze::R20; +} + +//===----------------------------------------------------------------------===// +// Callee Saved Registers methods +//===----------------------------------------------------------------------===// + +/// MBlaze Callee Saved Registers +const unsigned* MBlazeRegisterInfo:: +getCalleeSavedRegs(const MachineFunction *MF) const { + // MBlaze callee-save register range is R20 - R31 + static const unsigned CalleeSavedRegs[] = { + MBlaze::R20, MBlaze::R21, MBlaze::R22, MBlaze::R23, + MBlaze::R24, MBlaze::R25, MBlaze::R26, MBlaze::R27, + MBlaze::R28, MBlaze::R29, MBlaze::R30, MBlaze::R31, + 0 + }; + + return CalleeSavedRegs; +} + +/// MBlaze Callee Saved Register Classes +const TargetRegisterClass* const* MBlazeRegisterInfo:: +getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRC[] = { + &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, + &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, + &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, + &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, + &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, + &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, + 0 + }; + + return CalleeSavedRC; +} + +BitVector MBlazeRegisterInfo:: +getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + Reserved.set(MBlaze::R0); + Reserved.set(MBlaze::R1); + Reserved.set(MBlaze::R2); + Reserved.set(MBlaze::R13); + Reserved.set(MBlaze::R14); + Reserved.set(MBlaze::R15); + Reserved.set(MBlaze::R16); + Reserved.set(MBlaze::R17); + Reserved.set(MBlaze::R18); + Reserved.set(MBlaze::R19); + return Reserved; +} + +//===----------------------------------------------------------------------===// +// +// Stack Frame Processing methods +// +----------------------------+ +// +// The stack is allocated decrementing the stack pointer on +// the first instruction of a function prologue. Once decremented, +// all stack references are are done through a positive offset +// from the stack/frame pointer, so the stack is considered +// to grow up. +// +//===----------------------------------------------------------------------===// + +void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + + // See the description at MicroBlazeMachineFunction.h + int TopCPUSavedRegOff = -1; + + // Adjust CPU Callee Saved Registers Area. Registers RA and FP must + // be saved in this CPU Area there is the need. This whole Area must + // be aligned to the default Stack Alignment requirements. + unsigned StackOffset = MFI->getStackSize(); + unsigned RegSize = 4; + + // Replace the dummy '0' SPOffset by the negative offsets, as explained on + // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid + // the approach done by calculateFrameObjectOffsets to the stack frame. + MBlazeFI->adjustLoadArgsFI(MFI); + MBlazeFI->adjustStoreVarArgsFI(MFI); + + if (hasFP(MF)) { + MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), + StackOffset); + MBlazeFI->setFPStackOffset(StackOffset); + TopCPUSavedRegOff = StackOffset; + StackOffset += RegSize; + } + + if (MFI->hasCalls()) { + MBlazeFI->setRAStackOffset(0); + MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), + StackOffset); + TopCPUSavedRegOff = StackOffset; + StackOffset += RegSize; + } + + // Update frame info + MFI->setStackSize(StackOffset); + + // Recalculate the final tops offset. The final values must be '0' + // if there isn't a callee saved register for CPU or FPU, otherwise + // a negative offset is needed. + if (TopCPUSavedRegOff >= 0) + MBlazeFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset); +} + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +bool MBlazeRegisterInfo::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return NoFramePointerElim || MFI->hasVarSizedObjects(); +} + +// This function eliminate ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void MBlazeRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + +// FrameIndex represent objects inside a abstract stack. +// We must replace FrameIndex with an stack/frame pointer +// direct reference. +unsigned MBlazeRegisterInfo:: +eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + int *Value, RegScavenger *RS) const { + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + + unsigned i = 0; + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + + unsigned oi = i == 2 ? 1 : 2; + + DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n"; + errs() << "<--------->\n" << MI); + + int FrameIndex = MI.getOperand(i).getIndex(); + int stackSize = MF.getFrameInfo()->getStackSize(); + int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + + DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" + << "spOffset : " << spOffset << "\n" + << "stackSize : " << stackSize << "\n"); + + // as explained on LowerFormalArguments, detect negative offsets + // and adjust SPOffsets considering the final stack size. + int Offset = (spOffset < 0) ? (stackSize - spOffset) : (spOffset + 4); + Offset += MI.getOperand(oi).getImm(); + + DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); + + MI.getOperand(oi).ChangeToImmediate(Offset); + MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); + return 0; +} + +void MBlazeRegisterInfo:: +emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc dl = (MBBI != MBB.end() ? + MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); + + // Get the right frame order for MBlaze. + adjustMBlazeStackFrame(MF); + + // Get the number of bytes to allocate from the FrameInfo. + unsigned StackSize = MFI->getStackSize(); + + // No need to allocate space on the stack. + if (StackSize == 0 && !MFI->hasCalls()) return; + if (StackSize < 28 && MFI->hasCalls()) StackSize = 28; + + int FPOffset = MBlazeFI->getFPStackOffset(); + int RAOffset = MBlazeFI->getRAStackOffset(); + + // Adjust stack : addi R1, R1, -imm + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDI), MBlaze::R1) + .addReg(MBlaze::R1).addImm(-StackSize); + + // Save the return address only if the function isnt a leaf one. + // swi R15, R1, stack_loc + if (MFI->hasCalls()) { + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::SWI)) + .addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1); + } + + // if framepointer enabled, save it and set it + // to point to the stack pointer + if (hasFP(MF)) { + // swi R19, R1, stack_loc + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::SWI)) + .addReg(MBlaze::R19).addImm(FPOffset).addReg(MBlaze::R1); + + // add R19, R1, R0 + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R19) + .addReg(MBlaze::R1).addReg(MBlaze::R0); + } +} + +void MBlazeRegisterInfo:: +emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + DebugLoc dl = MBBI->getDebugLoc(); + + // Get the FI's where RA and FP are saved. + int FPOffset = MBlazeFI->getFPStackOffset(); + int RAOffset = MBlazeFI->getRAStackOffset(); + + // if framepointer enabled, restore it and restore the + // stack pointer + if (hasFP(MF)) { + // add R1, R19, R0 + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1) + .addReg(MBlaze::R19).addReg(MBlaze::R0); + + // lwi R19, R1, stack_loc + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19) + .addImm(FPOffset).addReg(MBlaze::R1); + } + + // Restore the return address only if the function isnt a leaf one. + // lwi R15, R1, stack_loc + if (MFI->hasCalls()) { + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15) + .addImm(RAOffset).addReg(MBlaze::R1); + } + + // Get the number of bytes from FrameInfo + int StackSize = (int) MFI->getStackSize(); + if (StackSize < 28 && MFI->hasCalls()) StackSize = 28; + + // adjust stack. + // addi R1, R1, imm + if (StackSize) { + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDI), MBlaze::R1) + .addReg(MBlaze::R1).addImm(StackSize); + } +} + + +void MBlazeRegisterInfo:: +processFunctionBeforeFrameFinalized(MachineFunction &MF) const { + // Set the stack offset where GP must be saved/loaded from. + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + if (MBlazeFI->needGPSaveRestore()) + MFI->setObjectOffset(MBlazeFI->getGPFI(), MBlazeFI->getGPStackOffset()); +} + +unsigned MBlazeRegisterInfo::getRARegister() const { + return MBlaze::R15; +} + +unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + return hasFP(MF) ? MBlaze::R19 : MBlaze::R1; +} + +unsigned MBlazeRegisterInfo::getEHExceptionRegister() const { + llvm_unreachable("What is the exception register"); + return 0; +} + +unsigned MBlazeRegisterInfo::getEHHandlerRegister() const { + llvm_unreachable("What is the exception handler register"); + return 0; +} + +int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + llvm_unreachable("What is the dwarf register number"); + return -1; +} + +#include "MBlazeGenRegisterInfo.inc" + diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h new file mode 100644 index 0000000..cde7d39 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -0,0 +1,96 @@ +//===- MBlazeRegisterInfo.h - MBlaze Register Information Impl --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MBlaze implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZEREGISTERINFO_H +#define MBLAZEREGISTERINFO_H + +#include "MBlaze.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "MBlazeGenRegisterInfo.h.inc" + +namespace llvm { +class MBlazeSubtarget; +class TargetInstrInfo; +class Type; + +namespace MBlaze { + /// SubregIndex - The index of various sized subregister classes. Note that + /// these indices must be kept in sync with the class indices in the + /// MBlazeRegisterInfo.td file. + enum SubregIndex { + SUBREG_FPEVEN = 1, SUBREG_FPODD = 2 + }; +} + +struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { + const MBlazeSubtarget &Subtarget; + const TargetInstrInfo &TII; + + MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget, + const TargetInstrInfo &tii); + + /// getRegisterNumbering - Given the enum value for some register, e.g. + /// MBlaze::RA, return the number that it corresponds to (e.g. 31). + static unsigned getRegisterNumbering(unsigned RegEnum); + static unsigned getRegisterFromNumbering(unsigned RegEnum); + + /// Get PIC indirect call register + static unsigned getPICCallReg(); + + /// Adjust the MBlaze stack frame. + void adjustMBlazeStackFrame(MachineFunction &MF) const; + + /// Code Generation virtual methods... + const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + + const TargetRegisterClass* const* + getCalleeSavedRegClasses(const MachineFunction* MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + bool hasFP(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + /// Stack Frame Processing Methods + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; + + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + /// Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(const MachineFunction &MF) const; + + /// Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; + + /// targetHandlesStackFrameRounding - Returns true if the target is + /// responsible for rounding up the stack frame (probably at emitPrologue + /// time). + bool targetHandlesStackFrameRounding() const { return true; } + + int getDwarfRegNum(unsigned RegNum, bool isEH) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td new file mode 100644 index 0000000..96a5c98 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td @@ -0,0 +1,186 @@ +//===- MBlazeRegisterInfo.td - MBlaze Register defs -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the MicroBlaze register file +//===----------------------------------------------------------------------===// + +// We have banks of 32 registers each. +class MBlazeReg<string n> : Register<n> { + field bits<5> Num; + let Namespace = "MBlaze"; +} + +class MBlazeRegWithSubRegs<string n, list<Register> subregs> + : RegisterWithSubRegs<n, subregs> { + field bits<5> Num; + let Namespace = "MBlaze"; +} + +// MBlaze CPU Registers +class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> { + let Num = num; +} + +// MBlaze 32-bit (aliased) FPU Registers +class FPR<bits<5> num, string n, list<Register> subregs> + : MBlazeRegWithSubRegs<n, subregs> { + let Num = num; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +let Namespace = "MBlaze" in { + + // General Purpose Registers + def R0 : MBlazeGPRReg< 0, "r0">, DwarfRegNum<[0]>; + def R1 : MBlazeGPRReg< 1, "r1">, DwarfRegNum<[1]>; + def R2 : MBlazeGPRReg< 2, "r2">, DwarfRegNum<[2]>; + def R3 : MBlazeGPRReg< 3, "r3">, DwarfRegNum<[3]>; + def R4 : MBlazeGPRReg< 4, "r4">, DwarfRegNum<[5]>; + def R5 : MBlazeGPRReg< 5, "r5">, DwarfRegNum<[5]>; + def R6 : MBlazeGPRReg< 6, "r6">, DwarfRegNum<[6]>; + def R7 : MBlazeGPRReg< 7, "r7">, DwarfRegNum<[7]>; + def R8 : MBlazeGPRReg< 8, "r8">, DwarfRegNum<[8]>; + def R9 : MBlazeGPRReg< 9, "r9">, DwarfRegNum<[9]>; + def R10 : MBlazeGPRReg< 10, "r10">, DwarfRegNum<[10]>; + def R11 : MBlazeGPRReg< 11, "r11">, DwarfRegNum<[11]>; + def R12 : MBlazeGPRReg< 12, "r12">, DwarfRegNum<[12]>; + def R13 : MBlazeGPRReg< 13, "r13">, DwarfRegNum<[13]>; + def R14 : MBlazeGPRReg< 14, "r14">, DwarfRegNum<[14]>; + def R15 : MBlazeGPRReg< 15, "r15">, DwarfRegNum<[15]>; + def R16 : MBlazeGPRReg< 16, "r16">, DwarfRegNum<[16]>; + def R17 : MBlazeGPRReg< 17, "r17">, DwarfRegNum<[17]>; + def R18 : MBlazeGPRReg< 18, "r18">, DwarfRegNum<[18]>; + def R19 : MBlazeGPRReg< 19, "r19">, DwarfRegNum<[19]>; + def R20 : MBlazeGPRReg< 20, "r20">, DwarfRegNum<[20]>; + def R21 : MBlazeGPRReg< 21, "r21">, DwarfRegNum<[21]>; + def R22 : MBlazeGPRReg< 22, "r22">, DwarfRegNum<[22]>; + def R23 : MBlazeGPRReg< 23, "r23">, DwarfRegNum<[23]>; + def R24 : MBlazeGPRReg< 24, "r24">, DwarfRegNum<[24]>; + def R25 : MBlazeGPRReg< 25, "r25">, DwarfRegNum<[25]>; + def R26 : MBlazeGPRReg< 26, "r26">, DwarfRegNum<[26]>; + def R27 : MBlazeGPRReg< 27, "r27">, DwarfRegNum<[27]>; + def R28 : MBlazeGPRReg< 28, "r28">, DwarfRegNum<[28]>; + def R29 : MBlazeGPRReg< 29, "r29">, DwarfRegNum<[29]>; + def R30 : MBlazeGPRReg< 30, "r30">, DwarfRegNum<[30]>; + def R31 : MBlazeGPRReg< 31, "r31">, DwarfRegNum<[31]>; + + /// MBlaze Single point precision FPU Registers + def F0 : FPR< 0, "r0", [R0]>, DwarfRegNum<[32]>; + def F1 : FPR< 1, "r1", [R1]>, DwarfRegNum<[33]>; + def F2 : FPR< 2, "r2", [R2]>, DwarfRegNum<[34]>; + def F3 : FPR< 3, "r3", [R3]>, DwarfRegNum<[35]>; + def F4 : FPR< 4, "r4", [R4]>, DwarfRegNum<[36]>; + def F5 : FPR< 5, "r5", [R5]>, DwarfRegNum<[37]>; + def F6 : FPR< 6, "r6", [R6]>, DwarfRegNum<[38]>; + def F7 : FPR< 7, "r7", [R7]>, DwarfRegNum<[39]>; + def F8 : FPR< 8, "r8", [R8]>, DwarfRegNum<[40]>; + def F9 : FPR< 9, "r9", [R9]>, DwarfRegNum<[41]>; + def F10 : FPR<10, "r10", [R10]>, DwarfRegNum<[42]>; + def F11 : FPR<11, "r11", [R11]>, DwarfRegNum<[43]>; + def F12 : FPR<12, "r12", [R12]>, DwarfRegNum<[44]>; + def F13 : FPR<13, "r13", [R13]>, DwarfRegNum<[45]>; + def F14 : FPR<14, "r14", [R14]>, DwarfRegNum<[46]>; + def F15 : FPR<15, "r15", [R15]>, DwarfRegNum<[47]>; + def F16 : FPR<16, "r16", [R16]>, DwarfRegNum<[48]>; + def F17 : FPR<17, "r17", [R17]>, DwarfRegNum<[49]>; + def F18 : FPR<18, "r18", [R18]>, DwarfRegNum<[50]>; + def F19 : FPR<19, "r19", [R19]>, DwarfRegNum<[51]>; + def F20 : FPR<20, "r20", [R20]>, DwarfRegNum<[52]>; + def F21 : FPR<21, "r21", [R21]>, DwarfRegNum<[53]>; + def F22 : FPR<22, "r22", [R22]>, DwarfRegNum<[54]>; + def F23 : FPR<23, "r23", [R23]>, DwarfRegNum<[55]>; + def F24 : FPR<24, "r24", [R24]>, DwarfRegNum<[56]>; + def F25 : FPR<25, "r25", [R25]>, DwarfRegNum<[57]>; + def F26 : FPR<26, "r26", [R26]>, DwarfRegNum<[58]>; + def F27 : FPR<27, "r27", [R27]>, DwarfRegNum<[59]>; + def F28 : FPR<28, "r28", [R28]>, DwarfRegNum<[60]>; + def F29 : FPR<29, "r29", [R29]>, DwarfRegNum<[61]>; + def F30 : FPR<30, "r30", [R30]>, DwarfRegNum<[62]>; + def F31 : FPR<31, "r31", [R31]>, DwarfRegNum<[63]>; +} + +//===----------------------------------------------------------------------===// +// Register Classes +//===----------------------------------------------------------------------===// + +def CPURegs : RegisterClass<"MBlaze", [i32], 32, + [ + // Return Values and Arguments + R3, R4, R5, R6, R7, R8, R9, R10, + + // Not preserved across procedure calls + R11, R12, + + // Callee save + R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + + // Reserved + R0, // Always zero + R1, // The stack pointer + R2, // Read-only small data area anchor + R13, // Read-write small data area anchor + R14, // Return address for interrupts + R15, // Return address for sub-routines + R16, // Return address for trap + R17, // Return address for exceptions + R18, // Reserved for assembler + R19 // The frame-pointer + ]> +{ + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + CPURegsClass::iterator + CPURegsClass::allocation_order_end(const MachineFunction &MF) const { + // The last 10 registers on the list above are reserved + return end()-10; + } + }]; +} + +def FGR32 : RegisterClass<"MBlaze", [f32], 32, + [ + // Return Values and Arguments + F3, F4, F5, F6, F7, F8, F9, F10, + + // Not preserved across procedure calls + F11, F12, + + // Callee save + F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31, + + // Reserved + F0, // Always zero + F1, // The stack pointer + F2, // Read-only small data area anchor + F13, // Read-write small data area anchor + F14, // Return address for interrupts + F15, // Return address for sub-routines + F16, // Return address for trap + F17, // Return address for exceptions + F18, // Reserved for assembler + F19 // The frame pointer + ]> +{ + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + FGR32Class::iterator + FGR32Class::allocation_order_end(const MachineFunction &MF) const { + // The last 10 registers on the list above are reserved + return end()-10; + } + }]; +} diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td new file mode 100644 index 0000000..6a94491 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeSchedule.td @@ -0,0 +1,63 @@ +//===- MBlazeSchedule.td - MBlaze Scheduling Definitions --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Functional units across MBlaze chips sets. Based on GCC/MBlaze backend files. +//===----------------------------------------------------------------------===// +def ALU : FuncUnit; +def IMULDIV : FuncUnit; + +//===----------------------------------------------------------------------===// +// Instruction Itinerary classes used for MBlaze +//===----------------------------------------------------------------------===// +def IIAlu : InstrItinClass; +def IILoad : InstrItinClass; +def IIStore : InstrItinClass; +def IIXfer : InstrItinClass; +def IIBranch : InstrItinClass; +def IIHiLo : InstrItinClass; +def IIImul : InstrItinClass; +def IIIdiv : InstrItinClass; +def IIFcvt : InstrItinClass; +def IIFmove : InstrItinClass; +def IIFcmp : InstrItinClass; +def IIFadd : InstrItinClass; +def IIFmulSingle : InstrItinClass; +def IIFmulDouble : InstrItinClass; +def IIFdivSingle : InstrItinClass; +def IIFdivDouble : InstrItinClass; +def IIFsqrtSingle : InstrItinClass; +def IIFsqrtDouble : InstrItinClass; +def IIFrecipFsqrtStep : InstrItinClass; +def IIPseudo : InstrItinClass; + +//===----------------------------------------------------------------------===// +// MBlaze Generic instruction itineraries. +//===----------------------------------------------------------------------===// +def MBlazeGenericItineraries : ProcessorItineraries<[ + InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>, + InstrItinData<IILoad , [InstrStage<3, [ALU]>]>, + InstrItinData<IIStore , [InstrStage<1, [ALU]>]>, + InstrItinData<IIXfer , [InstrStage<2, [ALU]>]>, + InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>, + InstrItinData<IIHiLo , [InstrStage<1, [IMULDIV]>]>, + InstrItinData<IIImul , [InstrStage<17, [IMULDIV]>]>, + InstrItinData<IIIdiv , [InstrStage<38, [IMULDIV]>]>, + InstrItinData<IIFcvt , [InstrStage<1, [ALU]>]>, + InstrItinData<IIFmove , [InstrStage<2, [ALU]>]>, + InstrItinData<IIFcmp , [InstrStage<3, [ALU]>]>, + InstrItinData<IIFadd , [InstrStage<4, [ALU]>]>, + InstrItinData<IIFmulSingle , [InstrStage<7, [ALU]>]>, + InstrItinData<IIFmulDouble , [InstrStage<8, [ALU]>]>, + InstrItinData<IIFdivSingle , [InstrStage<23, [ALU]>]>, + InstrItinData<IIFdivDouble , [InstrStage<36, [ALU]>]>, + InstrItinData<IIFsqrtSingle , [InstrStage<54, [ALU]>]>, + InstrItinData<IIFsqrtDouble , [InstrStage<12, [ALU]>]>, + InstrItinData<IIFrecipFsqrtStep , [InstrStage<5, [ALU]>]> +]>; diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp new file mode 100644 index 0000000..3440521 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp @@ -0,0 +1,31 @@ +//===- MBlazeSubtarget.cpp - MBlaze Subtarget Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MBlaze specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "MBlazeSubtarget.h" +#include "MBlaze.h" +#include "MBlazeGenSubtarget.inc" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, const std::string &FS): + HasPipe3(false), HasBarrel(false), HasDiv(false), HasMul(false), + HasFSL(false), HasEFSL(false), HasMSRSet(false), HasException(false), + HasPatCmp(false), HasFPU(false), HasESR(false), HasPVR(false), + HasMul64(false), HasSqrt(false), HasMMU(false) +{ + std::string CPU = "v400"; + MBlazeArchVersion = V400; + + // Parse features string. + ParseSubtargetFeatures(FS, CPU); +} diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h new file mode 100644 index 0000000..bebb3f7 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeSubtarget.h @@ -0,0 +1,79 @@ +//=====-- MBlazeSubtarget.h - Define Subtarget for the MBlaze -*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MBlaze specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZESUBTARGET_H +#define MBLAZESUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetMachine.h" + +#include <string> + +namespace llvm { + +class MBlazeSubtarget : public TargetSubtarget { + +protected: + + enum MBlazeArchEnum { + V400, V500, V600, V700, V710 + }; + + // MBlaze architecture version + MBlazeArchEnum MBlazeArchVersion; + + bool HasPipe3; + bool HasBarrel; + bool HasDiv; + bool HasMul; + bool HasFSL; + bool HasEFSL; + bool HasMSRSet; + bool HasException; + bool HasPatCmp; + bool HasFPU; + bool HasESR; + bool HasPVR; + bool HasMul64; + bool HasSqrt; + bool HasMMU; + + InstrItineraryData InstrItins; + +public: + + /// This constructor initializes the data members to match that + /// of the specified triple. + MBlazeSubtarget(const std::string &TT, const std::string &FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); + + bool hasFPU() const { return HasFPU; } + bool hasSqrt() const { return HasSqrt; } + bool hasMul() const { return HasMul; } + bool hasMul64() const { return HasMul64; } + bool hasDiv() const { return HasDiv; } + bool hasBarrel() const { return HasBarrel; } + + bool isV400() const { return MBlazeArchVersion == V400; } + bool isV500() const { return MBlazeArchVersion == V500; } + bool isV600() const { return MBlazeArchVersion == V600; } + bool isV700() const { return MBlazeArchVersion == V700; } + bool isV710() const { return MBlazeArchVersion == V710; } +}; +} // End llvm namespace + +#endif diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp new file mode 100644 index 0000000..9eba2b3 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -0,0 +1,66 @@ +//===-- MBlazeTargetMachine.cpp - Define TargetMachine for MBlaze ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the info about MBlaze target spec. +// +//===----------------------------------------------------------------------===// + +#include "MBlaze.h" +#include "MBlazeMCAsmInfo.h" +#include "MBlazeTargetMachine.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +extern "C" void LLVMInitializeMBlazeTarget() { + // Register the target. + RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget); + RegisterAsmInfo<MBlazeMCAsmInfo> A(TheMBlazeTarget); +} + +// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment +// The stack is always 8 byte aligned +// On function prologue, the stack is created by decrementing +// its pointer. Once decremented, all references are done with positive +// offset from the stack/frame pointer, using StackGrowsUp enables +// an easier handling. +MBlazeTargetMachine:: +MBlazeTargetMachine(const Target &T, const std::string &TT, + const std::string &FS): + LLVMTargetMachine(T, TT), + Subtarget(TT, FS), + DataLayout("E-p:32:32-i8:8:8-i16:16:16-i64:32:32-" + "f64:32:32-v64:32:32-v128:32:32-n32"), + InstrInfo(*this), + FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0), + TLInfo(*this) { + if (getRelocationModel() == Reloc::Default) { + setRelocationModel(Reloc::Static); + } + + if (getCodeModel() == CodeModel::Default) + setCodeModel(CodeModel::Small); +} + +// Install an instruction selector pass using +// the ISelDag to gen MBlaze code. +bool MBlazeTargetMachine:: +addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + PM.add(createMBlazeISelDag(*this)); + return false; +} + +// Implemented by targets that want to run passes immediately before +// machine code is emitted. return true if -print-machineinstrs should +// print out the code after the passes. +bool MBlazeTargetMachine:: +addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + PM.add(createMBlazeDelaySlotFillerPass(*this)); + return true; +} diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h new file mode 100644 index 0000000..85c975c --- /dev/null +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -0,0 +1,69 @@ +//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze --- C++ ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MBlaze specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZE_TARGETMACHINE_H +#define MBLAZE_TARGETMACHINE_H + +#include "MBlazeSubtarget.h" +#include "MBlazeInstrInfo.h" +#include "MBlazeISelLowering.h" +#include "MBlazeIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" + +namespace llvm { + class formatted_raw_ostream; + + class MBlazeTargetMachine : public LLVMTargetMachine { + MBlazeSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + MBlazeInstrInfo InstrInfo; + TargetFrameInfo FrameInfo; + MBlazeTargetLowering TLInfo; + MBlazeIntrinsicInfo IntrinsicInfo; + public: + MBlazeTargetMachine(const Target &T, const std::string &TT, + const std::string &FS); + + virtual const MBlazeInstrInfo *getInstrInfo() const + { return &InstrInfo; } + + virtual const TargetFrameInfo *getFrameInfo() const + { return &FrameInfo; } + + virtual const MBlazeSubtarget *getSubtargetImpl() const + { return &Subtarget; } + + virtual const TargetData *getTargetData() const + { return &DataLayout;} + + virtual const MBlazeRegisterInfo *getRegisterInfo() const + { return &InstrInfo.getRegisterInfo(); } + + virtual MBlazeTargetLowering *getTargetLowering() const + { return const_cast<MBlazeTargetLowering*>(&TLInfo); } + + const TargetIntrinsicInfo *getIntrinsicInfo() const + { return &IntrinsicInfo; } + + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel); + + virtual bool addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel); + }; +} // End llvm namespace + +#endif diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp new file mode 100644 index 0000000..79c9494 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp @@ -0,0 +1,88 @@ +//===-- MBlazeTargetObjectFile.cpp - MBlaze object files ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MBlazeTargetObjectFile.h" +#include "MBlazeSubtarget.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +void MBlazeTargetObjectFile:: +Initialize(MCContext &Ctx, const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + + SmallDataSection = + getELFSection(".sdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); + + SmallBSSSection = + getELFSection(".sbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); + +} + +// A address must be loaded from a small section if its size is less than the +// small section size threshold. Data in this section must be addressed using +// gp_rel operator. +static bool IsInSmallSection(uint64_t Size) { + return Size > 0 && Size <= 8; +} + +bool MBlazeTargetObjectFile:: +IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) const { + if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) + return false; + + return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM)); +} + +/// IsGlobalInSmallSection - Return true if this global address should be +/// placed into small data/bss section. +bool MBlazeTargetObjectFile:: +IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, + SectionKind Kind) const { + // Only global variables, not functions. + const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV); + if (!GVA) + return false; + + // We can only do this for datarel or BSS objects for now. + if (!Kind.isBSS() && !Kind.isDataRel()) + return false; + + // If this is a internal constant string, there is a special + // section for it, but not in small data/bss. + if (Kind.isMergeable1ByteCString()) + return false; + + const Type *Ty = GV->getType()->getElementType(); + return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty)); +} + +const MCSection *MBlazeTargetObjectFile:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*" + // sections? + + // Handle Small Section classification here. + if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind)) + return SmallBSSSection; + if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind)) + return SmallDataSection; + + // Otherwise, we work the same as ELF. + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM); +} diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.h b/lib/Target/MBlaze/MBlazeTargetObjectFile.h new file mode 100644 index 0000000..20e7702 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.h @@ -0,0 +1,41 @@ +//===-- llvm/Target/MBlazeTargetObjectFile.h - MBlaze Obj. Info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H +#define LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +namespace llvm { + + class MBlazeTargetObjectFile : public TargetLoweringObjectFileELF { + const MCSection *SmallDataSection; + const MCSection *SmallBSSSection; + public: + + void Initialize(MCContext &Ctx, const TargetMachine &TM); + + + /// IsGlobalInSmallSection - Return true if this global address should be + /// placed into small data/bss section. + bool IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM, + SectionKind Kind) const; + + bool IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM) const; + + const MCSection *SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, + Mangler *Mang, + const TargetMachine &TM) const; + }; +} // end namespace llvm + +#endif diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile new file mode 100644 index 0000000..19e508c --- /dev/null +++ b/lib/Target/MBlaze/Makefile @@ -0,0 +1,23 @@ +##===- lib/Target/MBlaze/Makefile --------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMMBlazeCodeGen +TARGET = MBlaze + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = MBlazeGenRegisterInfo.h.inc MBlazeGenRegisterNames.inc \ + MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \ + MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \ + MBlazeGenDAGISel.inc MBlazeGenCallingConv.inc \ + MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc + +DIRS = AsmPrinter TargetInfo + +include $(LEVEL)/Makefile.common + diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000..5afb14d --- /dev/null +++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMMBlazeInfo + MBlazeTargetInfo.cpp + ) + +add_dependencies(LLVMMBlazeInfo MBlazeCodeGenTable_gen) diff --git a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp new file mode 100644 index 0000000..16e01db --- /dev/null +++ b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp @@ -0,0 +1,19 @@ +//===-- MBlazeTargetInfo.cpp - MBlaze Target Implementation ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MBlaze.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheMBlazeTarget; + +extern "C" void LLVMInitializeMBlazeTargetInfo() { + RegisterTarget<Triple::mblaze> X(TheMBlazeTarget, "mblaze", "MBlaze"); +} diff --git a/lib/Target/MBlaze/TargetInfo/Makefile b/lib/Target/MBlaze/TargetInfo/Makefile new file mode 100644 index 0000000..fb7ea11 --- /dev/null +++ b/lib/Target/MBlaze/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/MBlaze/TargetInfo/Makefile ---------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMMBlazeInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index a96ee49..ac41cc8 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -38,7 +38,8 @@ namespace llvm { virtual bool addPassesToEmitWholeFile(PassManager &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel); + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; @@ -57,7 +58,7 @@ bool MSILModule::runOnModule(Module &M) { TypeSymbolTable& Table = M.getTypeSymbolTable(); std::set<const Type *> Types = getAnalysis<FindUsedTypes>().getTypes(); for (TypeSymbolTable::iterator I = Table.begin(), E = Table.end(); I!=E; ) { - if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second)) + if (!I->second->isStructTy() && !I->second->isOpaqueTy()) Table.remove(I++); else { std::set<const Type *>::iterator T = Types.find(I->second); @@ -187,7 +188,7 @@ void MSILWriter::printModuleStartup() { break; case 1: Arg1 = F->arg_begin(); - if (Arg1->getType()->isInteger()) { + if (Arg1->getType()->isIntegerTy()) { Out << "\tldloc\targc\n"; Args = getTypeName(Arg1->getType()); BadSig = false; @@ -195,7 +196,7 @@ void MSILWriter::printModuleStartup() { break; case 2: Arg1 = Arg2 = F->arg_begin(); ++Arg2; - if (Arg1->getType()->isInteger() && + if (Arg1->getType()->isIntegerTy() && Arg2->getType()->getTypeID() == Type::PointerTyID) { Out << "\tldloc\targc\n\tldloc\targv\n"; Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType()); @@ -207,7 +208,7 @@ void MSILWriter::printModuleStartup() { } bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID); - if (BadSig || (!F->getReturnType()->isInteger() && !RetVoid)) { + if (BadSig || (!F->getReturnType()->isIntegerTy() && !RetVoid)) { Out << "\tldc.i4.0\n"; } else { Out << "\tcall\t" << getTypeName(F->getReturnType()) << @@ -334,7 +335,7 @@ std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) { std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned, bool isNested) { - if (Ty->isPrimitiveType() || Ty->isInteger()) + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) return getPrimitiveTypeName(Ty,isSigned); // FIXME: "OpaqueType" support switch (Ty->getTypeID()) { @@ -1459,7 +1460,7 @@ void MSILWriter::printDeclarations(const TypeSymbolTable& ST) { for (std::set<const Type*>::const_iterator UI = UsedTypes->begin(), UE = UsedTypes->end(); UI!=UE; ++UI) { const Type* Ty = *UI; - if (isa<ArrayType>(Ty) || isa<VectorType>(Ty) || isa<StructType>(Ty)) + if (Ty->isArrayTy() || Ty->isVectorTy() || Ty->isStructTy()) Name = getTypeName(Ty, false, true); // Type with no need to declare. else continue; @@ -1688,7 +1689,8 @@ void MSILWriter::printExternals() { bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM, formatted_raw_ostream &o, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel) + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; MSILWriter* Writer = new MSILWriter(o); diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp index def5fc6..7a35eb0 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp @@ -98,12 +98,19 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, bool isMemOp = Modifier && !strcmp(Modifier, "mem"); uint64_t Offset = MO.getOffset(); - O << (isMemOp ? '&' : '#'); + // If the global address expression is a part of displacement field with a + // register base, we should not emit any prefix symbol here, e.g. + // mov.w &foo, r1 + // vs + // mov.w glb(r1), r2 + // Otherwise (!) msp430-as will silently miscompile the output :( + if (!Modifier || strcmp(Modifier, "nohash")) + O << (isMemOp ? '&' : '#'); if (Offset) O << '(' << Offset << '+'; O << *GetGlobalValueSymbol(MO.getGlobal()); - + if (Offset) O << ')'; @@ -124,15 +131,11 @@ void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum, const MachineOperand &Disp = MI->getOperand(OpNum+1); // Print displacement first - if (!Disp.isImm()) { - printOperand(MI, OpNum+1, "mem"); - } else { - if (!Base.getReg()) - O << '&'; - - printOperand(MI, OpNum+1, "nohash"); - } + // Imm here is in fact global address - print extra modifier. + if (Disp.isImm() && !Base.getReg()) + O << '&'; + printOperand(MI, OpNum+1, "nohash"); // Print register base field if (Base.getReg()) { diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp index f6565bd..d7636e6 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp @@ -62,21 +62,26 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo, const MCOperand &Disp = MI->getOperand(OpNo+1); // Print displacement first - if (Disp.isExpr()) { - O << '&' << *Disp.getExpr(); - } else { - assert(Disp.isImm() && "Expected immediate in displacement field"); - if (!Base.getReg()) - O << '&'; + // If the global address expression is a part of displacement field with a + // register base, we should not emit any prefix symbol here, e.g. + // mov.w &foo, r1 + // vs + // mov.w glb(r1), r2 + // Otherwise (!) msp430-as will silently miscompile the output :( + if (!Base.getReg()) + O << '&'; + + if (Disp.isExpr()) + O << *Disp.getExpr(); + else { + assert(Disp.isImm() && "Expected immediate in displacement field"); O << Disp.getImm(); } - // Print register base field - if (Base.getReg()) { + if (Base.getReg()) O << '(' << getRegisterName(Base.getReg()) << ')'; - } } void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo) { diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 4eec757..911cfcb 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -26,26 +26,12 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" - using namespace llvm; -#ifndef NDEBUG -static cl::opt<bool> -ViewRMWDAGs("view-msp430-rmw-dags", cl::Hidden, - cl::desc("Pop up a window to show isel dags after RMW preprocess")); -#else -static const bool ViewRMWDAGs = false; -#endif - -STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); - - namespace { struct MSP430ISelAddressMode { enum { @@ -123,8 +109,6 @@ namespace { Lowering(*TM.getTargetLowering()), Subtarget(*TM.getSubtargetImpl()) { } - virtual void InstructionSelect(); - virtual const char *getPassName() const { return "MSP430 DAG->DAG Pattern Instruction Selection"; } @@ -133,9 +117,6 @@ namespace { bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM); bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM); - bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, - SDNode *Root) const; - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps); @@ -144,18 +125,12 @@ namespace { #include "MSP430GenDAGISel.inc" private: - DenseMap<SDNode*, SDNode*> RMWStores; - void PreprocessForRMW(); SDNode *Select(SDNode *N); SDNode *SelectIndexedLoad(SDNode *Op); SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2, unsigned Opc8, unsigned Opc16); bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp); - - #ifndef NDEBUG - unsigned Indent; - #endif }; } // end anonymous namespace @@ -217,10 +192,7 @@ bool MSP430DAGToDAGISel::MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM) } bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) { - DEBUG({ - errs() << "MatchAddress: "; - AM.dump(); - }); + DEBUG(errs() << "MatchAddress: "; AM.dump()); switch (N.getOpcode()) { default: break; @@ -336,270 +308,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, return false; } -bool MSP430DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, - SDNode *Root) const { - if (OptLevel == CodeGenOpt::None) return false; - - /// RMW preprocessing creates the following code: - /// [Load1] - /// ^ ^ - /// / | - /// / | - /// [Load2] | - /// ^ ^ | - /// | | | - /// | \-| - /// | | - /// | [Op] - /// | ^ - /// | | - /// \ / - /// \ / - /// [Store] - /// - /// The path Store => Load2 => Load1 is via chain. Note that in general it is - /// not allowed to fold Load1 into Op (and Store) since it will creates a - /// cycle. However, this is perfectly legal for the loads moved below the - /// TokenFactor by PreprocessForRMW. Query the map Store => Load1 (created - /// during preprocessing) to determine whether it's legal to introduce such - /// "cycle" for a moment. - DenseMap<SDNode*, SDNode*>::const_iterator I = RMWStores.find(Root); - if (I != RMWStores.end() && I->second == N) - return true; - - // Proceed to 'generic' cycle finder code - return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); -} - - -/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand -/// and move load below the TokenFactor. Replace store's chain operand with -/// load's chain result. -static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, - SDValue Store, SDValue TF) { - SmallVector<SDValue, 4> Ops; - for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) - if (Load.getNode() == TF.getOperand(i).getNode()) - Ops.push_back(Load.getOperand(0)); - else - Ops.push_back(TF.getOperand(i)); - SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); - SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, - Load.getOperand(1), - Load.getOperand(2)); - CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1), - Store.getOperand(2), Store.getOperand(3)); -} - -/// MoveBelowTokenFactor2 - Replace TokenFactor operand with load's chain operand -/// and move load below the TokenFactor. Replace store's chain operand with -/// load's chain result. This a version which sinks two loads below token factor. -/// Look into PreprocessForRMW comments for explanation of transform. -static void MoveBelowTokenFactor2(SelectionDAG *CurDAG, - SDValue Load1, SDValue Load2, - SDValue Store, SDValue TF) { - SmallVector<SDValue, 4> Ops; - for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) { - SDNode* N = TF.getOperand(i).getNode(); - if (Load2.getNode() == N) - Ops.push_back(Load2.getOperand(0)); - else if (Load1.getNode() != N) - Ops.push_back(TF.getOperand(i)); - } - - SDValue NewTF = SDValue(CurDAG->MorphNodeTo(TF.getNode(), - TF.getOpcode(), - TF.getNode()->getVTList(), - &Ops[0], Ops.size()), TF.getResNo()); - SDValue NewLoad2 = CurDAG->UpdateNodeOperands(Load2, NewTF, - Load2.getOperand(1), - Load2.getOperand(2)); - - SDValue NewLoad1 = CurDAG->UpdateNodeOperands(Load1, NewLoad2.getValue(1), - Load1.getOperand(1), - Load1.getOperand(2)); - - CurDAG->UpdateNodeOperands(Store, - NewLoad1.getValue(1), - Store.getOperand(1), - Store.getOperand(2), Store.getOperand(3)); -} - -/// isAllowedToSink - return true if N a load which can be moved below token -/// factor. Basically, the load should be non-volatile and has single use. -static bool isLoadAllowedToSink(SDValue N, SDValue Chain) { - if (N.getOpcode() == ISD::BIT_CONVERT) - N = N.getOperand(0); - - LoadSDNode *LD = dyn_cast<LoadSDNode>(N); - if (!LD || LD->isVolatile()) - return false; - if (LD->getAddressingMode() != ISD::UNINDEXED) - return false; - - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) - return false; - - return (N.hasOneUse() && - LD->hasNUsesOfValue(1, 1) && - LD->isOperandOf(Chain.getNode())); -} - - -/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. -/// The chain produced by the load must only be used by the store's chain -/// operand, otherwise this may produce a cycle in the DAG. -static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, - SDValue &Load) { - if (isLoadAllowedToSink(N, Chain) && - N.getOperand(1) == Address) { - Load = N; - return true; - } - return false; -} - -/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. -/// This is only run if not in -O0 mode. -/// This allows the instruction selector to pick more read-modify-write -/// instructions. This is a common case: -/// -/// [Load chain] -/// ^ -/// | -/// [Load] -/// ^ ^ -/// | | -/// / \- -/// / | -/// [TokenFactor] [Op] -/// ^ ^ -/// | | -/// \ / -/// \ / -/// [Store] -/// -/// The fact the store's chain operand != load's chain will prevent the -/// (store (op (load))) instruction from being selected. We can transform it to: -/// -/// [Load chain] -/// ^ -/// | -/// [TokenFactor] -/// ^ -/// | -/// [Load] -/// ^ ^ -/// | | -/// | \- -/// | | -/// | [Op] -/// | ^ -/// | | -/// \ / -/// \ / -/// [Store] -/// -/// We also recognize the case where second operand of Op is load as well and -/// move it below token factor as well creating DAG as follows: -/// -/// [Load chain] -/// ^ -/// | -/// [TokenFactor] -/// ^ -/// | -/// [Load1] -/// ^ ^ -/// / | -/// / | -/// [Load2] | -/// ^ ^ | -/// | | | -/// | \-| -/// | | -/// | [Op] -/// | ^ -/// | | -/// \ / -/// \ / -/// [Store] -/// -/// This allows selection of mem-mem instructions. Yay! - -void MSP430DAGToDAGISel::PreprocessForRMW() { - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - if (!ISD::isNON_TRUNCStore(I)) - continue; - SDValue Chain = I->getOperand(0); - - if (Chain.getNode()->getOpcode() != ISD::TokenFactor) - continue; - - SDValue N1 = I->getOperand(1); - SDValue N2 = I->getOperand(2); - if ((N1.getValueType().isFloatingPoint() && - !N1.getValueType().isVector()) || - !N1.hasOneUse()) - continue; - - unsigned RModW = 0; - SDValue Load1, Load2; - unsigned Opcode = N1.getNode()->getOpcode(); - switch (Opcode) { - case ISD::ADD: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::ADDC: - case ISD::ADDE: { - SDValue N10 = N1.getOperand(0); - SDValue N11 = N1.getOperand(1); - if (isRMWLoad(N10, Chain, N2, Load1)) { - if (isLoadAllowedToSink(N11, Chain)) { - Load2 = N11; - RModW = 2; - } else - RModW = 1; - } else if (isRMWLoad(N11, Chain, N2, Load1)) { - if (isLoadAllowedToSink(N10, Chain)) { - Load2 = N10; - RModW = 2; - } else - RModW = 1; - } - break; - } - case ISD::SUB: - case ISD::SUBC: - case ISD::SUBE: { - SDValue N10 = N1.getOperand(0); - SDValue N11 = N1.getOperand(1); - if (isRMWLoad(N10, Chain, N2, Load1)) { - if (isLoadAllowedToSink(N11, Chain)) { - Load2 = N11; - RModW = 2; - } else - RModW = 1; - } - break; - } - } - - NumLoadMoved += RModW; - if (RModW == 1) - MoveBelowTokenFactor(CurDAG, Load1, SDValue(I, 0), Chain); - else if (RModW == 2) { - MoveBelowTokenFactor2(CurDAG, Load1, Load2, SDValue(I, 0), Chain); - SDNode* Store = I; - RMWStores[Store] = Load2.getNode(); - } - } -} - - static bool isValidIndexedLoad(const LoadSDNode *LD) { ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD) @@ -656,7 +364,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op, unsigned Opc8, unsigned Opc16) { if (N1.getOpcode() == ISD::LOAD && N1.hasOneUse() && - IsLegalAndProfitableToFold(N1.getNode(), Op, Op)) { + IsLegalToFold(N1, Op, Op)) { LoadSDNode *LD = cast<LoadSDNode>(N1); if (!isValidIndexedLoad(LD)) return NULL; @@ -682,46 +390,19 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op, } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void MSP430DAGToDAGISel::InstructionSelect() { - std::string BlockName; - if (ViewRMWDAGs) - BlockName = MF->getFunction()->getNameStr() + ":" + - BB->getBasicBlock()->getNameStr(); - - PreprocessForRMW(); - - if (ViewRMWDAGs) CurDAG->viewGraph("RMW preprocessed:" + BlockName); - - DEBUG(errs() << "Selection DAG after RMW preprocessing:\n"); - DEBUG(CurDAG->dump()); - - // Codegen the basic block. - DEBUG(errs() << "===== Instruction selection begins:\n"); - DEBUG(Indent = 0); - SelectRoot(*CurDAG); - DEBUG(errs() << "===== Instruction selection ends:\n"); - - CurDAG->RemoveDeadNodes(); - RMWStores.clear(); -} - SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) { DebugLoc dl = Node->getDebugLoc(); // Dump information about the Node being selected - DEBUG(errs().indent(Indent) << "Selecting: "); + DEBUG(errs() << "Selecting: "); DEBUG(Node->dump(CurDAG)); DEBUG(errs() << "\n"); - DEBUG(Indent += 2); // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { - DEBUG(errs().indent(Indent-2) << "== "; + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); - DEBUG(Indent -= 2); return NULL; } @@ -809,13 +490,12 @@ SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) { // Select the default instruction SDNode *ResNode = SelectCode(Node); - DEBUG(errs() << std::string(Indent-2, ' ') << "=> "); + DEBUG(errs() << "=> "); if (ResNode == NULL || ResNode == Node) DEBUG(Node->dump(CurDAG)); else DEBUG(ResNode->dump(CurDAG)); DEBUG(errs() << "\n"); - DEBUG(Indent -= 2); return ResNode; } diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index ef81f51..e6c7e1e 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -31,8 +31,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -371,7 +371,8 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i16); InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0)); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0)); } } @@ -500,7 +501,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, PseudoSourceValue::getStack(), - VA.getLocMemOffset())); + VA.getLocMemOffset(), false, false, 0)); } } @@ -794,18 +795,15 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { if (andCC) { // C = ~Z, thus Res = SRW & 1, no processing is required } else { - // Res = (SRW >> 1) & 1 + // Res = ~((SRW >> 1) & 1) Shift = true; + Invert = true; } break; case MSP430CC::COND_E: - if (andCC) { - // C = ~Z, thus Res = ~(SRW & 1) - } else { - // Res = ~((SRW >> 1) & 1) - Shift = true; - } - Invert = true; + Shift = true; + // C = ~Z for AND instruction, thus we can put Res = ~(SRW & 1), however, + // Res = (SRW >> 1) & 1 is 1 word shorter. break; } EVT VT = Op.getValueType(); @@ -893,13 +891,13 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - NULL, 0); + NULL, 0, false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, NULL, 0); + RetAddrFI, NULL, 0, false, false, 0); } SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { @@ -911,7 +909,8 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::FPW, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + false, false, 0); return FrameAddr; } @@ -971,7 +970,7 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { bool MSP430TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { - if (!Ty1->isInteger() || !Ty2->isInteger()) + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; return (Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits()); @@ -986,7 +985,7 @@ bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { bool MSP430TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { // MSP430 implicitly zero-extends 8-bit results in 16-bit registers. - return 0 && Ty1->isInteger(8) && Ty2->isInteger(16); + return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16); } bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index bb06f7b..144ba26 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -250,7 +250,7 @@ def MOV16ri : I16ri<0x0, [(set GR16:$dst, imm:$src)]>; } -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOV8rm : I8rm<0x0, (outs GR8:$dst), (ins memsrc:$src), "mov.b\t{$src, $dst}", diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index f1d4a67..c4746db 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -59,8 +59,6 @@ public: SelectionDAGISel(tm), TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {} - virtual void InstructionSelect(); - // Pass Name virtual const char *getPassName() const { return "MIPS DAG->DAG Pattern Instruction Selection"; @@ -98,29 +96,10 @@ private: inline SDValue getI32Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); } - - - #ifndef NDEBUG - unsigned Indent; - #endif }; } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void MipsDAGToDAGISel::InstructionSelect() { - // Codegen the basic block. - DEBUG(errs() << "===== Instruction selection begins:\n"); - DEBUG(Indent = 0); - - // Select target instructions for the DAG. - SelectRoot(*CurDAG); - - DEBUG(errs() << "===== Instruction selection ends:\n"); - - CurDAG->RemoveDeadNodes(); -} /// getGlobalBaseReg - Output the instructions required to put the /// GOT address into a register. @@ -329,17 +308,11 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { DebugLoc dl = Node->getDebugLoc(); // Dump information about the Node being selected - DEBUG(errs().indent(Indent) << "Selecting: "; - Node->dump(CurDAG); - errs() << "\n"); - DEBUG(Indent += 2); + DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { - DEBUG(errs().indent(Indent-2) << "== "; - Node->dump(CurDAG); - errs() << "\n"); - DEBUG(Indent -= 2); + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); return NULL; } @@ -547,14 +520,12 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { // Select the default instruction SDNode *ResNode = SelectCode(Node); - DEBUG(errs().indent(Indent-2) << "=> "); + DEBUG(errs() << "=> "); if (ResNode == NULL || ResNode == Node) DEBUG(Node->dump(CurDAG)); else DEBUG(ResNode->dump(CurDAG)); DEBUG(errs() << "\n"); - DEBUG(Indent -= 2); - return ResNode; } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index d94944f..584b887 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -510,7 +510,8 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, MipsII::MO_GOT); SDValue ResNode = DAG.getLoad(MVT::i32, dl, - DAG.getEntryNode(), GA, NULL, 0); + DAG.getEntryNode(), GA, NULL, 0, + false, false, 0); // On functions and global targets not internal linked only // a load from got/GP is necessary for PIC to work. if (!GV->hasLocalLinkage() || isa<Function>(GV)) @@ -549,7 +550,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) SDValue Ops[] = { JTI }; HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1); } else // Emit Load from Global Pointer - HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0); + HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0, + false, false, 0); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); @@ -586,7 +588,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), N->getOffset(), MipsII::MO_GOT); SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), - CP, NULL, 0); + CP, NULL, 0, false, false, 0); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); } @@ -601,7 +603,8 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0, + false, false, 0); } //===----------------------------------------------------------------------===// @@ -859,7 +862,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // emit ISD::STORE whichs stores the // parameter value to a stack Location - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); } // Transform all store nodes into one single node because all store @@ -933,7 +937,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Reload GP value. FI = MipsFI->getGPFI(); SDValue FIN = DAG.getFrameIndex(FI,getPointerTy()); - SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0); + SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0, + false, false, 0); Chain = GPLoad.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32), GPLoad, SDValue(0,0)); @@ -1097,7 +1102,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + false, false, 0)); } } @@ -1132,7 +1138,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, int FI = MFI->CreateFixedObject(4, 0, true, false); MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); - OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0)); + OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0, + false, false, 0)); // Record the frame index of the first variable argument // which is a value necessary to VASTART. diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index e67bcbf..cef3697 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -120,7 +120,7 @@ def immZExt5 : PatLeaf<(imm), [{ // Mips Address Mode! SDNode frameindex could possibily be a match // since load and store instructions from stack used it. -def addr : ComplexPattern<i32, 2, "SelectAddr", [frameindex], []>; +def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>; //===----------------------------------------------------------------------===// // Instructions specific format @@ -300,9 +300,8 @@ class JumpFR<bits<6> op, bits<6> func, string instr_asm>: // Jump and Link (Call) let isCall=1, hasDelaySlot=1, // All calls clobber the non-callee saved registers... - Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, - K0, K1, F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, - F14, F15, F16, F17, F18, F19], Uses = [GP] in { + Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, + K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in { class JumpLink<bits<6> op, string instr_asm>: FJ< op, (outs), @@ -593,8 +592,8 @@ def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)), (JAL tglobaladdr:$dst)>; def : Pat<(MipsJmpLink (i32 texternalsym:$dst)), (JAL texternalsym:$dst)>; -def : Pat<(MipsJmpLink CPURegs:$dst), - (JALR CPURegs:$dst)>; +//def : Pat<(MipsJmpLink CPURegs:$dst), +// (JALR CPURegs:$dst)>; // hi/lo relocs def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h index 32e0436..237b160 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.h +++ b/lib/Target/Mips/MipsTargetObjectFile.h @@ -10,7 +10,7 @@ #ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H #define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" namespace llvm { diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp index 72f7c16..44a6cc0 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp @@ -106,8 +106,9 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { DbgInfo.BeginFunction(MF); // Now emit the instructions of function in its code section. - const MCSection *fCodeSection - = getObjFileLowering().SectionForCode(CurrentFnSym->getName()); + const MCSection *fCodeSection = + getObjFileLowering().SectionForCode(CurrentFnSym->getName(), + PAN::isISR(F->getSection())); // Start the Code Section. O << "\n"; @@ -157,6 +158,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { // printOperand - print operand of insn. void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) { const MachineOperand &MO = MI->getOperand(opNum); + const Function *F = MI->getParent()->getParent()->getFunction(); switch (MO.getType()) { case MachineOperand::MO_Register: @@ -189,19 +191,18 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) { } case MachineOperand::MO_ExternalSymbol: { const char *Sname = MO.getSymbolName(); + std::string Printname = Sname; - // If its a libcall name, record it to decls section. - if (PAN::getSymbolTag(Sname) == PAN::LIBCALL) - LibcallDecls.push_back(Sname); - - // Record a call to intrinsic to print the extern declaration for it. - std::string Sym = Sname; - if (PAN::isMemIntrinsic(Sym)) { - Sym = PAN::addPrefix(Sym); - LibcallDecls.push_back(createESName(Sym)); + // Intrinsic stuff needs to be renamed if we are printing IL fn. + if (PAN::isIntrinsicStuff(Printname)) { + if (PAN::isISR(F->getSection())) { + Printname = PAN::Rename(Sname); + } + // Record these decls, we need to print them in asm as extern. + LibcallDecls.push_back(createESName(Printname)); } - O << Sym; + O << Printname; break; } case MachineOperand::MO_MachineBasicBlock: @@ -247,8 +248,6 @@ void PIC16AsmPrinter::printLibcallDecls() { for (std::list<const char*>::const_iterator I = LibcallDecls.begin(); I != LibcallDecls.end(); I++) { O << MAI->getExternDirective() << *I << "\n"; - O << MAI->getExternDirective() << PAN::getArgsLabel(*I) << "\n"; - O << MAI->getExternDirective() << PAN::getRetvalLabel(*I) << "\n"; } O << MAI->getCommentString() << "External decls for libcalls - END." <<"\n"; } diff --git a/lib/Target/PIC16/PIC16ABINames.h b/lib/Target/PIC16/PIC16ABINames.h index e18ddf1..4c1a8da 100644 --- a/lib/Target/PIC16/PIC16ABINames.h +++ b/lib/Target/PIC16/PIC16ABINames.h @@ -178,18 +178,21 @@ namespace llvm { return Func1 + tag; } + // Get the retval label for the given function. static std::string getRetvalLabel(const std::string &Func) { std::string Func1 = addPrefix(Func); std::string tag = getTagName(RET_LABEL); return Func1 + tag; } + // Get the argument label for the given function. static std::string getArgsLabel(const std::string &Func) { std::string Func1 = addPrefix(Func); std::string tag = getTagName(ARGS_LABEL); return Func1 + tag; } + // Get the tempdata label for the given function. static std::string getTempdataLabel(const std::string &Func) { std::string Func1 = addPrefix(Func); std::string tag = getTagName(TEMPS_LABEL); @@ -263,6 +266,7 @@ namespace llvm { return false; } + inline static bool isMemIntrinsic (const std::string &Name) { if (Name.compare("@memcpy") == 0 || Name.compare("@memset") == 0 || Name.compare("@memmove") == 0) { @@ -272,6 +276,41 @@ namespace llvm { return false; } + // Currently names of libcalls are assigned during TargetLowering + // object construction. There is no provision to change the when the + // code for a function IL function being generated. + // So we have to change these names while printing assembly. + // We need to do that mainly for names related to intrinsics. This + // function returns true if a name needs to be cloned. + inline static bool isIntrinsicStuff(const std::string &Name) { + // Return true if the name contains LIBCALL marker, or a MemIntrinisc. + // these are mainly ARGS_LABEL, RET_LABEL, and the LIBCALL name itself. + if ((Name.find(getTagName(LIBCALL)) != std::string::npos) + || isMemIntrinsic(Name)) + return true; + + return false; + } + + // Rename the name for IL. + inline static std::string Rename(const std::string &Name) { + std::string Newname; + // If its a label (LIBCALL+Func+LABEL), change it to + // (LIBCALL+Func+IL+LABEL). + TAGS id = getSymbolTag(Name); + if (id == ARGS_LABEL || id == RET_LABEL) { + std::size_t pos = Name.find(getTagName(id)); + Newname = Name.substr(0, pos) + ".IL" + getTagName(id); + return Newname; + } + + // Else, just append IL to name. + return Name + ".IL"; + } + + + + inline static bool isLocalToFunc (std::string &Func, std::string &Var) { if (! isLocalName(Var)) return false; @@ -325,6 +364,35 @@ namespace llvm { return o.str(); } + + // Return true if the current function is an ISR + inline static bool isISR(const std::string SectName) { + if (SectName.find("interrupt") != std::string::npos) + return true; + + return false; + } + + // Return the address for ISR starts in rom. + inline static std::string getISRAddr(void) { + return "0x4"; + } + + // Returns the name of clone of a function. + static std::string getCloneFnName(const std::string &Func) { + return (Func + ".IL"); + } + + // Returns the name of clone of a variable. + static std::string getCloneVarName(const std::string &Fn, + const std::string &Var) { + std::string cloneVarName = Var; + // These vars are named like fun.auto.var. + // Just replace the function name, with clone function name. + std::string cloneFnName = getCloneFnName(Fn); + cloneVarName.replace(cloneVarName.find(Fn), Fn.length(), cloneFnName); + return cloneVarName; + } }; // class PAN. } // end namespace llvm; diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp index c517b1b..877e4ff 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -419,7 +419,7 @@ void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num, if (TagName != "") O << ", " << TagName; for (int i = 0; i<Num; i++) - O << "," << Aux[i]; + O << "," << (Aux[i] && 0xff); } /// EmitSymbol - Emit .def for a symbol. Value is offset for the member. diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp index 82197ae..6cbd002 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp @@ -14,10 +14,7 @@ #define DEBUG_TYPE "pic16-isel" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "PIC16ISelDAGToDAG.h" -#include "llvm/Support/Debug.h" - using namespace llvm; /// createPIC16ISelDag - This pass converts a legalized DAG into a @@ -27,13 +24,6 @@ FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) { } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void PIC16DAGToDAGISel::InstructionSelect() { - SelectRoot(*CurDAG); - CurDAG->RemoveDeadNodes(); -} - /// Select - Select instructions not customized! Used for /// expanded, promoted and normal instructions. SDNode* PIC16DAGToDAGISel::Select(SDNode *N) { diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h index 813a540..8ed5bf7 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h @@ -19,6 +19,8 @@ #include "PIC16TargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" #include "llvm/Intrinsics.h" using namespace llvm; @@ -46,8 +48,6 @@ public: return "PIC16 DAG->DAG Pattern Instruction Selection"; } - virtual void InstructionSelect(); - private: // Include the pieces autogenerated from the target description. #include "PIC16GenDAGISel.inc" diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index 7754a4f..d17abb9 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -419,8 +419,7 @@ PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl, - DAG.GetOrdering(DAG.getEntryNode().getNode())); + Callee, Args, DAG, dl); return CallInfo.first; } @@ -622,12 +621,12 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) { ChainHi = Chain.getOperand(1); } SDValue Store1 = DAG.getStore(ChainLo, dl, SrcLo, Ptr, NULL, - 0 + StoreOffset); + 0 + StoreOffset, false, false, 0); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); SDValue Store2 = DAG.getStore(ChainHi, dl, SrcHi, Ptr, NULL, - 1 + StoreOffset); + 1 + StoreOffset, false, false, 0); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); @@ -1513,8 +1512,7 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, // Direct load operands are folded in binary operations. But before folding // verify if this folding is legal. Fold only if it is legal otherwise // convert this direct load to a separate memory operation. - if(ISel->IsLegalAndProfitableToFold(Op.getOperand(0).getNode(), - Op.getNode(), Op.getNode())) + if(ISel->IsLegalToFold(Op.getOperand(0), Op.getNode(), Op.getNode())) return false; else MemOp = 0; @@ -1528,10 +1526,24 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, return true; if (isDirectLoad(Op.getOperand(1))) { - if (Op.getOperand(1).hasOneUse()) - return false; - else - MemOp = 1; + if (Op.getOperand(1).hasOneUse()) { + // Legal and profitable folding check uses the NodeId of DAG nodes. + // This NodeId is assigned by topological order. Therefore first + // assign topological order then perform legal and profitable check. + // Note:- Though this ordering is done before begining with legalization, + // newly added node during legalization process have NodeId=-1 (NewNode) + // therefore before performing any check proper ordering of the node is + // required. + DAG.AssignTopologicalOrder(); + + // Direct load operands are folded in binary operations. But before folding + // verify if this folding is legal. Fold only if it is legal otherwise + // convert this direct load to a separate memory operation. + if(ISel->IsLegalToFold(Op.getOperand(1), Op.getNode(), Op.getNode())) + return false; + else + MemOp = 1; + } } return true; } diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp index cc71b04..ab81ed1 100644 --- a/lib/Target/PIC16/PIC16MemSelOpt.cpp +++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp @@ -59,6 +59,7 @@ namespace { const TargetInstrInfo *TII; // Machine instruction info. MachineBasicBlock *MBB; // Current basic block std::string CurBank; + int PageChanged; }; char MemSelOpt::ID = 0; @@ -93,10 +94,56 @@ bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { // Let us assume that when entering a basic block now bank is selected. // Ideally we should look at the predecessors for this information. CurBank=""; + PageChanged=0; - for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { + MachineBasicBlock::iterator I; + for (I = BB.begin(); I != BB.end(); ++I) { Changed |= processInstruction(I); + + // if the page has changed insert a page sel before + // any instruction that needs one + if (PageChanged == 1) + { + // Restore the page if it was changed, before leaving the basic block, + // because it may be required by the goto terminator or the fall thru + // basic blcok. + // If the terminator is return, we don't need to restore since there + // is no goto or fall thru basic block. + if ((I->getOpcode() == PIC16::sublw_3) || //macro has goto + (I->getOpcode() == PIC16::sublw_6) || //macro has goto + (I->getOpcode() == PIC16::addlwc) || //macro has goto + (TII->get(I->getOpcode()).isBranch())) + { + DebugLoc dl = I->getDebugLoc(); + BuildMI(*MBB, I, dl, TII->get(PIC16::pagesel)).addExternalSymbol("$"); + Changed = true; + PageChanged = 0; + } + } } + + // The basic block is over, but if we did not find any goto yet, + // we haven't restored the page. + // Restore the page if it was changed, before leaving the basic block, + // because it may be required by fall thru basic blcok. + // If the terminator is return, we don't need to restore since there + // is fall thru basic block. + if (PageChanged == 1) { + // save the end pointer before we move back to last insn. + MachineBasicBlock::iterator J = I; + I--; + const TargetInstrDesc &TID = TII->get(I->getOpcode()); + if (! TID.isReturn()) + { + DebugLoc dl = I->getDebugLoc(); + BuildMI(*MBB, J, dl, + TII->get(PIC16::pagesel)).addExternalSymbol("$"); + Changed = true; + PageChanged = 0; + } + } + + return Changed; } @@ -112,42 +159,74 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) { if (!(TID.isBranch() || TID.isCall() || TID.mayLoad() || TID.mayStore())) return false; + // The first thing we should do is that record if banksel/pagesel are + // changed in an unknown way. This can happend via any type of call. + // We do it here first before scanning of MemOp / BBOp as the indirect + // call insns do not have any operands, but they still may change bank/page. + if (TID.isCall()) { + // Record that we have changed the page, so that we can restore it + // before basic block ends. + // We require to signal that a page anc bank change happened even for + // indirect calls. + PageChanged = 1; + + // When a call is made, there may be banksel for variables in callee. + // Hence the banksel in caller needs to be reset. + CurBank = ""; + } + // Scan for the memory address operand. // FIXME: Should we use standard interfaces like memoperands_iterator, // hasMemOperand() etc ? int MemOpPos = -1; + int BBOpPos = -1; for (unsigned i = 0; i < NumOperands; i++) { MachineOperand Op = MI->getOperand(i); if (Op.getType() == MachineOperand::MO_GlobalAddress || - Op.getType() == MachineOperand::MO_ExternalSymbol || - Op.getType() == MachineOperand::MO_MachineBasicBlock) { + Op.getType() == MachineOperand::MO_ExternalSymbol) { // We found one mem operand. Next one may be BS. MemOpPos = i; - break; + } + if (Op.getType() == MachineOperand::MO_MachineBasicBlock) { + // We found one BB operand. Next one may be pagesel. + BBOpPos = i; } } // If we did not find an insn accessing memory. Continue. - if (MemOpPos == -1) return Changed; + if ((MemOpPos == -1) && + (BBOpPos == -1)) + return false; + assert ((BBOpPos != MemOpPos) && "operand can only be of one type"); - // Get the MemOp. - MachineOperand &Op = MI->getOperand(MemOpPos); // If this is a pagesel material, handle it first. - if (MI->getOpcode() == PIC16::CALL || - MI->getOpcode() == PIC16::br_uncond) { + // CALL and br_ucond insns use MemOp (GA or ES) and not BBOp. + // Pagesel is required only for a direct call. + if ((MI->getOpcode() == PIC16::CALL)) { + // Get the BBOp. + MachineOperand &MemOp = MI->getOperand(MemOpPos); DebugLoc dl = MI->getDebugLoc(); - BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)). - addOperand(Op); - return true; + BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).addOperand(MemOp); + + // CALL and br_ucond needs only pagesel. so we are done. + return true; } + // Pagesel is handled. Now, add a Banksel if needed. + if (MemOpPos == -1) return Changed; + // Get the MemOp. + MachineOperand &Op = MI->getOperand(MemOpPos); + // Get the section name(NewBank) for MemOp. // This assumes that the section names for globals are already set by // AsmPrinter->doInitialization. std::string NewBank = CurBank; + bool hasExternalLinkage = false; if (Op.getType() == MachineOperand::MO_GlobalAddress && Op.getGlobal()->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) { + if (Op.getGlobal()->hasExternalLinkage()) + hasExternalLinkage= true; NewBank = Op.getGlobal()->getSection(); } else if (Op.getType() == MachineOperand::MO_ExternalSymbol) { // External Symbol is generated for temp data and arguments. They are @@ -162,7 +241,7 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) { // If the previous and new section names are same, we don't need to // emit banksel. - if (NewBank.compare(CurBank) != 0 ) { + if (NewBank.compare(CurBank) != 0 || hasExternalLinkage) { DebugLoc dl = MI->getDebugLoc(); BuildMI(*MBB, MI, dl, TII->get(PIC16::banksel)). addOperand(Op); diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp new file mode 100644 index 0000000..865da35 --- /dev/null +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp @@ -0,0 +1,299 @@ +//===-- PIC16Cloner.cpp - PIC16 LLVM Cloner for shared functions -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to clone all functions that are shared between +// the main line code (ML) and interrupt line code (IL). It clones all such +// shared functions and their automatic global vars by adding the .IL suffix. +// +// This pass is supposed to be run on the linked .bc module. +// It traveses the module call graph twice. Once starting from the main function +// and marking each reached function as "ML". Again, starting from the ISR +// and cloning any reachable function that was marked as "ML". After cloning +// the function, it remaps all the call sites in IL functions to call the +// cloned functions. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Pass.h" +#include "llvm/Module.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "PIC16Cloner.h" +#include "../PIC16ABINames.h" +#include <vector> + +using namespace llvm; +using std::vector; +using std::string; +using std::map; + +namespace llvm { + char PIC16Cloner::ID = 0; + + ModulePass *createPIC16ClonerPass() { return new PIC16Cloner(); } +} + +// We currently intend to run these passes in opt, which does not have any +// diagnostic support. So use these functions for now. In future +// we will probably write our own driver tool. +// +void PIC16Cloner::reportError(string ErrorString) { + errs() << "ERROR : " << ErrorString << "\n"; + exit(1); +} + +void PIC16Cloner:: +reportError (string ErrorString, vector<string> &Values) { + unsigned ValCount = Values.size(); + string TargetString; + for (unsigned i=0; i<ValCount; ++i) { + TargetString = "%"; + TargetString += ((char)i + '0'); + ErrorString.replace(ErrorString.find(TargetString), TargetString.length(), + Values[i]); + } + errs() << "ERROR : " << ErrorString << "\n"; + exit(1); +} + + +// Entry point +// +bool PIC16Cloner::runOnModule(Module &M) { + CallGraph &CG = getAnalysis<CallGraph>(); + + // Search for the "main" and "ISR" functions. + CallGraphNode *mainCGN = NULL, *isrCGN = NULL; + for (CallGraph::iterator it = CG.begin() ; it != CG.end(); it++) + { + // External calling node doesn't have any function associated with it. + if (! it->first) + continue; + + if (it->first->getName().str() == "main") { + mainCGN = it->second; + } + + if (PAN::isISR(it->first->getSection())) { + isrCGN = it->second; + } + + // Don't search further if we've found both. + if (mainCGN && isrCGN) + break; + } + + // We have nothing to do if any of the main or ISR is missing. + if (! mainCGN || ! isrCGN) return false; + + // Time for some diagnostics. + // See if the main itself is interrupt function then report an error. + if (PAN::isISR(mainCGN->getFunction()->getSection())) { + reportError("Function 'main' can't be interrupt function"); + } + + + // Mark all reachable functions from main as ML. + markCallGraph(mainCGN, "ML"); + + // And then all the functions reachable from ISR will be cloned. + cloneSharedFunctions(isrCGN); + + return true; +} + +// Mark all reachable functions from the given node, with the given mark. +// +void PIC16Cloner::markCallGraph(CallGraphNode *CGN, string StringMark) { + // Mark the top node first. + Function *thisF = CGN->getFunction(); + + thisF->setSection(StringMark); + + // Mark all the called functions + for(CallGraphNode::iterator cgn_it = CGN->begin(); + cgn_it != CGN->end(); ++cgn_it) { + Function *CalledF = cgn_it->second->getFunction(); + + // If calling an external function then CallGraphNode + // will not be associated with any function. + if (! CalledF) + continue; + + // Issue diagnostic if interrupt function is being called. + if (PAN::isISR(CalledF->getSection())) { + vector<string> Values; + Values.push_back(CalledF->getName().str()); + reportError("Interrupt function (%0) can't be called", Values); + } + + // Has already been mark + if (CalledF->getSection().find(StringMark) != string::npos) { + // Should we do anything here? + } else { + // Mark now + CalledF->setSection(StringMark); + } + + // Before going any further mark all the called function by current + // function. + markCallGraph(cgn_it->second ,StringMark); + } // end of loop of all called functions. +} + + +// For PIC16, automatic variables of a function are emitted as globals. +// Clone the auto variables of a function and put them in ValueMap, +// this ValueMap will be used while +// Cloning the code of function itself. +// +void PIC16Cloner::CloneAutos(Function *F) { + // We'll need to update module's globals list as well. So keep a reference + // handy. + Module *M = F->getParent(); + Module::GlobalListType &Globals = M->getGlobalList(); + + // Clear the leftovers in ValueMap by any previous cloning. + ValueMap.clear(); + + // Find the auto globls for this function and clone them, and put them + // in ValueMap. + std::string FnName = F->getName().str(); + std::string VarName, ClonedVarName; + for (Module::global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) { + VarName = I->getName().str(); + if (PAN::isLocalToFunc(FnName, VarName)) { + // Auto variable for current function found. Clone it. + GlobalVariable *GV = I; + + const Type *InitTy = GV->getInitializer()->getType(); + GlobalVariable *ClonedGV = + new GlobalVariable(InitTy, false, GV->getLinkage(), + GV->getInitializer()); + ClonedGV->setName(PAN::getCloneVarName(FnName, VarName)); + // Add these new globals to module's globals list. + Globals.push_back(ClonedGV); + + // Update ValueMap. + ValueMap[GV] = ClonedGV; + } + } +} + + +// Clone all functions that are reachable from ISR and are already +// marked as ML. +// +void PIC16Cloner::cloneSharedFunctions(CallGraphNode *CGN) { + + // Check all the called functions from ISR. + for(CallGraphNode::iterator cgn_it = CGN->begin(); + cgn_it != CGN->end(); ++cgn_it) { + Function *CalledF = cgn_it->second->getFunction(); + + // If calling an external function then CallGraphNode + // will not be associated with any function. + if (!CalledF) + continue; + + // Issue diagnostic if interrupt function is being called. + if (PAN::isISR(CalledF->getSection())) { + vector<string> Values; + Values.push_back(CalledF->getName().str()); + reportError("Interrupt function (%0) can't be called", Values); + } + + if (CalledF->getSection().find("ML") != string::npos) { + // Function is alternatively marked. It should be a shared one. + // Create IL copy. Passing called function as first argument + // and the caller as the second argument. + + // Before making IL copy, first ensure that this function has a + // body. If the function does have a body. It can't be cloned. + // Such a case may occur when the function has been declarated + // in the C source code but its body exists in assembly file. + if (!CalledF->isDeclaration()) { + Function *cf = cloneFunction(CalledF); + remapAllSites(CGN->getFunction(), CalledF, cf); + } else { + // It is called only from ISR. Still mark it as we need this info + // in code gen while calling intrinsics.Function is not marked. + CalledF->setSection("IL"); + } + } + // Before going any further clone all the shared function reachaable + // by current function. + cloneSharedFunctions(cgn_it->second); + } // end of loop of all called functions. +} + +// Clone the given function and return it. +// Note: it uses the ValueMap member of the class, which is already populated +// by cloneAutos by the time we reach here. +// FIXME: Should we just pass ValueMap's ref as a parameter here? rather +// than keeping the ValueMap as a member. +Function * +PIC16Cloner::cloneFunction(Function *OrgF) { + Function *ClonedF; + + // See if we already cloned it. Return that. + cloned_map_iterator cm_it = ClonedFunctionMap.find(OrgF); + if(cm_it != ClonedFunctionMap.end()) { + ClonedF = cm_it->second; + return ClonedF; + } + + // Clone does not exist. + // First clone the autos, and populate ValueMap. + CloneAutos(OrgF); + + // Now create the clone. + ClonedF = CloneFunction(OrgF, ValueMap); + + // The new function should be for interrupt line. Therefore should have + // the name suffixed with IL and section attribute marked with IL. + ClonedF->setName(PAN::getCloneFnName(OrgF->getName())); + ClonedF->setSection("IL"); + + // Add the newly created function to the module. + OrgF->getParent()->getFunctionList().push_back(ClonedF); + + // Update the ClonedFunctionMap to record this cloning activity. + ClonedFunctionMap[OrgF] = ClonedF; + + return ClonedF; +} + + +// Remap the call sites of shared functions, that are in IL. +// Change the IL call site of a shared function to its clone. +// +void PIC16Cloner:: +remapAllSites(Function *Caller, Function *OrgF, Function *Clone) { + // First find the caller to update. If the caller itself is cloned + // then use the cloned caller. Otherwise use it. + cloned_map_iterator cm_it = ClonedFunctionMap.find(Caller); + if (cm_it != ClonedFunctionMap.end()) + Caller = cm_it->second; + + // For the lack of a better call site finding mechanism, iterate over + // all insns to find the uses of original fn. + for (Function::iterator BI = Caller->begin(); BI != Caller->end(); ++BI) { + BasicBlock &BB = *BI; + for (BasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) { + if (II->getNumOperands() > 0 && II->getOperand(0) == OrgF) + II->setOperand(0, Clone); + } + } +} + + + diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h new file mode 100644 index 0000000..24c1152 --- /dev/null +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h @@ -0,0 +1,83 @@ +//===-- PIC16Cloner.h - PIC16 LLVM Cloner for shared functions --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains declaration of a cloner class clone all functions that +// are shared between the main line code (ML) and interrupt line code (IL). +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16CLONER_H +#define PIC16CLONER_H + +#include "llvm/ADT/DenseMap.h" + +using namespace llvm; +using std::vector; +using std::string; +using std::map; + +namespace llvm { + // forward classes. + class Value; + class Function; + class Module; + class ModulePass; + class CallGraph; + class CallGraphNode; + class AnalysisUsage; + + class PIC16Cloner : public ModulePass { + public: + static char ID; // Class identification + PIC16Cloner() : ModulePass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<CallGraph>(); + } + virtual bool runOnModule(Module &M); + + private: // Functions + // Mark reachable functions for the MainLine or InterruptLine. + void markCallGraph(CallGraphNode *CGN, string StringMark); + + // Clone auto variables of function specified. + void CloneAutos(Function *F); + + // Clone the body of a function. + Function *cloneFunction(Function *F); + + // Clone all shared functions. + void cloneSharedFunctions(CallGraphNode *isrCGN); + + // Remap all call sites to the shared function. + void remapAllSites(Function *Caller, Function *OrgF, Function *Clone); + + // Error reporting for PIC16Pass + void reportError(string ErrorString, vector<string> &Values); + void reportError(string ErrorString); + + private: //data + // Records if the interrupt function has already been found. + // If more than one interrupt function is found then an error + // should be thrown. + bool foundISR; + + // This ValueMap maps the auto variables of the original functions with + // the corresponding cloned auto variable of the cloned function. + // This value map is passed during the function cloning so that all the + // uses of auto variables be updated properly. + DenseMap<const Value*, Value*> ValueMap; + + // Map of a already cloned functions. + map<Function *, Function *> ClonedFunctionMap; + typedef map<Function *, Function *>::iterator cloned_map_iterator; + }; +} // End of anonymous namespace + +#endif diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp index 197c398..5ecb6aa 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp +++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp @@ -24,27 +24,27 @@ using namespace llvm; namespace llvm { - char PIC16FrameOverlay::ID = 0; - ModulePass *createPIC16OverlayPass() { return new PIC16FrameOverlay(); } + char PIC16Overlay::ID = 0; + ModulePass *createPIC16OverlayPass() { return new PIC16Overlay(); } } -void PIC16FrameOverlay::getAnalysisUsage(AnalysisUsage &AU) const { +void PIC16Overlay::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<CallGraph>(); } -void PIC16FrameOverlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) { +void PIC16Overlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) { // Do not set any color for external calling node. if (Depth != 0 && CGN->getFunction()) { unsigned Color = getColor(CGN->getFunction()); // Handle indirectly called functions - if (Color >= PIC16Overlay::StartIndirectCallColor || - Depth >= PIC16Overlay::StartIndirectCallColor) { + if (Color >= PIC16OVERLAY::StartIndirectCallColor || + Depth >= PIC16OVERLAY::StartIndirectCallColor) { // All functions called from an indirectly called function are given // an unique color. - if (Color < PIC16Overlay::StartIndirectCallColor && - Depth >= PIC16Overlay::StartIndirectCallColor) + if (Color < PIC16OVERLAY::StartIndirectCallColor && + Depth >= PIC16OVERLAY::StartIndirectCallColor) setColor(CGN->getFunction(), Depth); for (unsigned int i = 0; i < CGN->size(); i++) @@ -65,7 +65,7 @@ void PIC16FrameOverlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) { DFSTraverse((*CGN)[i], Depth+1); } -unsigned PIC16FrameOverlay::ModifyDepthForInterrupt(CallGraphNode *CGN, +unsigned PIC16Overlay::ModifyDepthForInterrupt(CallGraphNode *CGN, unsigned Depth) { Function *Fn = CGN->getFunction(); @@ -81,7 +81,7 @@ unsigned PIC16FrameOverlay::ModifyDepthForInterrupt(CallGraphNode *CGN, return Depth; } -void PIC16FrameOverlay::setColor(Function *Fn, unsigned Color) { +void PIC16Overlay::setColor(Function *Fn, unsigned Color) { std::string Section = ""; if (Fn->hasSection()) Section = Fn->getSection(); @@ -119,7 +119,7 @@ void PIC16FrameOverlay::setColor(Function *Fn, unsigned Color) { Fn->setSection(Section); } -unsigned PIC16FrameOverlay::getColor(Function *Fn) { +unsigned PIC16Overlay::getColor(Function *Fn) { int Color = 0; if (!Fn->hasSection()) return 0; @@ -150,7 +150,7 @@ unsigned PIC16FrameOverlay::getColor(Function *Fn) { return Color; } -bool PIC16FrameOverlay::runOnModule(Module &M) { +bool PIC16Overlay::runOnModule(Module &M) { CallGraph &CG = getAnalysis<CallGraph>(); CallGraphNode *ECN = CG.getExternalCallingNode(); @@ -164,7 +164,7 @@ bool PIC16FrameOverlay::runOnModule(Module &M) { return false; } -void PIC16FrameOverlay::MarkIndirectlyCalledFunctions(Module &M) { +void PIC16Overlay::MarkIndirectlyCalledFunctions(Module &M) { // If the use of a function is not a call instruction then this // function might be called indirectly. In that case give it // an unique color. diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h index d70c4e7..5a2551f 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h +++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h @@ -1,4 +1,4 @@ -//===-- PIC16FrameOverlay.h - Interface for PIC16 Frame Overlay -*- C++ -*-===// +//===-- PIC16Overlay.h - Interface for PIC16 Frame Overlay -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,30 +14,35 @@ #ifndef PIC16FRAMEOVERLAY_H #define PIC16FRAMEOVERLAY_H -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Pass.h" -#include "llvm/CallGraphSCCPass.h" using std::string; using namespace llvm; namespace llvm { - namespace PIC16Overlay { + // Forward declarations. + class Function; + class Module; + class ModulePass; + class AnalysisUsage; + class CallGraphNode; + class CallGraph; + + namespace PIC16OVERLAY { enum OverlayConsts { StartInterruptColor = 200, StartIndirectCallColor = 300 }; } - class PIC16FrameOverlay : public ModulePass { + class PIC16Overlay : public ModulePass { std::string OverlayStr; unsigned InterruptDepth; unsigned IndirectCallColor; public: static char ID; // Class identification - PIC16FrameOverlay() : ModulePass(&ID) { + PIC16Overlay() : ModulePass(&ID) { OverlayStr = "Overlay="; - InterruptDepth = PIC16Overlay::StartInterruptColor; - IndirectCallColor = PIC16Overlay::StartIndirectCallColor; + InterruptDepth = PIC16OVERLAY::StartInterruptColor; + IndirectCallColor = PIC16OVERLAY::StartIndirectCallColor; } virtual void getAnalysisUsage(AnalysisUsage &AU) const; diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp index d7cfe02..b891c18 100644 --- a/lib/Target/PIC16/PIC16TargetObjectFile.cpp +++ b/lib/Target/PIC16/PIC16TargetObjectFile.cpp @@ -315,8 +315,12 @@ PIC16TargetObjectFile::allocateSHARED(const GlobalVariable *GV, // Interface used by AsmPrinter to get a code section for a function. const PIC16Section * -PIC16TargetObjectFile::SectionForCode(const std::string &FnName) const { +PIC16TargetObjectFile::SectionForCode(const std::string &FnName, + bool isISR) const { const std::string &sec_name = PAN::getCodeSectionName(FnName); + // If it is ISR, its code section starts at a specific address. + if (isISR) + return getPIC16Section(sec_name, CODE, PAN::getISRAddr()); return getPIC16Section(sec_name, CODE); } diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h index 0b0ad43..cf8bf84 100644 --- a/lib/Target/PIC16/PIC16TargetObjectFile.h +++ b/lib/Target/PIC16/PIC16TargetObjectFile.h @@ -137,7 +137,8 @@ namespace llvm { /// Return a code section for a function. - const PIC16Section *SectionForCode (const std::string &FnName) const; + const PIC16Section *SectionForCode (const std::string &FnName, + bool isISR) const; /// Return a frame section for a function. const PIC16Section *SectionForFrame (const std::string &FnName) const; diff --git a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp index 46cc819..f1bdb12 100644 --- a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp +++ b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp @@ -15,7 +15,8 @@ using namespace llvm; Target llvm::ThePIC16Target, llvm::TheCooperTarget; extern "C" void LLVMInitializePIC16TargetInfo() { - RegisterTarget<> X(ThePIC16Target, "pic16", "PIC16 14-bit [experimental]"); + RegisterTarget<Triple::pic16> X(ThePIC16Target, "pic16", + "PIC16 14-bit [experimental]"); RegisterTarget<> Y(TheCooperTarget, "cooper", "PIC16 Cooper [experimental]"); } diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index afc90b1..ac901d0 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -31,13 +31,13 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index c7ce171..155fba2 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -66,28 +66,13 @@ def CC_PPC : CallingConv<[ // PowerPC System V Release 4 ABI //===----------------------------------------------------------------------===// -// _Complex arguments are never split, thus their two scalars are either -// passed both in argument registers or both on the stack. Also _Complex -// arguments are always passed in general purpose registers, never in -// Floating-point registers or vector registers. Arguments which should go -// on the stack are marked with the inreg parameter attribute. -// Giving inreg this target-dependent (and counter-intuitive) meaning -// simplifies things, because functions calls are not always coming from the -// frontend but are also created implicitly e.g. for libcalls. If inreg would -// actually mean that the argument is passed in a register, then all places -// which create function calls/function definitions implicitly would need to -// be aware of this fact and would need to mark arguments accordingly. With -// inreg meaning that the argument is passed on the stack, this is not an -// issue, except for calls which involve _Complex types. - def CC_PPC_SVR4_Common : CallingConv<[ // The ABI requires i64 to be passed in two adjacent registers with the first // register having an odd register number. CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>, // The first 8 integer arguments are passed in integer registers. - CCIfType<[i32], CCIf<"!ArgFlags.isInReg()", - CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>, + CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, // Make sure the i64 words from a long double are either both passed in // registers or both passed on the stack. diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 3a15f7e..66dfd4b 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -257,7 +257,7 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { case PPC::STWX: case PPC::STWX8: case PPC::STWUX: case PPC::STW: case PPC::STW8: - case PPC::STWU: case PPC::STWU8: + case PPC::STWU: case PPC::STVEWX: case PPC::STFIWX: case PPC::STWBRX: diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 004997f..9d79c0d 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -156,10 +156,6 @@ namespace { SDValue BuildSDIVSequence(SDNode *N); SDValue BuildUDIVSequence(SDNode *N); - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); - void InsertVRSaveCode(MachineFunction &MF); virtual const char *getPassName() const { @@ -184,14 +180,6 @@ private: }; } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void PPCDAGToDAGISel::InstructionSelect() { - // Select target instructions for the DAG. - SelectRoot(*CurDAG); - CurDAG->RemoveDeadNodes(); -} - /// InsertVRSaveCode - Once the entire function has been instruction selected, /// all virtual registers are created and all machine instructions are built, /// check to see if we need to save/restore VRSAVE. If so, do it. diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index a11d624..3d81afa 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -25,13 +25,13 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -1243,7 +1243,8 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, // If the global is weak or external, we have to go through the lazy // resolution stub. - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0, + false, false, 0); } SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { @@ -1333,7 +1334,7 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) { false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__trampoline_setup", PtrVT), - Args, DAG, dl, DAG.GetOrdering(Chain.getNode())); + Args, DAG, dl); SDValue Ops[] = { CallResult.first, CallResult.second }; @@ -1355,7 +1356,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, + false, false, 0); } // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. @@ -1405,25 +1407,29 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, // Store first byte : number of int regs SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, - Op.getOperand(1), SV, 0, MVT::i8); + Op.getOperand(1), SV, 0, MVT::i8, + false, false, 0); uint64_t nextOffset = FPROffset; SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), ConstFPROffset); // Store second byte : number of float regs SDValue secondStore = - DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8); + DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8, + false, false, 0); nextOffset += StackOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); // Store second word : arguments given on stack SDValue thirdStore = - DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset); + DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset, + false, false, 0); nextOffset += FrameOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); // Store third word : arguments given in registers - return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset); + return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset, + false, false, 0); } @@ -1628,7 +1634,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + false, false, 0)); } } @@ -1700,7 +1707,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned GPRIndex = 0; for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) { SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT); - SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); @@ -1714,7 +1722,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); @@ -1729,7 +1738,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned FPRIndex = 0; for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) { SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64); - SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by eight for the next argument to store SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, @@ -1741,7 +1751,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by eight for the next argument to store SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, @@ -1903,7 +1914,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 ); + NULL, 0, + ObjSize==1 ? MVT::i8 : MVT::i16, + false, false, 0); MemOps.push_back(Store); ++GPR_idx; } @@ -1921,7 +1934,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); ++GPR_idx; ArgOffset += PtrByteSize; @@ -2045,7 +2059,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( CurArgOffset + (ArgSize - ObjSize), isImmutable, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, + false, false, 0); } InVals.push_back(ArgVal); @@ -2091,7 +2106,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); @@ -2271,7 +2287,7 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, // Store relative to framepointer. MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, PseudoSourceValue::getFixedStack(FI), - 0)); + 0, false, false, 0)); } } @@ -2297,7 +2313,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, - PseudoSourceValue::getFixedStack(NewRetAddr), 0); + PseudoSourceValue::getFixedStack(NewRetAddr), 0, + false, false, 0); // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack // slot as the FP is never overwritten. @@ -2308,7 +2325,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, true, false); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, - PseudoSourceValue::getFixedStack(NewFPIdx), 0); + PseudoSourceValue::getFixedStack(NewFPIdx), 0, + false, false, 0); } } return Chain; @@ -2346,14 +2364,16 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, // Load the LR and FP stack slot for later adjusting. EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); - LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0); + LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0, + false, false, 0); Chain = SDValue(LROpOut.getNode(), 1); // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack // slot as the FP is never overwritten. if (isDarwinABI) { FPOpOut = getFramePointerFrameIndex(DAG); - FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0); + FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0, + false, false, 0); Chain = SDValue(FPOpOut.getNode(), 1); } } @@ -2395,7 +2415,8 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, PtrVT)); } - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0)); // Calculate and remember argument location. } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, TailCallArguments); @@ -2862,7 +2883,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset)); + PseudoSourceValue::getStack(), LocMemOffset, + false, false, 0)); } else { // Calculate and remember argument location. CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, @@ -3024,7 +3046,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, EVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, - NULL, 0, VT); + NULL, 0, VT, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -3061,7 +3083,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0); + SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; @@ -3092,19 +3115,22 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); if (isVarArg) { - SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Store); // Float varargs are always shadowed in available integer registers if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); - SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -3147,10 +3173,12 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // entirely in R registers. Maybe later. PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, PtrVT)); - SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Store); if (VR_idx != NumVRs) { - SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0); + SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); } @@ -3160,7 +3188,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, break; SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, PtrVT)); - SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -3225,7 +3254,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // TOC save area offset. SDValue PtrOff = DAG.getIntPtrConstant(40); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0); + Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0, + false, false, 0); } // Build a sequence of copy-to-reg nodes chained together with token chain @@ -3300,13 +3330,15 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, SDValue SaveSP = Op.getOperand(1); // Load the old link SP. - SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0); + SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0, + false, false, 0); // Restore the stack pointer. Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); // Store the old link SP. - return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0); + return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0, + false, false, 0); } @@ -3483,14 +3515,16 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64); // Emit a store to the stack slot. - SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0); + SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0, + false, false, 0); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias. if (Op.getValueType() == MVT::i32) FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, FIPtr.getValueType())); - return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0); + return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0, + false, false, 0); } SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { @@ -3533,7 +3567,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other), Ops, 4, MVT::i64, MMO); // Load the value as a double. - SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0); + SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0, false, false, 0); // FCFID it and return it. SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); @@ -3578,12 +3612,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, - StackSlot, NULL, 0); + StackSlot, NULL, 0, false, false, 0); // Load FP Control Word from low 32 bits of stack slot. SDValue Four = DAG.getConstant(4, PtrVT); SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); - SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0); + SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0, + false, false, 0); // Transform as necessary SDValue CWD1 = @@ -4249,9 +4284,11 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, // Store the input value into Value#0 of the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, - Op.getOperand(0), FIdx, NULL, 0); + Op.getOperand(0), FIdx, NULL, 0, + false, false, 0); // Load it out. - return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0); + return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0, + false, false, 0); } SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { @@ -5460,7 +5497,8 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { // to the stack. FuncInfo->setLRStoreRequired(); return DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), RetAddrFI, NULL, 0); + DAG.getEntryNode(), RetAddrFI, NULL, 0, + false, false, 0); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 219efb9..a0781b9 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -366,7 +366,7 @@ def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>; def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA), "addme $rT, $rA", IntGeneral, - [(set G8RC:$rT, (adde G8RC:$rA, immAllOnes))]>; + [(set G8RC:$rT, (adde G8RC:$rA, -1))]>; def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA), "addze $rT, $rA", IntGeneral, [(set G8RC:$rT, (adde G8RC:$rA, 0))]>; @@ -375,7 +375,7 @@ def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>; def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA), "subfme $rT, $rA", IntGeneral, - [(set G8RC:$rT, (sube immAllOnes, G8RC:$rA))]>; + [(set G8RC:$rT, (sube -1, G8RC:$rA))]>; def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA), "subfze $rT, $rA", IntGeneral, [(set G8RC:$rT, (sube 0, G8RC:$rA))]>; @@ -635,13 +635,6 @@ def STHU8 : DForm_1<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STWU8 : DForm_1<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStGeneral, - [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; - def STDU : DSForm_1<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, s16immX4:$ptroff, ptr_rc:$ptrreg), diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index af7d812..9895bea 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -19,6 +19,7 @@ #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -73,8 +74,7 @@ bool PPCInstrInfo::isMoveInstr(const MachineInstr& MI, destReg = MI.getOperand(0).getReg(); return true; } - } else if (oc == PPC::FMRS || oc == PPC::FMRD || - oc == PPC::FMRSD) { // fmr r1, r2 + } else if (oc == PPC::FMR || oc == PPC::FMRSD) { // fmr r1, r2 assert(MI.getNumOperands() >= 2 && MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && @@ -344,10 +344,9 @@ bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg); } else if (DestRC == PPC::G8RCRegisterClass) { BuildMI(MBB, MI, DL, get(PPC::OR8), DestReg).addReg(SrcReg).addReg(SrcReg); - } else if (DestRC == PPC::F4RCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::FMRS), DestReg).addReg(SrcReg); - } else if (DestRC == PPC::F8RCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::FMRD), DestReg).addReg(SrcReg); + } else if (DestRC == PPC::F4RCRegisterClass || + DestRC == PPC::F8RCRegisterClass) { + BuildMI(MBB, MI, DL, get(PPC::FMR), DestReg).addReg(SrcReg); } else if (DestRC == PPC::CRRCRegisterClass) { BuildMI(MBB, MI, DL, get(PPC::MCRF), DestReg).addReg(SrcReg); } else if (DestRC == PPC::VRRCRegisterClass) { @@ -421,22 +420,30 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, FrameIdx)); return true; } else { - // FIXME: We use R0 here, because it isn't available for RA. We need to - // store the CR in the low 4-bits of the saved value. First, issue a MFCR - // to save all of the CRBits. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), PPC::R0)); + // FIXME: We need a scatch reg here. The trouble with using R0 is that + // it's possible for the stack frame to be so big the save location is + // out of range of immediate offsets, necessitating another register. + // We hack this on Darwin by reserving R2. It's probably broken on Linux + // at the moment. + + // We need to store the CR in the low 4-bits of the saved value. First, + // issue a MFCR to save all of the CRBits. + unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? + PPC::R2 : PPC::R0; + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), ScratchReg)); // If the saved register wasn't CR0, shift the bits left so that they are // in CR0's slot. if (SrcReg != PPC::CR0) { unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4; - // rlwinm r0, r0, ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0) - .addReg(PPC::R0).addImm(ShiftBits).addImm(0).addImm(31)); + // rlwinm scratch, scratch, ShiftBits, 0, 31. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) + .addReg(ScratchReg).addImm(ShiftBits) + .addImm(0).addImm(31)); } NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) - .addReg(PPC::R0, + .addReg(ScratchReg, getKillRegState(isKill)), FrameIdx)); } @@ -540,20 +547,28 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); } else if (RC == PPC::CRRCRegisterClass) { - // FIXME: We use R0 here, because it isn't available for RA. - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), PPC::R0), - FrameIdx)); + // FIXME: We need a scatch reg here. The trouble with using R0 is that + // it's possible for the stack frame to be so big the save location is + // out of range of immediate offsets, necessitating another register. + // We hack this on Darwin by reserving R2. It's probably broken on Linux + // at the moment. + unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? + PPC::R2 : PPC::R0; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + ScratchReg), FrameIdx)); // If the reloaded register isn't CR0, shift the bits right so that they are // in the right CR's slot. if (DestReg != PPC::CR0) { unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4; // rlwinm r11, r11, 32-ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0) - .addReg(PPC::R0).addImm(32-ShiftBits).addImm(0).addImm(31)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) + .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) + .addImm(31)); } - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg).addReg(PPC::R0)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg) + .addReg(ScratchReg)); } else if (RC == PPC::CRBITRCRegisterClass) { unsigned Reg = 0; @@ -672,33 +687,21 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, getUndefRegState(isUndef)), FrameIndex); } - } else if (Opc == PPC::FMRD) { - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFD)) - .addReg(InReg, - getKillRegState(isKill) | - getUndefRegState(isUndef)), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFD)) - .addReg(OutReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef)), - FrameIndex); - } - } else if (Opc == PPC::FMRS) { + } else if (Opc == PPC::FMR || Opc == PPC::FMRSD) { + // The register may be F4RC or F8RC, and that determines the memory op. + unsigned OrigReg = MI->getOperand(OpNum).getReg(); + // We cannot tell the register class from a physreg alone. + if (TargetRegisterInfo::isPhysicalRegister(OrigReg)) + return NULL; + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(OrigReg); + const bool is64 = RC == PPC::F8RCRegisterClass; + if (OpNum == 0) { // move -> store unsigned InReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); bool isUndef = MI->getOperand(1).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFS)) + NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), + get(is64 ? PPC::STFD : PPC::STFS)) .addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef)), @@ -707,7 +710,8 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, unsigned OutReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); bool isUndef = MI->getOperand(0).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFS)) + NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), + get(is64 ? PPC::LFD : PPC::LFS)) .addReg(OutReg, RegState::Define | getDeadRegState(isDead) | @@ -733,7 +737,7 @@ bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, else if ((Opc == PPC::OR8 && MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) return true; - else if (Opc == PPC::FMRD || Opc == PPC::FMRS) + else if (Opc == PPC::FMR || Opc == PPC::FMRSD) return true; return false; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 842f8ee..845cd8f 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1019,20 +1019,16 @@ let Uses = [RM] in { } } -/// FMR is split into 3 versions, one for 4/8 byte FP, and one for extending. +/// FMR is split into 2 versions, one for 4/8 byte FP, and one for extending. /// /// Note that these are defined as pseudo-ops on the PPC970 because they are /// often coalesced away and we don't want the dispatch group builder to think /// that they will fill slots (which could cause the load of a LSU reject to /// sneak into a d-group with a store). -def FMRS : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB), - "fmr $frD, $frB", FPGeneral, - []>, // (set F4RC:$frD, F4RC:$frB) - PPC970_Unit_Pseudo; -def FMRD : XForm_26<63, 72, (outs F8RC:$frD), (ins F8RC:$frB), - "fmr $frD, $frB", FPGeneral, - []>, // (set F8RC:$frD, F8RC:$frB) - PPC970_Unit_Pseudo; +def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB), + "fmr $frD, $frB", FPGeneral, + []>, // (set F4RC:$frD, F4RC:$frB) + PPC970_Unit_Pseudo; def FMRSD : XForm_26<63, 72, (outs F8RC:$frD), (ins F4RC:$frB), "fmr $frD, $frB", FPGeneral, [(set F8RC:$frD, (fextend F4RC:$frB))]>, @@ -1215,7 +1211,7 @@ def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>; def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA), "addme $rT, $rA", IntGeneral, - [(set GPRC:$rT, (adde GPRC:$rA, immAllOnes))]>; + [(set GPRC:$rT, (adde GPRC:$rA, -1))]>; def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA), "addze $rT, $rA", IntGeneral, [(set GPRC:$rT, (adde GPRC:$rA, 0))]>; @@ -1224,7 +1220,7 @@ def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>; def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA), "subfme $rT, $rA", IntGeneral, - [(set GPRC:$rT, (sube immAllOnes, GPRC:$rA))]>; + [(set GPRC:$rT, (sube -1, GPRC:$rA))]>; def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA), "subfze $rT, $rA", IntGeneral, [(set GPRC:$rT, (sube 0, GPRC:$rA))]>; @@ -1480,11 +1476,11 @@ def : Pat<(extloadf32 xaddr:$src), (FMRSD (LFSX xaddr:$src))>; // Memory barriers -def : Pat<(membarrier (i32 imm:$ll), - (i32 imm:$ls), - (i32 imm:$sl), - (i32 imm:$ss), - (i32 imm:$device)), +def : Pat<(membarrier (i32 imm /*ll*/), + (i32 imm /*ls*/), + (i32 imm /*sl*/), + (i32 imm /*ss*/), + (i32 imm /*device*/)), (SYNC)>; include "PPCInstrAltivec.td" diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 20e77e7..0b509ac 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -427,6 +427,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R2); // System-reserved register Reserved.set(PPC::R13); // Small Data Area pointer register } + // Reserve R2 on Darwin to hack around the problem of save/restore of CR + // when the stack frame is too big to address directly; we need two regs. + // This is a hack. + if (Subtarget.isDarwinABI()) { + Reserved.set(PPC::R2); + } // On PPC64, r13 is the thread pointer. Never allocate this register. // Note that this is over conservative, as it also prevents allocation of R31 @@ -447,6 +453,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } + // Reserve R2 on Darwin to hack around the problem of save/restore of CR + // when the stack frame is too big to address directly; we need two regs. + // This is a hack. + if (Subtarget.isDarwinABI()) { + Reserved.set(PPC::X2); + } } if (needsFP(MF)) diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 049e893..1cb7340 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -287,10 +287,8 @@ def GPRC : RegisterClass<"PPC", [i32], 32, GPRCClass::allocation_order_begin(const MachineFunction &MF) const { // 32-bit SVR4 ABI: r2 is reserved for the OS. // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer. - if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin()) - return begin()+1; - - return begin(); + // Darwin: R2 is reserved for CR save/restore sequence. + return begin()+1; } GPRCClass::iterator GPRCClass::allocation_order_end(const MachineFunction &MF) const { @@ -325,10 +323,8 @@ def G8RC : RegisterClass<"PPC", [i64], 64, G8RCClass::iterator G8RCClass::allocation_order_begin(const MachineFunction &MF) const { // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer. - if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin()) - return begin()+1; - - return begin(); + // Darwin: r2 is reserved for CR save/restore sequence. + return begin()+1; } G8RCClass::iterator G8RCClass::allocation_order_end(const MachineFunction &MF) const { diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 22eecd4..cac6962 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -115,32 +115,3 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, return false; } - -/// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are 4-byte, -/// 8-byte, and target default. The CIE is hard-coded to indicate that the LSDA -/// pointer in the FDE section is an "sdata4", and should be encoded as a 4-byte -/// pointer by default. However, some systems may require a different size due -/// to bugs or other conditions. We will default to a 4-byte encoding unless the -/// system tells us otherwise. -/// -/// The issue is when the CIE says their is an LSDA. That mandates that every -/// FDE have an LSDA slot. But if the function does not need an LSDA. There -/// needs to be some way to signify there is none. The LSDA is encoded as -/// pc-rel. But you don't look for some magic value after adding the pc. You -/// have to look for a zero before adding the pc. The problem is that the size -/// of the zero to look for depends on the encoding. The unwinder bug in SL is -/// that it always checks for a pointer-size zero. So on x86_64 it looks for 8 -/// bytes of zero. If you have an LSDA, it works fine since the 8-bytes are -/// non-zero so it goes ahead and then reads the value based on the encoding. -/// But if you use sdata4 and there is no LSDA, then the test for zero gives a -/// false negative and the unwinder thinks there is an LSDA. -/// -/// FIXME: This call-back isn't good! We should be using the correct encoding -/// regardless of the system. However, there are some systems which have bugs -/// that prevent this from occuring. -DwarfLSDAEncoding::Encoding PPCTargetMachine::getLSDAEncoding() const { - if (Subtarget.isDarwin() && Subtarget.getDarwinVers() != 10) - return DwarfLSDAEncoding::Default; - - return DwarfLSDAEncoding::EightByte; -} diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index a654435..ac9ae2b 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -57,18 +57,6 @@ public: return InstrItins; } - /// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are - /// 4-byte, 8-byte, and target default. The CIE is hard-coded to indicate that - /// the LSDA pointer in the FDE section is an "sdata4", and should be encoded - /// as a 4-byte pointer by default. However, some systems may require a - /// different size due to bugs or other conditions. We will default to a - /// 4-byte encoding unless the system tells us otherwise. - /// - /// FIXME: This call-back isn't good! We should be using the correct encoding - /// regardless of the system. However, there are some systems which have bugs - /// that prevent this from occuring. - virtual DwarfLSDAEncoding::Encoding getLSDAEncoding() const; - // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 8f265cf..3465779 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -889,9 +889,26 @@ entry: ; recognize a more elaborate tree than a simple SETxx. define double @test_FNEG_sel(double %A, double %B, double %C) { - %D = sub double -0.000000e+00, %A ; <double> [#uses=1] + %D = fsub double -0.000000e+00, %A ; <double> [#uses=1] %Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1] %E = select i1 %Cond, double %B, double %C ; <double> [#uses=1] ret double %E } +//===----------------------------------------------------------------------===// +The save/restore sequence for CR in prolog/epilog is terrible: +- Each CR subreg is saved individually, rather than doing one save as a unit. +- On Darwin, the save is done after the decrement of SP, which means the offset +from SP of the save slot can be too big for a store instruction, which means we +need an additional register (currently hacked in 96015+96020; the solution there +is correct, but poor). +- On SVR4 the same thing can happen, and I don't think saving before the SP +decrement is safe on that target, as there is no red zone. This is currently +broken AFAIK, although it's not a target I can exercise. +The following demonstrates the problem: +extern void bar(char *p); +void foo() { + char x[100000]; + bar(x); + __asm__("" ::: "cr2"); +} diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp index 9a2ce6b..f6753a6 100644 --- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp @@ -56,6 +56,9 @@ namespace { unsigned AsmVariant, const char *ExtraCode); bool printGetPCX(const MachineInstr *MI, unsigned OpNo); + + virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) + const; }; } // end of anonymous namespace @@ -140,18 +143,19 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum) { break; } + unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber(); unsigned bbNum = MI->getParent()->getNumber(); - O << '\n' << ".LLGETPCH" << bbNum << ":\n"; - O << "\tcall\t.LLGETPC" << bbNum << '\n' ; + O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n"; + O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ; O << "\t sethi\t" - << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << bbNum << ")), " + << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), " << operand << '\n' ; - O << ".LLGETPC" << bbNum << ":\n" ; + O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ; O << "\tor\t" << operand - << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << bbNum << ")), " + << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), " << operand << '\n'; O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; @@ -197,6 +201,39 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } +/// isBlockOnlyReachableByFallthough - Return true if the basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +/// +/// This overrides AsmPrinter's implementation to handle delay slots. +bool SparcAsmPrinter:: +isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { + // If this is a landing pad, it isn't a fall through. If it has no preds, + // then nothing falls through to it. + if (MBB->isLandingPad() || MBB->pred_empty()) + return false; + + // If there isn't exactly one predecessor, it can't be a fall through. + MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; + ++PI2; + if (PI2 != MBB->pred_end()) + return false; + + // The predecessor has to be immediately before this block. + const MachineBasicBlock *Pred = *PI; + + if (!Pred->isLayoutSuccessor(MBB)) + return false; + + // Check if the last terminator is an unconditional branch. + MachineBasicBlock::const_iterator I = Pred->end(); + while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) + ; // Noop + return I == Pred->end() || !I->getDesc().isBarrier(); +} + + + // Force static initialization. extern "C" void LLVMInitializeSparcAsmPrinter() { RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget); diff --git a/lib/Target/Sparc/README.txt b/lib/Target/Sparc/README.txt index cc24abf..b4991fe 100644 --- a/lib/Target/Sparc/README.txt +++ b/lib/Target/Sparc/README.txt @@ -56,3 +56,4 @@ int %t1(int %a, int %b) { leaf fns. * Fill delay slots +* Implement JIT support diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index e1b3299..a7d1805 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -35,7 +35,6 @@ class SparcDAGToDAGISel : public SelectionDAGISel { /// make the right decision when generating code for different targets. const SparcSubtarget &Subtarget; SparcTargetMachine& TM; - MachineBasicBlock *CurBB; public: explicit SparcDAGToDAGISel(SparcTargetMachine &tm) : SelectionDAGISel(tm), @@ -56,10 +55,6 @@ public: char ConstraintCode, std::vector<SDValue> &OutOps); - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); - virtual const char *getPassName() const { return "SPARC DAG->DAG Pattern Instruction Selection"; } @@ -72,17 +67,8 @@ private: }; } // end anonymous namespace -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void SparcDAGToDAGISel::InstructionSelect() { - CurBB = BB; - // Select target instructions for the DAG. - SelectRoot(*CurDAG); - CurDAG->RemoveDeadNodes(); -} - SDNode* SparcDAGToDAGISel::getGlobalBaseReg() { - MachineFunction *MF = CurBB->getParent(); + MachineFunction *MF = BB->getParent(); unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF); return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index e67002a..4e93ef0 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -21,7 +21,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -134,7 +134,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load; if (ObjectVT == MVT::i32) { - Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); + Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, + false, false, 0); } else { ISD::LoadExtType LoadOp = ISD::SEXTLOAD; @@ -143,7 +144,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr, DAG.getConstant(Offset, MVT::i32)); Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr, - NULL, 0, ObjectVT); + NULL, 0, ObjectVT, false, false, 0); Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load); } InVals.push_back(Load); @@ -167,7 +168,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0); + SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0, + false, false, 0); InVals.push_back(Load); } ArgOffset += 4; @@ -189,7 +191,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); + HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, + false, false, 0); } SDValue LoVal; @@ -201,7 +204,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4, true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); + LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, + false, false, 0); } // Compose the two halves together into an i64 unit. @@ -235,7 +239,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0)); + OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0, + false, false, 0)); ArgOffset += 4; } @@ -339,7 +344,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // FIXME: VERIFY THAT 68 IS RIGHT. SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68); PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); + MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0, + false, false, 0)); } #else @@ -385,14 +391,17 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // out the parts as integers. Top part goes in a reg. SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, - Val, StackPtr, NULL, 0); + Val, StackPtr, NULL, 0, + false, false, 0); // Sparc is big-endian, so the high part comes first. - SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0); + SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, + false, false, 0); // Increment the pointer to the other half. StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getIntPtrConstant(4)); // Load the low part. - SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0); + SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, + false, false, 0); RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi)); @@ -435,7 +444,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore, - PtrOff, NULL, 0)); + PtrOff, NULL, 0, + false, false, 0)); } ArgOffset += ObjSize; } @@ -759,7 +769,7 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, GlobalBase, RelAddr); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - AbsAddr, NULL, 0); + AbsAddr, NULL, 0, false, false, 0); } SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, @@ -780,7 +790,7 @@ SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, GlobalBase, RelAddr); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - AbsAddr, NULL, 0); + AbsAddr, NULL, 0, false, false, 0); } static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { @@ -872,7 +882,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, DAG.getConstant(TLI.getVarArgsFrameOffset(), MVT::i32)); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0, + false, false, 0); } static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { @@ -882,21 +893,23 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); DebugLoc dl = Node->getDebugLoc(); - SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0); + SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0, + false, false, 0); // Increment the pointer, VAList, to the next vaarg SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList, DAG.getConstant(VT.getSizeInBits()/8, MVT::i32)); // Store the incremented VAList to the legalized pointer InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr, - VAListPtr, SV, 0); + VAListPtr, SV, 0, false, false, 0); // Load the actual argument out of the pointer VAList, unless this is an // f64 load. if (VT != MVT::f64) - return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0); + return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0, false, false, 0); // Otherwise, load it as i64, then do a bitconvert. - SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0); + SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0, + false, false, 0); // Bit-Convert the value to f64. SDValue Ops[2] = { diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h index e457235..56d8708 100644 --- a/lib/Target/Sparc/SparcMachineFunctionInfo.h +++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h @@ -22,7 +22,7 @@ namespace llvm { unsigned GlobalBaseReg; public: SparcMachineFunctionInfo() : GlobalBaseReg(0) {} - SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {} + explicit SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {} unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index f6f632d..8152e1d 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -100,8 +100,6 @@ namespace { Lowering(*TM.getTargetLowering()), Subtarget(*TM.getSubtargetImpl()) { } - virtual void InstructionSelect(); - virtual const char *getPassName() const { return "SystemZ DAG->DAG Pattern Instruction Selection"; } @@ -152,10 +150,6 @@ namespace { bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM); bool MatchAddressRI(SDValue N, SystemZRRIAddressMode &AM, bool is12Bit); - - #ifndef NDEBUG - unsigned Indent; - #endif }; } // end anonymous namespace @@ -594,41 +588,22 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr, bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Disp, SDValue &Index) { if (ISD::isNON_EXTLoad(N.getNode()) && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), P, P)) + IsLegalToFold(N, P, P)) return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index); return false; } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void SystemZDAGToDAGISel::InstructionSelect() { - // Codegen the basic block. - DEBUG(errs() << "===== Instruction selection begins:\n"); - DEBUG(Indent = 0); - SelectRoot(*CurDAG); - DEBUG(errs() << "===== Instruction selection ends:\n"); - - CurDAG->RemoveDeadNodes(); -} - SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); DebugLoc dl = Node->getDebugLoc(); unsigned Opcode = Node->getOpcode(); // Dump information about the Node being selected - DEBUG(errs().indent(Indent) << "Selecting: "; - Node->dump(CurDAG); - errs() << "\n"); - DEBUG(Indent += 2); + DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { - DEBUG(errs().indent(Indent-2) << "== "; - Node->dump(CurDAG); - errs() << "\n"); - DEBUG(Indent -= 2); + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); return NULL; // Already selected. } @@ -694,9 +669,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { MVT::i32)); ReplaceUses(SDValue(Node, 0), SDValue(Div, 0)); - DEBUG(errs().indent(Indent-2) << "=> "; - Result->dump(CurDAG); - errs() << "\n"); + DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n"); } // Copy the remainder (even subreg) result, if it is needed. @@ -709,15 +682,9 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { MVT::i32)); ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0)); - DEBUG(errs().indent(Indent-2) << "=> "; - Result->dump(CurDAG); - errs() << "\n"); + DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n"); } -#ifndef NDEBUG - Indent -= 2; -#endif - return NULL; } case ISD::UDIVREM: { @@ -783,9 +750,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); ReplaceUses(SDValue(Node, 0), SDValue(Div, 0)); - DEBUG(errs().indent(Indent-2) << "=> "; - Result->dump(CurDAG); - errs() << "\n"); + DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n"); } // Copy the remainder (even subreg) result, if it is needed. @@ -797,15 +762,9 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0)); - DEBUG(errs().indent(Indent-2) << "=> "; - Result->dump(CurDAG); - errs() << "\n"); + DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n"); } -#ifndef NDEBUG - Indent -= 2; -#endif - return NULL; } } @@ -813,14 +772,12 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Select the default instruction SDNode *ResNode = SelectCode(Node); - DEBUG(errs().indent(Indent-2) << "=> "; + DEBUG(errs() << "=> "; if (ResNode == NULL || ResNode == Node) Node->dump(CurDAG); else ResNode->dump(CurDAG); errs() << "\n"; ); - DEBUG(Indent -= 2); - return ResNode; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index f7405a5..6f4b30f 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -30,9 +30,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -337,7 +337,8 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain, // from this parameter SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); } // If this is an 8/16/32-bit value, it is really passed promoted to 64 @@ -435,7 +436,8 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, DAG.getIntPtrConstant(Offset)); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), Offset)); + PseudoSourceValue::getStack(), Offset, + false, false, 0)); } } @@ -738,7 +740,7 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op, if (ExtraLoadRequired) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 336e20e..f46840c 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -58,7 +58,7 @@ def FMOV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), []>; } -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1 in { def FMOV32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src), "le\t{$dst, $src}", [(set FP32:$dst, (load rriaddr12:$src))]>; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 1891bba..a44f6d9 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -257,7 +257,7 @@ def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins i64imm:$src), [(set GR64:$dst, i64hi32:$src)]>; } -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOV32rm : RXI<0x58, (outs GR32:$dst), (ins rriaddr12:$src), "l\t{$dst, $src}", diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index e47d419..fd6e330 100644 --- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -33,7 +33,8 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo { public: SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {} - SystemZMachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0) {} + explicit SystemZMachineFunctionInfo(MachineFunction &MF) + : CalleeSavedFrameSize(0) {} unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 295b30f..9a16808 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -580,7 +580,7 @@ const IntegerType *TargetData::getIntPtrType(LLVMContext &C) const { uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, unsigned NumIndices) const { const Type *Ty = ptrTy; - assert(isa<PointerType>(Ty) && "Illegal argument for getIndexedOffset()"); + assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()"); uint64_t Result = 0; generic_gep_type_iterator<Value* const*> diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index 094a57e..18b0fa4 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -15,6 +15,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" +#include <ctype.h> using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index a231ebc..82619c7 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -19,17 +19,15 @@ #include "llvm/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -289,832 +287,54 @@ TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const { } /// getSymbolForDwarfGlobalReference - Return an MCExpr to use for a -/// pc-relative reference to the specified global variable from exception -/// handling information. In addition to the symbol, this returns -/// by-reference: -/// -/// IsIndirect - True if the returned symbol is actually a stub that contains -/// the address of the symbol, false if the symbol is the global itself. -/// -/// IsPCRel - True if the symbol reference is already pc-relative, false if -/// the caller needs to subtract off the address of the reference from the -/// symbol. -/// +/// reference to the specified global variable from exception +/// handling information. const MCExpr *TargetLoweringObjectFile:: getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { - // The generic implementation of this just returns a direct reference to the - // symbol. - IsIndirect = false; - IsPCRel = false; - + MachineModuleInfo *MMI, unsigned Encoding) const { // FIXME: Use GetGlobalValueSymbol. SmallString<128> Name; Mang->getNameWithPrefix(Name, GV, false); - return MCSymbolRefExpr::Create(Name.str(), getContext()); -} - - -//===----------------------------------------------------------------------===// -// ELF -//===----------------------------------------------------------------------===// -typedef StringMap<const MCSectionELF*> ELFUniqueMapTy; - -TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() { - // If we have the section uniquing map, free it. - delete (ELFUniqueMapTy*)UniquingMap; -} - -const MCSection *TargetLoweringObjectFileELF:: -getELFSection(StringRef Section, unsigned Type, unsigned Flags, - SectionKind Kind, bool IsExplicit) const { - if (UniquingMap == 0) - UniquingMap = new ELFUniqueMapTy(); - ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap; - - // Do the lookup, if we have a hit, return it. - const MCSectionELF *&Entry = Map[Section]; - if (Entry) return Entry; - - return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit, - getContext()); -} - -void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - if (UniquingMap != 0) - ((ELFUniqueMapTy*)UniquingMap)->clear(); - TargetLoweringObjectFile::Initialize(Ctx, TM); - - BSSSection = - getELFSection(".bss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getBSS()); - - TextSection = - getELFSection(".text", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC, - SectionKind::getText()); - - DataSection = - getELFSection(".data", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); - - ReadOnlySection = - getELFSection(".rodata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC, - SectionKind::getReadOnly()); - - TLSDataSection = - getELFSection(".tdata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | - MCSectionELF::SHF_WRITE, SectionKind::getThreadData()); - - TLSBSSSection = - getELFSection(".tbss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | - MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS()); - - DataRelSection = - getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); - - DataRelLocalSection = - getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRelLocal()); - - DataRelROSection = - getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getReadOnlyWithRel()); - - DataRelROLocalSection = - getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getReadOnlyWithRelLocal()); - - MergeableConst4Section = - getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst4()); - - MergeableConst8Section = - getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst8()); - - MergeableConst16Section = - getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, - SectionKind::getMergeableConst16()); - - StaticCtorSection = - getELFSection(".ctors", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); - - StaticDtorSection = - getELFSection(".dtors", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); - - // Exception Handling Sections. - - // FIXME: We're emitting LSDA info into a readonly section on ELF, even though - // it contains relocatable pointers. In PIC mode, this is probably a big - // runtime hit for C++ apps. Either the contents of the LSDA need to be - // adjusted or this should be a data section. - LSDASection = - getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly()); - EHFrameSection = - getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); - - // Debug Info Sections. - DwarfAbbrevSection = - getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfInfoSection = - getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfLineSection = - getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfFrameSection = - getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfPubNamesSection = - getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfPubTypesSection = - getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfStrSection = - getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfLocSection = - getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfARangesSection = - getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfRangesSection = - getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); - DwarfMacroInfoSection = - getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); -} - - -static SectionKind -getELFKindForNamedSection(StringRef Name, SectionKind K) { - if (Name.empty() || Name[0] != '.') return K; - - // Some lame default implementation based on some magic section names. - if (Name == ".bss" || - Name.startswith(".bss.") || - Name.startswith(".gnu.linkonce.b.") || - Name.startswith(".llvm.linkonce.b.") || - Name == ".sbss" || - Name.startswith(".sbss.") || - Name.startswith(".gnu.linkonce.sb.") || - Name.startswith(".llvm.linkonce.sb.")) - return SectionKind::getBSS(); - - if (Name == ".tdata" || - Name.startswith(".tdata.") || - Name.startswith(".gnu.linkonce.td.") || - Name.startswith(".llvm.linkonce.td.")) - return SectionKind::getThreadData(); - - if (Name == ".tbss" || - Name.startswith(".tbss.") || - Name.startswith(".gnu.linkonce.tb.") || - Name.startswith(".llvm.linkonce.tb.")) - return SectionKind::getThreadBSS(); - - return K; -} - - -static unsigned getELFSectionType(StringRef Name, SectionKind K) { - - if (Name == ".init_array") - return MCSectionELF::SHT_INIT_ARRAY; - - if (Name == ".fini_array") - return MCSectionELF::SHT_FINI_ARRAY; + const MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); - if (Name == ".preinit_array") - return MCSectionELF::SHT_PREINIT_ARRAY; - - if (K.isBSS() || K.isThreadBSS()) - return MCSectionELF::SHT_NOBITS; - - return MCSectionELF::SHT_PROGBITS; -} - - -static unsigned -getELFSectionFlags(SectionKind K) { - unsigned Flags = 0; - - if (!K.isMetadata()) - Flags |= MCSectionELF::SHF_ALLOC; - - if (K.isText()) - Flags |= MCSectionELF::SHF_EXECINSTR; - - if (K.isWriteable()) - Flags |= MCSectionELF::SHF_WRITE; - - if (K.isThreadLocal()) - Flags |= MCSectionELF::SHF_TLS; - - // K.isMergeableConst() is left out to honour PR4650 - if (K.isMergeableCString() || K.isMergeableConst4() || - K.isMergeableConst8() || K.isMergeableConst16()) - Flags |= MCSectionELF::SHF_MERGE; - - if (K.isMergeableCString()) - Flags |= MCSectionELF::SHF_STRINGS; - - return Flags; -} - - -const MCSection *TargetLoweringObjectFileELF:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - StringRef SectionName = GV->getSection(); - - // Infer section flags from the section name if we can. - Kind = getELFKindForNamedSection(SectionName, Kind); - - return getELFSection(SectionName, - getELFSectionType(SectionName, Kind), - getELFSectionFlags(Kind), Kind, true); -} - -static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) { - if (Kind.isText()) return ".gnu.linkonce.t."; - if (Kind.isReadOnly()) return ".gnu.linkonce.r."; - - if (Kind.isThreadData()) return ".gnu.linkonce.td."; - if (Kind.isThreadBSS()) return ".gnu.linkonce.tb."; - - if (Kind.isDataNoRel()) return ".gnu.linkonce.d."; - if (Kind.isDataRelLocal()) return ".gnu.linkonce.d.rel.local."; - if (Kind.isDataRel()) return ".gnu.linkonce.d.rel."; - if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local."; - - assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return ".gnu.linkonce.d.rel.ro."; + return getSymbolForDwarfReference(Sym, MMI, Encoding); } -const MCSection *TargetLoweringObjectFileELF:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - - // If this global is linkonce/weak and the target handles this by emitting it - // into a 'uniqued' section name, create and return the section now. - if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) { - const char *Prefix = getSectionPrefixForUniqueGlobal(Kind); - SmallString<128> Name; - Name.append(Prefix, Prefix+strlen(Prefix)); - Mang->getNameWithPrefix(Name, GV, false); - return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind), - getELFSectionFlags(Kind), Kind); - } - - if (Kind.isText()) return TextSection; - - if (Kind.isMergeable1ByteCString() || - Kind.isMergeable2ByteCString() || - Kind.isMergeable4ByteCString()) { - - // We also need alignment here. - // FIXME: this is getting the alignment of the character, not the - // alignment of the global! - unsigned Align = - TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)); - - const char *SizeSpec = ".rodata.str1."; - if (Kind.isMergeable2ByteCString()) - SizeSpec = ".rodata.str2."; - else if (Kind.isMergeable4ByteCString()) - SizeSpec = ".rodata.str4."; - else - assert(Kind.isMergeable1ByteCString() && "unknown string width"); - - - std::string Name = SizeSpec + utostr(Align); - return getELFSection(Name, MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | - MCSectionELF::SHF_MERGE | - MCSectionELF::SHF_STRINGS, - Kind); - } - - if (Kind.isMergeableConst()) { - if (Kind.isMergeableConst4() && MergeableConst4Section) - return MergeableConst4Section; - if (Kind.isMergeableConst8() && MergeableConst8Section) - return MergeableConst8Section; - if (Kind.isMergeableConst16() && MergeableConst16Section) - return MergeableConst16Section; - return ReadOnlySection; // .const - } - - if (Kind.isReadOnly()) return ReadOnlySection; - - if (Kind.isThreadData()) return TLSDataSection; - if (Kind.isThreadBSS()) return TLSBSSSection; - - // Note: we claim that common symbols are put in BSSSection, but they are - // really emitted with the magic .comm directive, which creates a symbol table - // entry but not a section. - if (Kind.isBSS() || Kind.isCommon()) return BSSSection; - - if (Kind.isDataNoRel()) return DataSection; - if (Kind.isDataRelLocal()) return DataRelLocalSection; - if (Kind.isDataRel()) return DataRelSection; - if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; - - assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return DataRelROSection; -} - -/// getSectionForConstant - Given a mergeable constant with the -/// specified size and relocation information, return a section that it -/// should be placed in. -const MCSection *TargetLoweringObjectFileELF:: -getSectionForConstant(SectionKind Kind) const { - if (Kind.isMergeableConst4() && MergeableConst4Section) - return MergeableConst4Section; - if (Kind.isMergeableConst8() && MergeableConst8Section) - return MergeableConst8Section; - if (Kind.isMergeableConst16() && MergeableConst16Section) - return MergeableConst16Section; - if (Kind.isReadOnly()) - return ReadOnlySection; - - if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; - assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return DataRelROSection; -} - -//===----------------------------------------------------------------------===// -// MachO -//===----------------------------------------------------------------------===// - -typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy; - -TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() { - // If we have the MachO uniquing map, free it. - delete (MachOUniqueMapTy*)UniquingMap; -} - - -const MCSectionMachO *TargetLoweringObjectFileMachO:: -getMachOSection(StringRef Segment, StringRef Section, - unsigned TypeAndAttributes, - unsigned Reserved2, SectionKind Kind) const { - // We unique sections by their segment/section pair. The returned section - // may not have the same flags as the requested section, if so this should be - // diagnosed by the client as an error. - - // Create the map if it doesn't already exist. - if (UniquingMap == 0) - UniquingMap = new MachOUniqueMapTy(); - MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap; - - // Form the name to look up. - SmallString<64> Name; - Name += Segment; - Name.push_back(','); - Name += Section; - - // Do the lookup, if we have a hit, return it. - const MCSectionMachO *&Entry = Map[Name.str()]; - if (Entry) return Entry; - - // Otherwise, return a new section. - return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes, - Reserved2, Kind, getContext()); -} - - -void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - if (UniquingMap != 0) - ((MachOUniqueMapTy*)UniquingMap)->clear(); - TargetLoweringObjectFile::Initialize(Ctx, TM); - - TextSection // .text - = getMachOSection("__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); - DataSection // .data - = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel()); - - CStringSection // .cstring - = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS, - SectionKind::getMergeable1ByteCString()); - UStringSection - = getMachOSection("__TEXT","__ustring", 0, - SectionKind::getMergeable2ByteCString()); - FourByteConstantSection // .literal4 - = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS, - SectionKind::getMergeableConst4()); - EightByteConstantSection // .literal8 - = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS, - SectionKind::getMergeableConst8()); - - // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back - // to using it in -static mode. - SixteenByteConstantSection = 0; - if (TM.getRelocationModel() != Reloc::Static && - TM.getTargetData()->getPointerSize() == 32) - SixteenByteConstantSection = // .literal16 - getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS, - SectionKind::getMergeableConst16()); - - ReadOnlySection // .const - = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly()); - - TextCoalSection - = getMachOSection("__TEXT", "__textcoal_nt", - MCSectionMachO::S_COALESCED | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - SectionKind::getText()); - ConstTextCoalSection - = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED, - SectionKind::getText()); - ConstDataCoalSection - = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED, - SectionKind::getText()); - ConstDataSection // .const_data - = getMachOSection("__DATA", "__const", 0, - SectionKind::getReadOnlyWithRel()); - DataCoalSection - = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED, - SectionKind::getDataRel()); - DataCommonSection - = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL, - SectionKind::getBSS()); - DataBSSSection - = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL, - SectionKind::getBSS()); - - - LazySymbolPointerSection - = getMachOSection("__DATA", "__la_symbol_ptr", - MCSectionMachO::S_LAZY_SYMBOL_POINTERS, - SectionKind::getMetadata()); - NonLazySymbolPointerSection - = getMachOSection("__DATA", "__nl_symbol_ptr", - MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, - SectionKind::getMetadata()); - - if (TM.getRelocationModel() == Reloc::Static) { - StaticCtorSection - = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel()); - StaticDtorSection - = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel()); - } else { - StaticCtorSection - = getMachOSection("__DATA", "__mod_init_func", - MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, - SectionKind::getDataRel()); - StaticDtorSection - = getMachOSection("__DATA", "__mod_term_func", - MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, - SectionKind::getDataRel()); - } - - // Exception Handling. - LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0, - SectionKind::getDataRel()); - EHFrameSection = - getMachOSection("__TEXT", "__eh_frame", - MCSectionMachO::S_COALESCED | - MCSectionMachO::S_ATTR_NO_TOC | - MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | - MCSectionMachO::S_ATTR_LIVE_SUPPORT, - SectionKind::getReadOnly()); - - // Debug Information. - DwarfAbbrevSection = - getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfInfoSection = - getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfLineSection = - getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfFrameSection = - getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfPubNamesSection = - getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfPubTypesSection = - getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfStrSection = - getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfLocSection = - getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfARangesSection = - getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfRangesSection = - getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfMacroInfoSection = - getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); - DwarfDebugInlineSection = - getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); -} - -const MCSection *TargetLoweringObjectFileMachO:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - // Parse the section specifier and create it if valid. - StringRef Segment, Section; - unsigned TAA, StubSize; - std::string ErrorCode = - MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, - TAA, StubSize); - if (!ErrorCode.empty()) { - // If invalid, report the error with llvm_report_error. - llvm_report_error("Global variable '" + GV->getNameStr() + - "' has an invalid section specifier '" + GV->getSection()+ - "': " + ErrorCode + "."); - // Fall back to dropping it into the data section. - return DataSection; - } - - // Get the section. - const MCSectionMachO *S = - getMachOSection(Segment, Section, TAA, StubSize, Kind); - - // Okay, now that we got the section, verify that the TAA & StubSize agree. - // If the user declared multiple globals with different section flags, we need - // to reject it here. - if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { - // If invalid, report the error with llvm_report_error. - llvm_report_error("Global variable '" + GV->getNameStr() + - "' section type or attributes does not match previous" - " section specifier"); - } - - return S; -} - -const MCSection *TargetLoweringObjectFileMachO:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS"); - - if (Kind.isText()) - return GV->isWeakForLinker() ? TextCoalSection : TextSection; - - // If this is weak/linkonce, put this in a coalescable section, either in text - // or data depending on if it is writable. - if (GV->isWeakForLinker()) { - if (Kind.isReadOnly()) - return ConstTextCoalSection; - return DataCoalSection; - } - - // FIXME: Alignment check should be handled by section classifier. - if (Kind.isMergeable1ByteCString() || - Kind.isMergeable2ByteCString()) { - if (TM.getTargetData()->getPreferredAlignment( - cast<GlobalVariable>(GV)) < 32) { - if (Kind.isMergeable1ByteCString()) - return CStringSection; - assert(Kind.isMergeable2ByteCString()); - return UStringSection; - } - } - - if (Kind.isMergeableConst()) { - if (Kind.isMergeableConst4()) - return FourByteConstantSection; - if (Kind.isMergeableConst8()) - return EightByteConstantSection; - if (Kind.isMergeableConst16() && SixteenByteConstantSection) - return SixteenByteConstantSection; - } - - // Otherwise, if it is readonly, but not something we can specially optimize, - // just drop it in .const. - if (Kind.isReadOnly()) - return ReadOnlySection; - - // If this is marked const, put it into a const section. But if the dynamic - // linker needs to write to it, put it in the data segment. - if (Kind.isReadOnlyWithRel()) - return ConstDataSection; - - // Put zero initialized globals with strong external linkage in the - // DATA, __common section with the .zerofill directive. - if (Kind.isBSSExtern()) - return DataCommonSection; - - // Put zero initialized globals with local linkage in __DATA,__bss directive - // with the .zerofill directive (aka .lcomm). - if (Kind.isBSSLocal()) - return DataBSSSection; - - // Otherwise, just drop the variable in the normal data section. - return DataSection; -} - -const MCSection * -TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const { - // If this constant requires a relocation, we have to put it in the data - // segment, not in the text segment. - if (Kind.isDataRel() || Kind.isReadOnlyWithRel()) - return ConstDataSection; - - if (Kind.isMergeableConst4()) - return FourByteConstantSection; - if (Kind.isMergeableConst8()) - return EightByteConstantSection; - if (Kind.isMergeableConst16() && SixteenByteConstantSection) - return SixteenByteConstantSection; - return ReadOnlySection; // .const -} - -/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide -/// not to emit the UsedDirective for some symbols in llvm.used. -// FIXME: REMOVE this (rdar://7071300) -bool TargetLoweringObjectFileMachO:: -shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { - /// On Darwin, internally linked data beginning with "L" or "l" does not have - /// the directive emitted (this occurs in ObjC metadata). - if (!GV) return false; - - // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix. - if (GV->hasLocalLinkage() && !isa<Function>(GV)) { - // FIXME: ObjC metadata is currently emitted as internal symbols that have - // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and - // this horrible hack can go away. - SmallString<64> Name; - Mang->getNameWithPrefix(Name, GV, false); - if (Name[0] == 'L' || Name[0] == 'l') - return false; +const MCExpr *TargetLoweringObjectFile:: +getSymbolForDwarfReference(const MCSymbol *Sym, MachineModuleInfo *MMI, + unsigned Encoding) const { + const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext()); + + switch (Encoding & 0xF0) { + default: + llvm_report_error("We do not support this DWARF encoding yet!"); + break; + case dwarf::DW_EH_PE_absptr: + // Do nothing special + break; + case dwarf::DW_EH_PE_pcrel: + // FIXME: PCSymbol + const MCExpr *PC = MCSymbolRefExpr::Create(".", getContext()); + Res = MCBinaryExpr::CreateSub(Res, PC, getContext()); + break; } - return true; + return Res; } -const MCExpr *TargetLoweringObjectFileMachO:: -getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { - // The mach-o version of this method defaults to returning a stub reference. - IsIndirect = true; - IsPCRel = false; - - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; - return MCSymbolRefExpr::Create(Name.str(), getContext()); +unsigned TargetLoweringObjectFile::getPersonalityEncoding() const { + return dwarf::DW_EH_PE_absptr; } - -//===----------------------------------------------------------------------===// -// COFF -//===----------------------------------------------------------------------===// - -typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy; - -TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() { - delete (COFFUniqueMapTy*)UniquingMap; +unsigned TargetLoweringObjectFile::getLSDAEncoding() const { + return dwarf::DW_EH_PE_absptr; } - -const MCSection *TargetLoweringObjectFileCOFF:: -getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const { - // Create the map if it doesn't already exist. - if (UniquingMap == 0) - UniquingMap = new MachOUniqueMapTy(); - COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap; - - // Do the lookup, if we have a hit, return it. - const MCSectionCOFF *&Entry = Map[Name]; - if (Entry) return Entry; - - return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext()); +unsigned TargetLoweringObjectFile::getFDEEncoding() const { + return dwarf::DW_EH_PE_absptr; } -void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - if (UniquingMap != 0) - ((COFFUniqueMapTy*)UniquingMap)->clear(); - TargetLoweringObjectFile::Initialize(Ctx, TM); - TextSection = getCOFFSection("\t.text", true, SectionKind::getText()); - DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel()); - StaticCtorSection = - getCOFFSection(".ctors", false, SectionKind::getDataRel()); - StaticDtorSection = - getCOFFSection(".dtors", false, SectionKind::getDataRel()); - - // FIXME: We're emitting LSDA info into a readonly section on COFF, even - // though it contains relocatable pointers. In PIC mode, this is probably a - // big runtime hit for C++ apps. Either the contents of the LSDA need to be - // adjusted or this should be a data section. - LSDASection = - getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly()); - EHFrameSection = - getCOFFSection(".eh_frame", false, SectionKind::getDataRel()); - - // Debug info. - // FIXME: Don't use 'directive' mode here. - DwarfAbbrevSection = - getCOFFSection("\t.section\t.debug_abbrev,\"dr\"", - true, SectionKind::getMetadata()); - DwarfInfoSection = - getCOFFSection("\t.section\t.debug_info,\"dr\"", - true, SectionKind::getMetadata()); - DwarfLineSection = - getCOFFSection("\t.section\t.debug_line,\"dr\"", - true, SectionKind::getMetadata()); - DwarfFrameSection = - getCOFFSection("\t.section\t.debug_frame,\"dr\"", - true, SectionKind::getMetadata()); - DwarfPubNamesSection = - getCOFFSection("\t.section\t.debug_pubnames,\"dr\"", - true, SectionKind::getMetadata()); - DwarfPubTypesSection = - getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"", - true, SectionKind::getMetadata()); - DwarfStrSection = - getCOFFSection("\t.section\t.debug_str,\"dr\"", - true, SectionKind::getMetadata()); - DwarfLocSection = - getCOFFSection("\t.section\t.debug_loc,\"dr\"", - true, SectionKind::getMetadata()); - DwarfARangesSection = - getCOFFSection("\t.section\t.debug_aranges,\"dr\"", - true, SectionKind::getMetadata()); - DwarfRangesSection = - getCOFFSection("\t.section\t.debug_ranges,\"dr\"", - true, SectionKind::getMetadata()); - DwarfMacroInfoSection = - getCOFFSection("\t.section\t.debug_macinfo,\"dr\"", - true, SectionKind::getMetadata()); -} - -const MCSection *TargetLoweringObjectFileCOFF:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - return getCOFFSection(GV->getSection(), false, Kind); -} - -static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { - if (Kind.isText()) - return ".text$linkonce"; - if (Kind.isWriteable()) - return ".data$linkonce"; - return ".rdata$linkonce"; -} - - -const MCSection *TargetLoweringObjectFileCOFF:: -SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - assert(!Kind.isThreadLocal() && "Doesn't support TLS"); - - // If this global is linkonce/weak and the target handles this by emitting it - // into a 'uniqued' section name, create and return the section now. - if (GV->isWeakForLinker()) { - const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); - SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - Mang->getNameWithPrefix(Name, GV, false); - return getCOFFSection(Name.str(), false, Kind); - } - - if (Kind.isText()) - return getTextSection(); - - return getDataSection(); +unsigned TargetLoweringObjectFile::getTTypeEncoding() const { + return dwarf::DW_EH_PE_absptr; } diff --git a/lib/Target/X86/Android.mk b/lib/Target/X86/Android.mk new file mode 100644 index 0000000..f5b8180 --- /dev/null +++ b/lib/Target/X86/Android.mk @@ -0,0 +1,47 @@ +LOCAL_PATH := $(call my-dir) + +# For the host only +# ===================================================== +include $(CLEAR_VARS) +include $(CLEAR_TBLGEN_VARS) + +TBLGEN_TABLES := \ + X86GenRegisterInfo.h.inc \ + X86GenRegisterNames.inc \ + X86GenRegisterInfo.inc \ + X86GenInstrNames.inc \ + X86GenInstrInfo.inc \ + X86GenAsmMatcher.inc \ + X86GenDAGISel.inc \ + X86GenDisassemblerTables.inc \ + X86GenFastISel.inc \ + X86GenCallingConv.inc \ + X86GenSubtarget.inc \ + X86GenEDInfo.inc + +LOCAL_SRC_FILES := \ + X86AsmBackend.cpp \ + X86COFFMachineModuleInfo.cpp \ + X86CodeEmitter.cpp \ + X86ELFWriterInfo.cpp \ + X86FastISel.cpp \ + X86FloatingPoint.cpp \ + X86FloatingPointRegKill.cpp \ + X86ISelDAGToDAG.cpp \ + X86ISelLowering.cpp \ + X86InstrInfo.cpp \ + X86JITInfo.cpp \ + X86MCAsmInfo.cpp \ + X86MCCodeEmitter.cpp \ + X86MCTargetExpr.cpp \ + X86RegisterInfo.cpp \ + X86Subtarget.cpp \ + X86TargetMachine.cpp \ + X86TargetObjectFile.cpp + +LOCAL_MODULE:= libLLVMX86CodeGen + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_TBLGEN_RULES_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index acf497a..84d7bb7 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -10,6 +10,7 @@ #include "llvm/Target/TargetAsmParser.h" #include "X86.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" @@ -183,6 +184,14 @@ struct X86Operand : public MCParsedAsmOperand { bool isReg() const { return Kind == Register; } + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getReg())); @@ -190,13 +199,13 @@ struct X86Operand : public MCParsedAsmOperand { void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateExpr(getImm())); + addExpr(Inst, getImm()); } void addImmSExt8Operands(MCInst &Inst, unsigned N) const { // FIXME: Support user customization of the render method. assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateExpr(getImm())); + addExpr(Inst, getImm()); } void addMemOperands(MCInst &Inst, unsigned N) const { @@ -204,7 +213,7 @@ struct X86Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); Inst.addOperand(MCOperand::CreateImm(getMemScale())); Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); - Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); + addExpr(Inst, getMemDisp()); Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); } @@ -218,7 +227,7 @@ struct X86Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); Inst.addOperand(MCOperand::CreateImm(getMemScale())); Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); - Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); + addExpr(Inst, getMemDisp()); } static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { @@ -492,24 +501,20 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { bool X86ATTAsmParser:: ParseInstruction(const StringRef &Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // FIXME: Hack to recognize "sal..." for now. We need a way to represent - // alternative syntaxes in the .td file, without requiring instruction - // duplication. - if (Name.startswith("sal")) { - std::string Tmp = "shl" + Name.substr(3).str(); - Operands.push_back(X86Operand::CreateToken(Tmp, NameLoc)); - } else { - // FIXME: This is a hack. We eventually want to add a general pattern - // mechanism to be used in the table gen file for these assembly names that - // use the same opcodes. Also we should only allow the "alternate names" - // for rep and repne with the instructions they can only appear with. - StringRef PatchedName = Name; - if (Name == "repe" || Name == "repz") - PatchedName = "rep"; - else if (Name == "repnz") - PatchedName = "repne"; - Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); - } + // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to + // represent alternative syntaxes in the .td file, without requiring + // instruction duplication. + StringRef PatchedName = StringSwitch<StringRef>(Name) + .Case("sal", "shl") + .Case("salb", "shlb") + .Case("sall", "shll") + .Case("salq", "shlq") + .Case("salw", "shlw") + .Case("repe", "rep") + .Case("repz", "rep") + .Case("repnz", "repne") + .Default(Name); + Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); if (getLexer().isNot(AsmToken::EndOfStatement)) { diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index 38ccbf9..734a545 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -25,10 +25,15 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #define MachineInstr MCInst +#define GET_INSTRUCTION_NAME #include "X86GenAsmWriter.inc" #undef MachineInstr void X86ATTInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } +StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} + void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op) { switch (MI->getOperand(Op).getImm()) { @@ -68,7 +73,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo) { O << '$' << Op.getImm(); if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256)) - *CommentStream << format("imm = 0x%X\n", Op.getImm()); + *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm()); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h index 3180618..d109a07 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -26,11 +26,12 @@ public: virtual void printInst(const MCInst *MI); - + virtual StringRef getOpcodeName(unsigned Opcode) const; + // Autogenerated by tblgen. void printInstruction(const MCInst *MI); static const char *getRegisterName(unsigned RegNo); - + static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo); void printMemReference(const MCInst *MI, unsigned Op); diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 304306d..8cab24c 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -33,10 +33,11 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" @@ -52,40 +53,42 @@ void X86AsmPrinter::PrintPICBaseSymbol() const { OutContext); } +MCSymbol *X86AsmPrinter::GetGlobalValueSymbol(const GlobalValue *GV) const { + SmallString<60> NameStr; + Mang->getNameWithPrefix(NameStr, GV, false); + MCSymbol *Symb = OutContext.GetOrCreateSymbol(NameStr.str()); + + if (Subtarget->isTargetCygMing()) { + X86COFFMachineModuleInfo &COFFMMI = + MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); + COFFMMI.DecorateCygMingName(Symb, OutContext, GV, *TM.getTargetData()); + + // Save function name for later type emission. + if (const Function *F = dyn_cast<Function>(GV)) + if (F->isDeclaration()) + COFFMMI.addExternalFunction(Symb->getName()); + + } + + return Symb; +} + /// runOnMachineFunction - Emit the function body. /// bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); - - // COFF and Cygwin specific mangling stuff. This should be moved out to the - // mangler or handled some other way? - if (Subtarget->isTargetCOFF()) { - X86COFFMachineModuleInfo &COFFMMI = - MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - // Populate function information map. Don't want to populate - // non-stdcall or non-fastcall functions' information right now. - const Function *F = MF.getFunction(); - CallingConv::ID CC = F->getCallingConv(); - if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall) - COFFMMI.AddFunctionInfo(F, *MF.getInfo<X86MachineFunctionInfo>()); - } - if (Subtarget->isTargetCygMing()) { + if (Subtarget->isTargetCOFF()) { const Function *F = MF.getFunction(); - X86COFFMachineModuleInfo &COFFMMI = - MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - COFFMMI.DecorateCygMingName(CurrentFnSym, OutContext,F,*TM.getTargetData()); - - O << "\t.def\t " << *CurrentFnSym; - O << ";\t.scl\t" << + O << "\t.def\t " << *CurrentFnSym << ";\t.scl\t" << (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) << ";\t.endef\n"; } - + // Have common code print out the function header with linkage info etc. EmitFunctionHeader(); - + // Emit the rest of the function body. EmitFunctionBody(); @@ -119,12 +122,6 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { else GVSym = GetGlobalValueSymbol(GV); - if (Subtarget->isTargetCygMing()) { - X86COFFMachineModuleInfo &COFFMMI = - MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - COFFMMI.DecorateCygMingName(GVSym, OutContext, GV, *TM.getTargetData()); - } - // Handle dllimport linkage. if (MO.getTargetFlags() == X86II::MO_DLLIMPORT) GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName()); @@ -585,7 +582,6 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) if (I->hasDLLExportLinkage()) { MCSymbol *Sym = GetGlobalValueSymbol(I); - COFFMMI.DecorateCygMingName(Sym, OutContext, I, *TM.getTargetData()); DLLExportedFns.push_back(Sym); } @@ -607,6 +603,28 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } } } + + if (Subtarget->isTargetELF()) { + TargetLoweringObjectFileELF &TLOFELF = + static_cast<TargetLoweringObjectFileELF &>(getObjFileLowering()); + + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); + const TargetData *TD = TM.getTargetData(); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) + O << *Stubs[i].first << ":\n" + << (TD->getPointerSize() == 8 ? + MAI->getData64bitsDirective() : MAI->getData32bitsDirective()) + << *Stubs[i].second << '\n'; + + Stubs.clear(); + } + } } diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h index 1d32a5f..039214a 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h @@ -61,8 +61,7 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { virtual void EmitInstruction(const MachineInstr *MI); void printSymbolOperand(const MachineOperand &MO); - - + virtual MCSymbol *GetGlobalValueSymbol(const GlobalValue *GV) const; // These methods are used by the tablegen'erated instruction printer. void printOperand(const MachineInstr *MI, unsigned OpNo, diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp index 4274d0a..610beb5 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp @@ -24,10 +24,14 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #define MachineInstr MCInst +#define GET_INSTRUCTION_NAME #include "X86GenAsmWriter1.inc" #undef MachineInstr void X86IntelInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } +StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op) { switch (MI->getOperand(Op).getImm()) { diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h index 1976177..545bf84 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h @@ -26,10 +26,12 @@ public: : MCInstPrinter(O, MAI) {} virtual void printInst(const MCInst *MI); + virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen. void printInstruction(const MCInst *MI); static const char *getRegisterName(unsigned RegNo); + static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo, diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 61f26a7..eed3b45 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv) tablegen(X86GenSubtarget.inc -gen-subtarget) set(sources + X86AsmBackend.cpp X86CodeEmitter.cpp X86COFFMachineModuleInfo.cpp X86ELFWriterInfo.cpp diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 19eb05e..e5f84e8 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -67,8 +67,8 @@ no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit [ %tmp.34.i18, %no_exit.i7 ] %tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.28.i16, %no_exit.i7 ] - %tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00 - %tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00 + %tmp.28.i16 = fadd double %tmp.0.0.0.i10, 0.000000e+00 + %tmp.34.i18 = fadd double %tmp.0.1.0.i9, 0.000000e+00 br i1 false, label %Compute_Tree.exit23, label %no_exit.i7 Compute_Tree.exit23: ; preds = %no_exit.i7 @@ -97,7 +97,7 @@ pcmp/pand/pandn/por to do a selection instead of a conditional branch: double %X(double %Y, double %Z, double %A, double %B) { %C = setlt double %A, %B - %z = add double %Z, 0.0 ;; select operand is not a load + %z = fadd double %Z, 0.0 ;; select operand is not a load %D = select bool %C, double %Y, double %z ret double %D } @@ -545,7 +545,7 @@ eliminates a constant pool load. For example, consider: define i64 @ccosf(float %z.0, float %z.1) nounwind readonly { entry: - %tmp6 = sub float -0.000000e+00, %z.1 ; <float> [#uses=1] + %tmp6 = fsub float -0.000000e+00, %z.1 ; <float> [#uses=1] %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly ret i64 %tmp20 } diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index aa7bb3d..d4545a6 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -227,11 +227,6 @@ lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor. //===---------------------------------------------------------------------===// -Teach the coalescer to coalesce vregs of different register classes. e.g. FR32 / -FR64 to VR128. - -//===---------------------------------------------------------------------===// - Adding to the list of cmp / test poor codegen issues: int test(__m128 *A, __m128 *B) { @@ -1868,3 +1863,69 @@ carried over to machine instructions. Asm printer (or JIT) can use this information to add the "lock" prefix. //===---------------------------------------------------------------------===// + +_Bool bar(int *x) { return *x & 1; } + +define zeroext i1 @bar(i32* nocapture %x) nounwind readonly { +entry: + %tmp1 = load i32* %x ; <i32> [#uses=1] + %and = and i32 %tmp1, 1 ; <i32> [#uses=1] + %tobool = icmp ne i32 %and, 0 ; <i1> [#uses=1] + ret i1 %tobool +} + +bar: # @bar +# BB#0: # %entry + movl 4(%esp), %eax + movb (%eax), %al + andb $1, %al + movzbl %al, %eax + ret + +Missed optimization: should be movl+andl. + +//===---------------------------------------------------------------------===// + +Consider the following two functions compiled with clang: +_Bool foo(int *x) { return !(*x & 4); } +unsigned bar(int *x) { return !(*x & 4); } + +foo: + movl 4(%esp), %eax + testb $4, (%eax) + sete %al + movzbl %al, %eax + ret + +bar: + movl 4(%esp), %eax + movl (%eax), %eax + shrl $2, %eax + andl $1, %eax + xorl $1, %eax + ret + +The second function generates more code even though the two functions are +are functionally identical. + +//===---------------------------------------------------------------------===// + +Take the following C code: +int x(int y) { return (y & 63) << 14; } + +Code produced by gcc: + andl $63, %edi + sall $14, %edi + movl %edi, %eax + ret + +Code produced by clang: + shll $14, %edi + movl %edi, %eax + andl $1032192, %eax + ret + +The code produced by gcc is 3 bytes shorter. This sort of construct often +shows up with bitfields. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/TargetInfo/Android.mk b/lib/Target/X86/TargetInfo/Android.mk new file mode 100644 index 0000000..ee53f0d --- /dev/null +++ b/lib/Target/X86/TargetInfo/Android.mk @@ -0,0 +1,24 @@ +LOCAL_PATH := $(call my-dir) + +# For the device only +# ===================================================== +include $(CLEAR_VARS) +include $(CLEAR_TBLGEN_VARS) + +TBLGEN_TABLES := \ + X86GenRegisterNames.inc \ + X86GenInstrNames.inc + +TBLGEN_TD_DIR := $(LOCAL_PATH)/.. + +LOCAL_SRC_FILES := \ + X86TargetInfo.cpp + +LOCAL_C_INCLUDES += \ + $(LOCAL_PATH)/.. + +LOCAL_MODULE:= libLLVMX86Info + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_TBLGEN_RULES_MK) +include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 1d17a05..ba0ee6c 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -19,12 +19,15 @@ namespace llvm { -class X86TargetMachine; class FunctionPass; -class MachineCodeEmitter; -class MCCodeEmitter; class JITCodeEmitter; +class MCAssembler; +class MCCodeEmitter; +class MCContext; +class MachineCodeEmitter; class Target; +class TargetAsmBackend; +class X86TargetMachine; class formatted_raw_ostream; /// createX86ISelDag - This pass converts a legalized DAG into a @@ -49,9 +52,13 @@ FunctionPass *createX87FPRegKillInserterPass(); FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE); -MCCodeEmitter *createHeinousX86MCCodeEmitter(const Target &, TargetMachine &TM); -MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM); -MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM); +MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM, + MCContext &Ctx); +MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM, + MCContext &Ctx); + +TargetAsmBackend *createX86_32AsmBackend(const Target &, MCAssembler &); +TargetAsmBackend *createX86_64AsmBackend(const Target &, MCAssembler &); /// createX86EmitCodeToMemory - Returns a pass that converts a register /// allocated function into raw machine code in a dynamically diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp new file mode 100644 index 0000000..e6654ef --- /dev/null +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -0,0 +1,34 @@ +//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmBackend.h" +#include "X86.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmBackend.h" +using namespace llvm; + +namespace { + +class X86AsmBackend : public TargetAsmBackend { +public: + X86AsmBackend(const Target &T, MCAssembler &A) + : TargetAsmBackend(T) {} +}; + +} + +TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, + MCAssembler &A) { + return new X86AsmBackend(T, A); +} + +TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, + MCAssembler &A) { + return new X86AsmBackend(T, A); +} diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp index ea52795..ab67acb 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp +++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp @@ -27,90 +27,55 @@ X86COFFMachineModuleInfo::X86COFFMachineModuleInfo(const MachineModuleInfo &) { X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() { } -void X86COFFMachineModuleInfo::AddFunctionInfo(const Function *F, - const X86MachineFunctionInfo &Val) { - FunctionInfoMap[F] = Val; +void X86COFFMachineModuleInfo::addExternalFunction(const StringRef& Name) { + CygMingStubs.insert(Name); } - - -static X86MachineFunctionInfo calculateFunctionInfo(const Function *F, - const TargetData &TD) { - X86MachineFunctionInfo Info; - uint64_t Size = 0; - - switch (F->getCallingConv()) { - case CallingConv::X86_StdCall: - Info.setDecorationStyle(StdCall); - break; - case CallingConv::X86_FastCall: - Info.setDecorationStyle(FastCall); - break; - default: - return Info; - } - - unsigned argNum = 1; - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI, ++argNum) { - const Type* Ty = AI->getType(); - - // 'Dereference' type in case of byval parameter attribute - if (F->paramHasAttr(argNum, Attribute::ByVal)) - Ty = cast<PointerType>(Ty)->getElementType(); - - // Size should be aligned to DWORD boundary - Size += ((TD.getTypeAllocSize(Ty) + 3)/4)*4; - } - - // We're not supporting tooooo huge arguments :) - Info.setBytesToPopOnReturn((unsigned int)Size); - return Info; -} - - -/// DecorateCygMingName - Query FunctionInfoMap and use this information for -/// various name decorations for Cygwin and MingW. +/// DecorateCygMingName - Apply various name decorations if the function uses +/// stdcall or fastcall calling convention. void X86COFFMachineModuleInfo::DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV, const TargetData &TD) { const Function *F = dyn_cast<Function>(GV); if (!F) return; - - // Save function name for later type emission. - if (F->isDeclaration()) - CygMingStubs.insert(StringRef(Name.data(), Name.size())); - + // We don't want to decorate non-stdcall or non-fastcall functions right now CallingConv::ID CC = F->getCallingConv(); if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall) return; - - const X86MachineFunctionInfo *Info; - - FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F); - if (info_item == FunctionInfoMap.end()) { - // Calculate apropriate function info and populate map - FunctionInfoMap[F] = calculateFunctionInfo(F, TD); - Info = &FunctionInfoMap[F]; - } else { - Info = &info_item->second; - } - - if (Info->getDecorationStyle() == None) return; + + unsigned ArgWords = 0; + DenseMap<const Function*, unsigned>::const_iterator item = FnArgWords.find(F); + if (item == FnArgWords.end()) { + // Calculate arguments sizes + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + const Type* Ty = AI->getType(); + + // 'Dereference' type in case of byval parameter attribute + if (AI->hasByValAttr()) + Ty = cast<PointerType>(Ty)->getElementType(); + + // Size should be aligned to DWORD boundary + ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4; + } + + FnArgWords[F] = ArgWords; + } else + ArgWords = item->second; + const FunctionType *FT = F->getFunctionType(); - // "Pure" variadic functions do not receive @0 suffix. if (!FT->isVarArg() || FT->getNumParams() == 0 || (FT->getNumParams() == 1 && F->hasStructRetAttr())) - raw_svector_ostream(Name) << '@' << Info->getBytesToPopOnReturn(); - - if (Info->getDecorationStyle() == FastCall) { + raw_svector_ostream(Name) << '@' << ArgWords; + + if (CC == CallingConv::X86_FastCall) { if (Name[0] == '_') Name[0] = '@'; else Name.insert(Name.begin(), '@'); - } + } } /// DecorateCygMingName - Query FunctionInfoMap and use this information for @@ -121,6 +86,6 @@ void X86COFFMachineModuleInfo::DecorateCygMingName(MCSymbol *&Name, const TargetData &TD) { SmallString<128> NameStr(Name->getName().begin(), Name->getName().end()); DecorateCygMingName(NameStr, GV, TD); - + Name = Ctx.GetOrCreateSymbol(NameStr.str()); } diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index 0e2009e..9de3dcd 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -21,44 +21,25 @@ namespace llvm { class X86MachineFunctionInfo; class TargetData; - + /// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation /// for X86 COFF targets. class X86COFFMachineModuleInfo : public MachineModuleInfoImpl { StringSet<> CygMingStubs; - - // We have to propagate some information about MachineFunction to - // AsmPrinter. It's ok, when we're printing the function, since we have - // access to MachineFunction and can get the appropriate MachineFunctionInfo. - // Unfortunately, this is not possible when we're printing reference to - // Function (e.g. calling it and so on). Even more, there is no way to get the - // corresponding MachineFunctions: it can even be not created at all. That's - // why we should use additional structure, when we're collecting all necessary - // information. - // - // This structure is using e.g. for name decoration for stdcall & fastcall'ed - // function, since we have to use arguments' size for decoration. - typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap; - FMFInfoMap FunctionInfoMap; - + DenseMap<const Function*, unsigned> FnArgWords; public: X86COFFMachineModuleInfo(const MachineModuleInfo &); ~X86COFFMachineModuleInfo(); - - + void DecorateCygMingName(MCSymbol* &Name, MCContext &Ctx, const GlobalValue *GV, const TargetData &TD); void DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV, const TargetData &TD); - - void AddFunctionInfo(const Function *F, const X86MachineFunctionInfo &Val); - + void addExternalFunction(const StringRef& Name); typedef StringSet<>::const_iterator stub_iterator; stub_iterator stub_begin() const { return CygMingStubs.begin(); } stub_iterator stub_end() const { return CygMingStubs.end(); } - - }; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index f0bceb1..8deadf6 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -387,10 +387,16 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // If no BaseReg, issue a RIP relative instruction only if the MCE can // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table // 2-7) and absolute references. + unsigned BaseRegNo = -1U; + if (BaseReg != 0 && BaseReg != X86::RIP) + BaseRegNo = getX86RegNum(BaseReg); + if (// The SIB byte must be used if there is an index register. IndexReg.getReg() == 0 && - // The SIB byte must be used if the base is ESP/RSP. - BaseReg != X86::ESP && BaseReg != X86::RSP && + // The SIB byte must be used if the base is ESP/RSP/R12, all of which + // encode to an R/M value of 4, which indicates that a SIB byte is + // present. + BaseRegNo != N86::ESP && // If there is no base register and we're in 64-bit mode, we need a SIB // byte to emit an addr that is just 'disp32' (the non-RIP relative form). (!Is64BitMode || BaseReg != 0)) { @@ -401,7 +407,6 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, return; } - unsigned BaseRegNo = getX86RegNum(BaseReg); // If the base is not EBP/ESP and there is no displacement, use simple // indirect register encoding, this handles addresses like [EAX]. The // encoding for [EBP] with no displacement means [disp32] so we handle it @@ -757,27 +762,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: { MCE.emitByte(BaseOpcode); - - // Special handling of lfence, mfence, monitor, and mwait. - if (Desc->getOpcode() == X86::LFENCE || - Desc->getOpcode() == X86::MFENCE || - Desc->getOpcode() == X86::MONITOR || - Desc->getOpcode() == X86::MWAIT) { - emitRegModRMByte((Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); - - switch (Desc->getOpcode()) { - default: break; - case X86::MONITOR: - MCE.emitByte(0xC8); - break; - case X86::MWAIT: - MCE.emitByte(0xC9); - break; - } - } else { - emitRegModRMByte(MI.getOperand(CurOp++).getReg(), - (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); - } + emitRegModRMByte(MI.getOperand(CurOp++).getReg(), + (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); if (CurOp == NumOps) break; @@ -853,6 +839,27 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, getX86RegNum(MI.getOperand(CurOp).getReg())); ++CurOp; break; + + case X86II::MRM_C1: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC1); + break; + case X86II::MRM_C8: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC8); + break; + case X86II::MRM_C9: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC9); + break; + case X86II::MRM_E8: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xE8); + break; + case X86II::MRM_F0: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xF0); + break; } if (!Desc->isVariadic() && CurOp != NumOps) { @@ -864,335 +871,3 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, MCE.processDebugLoc(MI.getDebugLoc(), false); } - -// Adapt the Emitter / CodeEmitter interfaces to MCCodeEmitter. -// -// FIXME: This is a total hack designed to allow work on llvm-mc to proceed -// without being blocked on various cleanups needed to support a clean interface -// to instruction encoding. -// -// Look away! - -#include "llvm/DerivedTypes.h" - -namespace { -class MCSingleInstructionCodeEmitter : public MachineCodeEmitter { - uint8_t Data[256]; - const MCInst *CurrentInst; - SmallVectorImpl<MCFixup> *FixupList; - -public: - MCSingleInstructionCodeEmitter() { reset(0, 0); } - - void reset(const MCInst *Inst, SmallVectorImpl<MCFixup> *Fixups) { - CurrentInst = Inst; - FixupList = Fixups; - BufferBegin = Data; - BufferEnd = array_endof(Data); - CurBufferPtr = Data; - } - - StringRef str() { - return StringRef(reinterpret_cast<char*>(BufferBegin), - CurBufferPtr - BufferBegin); - } - - virtual void startFunction(MachineFunction &F) {} - virtual bool finishFunction(MachineFunction &F) { return false; } - virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {} - virtual bool earlyResolveAddresses() const { return false; } - virtual void addRelocation(const MachineRelocation &MR) { - unsigned Offset = 0, OpIndex = 0, Kind = MR.getRelocationType(); - - // This form is only used in one case, for branches. - if (MR.isBasicBlock()) { - Offset = unsigned(MR.getMachineCodeOffset()); - OpIndex = 0; - } else { - assert(MR.isJumpTableIndex() && "Unexpected relocation!"); - - Offset = unsigned(MR.getMachineCodeOffset()); - - // The operand index is encoded as the first byte of the fake operand. - OpIndex = MR.getJumpTableIndex(); - } - - MCOperand Op = CurrentInst->getOperand(OpIndex); - assert(Op.isExpr() && "FIXME: Not yet implemented!"); - FixupList->push_back(MCFixup::Create(Offset, Op.getExpr(), - MCFixupKind(FirstTargetFixupKind + Kind))); - } - virtual void setModuleInfo(MachineModuleInfo* Info) {} - - // Interface functions which should never get called in our usage. - - virtual void emitLabel(uint64_t LabelID) { - assert(0 && "Unexpected code emitter call!"); - } - virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { - assert(0 && "Unexpected code emitter call!"); - return 0; - } - virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { - assert(0 && "Unexpected code emitter call!"); - return 0; - } - virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { - assert(0 && "Unexpected code emitter call!"); - return 0; - } - virtual uintptr_t getLabelAddress(uint64_t LabelID) const { - assert(0 && "Unexpected code emitter call!"); - return 0; - } -}; - -class X86MCCodeEmitter : public MCCodeEmitter { - X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - -private: - X86TargetMachine &TM; - llvm::Function *DummyF; - TargetData *DummyTD; - mutable llvm::MachineFunction *DummyMF; - llvm::MachineBasicBlock *DummyMBB; - - MCSingleInstructionCodeEmitter *InstrEmitter; - Emitter<MachineCodeEmitter> *Emit; - -public: - X86MCCodeEmitter(X86TargetMachine &_TM) : TM(_TM) { - // Verily, thou shouldst avert thine eyes. - const llvm::FunctionType *FTy = - FunctionType::get(llvm::Type::getVoidTy(getGlobalContext()), false); - DummyF = Function::Create(FTy, GlobalValue::InternalLinkage); - DummyTD = new TargetData(""); - DummyMF = new MachineFunction(DummyF, TM, 0); - DummyMBB = DummyMF->CreateMachineBasicBlock(); - - InstrEmitter = new MCSingleInstructionCodeEmitter(); - Emit = new Emitter<MachineCodeEmitter>(TM, *InstrEmitter, - *TM.getInstrInfo(), - *DummyTD, false); - } - ~X86MCCodeEmitter() { - delete Emit; - delete InstrEmitter; - delete DummyMF; - delete DummyF; - } - - unsigned getNumFixupKinds() const { - return 5; - } - - MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { - static MCFixupKindInfo Infos[] = { - { "reloc_pcrel_word", 0, 4 * 8 }, - { "reloc_picrel_word", 0, 4 * 8 }, - { "reloc_absolute_word", 0, 4 * 8 }, - { "reloc_absolute_word_sext", 0, 4 * 8 }, - { "reloc_absolute_dword", 0, 8 * 8 } - }; - - assert(Kind >= FirstTargetFixupKind && Kind < MaxTargetFixupKind && - "Invalid kind!"); - return Infos[Kind - FirstTargetFixupKind]; - } - - bool AddRegToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - if (Start + 1 > MI.getNumOperands()) - return false; - - const MCOperand &Op = MI.getOperand(Start); - if (!Op.isReg()) return false; - - Instr->addOperand(MachineOperand::CreateReg(Op.getReg(), false)); - return true; - } - - bool AddImmToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - if (Start + 1 > MI.getNumOperands()) - return false; - - const MCOperand &Op = MI.getOperand(Start); - if (Op.isImm()) { - Instr->addOperand(MachineOperand::CreateImm(Op.getImm())); - return true; - } - if (!Op.isExpr()) - return false; - - const MCExpr *Expr = Op.getExpr(); - if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) { - Instr->addOperand(MachineOperand::CreateImm(CE->getValue())); - return true; - } - - // Fake this as an external symbol to the code emitter to add a relcoation - // entry we will recognize. - Instr->addOperand(MachineOperand::CreateJTI(Start, 0)); - return true; - } - - bool AddLMemToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - return (AddRegToInstr(MI, Instr, Start + 0) && - AddImmToInstr(MI, Instr, Start + 1) && - AddRegToInstr(MI, Instr, Start + 2) && - AddImmToInstr(MI, Instr, Start + 3)); - } - - bool AddMemToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - return (AddRegToInstr(MI, Instr, Start + 0) && - AddImmToInstr(MI, Instr, Start + 1) && - AddRegToInstr(MI, Instr, Start + 2) && - AddImmToInstr(MI, Instr, Start + 3) && - AddRegToInstr(MI, Instr, Start + 4)); - } - - void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const { - // Don't look yet! - - // Convert the MCInst to a MachineInstr so we can (ab)use the regular - // emitter. - const X86InstrInfo &II = *TM.getInstrInfo(); - const TargetInstrDesc &Desc = II.get(MI.getOpcode()); - MachineInstr *Instr = DummyMF->CreateMachineInstr(Desc, DebugLoc()); - DummyMBB->push_back(Instr); - - unsigned Opcode = MI.getOpcode(); - unsigned NumOps = MI.getNumOperands(); - unsigned CurOp = 0; - bool AddTied = false; - if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) - AddTied = true; - else if (NumOps > 2 && - Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) - // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 - --NumOps; - - bool OK = true; - switch (Desc.TSFlags & X86II::FormMask) { - case X86II::MRMDestReg: - case X86II::MRMSrcReg: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (AddTied) - OK &= AddRegToInstr(MI, Instr, CurOp++ - 1); - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::RawFrm: - if (CurOp < NumOps) { - // Hack to make branches work. - if (!(Desc.TSFlags & X86II::ImmMask) && - MI.getOperand(0).isExpr() && - isa<MCSymbolRefExpr>(MI.getOperand(0).getExpr())) - Instr->addOperand(MachineOperand::CreateMBB(DummyMBB)); - else - OK &= AddImmToInstr(MI, Instr, CurOp); - } - break; - - case X86II::AddRegFrm: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (AddTied) - OK &= AddRegToInstr(MI, Instr, CurOp++ - 1); - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRM0r: case X86II::MRM1r: - case X86II::MRM2r: case X86II::MRM3r: - case X86II::MRM4r: case X86II::MRM5r: - case X86II::MRM6r: case X86II::MRM7r: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (AddTied) - OK &= AddRegToInstr(MI, Instr, CurOp++ - 1); - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRM0m: case X86II::MRM1m: - case X86II::MRM2m: case X86II::MRM3m: - case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: - OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5; - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRMSrcMem: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (AddTied) - OK &= AddRegToInstr(MI, Instr, CurOp++ - 1); - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - OK &= AddLMemToInstr(MI, Instr, CurOp); - else - OK &= AddMemToInstr(MI, Instr, CurOp); - break; - - case X86II::MRMDestMem: - OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5; - OK &= AddRegToInstr(MI, Instr, CurOp); - break; - - default: - case X86II::MRMInitReg: - case X86II::Pseudo: - OK = false; - break; - } - - if (!OK) { - dbgs() << "couldn't convert inst '"; - MI.dump(); - dbgs() << "' to machine instr:\n"; - Instr->dump(); - } - - InstrEmitter->reset(&MI, &Fixups); - if (OK) - Emit->emitInstruction(*Instr, &Desc); - OS << InstrEmitter->str(); - - Instr->eraseFromParent(); - } -}; -} - -#include "llvm/Support/CommandLine.h" - -static cl::opt<bool> EnableNewEncoder("enable-new-x86-encoder", - cl::ReallyHidden); - - -// Ok, now you can look. -MCCodeEmitter *llvm::createHeinousX86MCCodeEmitter(const Target &T, - TargetMachine &TM) { - - // FIXME: Remove the heinous one when the new one works. - if (EnableNewEncoder) { - if (TM.getTargetData()->getPointerSize() == 4) - return createX86_32MCCodeEmitter(T, TM); - return createX86_64MCCodeEmitter(T, TM); - } - - return new X86MCCodeEmitter(static_cast<X86TargetMachine&>(TM)); -} diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index ea398e9..98e3f4e 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -388,6 +388,8 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { } case Instruction::GetElementPtr: { + X86AddressMode SavedAM = AM; + // Pattern-match simple GEPs. uint64_t Disp = (int32_t)AM.Disp; unsigned IndexReg = AM.IndexReg; @@ -428,7 +430,13 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { AM.IndexReg = IndexReg; AM.Scale = Scale; AM.Disp = (uint32_t)Disp; - return X86SelectAddress(U->getOperand(0), AM); + if (X86SelectAddress(U->getOperand(0), AM)) + return true; + + // If we couldn't merge the sub value into this addr mode, revert back to + // our address and just match the value instead of completely failing. + AM = SavedAM; + break; unsupported_gep: // Ok, the GEP indices weren't all covered. break; @@ -786,8 +794,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { bool X86FastISel::X86SelectZExt(Instruction *I) { // Handle zero-extension from i1 to i8, which is common. - if (I->getType()->isInteger(8) && - I->getOperand(0)->getType()->isInteger(1)) { + if (I->getType()->isIntegerTy(8) && + I->getOperand(0)->getType()->isIntegerTy(1)) { unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Set the high bits to zero. @@ -828,30 +836,30 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { std::swap(TrueMBB, FalseMBB); Predicate = CmpInst::FCMP_UNE; // FALL THROUGH - case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA; break; - case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break; - case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA; break; - case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE; break; - case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break; - case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP; break; - case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE; break; - case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB; break; - case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE; break; - case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; - case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; + case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break; + case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; + case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break; + case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break; + case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break; + case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break; + case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break; + case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break; + case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break; + case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; + case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; - case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE; break; - case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA; break; - case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break; - case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; - case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; - case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG; break; - case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break; - case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL; break; - case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break; + case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break; + case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break; + case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; + case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; + case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; + case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break; + case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break; + case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break; + case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break; default: return false; } @@ -869,7 +877,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { if (Predicate == CmpInst::FCMP_UNE) { // X86 requires a second branch to handle UNE (and OEQ, // which is mapped to UNE above). - BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB); + BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB); } FastEmitBranch(FalseMBB); @@ -923,7 +931,8 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { unsigned OpCode = SetMI->getOpcode(); if (OpCode == X86::SETOr || OpCode == X86::SETBr) { - BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB)) + BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? + X86::JO_4 : X86::JB_4)) .addMBB(TrueMBB); FastEmitBranch(FalseMBB); MBB->addSuccessor(TrueMBB); @@ -939,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { if (OpReg == 0) return false; BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); - BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB); + BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB); FastEmitBranch(FalseMBB); MBB->addSuccessor(TrueMBB); return true; @@ -948,7 +957,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { bool X86FastISel::X86SelectShift(Instruction *I) { unsigned CReg = 0, OpReg = 0, OpImm = 0; const TargetRegisterClass *RC = NULL; - if (I->getType()->isInteger(8)) { + if (I->getType()->isIntegerTy(8)) { CReg = X86::CL; RC = &X86::GR8RegClass; switch (I->getOpcode()) { @@ -957,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; default: return false; } - } else if (I->getType()->isInteger(16)) { + } else if (I->getType()->isIntegerTy(16)) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { @@ -966,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; default: return false; } - } else if (I->getType()->isInteger(32)) { + } else if (I->getType()->isIntegerTy(32)) { CReg = X86::ECX; RC = &X86::GR32RegClass; switch (I->getOpcode()) { @@ -975,7 +984,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; default: return false; } - } else if (I->getType()->isInteger(64)) { + } else if (I->getType()->isIntegerTy(64)) { CReg = X86::RCX; RC = &X86::GR64RegClass; switch (I->getOpcode()) { @@ -1160,6 +1169,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { if (!X86SelectAddress(DI->getAddress(), AM)) return false; const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + // FIXME may need to add RegState::Debug to any registers produced, + // although ESP/EBP should be the only ones at the moment. addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0). addMetadata(DI->getVariable()); return true; diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h new file mode 100644 index 0000000..c8dac3c --- /dev/null +++ b/lib/Target/X86/X86FixupKinds.h @@ -0,0 +1,25 @@ +//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_X86_X86FIXUPKINDS_H +#define LLVM_X86_X86FIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace X86 { +enum Fixups { + reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch. + reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1 + reloc_riprel_4byte // 32-bit rip-relative +}; +} +} + +#endif diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp index 34a0045..6a117dd 100644 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -118,7 +118,7 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { for (BasicBlock::const_iterator II = SI->begin(); (PN = dyn_cast<PHINode>(II)); ++II) { if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) || - (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) || + (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) || (!Subtarget.hasSSE2() && PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) { ContainsFPCode = true; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index e44ce421..3fad8ad 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -12,15 +12,6 @@ // //===----------------------------------------------------------------------===// -// Force NDEBUG on in any optimized build on Darwin. -// -// FIXME: This is a huge hack, to work around ridiculously awful compile times -// on this file with gcc-4.2 on Darwin, in Release mode. -#if (!defined(__llvm__) && defined(__APPLE__) && \ - defined(__OPTIMIZE__) && !defined(NDEBUG)) -#define NDEBUG -#endif - #define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" @@ -177,14 +168,11 @@ namespace { return "X86 DAG->DAG Instruction Selection"; } - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); - virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); - virtual - bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; + virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const; + + virtual void PreprocessISelDAG(); // Include the pieces autogenerated from the target description. #include "X86GenDAGISel.inc" @@ -208,18 +196,17 @@ namespace { SDValue &Scale, SDValue &Index, SDValue &Disp); bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp); - bool SelectScalarSSELoad(SDNode *Op, SDValue Pred, - SDValue N, SDValue &Base, SDValue &Scale, + bool SelectScalarSSELoad(SDNode *Root, SDValue N, + SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, - SDValue &InChain, SDValue &OutChain); + SDValue &NodeWithChain); + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - void PreprocessForRMW(); - void PreprocessForFPConvert(); - + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, @@ -295,19 +282,22 @@ namespace { const X86InstrInfo *getInstrInfo() { return getTargetMachine().getInstrInfo(); } - -#ifndef NDEBUG - unsigned Indent; -#endif }; } -bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, - SDNode *Root) const { +bool +X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { if (OptLevel == CodeGenOpt::None) return false; - if (U == Root) + if (!N.hasOneUse()) + return false; + + if (N.getOpcode() != ISD::LOAD) + return true; + + // If N is a load, do additional profitability checks. + if (U == Root) { switch (U->getOpcode()) { default: break; case X86ISD::ADD: @@ -354,60 +344,9 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, } } } - - // Proceed to 'generic' cycle finder code - return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); -} - -/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand -/// and move load below the TokenFactor. Replace store's chain operand with -/// load's chain result. -static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, - SDValue Store, SDValue TF) { - SmallVector<SDValue, 4> Ops; - for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) - if (Load.getNode() == TF.getOperand(i).getNode()) - Ops.push_back(Load.getOperand(0)); - else - Ops.push_back(TF.getOperand(i)); - SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); - SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, - Load.getOperand(1), - Load.getOperand(2)); - CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1), - Store.getOperand(2), Store.getOperand(3)); -} - -/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The -/// chain produced by the load must only be used by the store's chain operand, -/// otherwise this may produce a cycle in the DAG. -/// -static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, - SDValue &Load) { - if (N.getOpcode() == ISD::BIT_CONVERT) { - if (!N.hasOneUse()) - return false; - N = N.getOperand(0); } - LoadSDNode *LD = dyn_cast<LoadSDNode>(N); - if (!LD || LD->isVolatile()) - return false; - if (LD->getAddressingMode() != ISD::UNINDEXED) - return false; - - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) - return false; - - if (N.hasOneUse() && - LD->hasNUsesOfValue(1, 1) && - N.getOperand(1) == Address && - LD->isOperandOf(Chain.getNode())) { - Load = N; - return true; - } - return false; + return true; } /// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain @@ -473,51 +412,15 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { return false; } - -/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. -/// This is only run if not in -O0 mode. -/// This allows the instruction selector to pick more read-modify-write -/// instructions. This is a common case: -/// -/// [Load chain] -/// ^ -/// | -/// [Load] -/// ^ ^ -/// | | -/// / \- -/// / | -/// [TokenFactor] [Op] -/// ^ ^ -/// | | -/// \ / -/// \ / -/// [Store] -/// -/// The fact the store's chain operand != load's chain will prevent the -/// (store (op (load))) instruction from being selected. We can transform it to: -/// -/// [Load chain] -/// ^ -/// | -/// [TokenFactor] -/// ^ -/// | -/// [Load] -/// ^ ^ -/// | | -/// | \- -/// | | -/// | [Op] -/// | ^ -/// | | -/// \ / -/// \ / -/// [Store] -void X86DAGToDAGISel::PreprocessForRMW() { +void X86DAGToDAGISel::PreprocessISelDAG() { + // OptForSize is used in pattern predicates that isel is matching. + OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - if (I->getOpcode() == X86ISD::CALL) { + E = CurDAG->allnodes_end(); I != E; ) { + SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + + if (OptLevel != CodeGenOpt::None && N->getOpcode() == X86ISD::CALL) { /// Also try moving call address load from outside callseq_start to just /// before the call to allow it to be folded. /// @@ -537,85 +440,23 @@ void X86DAGToDAGISel::PreprocessForRMW() { /// \ / /// \ / /// [CALL] - SDValue Chain = I->getOperand(0); - SDValue Load = I->getOperand(1); + SDValue Chain = N->getOperand(0); + SDValue Load = N->getOperand(1); if (!isCalleeLoad(Load, Chain)) continue; - MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); + MoveBelowCallSeqStart(CurDAG, Load, SDValue(N, 0), Chain); ++NumLoadMoved; continue; } - - if (!ISD::isNON_TRUNCStore(I)) - continue; - SDValue Chain = I->getOperand(0); - - if (Chain.getNode()->getOpcode() != ISD::TokenFactor) - continue; - - SDValue N1 = I->getOperand(1); - SDValue N2 = I->getOperand(2); - if ((N1.getValueType().isFloatingPoint() && - !N1.getValueType().isVector()) || - !N1.hasOneUse()) - continue; - - bool RModW = false; - SDValue Load; - unsigned Opcode = N1.getNode()->getOpcode(); - switch (Opcode) { - case ISD::ADD: - case ISD::MUL: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::ADDC: - case ISD::ADDE: - case ISD::VECTOR_SHUFFLE: { - SDValue N10 = N1.getOperand(0); - SDValue N11 = N1.getOperand(1); - RModW = isRMWLoad(N10, Chain, N2, Load); - if (!RModW) - RModW = isRMWLoad(N11, Chain, N2, Load); - break; - } - case ISD::SUB: - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - case ISD::ROTL: - case ISD::ROTR: - case ISD::SUBC: - case ISD::SUBE: - case X86ISD::SHLD: - case X86ISD::SHRD: { - SDValue N10 = N1.getOperand(0); - RModW = isRMWLoad(N10, Chain, N2, Load); - break; - } - } - - if (RModW) { - MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); - ++NumLoadMoved; - checkForCycles(I); - } - } -} - - -/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend -/// nodes that target the FP stack to be store and load to the stack. This is a -/// gross hack. We would like to simply mark these as being illegal, but when -/// we do that, legalize produces these when it expands calls, then expands -/// these in the same legalize pass. We would like dag combine to be able to -/// hack on these between the call expansion and the node legalization. As such -/// this pass basically does "really late" legalization of these inline with the -/// X86 isel pass. -void X86DAGToDAGISel::PreprocessForFPConvert() { - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ) { - SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + + // Lower fpround and fpextend nodes that target the FP stack to be store and + // load to the stack. This is a gross hack. We would like to simply mark + // these as being illegal, but when we do that, legalize produces these when + // it expands calls, then expands these in the same legalize pass. We would + // like dag combine to be able to hack on these between the call expansion + // and the node legalization. As such this pass basically does "really + // late" legalization of these inline with the X86 isel pass. + // FIXME: This should only happen when not compiled with -O0. if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) continue; @@ -652,9 +493,10 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { // FIXME: optimize the case where the src/dest is a load or store? SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0), - MemTmp, NULL, 0, MemVT); + MemTmp, NULL, 0, MemVT, + false, false, 0); SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, - NULL, 0, MemVT); + NULL, 0, MemVT, false, false, 0); // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the // extload we created. This will cause general havok on the dag because @@ -670,30 +512,6 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { } } -/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel -/// when it has created a SelectionDAG for us to codegen. -void X86DAGToDAGISel::InstructionSelect() { - const Function *F = MF->getFunction(); - OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); - - if (OptLevel != CodeGenOpt::None) - PreprocessForRMW(); - - // FIXME: This should only happen when not compiled with -O0. - PreprocessForFPConvert(); - - // Codegen the basic block. -#ifndef NDEBUG - DEBUG(dbgs() << "===== Instruction selection begins:\n"); - Indent = 0; -#endif - SelectRoot(*CurDAG); -#ifndef NDEBUG - DEBUG(dbgs() << "===== Instruction selection ends:\n"); -#endif - - CurDAG->RemoveDeadNodes(); -} /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in /// the main function. @@ -1300,22 +1118,24 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to /// match a load whose top elements are either undef or zeros. The load flavor /// is derived from the type of N, which is either v4f32 or v2f64. -bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, +/// +/// We also return: +/// PatternChainNode: this is the matched node that has a chain input and +/// output. +bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, - SDValue &InChain, - SDValue &OutChain) { + SDValue &PatternNodeWithChain) { if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { - InChain = N.getOperand(0).getValue(1); - if (ISD::isNON_EXTLoad(InChain.getNode()) && - InChain.getValue(0).hasOneUse() && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) { - LoadSDNode *LD = cast<LoadSDNode>(InChain); - if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + PatternNodeWithChain = N.getOperand(0); + if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && + PatternNodeWithChain.hasOneUse() && + IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && + IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { + LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); + if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment)) return false; - OutChain = LD->getChain(); return true; } } @@ -1327,13 +1147,14 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && N.getOperand(0).getNode()->hasOneUse() && ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && - N.getOperand(0).getOperand(0).hasOneUse()) { + N.getOperand(0).getOperand(0).hasOneUse() && + IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && + IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { // Okay, this is a zero extending load. Fold it. LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); - if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; - OutChain = LD->getChain(); - InChain = SDValue(LD, 1); + PatternNodeWithChain = SDValue(LD, 0); return true; } return false; @@ -1407,7 +1228,6 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp) { - assert(Op->getOpcode() == X86ISD::TLSADDR); assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); @@ -1434,11 +1254,12 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { - if (ISD::isNON_EXTLoad(N.getNode()) && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), P, P)) - return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); - return false; + if (!ISD::isNON_EXTLoad(N.getNode()) || + !IsProfitableToFold(N, P, P) || + !IsLegalToFold(N, P, P)) + return false; + + return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); } /// getGlobalBaseReg - Return an SDNode that returns the value of @@ -1541,7 +1362,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_DEC16m; else if (isSub) { if (isCN) { - if (Predicate_i16immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB16mi8; else Opc = X86::LOCK_SUB16mi; @@ -1549,7 +1370,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_SUB16mr; } else { if (isCN) { - if (Predicate_i16immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD16mi8; else Opc = X86::LOCK_ADD16mi; @@ -1564,7 +1385,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_DEC32m; else if (isSub) { if (isCN) { - if (Predicate_i32immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB32mi8; else Opc = X86::LOCK_SUB32mi; @@ -1572,7 +1393,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_SUB32mr; } else { if (isCN) { - if (Predicate_i32immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD32mi8; else Opc = X86::LOCK_ADD32mi; @@ -1588,7 +1409,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { else if (isSub) { Opc = X86::LOCK_SUB64mr; if (isCN) { - if (Predicate_i64immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB64mi8; else if (Predicate_i64immSExt32(Val.getNode())) Opc = X86::LOCK_SUB64mi32; @@ -1596,7 +1417,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { } else { Opc = X86::LOCK_ADD64mr; if (isCN) { - if (Predicate_i64immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD64mi8; else if (Predicate_i64immSExt32(Val.getNode())) Opc = X86::LOCK_ADD64mi32; @@ -1652,8 +1473,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) { case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: - case X86::JA: case X86::JAE: case X86::JB: case X86::JBE: - case X86::JE: case X86::JNE: case X86::JP: case X86::JNP: + case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4: + case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4: case X86::CMOVA16rr: case X86::CMOVA16rm: case X86::CMOVA32rr: case X86::CMOVA32rm: case X86::CMOVA64rr: case X86::CMOVA64rm: @@ -1693,24 +1514,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); DebugLoc dl = Node->getDebugLoc(); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent, ' ') << "Selecting: "; - Node->dump(CurDAG); - dbgs() << '\n'; - }); - Indent += 2; -#endif + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); if (Node->isMachineOpcode()) { -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "== "; - Node->dump(CurDAG); - dbgs() << '\n'; - }); - Indent -= 2; -#endif + DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); return NULL; // Already selected. } @@ -1806,13 +1613,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { LoReg, NVT, InFlag); InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 0), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { @@ -1835,19 +1636,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { InFlag = Result.getValue(2); } ReplaceUses(SDValue(Node, 1), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } -#ifndef NDEBUG - Indent -= 2; -#endif - return NULL; } @@ -1962,13 +1753,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { LoReg, NVT, InFlag); InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 0), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the remainder (high) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { @@ -1992,19 +1777,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { InFlag = Result.getValue(2); } ReplaceUses(SDValue(Node, 1), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - -#ifndef NDEBUG - Indent -= 2; -#endif - return NULL; } @@ -2117,17 +1891,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDNode *ResNode = SelectCode(Node); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - if (ResNode == NULL || ResNode == Node) - Node->dump(CurDAG); - else - ResNode->dump(CurDAG); - dbgs() << '\n'; - }); - Indent -= 2; -#endif + DEBUG(dbgs() << "=> "; + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); + else + ResNode->dump(CurDAG); + dbgs() << '\n'); return ResNode; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 515bc84..802bedc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -73,15 +73,16 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { case X86Subtarget::isDarwin: if (TM.getSubtarget<X86Subtarget>().is64Bit()) return new X8664_MachoTargetObjectFile(); - return new X8632_MachoTargetObjectFile(); + return new TargetLoweringObjectFileMachO(); case X86Subtarget::isELF: - return new TargetLoweringObjectFileELF(); + if (TM.getSubtarget<X86Subtarget>().is64Bit()) + return new X8664_ELFTargetObjectFile(TM); + return new X8632_ELFTargetObjectFile(TM); case X86Subtarget::isMingw: case X86Subtarget::isCygwin: case X86Subtarget::isWindows: return new TargetLoweringObjectFileCOFF(); } - } X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) @@ -1001,19 +1002,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) computeRegisterProperties(); - // Divide and reminder operations have no vector equivalent and can - // trap. Do a custom widening for these operations in which we never - // generate more divides/remainder than the original vector width. - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { - if (!isTypeLegal((MVT::SimpleValueType)VT)) { - setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom); - } - } - // FIXME: These should be based on subtarget info. Plus, the values should // be smaller when we are in optimizing for size mode. maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores @@ -1411,18 +1399,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { return CC_X86_32_C; } -/// NameDecorationForCallConv - Selects the appropriate decoration to -/// apply to a MachineFunction containing a given calling convention. -NameDecorationStyle -X86TargetLowering::NameDecorationForCallConv(CallingConv::ID CallConv) { - if (CallConv == CallingConv::X86_FastCall) - return FastCall; - else if (CallConv == CallingConv::X86_StdCall) - return StdCall; - return None; -} - - /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" with size and alignment information specified by /// the specific parameter attribute. The copy will be passed as a byval @@ -1476,7 +1452,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, VA.getLocMemOffset(), isImmutable, false); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); return DAG.getLoad(ValVT, dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); } } @@ -1498,9 +1475,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, Fn->getName() == "main") FuncInfo->setForceFramePointer(true); - // Decorate the function name. - FuncInfo->setDecorationStyle(NameDecorationForCallConv(CallConv)); - MachineFrameInfo *MFI = MF.getFrameInfo(); bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isTargetWin64(); @@ -1573,7 +1547,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If value is passed via pointer - do a load. if (VA.getLocInfo() == CCValAssign::Indirect) - ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0); + ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0, + false, false, 0); InVals.push_back(ArgValue); } @@ -1668,7 +1643,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, PseudoSourceValue::getFixedStack(RegSaveFrameIndex), - Offset); + Offset, false, false, 0); MemOps.push_back(Store); Offset += 8; } @@ -1737,7 +1712,8 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain, return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); } return DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset); + PseudoSourceValue::getStack(), LocMemOffset, + false, false, 0); } /// EmitTailCallLoadRetAddr - Emit a load of return address if tail call @@ -1752,7 +1728,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, OutRetAddr = getReturnAddressFrameIndex(DAG); // Load the "old" Return address. - OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0); + OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0); return SDValue(OutRetAddr.getNode(), 1); } @@ -1767,11 +1743,12 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, // Calculate the new stack slot for the return address. int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = - MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, true,false); + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, - PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0); + PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0, + false, false, 0); return Chain; } @@ -1882,7 +1859,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); Chain = DAG.getStore(Chain, dl, Arg, SpillSlot, - PseudoSourceValue::getFixedStack(FI), 0); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); Arg = SpillSlot; break; } @@ -2013,7 +1991,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Store relative to framepointer. MemOpChains2.push_back( DAG.getStore(ArgChain, dl, Arg, FIN, - PseudoSourceValue::getFixedStack(FI), 0)); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0)); } } } @@ -2256,7 +2235,8 @@ static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, const X86InstrInfo *TII) { - int FI; + unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; + int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) @@ -2272,25 +2252,30 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) && Def->getOperand(1).isFI()) { FI = Def->getOperand(1).getIndex(); - if (MFI->getObjectSize(FI) != Flags.getByValSize()) - return false; + Bytes = Flags.getByValSize(); } else return false; } - } else { - LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg); - if (!Ld) + } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { + if (Flags.isByVal()) + // ByVal argument is passed in as a pointer but it's now being + // dereferenced. e.g. + // define @foo(%struct.X* %A) { + // tail call @bar(%struct.X* byval %A) + // } return false; SDValue Ptr = Ld->getBasePtr(); FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); if (!FINode) return false; FI = FINode->getIndex(); - } + } else + return false; + assert(FI != INT_MAX); if (!MFI->isFixedObjectIndex(FI)) return false; - return Offset == MFI->getObjectOffset(FI); + return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); } /// IsEligibleForTailCallOptimization - Check whether the call is eligible @@ -2397,7 +2382,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { // Set up a frame object for the return address. uint64_t SlotSize = TD->getPointerSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, - true, false); + false, false); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -3592,7 +3577,8 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, int EltNo = (Offset - StartOffset) >> 2; int Mask[4] = { EltNo, EltNo, EltNo, EltNo }; EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32; - SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0); + SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0, + false, false, 0); // Canonicalize it to a v4i32 shuffle. V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, @@ -4836,8 +4822,16 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) && isa<ConstantSDNode>(N2)) { - unsigned Opc = (EltVT.getSizeInBits() == 8) ? X86ISD::PINSRB - : X86ISD::PINSRW; + unsigned Opc; + if (VT == MVT::v8i16) + Opc = X86ISD::PINSRW; + else if (VT == MVT::v4i16) + Opc = X86ISD::MMX_PINSRW; + else if (VT == MVT::v16i8) + Opc = X86ISD::PINSRB; + else + Opc = X86ISD::PINSRB; + // Transform it so it match pinsr{b,w} which expects a GR32 as its second // argument. if (N1.getValueType() != MVT::i32) @@ -4888,7 +4882,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); if (N2.getValueType() != MVT::i32) N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); - return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2); + return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW, + dl, VT, N0, N1, N2); } return SDValue(); } @@ -5091,7 +5086,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, // load. if (isGlobalStubReference(OpFlags)) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. @@ -5171,7 +5166,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, MVT::i32)); SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base, - NULL, 0); + NULL, 0, false, false, 0); unsigned char OperandFlags = 0; // Most TLS accesses are not RIP relative, even on x86-64. One exception is @@ -5196,7 +5191,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, if (model == TLSModel::InitialExec) Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, false, false, 0); // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. @@ -5264,7 +5259,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, DAG.getConstant(VTBits, MVT::i8)); - SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT, + SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32, AndNode, DAG.getConstant(0, MVT::i8)); SDValue Hi, Lo; @@ -5313,7 +5308,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); } @@ -5348,7 +5344,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, }; Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops)); Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); } return Result; @@ -5421,12 +5418,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, PseudoSourceValue::getConstantPool(), 0, - false, 16); + false, false, 16); SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, PseudoSourceValue::getConstantPool(), 0, - false, 16); + false, false, 16); SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); // Add the halves; easiest way is to swap them into another reg first. @@ -5513,9 +5510,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackSlot, WordOff); SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), - StackSlot, NULL, 0); + StackSlot, NULL, 0, false, false, 0); SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), - OffsetSlot, NULL, 0); + OffsetSlot, NULL, 0, false, false, 0); return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); } @@ -5563,7 +5560,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, dl, Value, StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) @@ -5597,7 +5595,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, NULL, 0); + FIST, StackSlot, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { @@ -5607,7 +5605,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, NULL, 0); + FIST, StackSlot, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { @@ -5632,8 +5630,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); } @@ -5659,8 +5657,8 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); if (VT.isVector()) { return DAG.getNode(ISD::BIT_CONVERT, dl, VT, DAG.getNode(ISD::XOR, dl, MVT::v2i64, @@ -5708,8 +5706,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1); // Shift sign bit right or left if the two operands have different types. @@ -5737,8 +5735,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2); // Or the value with the sign bit. @@ -5890,26 +5888,31 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node /// if it's possible. -static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC, +static SDValue LowerToBT(SDValue And, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) { + SDValue Op0 = And.getOperand(0); + SDValue Op1 = And.getOperand(1); + if (Op0.getOpcode() == ISD::TRUNCATE) + Op0 = Op0.getOperand(0); + if (Op1.getOpcode() == ISD::TRUNCATE) + Op1 = Op1.getOperand(0); + SDValue LHS, RHS; - if (Op0.getOperand(1).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op010C = - dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0))) - if (Op010C->getZExtValue() == 1) { - LHS = Op0.getOperand(0); - RHS = Op0.getOperand(1).getOperand(1); + if (Op1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *And10C = dyn_cast<ConstantSDNode>(Op1.getOperand(0))) + if (And10C->getZExtValue() == 1) { + LHS = Op0; + RHS = Op1.getOperand(1); } - } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op000C = - dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0))) - if (Op000C->getZExtValue() == 1) { - LHS = Op0.getOperand(1); - RHS = Op0.getOperand(0).getOperand(1); + } else if (Op0.getOpcode() == ISD::SHL) { + if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0))) + if (And00C->getZExtValue() == 1) { + LHS = Op1; + RHS = Op0.getOperand(1); } - } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { - ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1)); - SDValue AndLHS = Op0.getOperand(0); + } else if (Op1.getOpcode() == ISD::Constant) { + ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1); + SDValue AndLHS = Op0; if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { LHS = AndLHS.getOperand(0); RHS = AndLHS.getOperand(1); @@ -5959,6 +5962,21 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { return NewSetCC; } + // Look for "(setcc) == / != 1" to avoid unncessary setcc. + if (Op0.getOpcode() == X86ISD::SETCC && + Op1.getOpcode() == ISD::Constant && + (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || + cast<ConstantSDNode>(Op1)->isNullValue()) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); + bool Invert = (CC == ISD::SETNE) ^ + cast<ConstantSDNode>(Op1)->isNullValue(); + if (Invert) + CCode = X86::GetOppositeBranchCondition(CCode); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); + } + bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); if (X86CC == X86::COND_INVALID) @@ -6400,24 +6418,13 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, EVT IntPtr = getPointerTy(); EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); - Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); Flag = Chain.getValue(1); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, - DAG.getTargetExternalSymbol("_alloca", IntPtr), - DAG.getRegister(X86::EAX, IntPtr), - DAG.getRegister(X86StackPtr, SPTy), - Flag }; - Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops, 5); - Flag = Chain.getValue(1); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(0, true), - DAG.getIntPtrConstant(0, true), - Flag); + Chain = DAG.getNode(X86ISD::MINGW_ALLOCA, dl, NodeTys, Chain, Flag); + Flag = Chain.getValue(1); Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1); @@ -6461,8 +6468,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/false, - DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl, - DAG.GetOrdering(Chain.getNode())); + DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); return CallResult.second; } @@ -6646,7 +6652,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, + false, false, 0); } // __va_list_tag: @@ -6658,8 +6665,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { SDValue FIN = Op.getOperand(1); // Store gp_offset SDValue Store = DAG.getStore(Op.getOperand(0), dl, - DAG.getConstant(VarArgsGPOffset, MVT::i32), - FIN, SV, 0); + DAG.getConstant(VarArgsGPOffset, MVT::i32), + FIN, SV, 0, false, false, 0); MemOps.push_back(Store); // Store fp_offset @@ -6667,21 +6674,23 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { FIN, DAG.getIntPtrConstant(4)); Store = DAG.getStore(Op.getOperand(0), dl, DAG.getConstant(VarArgsFPOffset, MVT::i32), - FIN, SV, 0); + FIN, SV, 0, false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0); + Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0, + false, false, 0); MemOps.push_back(Store); // Store ptr to reg_save_area. FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0); + Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0, + false, false, 0); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], MemOps.size()); @@ -6967,13 +6976,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - NULL, 0); + NULL, 0, false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, NULL, 0); + RetAddrFI, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { @@ -6985,7 +6994,8 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + false, false, 0); return FrameAddr; } @@ -7009,7 +7019,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame, DAG.getIntPtrConstant(-TD->getPointerSize())); StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); - Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0); + Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); MF.getRegInfo().addLiveOut(StoreAddrReg); @@ -7044,11 +7054,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 SDValue Addr = Trmp; OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 0); + Addr, TrmpAddr, 0, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64)); - OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, false, 2); + OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, + false, false, 2); // Load the 'nest' parameter value into R10. // R10 is specified in X86CallingConv.td @@ -7056,24 +7067,25 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 10); + Addr, TrmpAddr, 10, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64)); - OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, false, 2); + OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, + false, false, 2); // Jump to the nested function. OpCode = (JMP64r << 8) | REX_WB; // jmpq *... Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64)); OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 20); + Addr, TrmpAddr, 20, false, false, 0); unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64)); OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr, - TrmpAddr, 22); + TrmpAddr, 22, false, false, 0); SDValue Ops[] = { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) }; @@ -7133,21 +7145,23 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), - Trmp, TrmpAddr, 0); + Trmp, TrmpAddr, 0, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); - OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, false, 1); + OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, + false, false, 1); const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode. Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr, - TrmpAddr, 5, false, 1); + TrmpAddr, 5, false, false, 1); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); - OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, false, 1); + OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, + false, false, 1); SDValue Ops[] = { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) }; @@ -7190,7 +7204,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { DAG.getEntryNode(), StackSlot); // Load FP Control Word from stack slot - SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0); + SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0, + false, false, 0); // Transform as necessary SDValue CWD1 = @@ -7554,7 +7569,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, if (FIST.getNode() != 0) { EVT VT = N->getValueType(0); // Return a load from the stack slot. - Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0)); + Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0, + false, false, 0)); } return; } @@ -7572,14 +7588,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(edx.getValue(1)); return; } - case ISD::SDIV: - case ISD::UDIV: - case ISD::SREM: - case ISD::UREM: { - EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements())); - return; - } case ISD::ATOMIC_CMP_SWAP: { EVT T = N->getValueType(0); assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); @@ -7677,6 +7685,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; + case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; @@ -7721,6 +7730,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; + case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA"; } } @@ -7778,13 +7788,13 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { - if (!Ty1->isInteger() || !Ty2->isInteger()) + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); if (NumBits1 <= NumBits2) return false; - return Subtarget->is64Bit() || NumBits1 < 64; + return true; } bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { @@ -7794,12 +7804,12 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { unsigned NumBits2 = VT2.getSizeInBits(); if (NumBits1 <= NumBits2) return false; - return Subtarget->is64Bit() || NumBits1 < 64; + return true; } bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. - return Ty1->isInteger(32) && Ty2->isInteger(64) && Subtarget->is64Bit(); + return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit(); } bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { @@ -7955,7 +7965,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, MIB.addReg(EAXreg); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. return nextMBB; @@ -8112,7 +8122,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MIB.addReg(X86::EDX); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. return nextMBB; @@ -8215,7 +8225,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, MIB.addReg(X86::EAX); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. return nextMBB; @@ -8297,7 +8307,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( if (!Subtarget->isTargetWin64()) { // If %al is 0, branch around the XMM save block. BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); - BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB); + BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB); MBB->addSuccessor(EndMBB); } @@ -8390,6 +8400,29 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, return BB; } +MachineBasicBlock * +X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction *F = BB->getParent(); + + // The lowering is pretty easy: we're just emitting the call to _alloca. The + // non-trivial part is impdef of ESP. + // FIXME: The code should be tweaked as soon as we'll try to do codegen for + // mingw-w64. + + BuildMI(BB, DL, TII->get(X86::CALLpcrel32)) + .addExternalSymbol("_alloca") + .addReg(X86::EAX, RegState::Implicit) + .addReg(X86::ESP, RegState::Implicit) + .addReg(X86::EAX, RegState::Define | RegState::Implicit) + .addReg(X86::ESP, RegState::Define | RegState::Implicit); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; +} MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, @@ -8397,6 +8430,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); + case X86::MINGW_ALLOCA: + return EmitLoweredMingwAlloca(MI, BB, EM); case X86::CMOV_GR8: case X86::CMOV_V1I64: case X86::CMOV_FR32: @@ -8783,10 +8818,11 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16) return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile()); + LD->isVolatile(), LD->isNonTemporal(), 0); return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); } else if (NumElems == 4 && LastLoadedElt == 1) { SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; @@ -8806,10 +8842,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(2); // If we have SSE[12] support, try to form min/max nodes. SSE min/max - // instructions have the peculiarity that if either operand is a NaN, - // they chose what we call the RHS operand (and as such are not symmetric). - // It happens that this matches the semantics of the common C idiom - // x<y?x:y and related forms, so we can recognize these cases. + // instructions match the semantics of the common C idiom x<y?x:y but not + // x<=y?x:y, because of how they handle negative zero (which can be + // ignored in unsafe-math mode). if (Subtarget->hasSSE2() && (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) && Cond.getOpcode() == ISD::SETCC) { @@ -8817,36 +8852,34 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode = 0; // Check for x CC y ? x : y. - if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { + if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && + DAG.isEqualTo(RHS, Cond.getOperand(1))) { switch (CC) { default: break; case ISD::SETULT: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a min would handle NaNs incorrectly, and swapping + // the operands would cause it to handle comparisons between positive + // and negative zero incorrectly. + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { + if (!UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMIN; break; case ISD::SETOLE: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a min would handle comparisons between positive + // and negative zero incorrectly. + if (!UnsafeFPMath && + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) + break; Opcode = X86ISD::FMIN; break; case ISD::SETULE: - // This can be a min, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a min would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOLT: case ISD::SETLT: @@ -8855,32 +8888,29 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; case ISD::SETOGE: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a max would handle comparisons between positive + // and negative zero incorrectly. + if (!UnsafeFPMath && + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS)) + break; Opcode = X86ISD::FMAX; break; case ISD::SETUGT: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a max would handle NaNs incorrectly, and swapping + // the operands would cause it to handle comparisons between positive + // and negative zero incorrectly. + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { + if (!UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMAX; break; case ISD::SETUGE: - // This can be a max, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a max would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOGT: case ISD::SETGT: @@ -8889,36 +8919,33 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; } // Check for x CC y ? y : x -- a min/max with reversed arms. - } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { + } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && + DAG.isEqualTo(RHS, Cond.getOperand(0))) { switch (CC) { default: break; case ISD::SETOGE: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a min would handle comparisons between positive + // and negative zero incorrectly, and swapping the operands would + // cause it to handle NaNs incorrectly. + if (!UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) { + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMIN; break; case ISD::SETUGT: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a min would handle NaNs incorrectly. + if (!UnsafeFPMath && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + break; Opcode = X86ISD::FMIN; break; case ISD::SETUGE: - // This can be a min, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a min would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOGT: case ISD::SETGT: @@ -8927,32 +8954,28 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; case ISD::SETULT: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a max would handle NaNs incorrectly. + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + break; Opcode = X86ISD::FMAX; break; case ISD::SETOLE: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a max would handle comparisons between positive + // and negative zero incorrectly, and swapping the operands would + // cause it to handle NaNs incorrectly. + if (!UnsafeFPMath && + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) { + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMAX; break; case ISD::SETULE: - // This can be a max, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a max would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOLT: case ISD::SETLT: @@ -9177,10 +9200,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, /// LEA + SHL, LEA + LEA. static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { - if (DAG.getMachineFunction(). - getFunction()->hasFnAttr(Attribute::OptimizeForSize)) - return SDValue(); - if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); @@ -9319,7 +9338,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { - unsigned SplatIdx = cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); + unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); if (C->getZExtValue() == SplatIdx) BaseShAmt = InVec.getOperand(1); } @@ -9505,7 +9524,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getSrcValue(), Ld->getSrcValueOffset(), Ld->isVolatile(), - Ld->getAlignment()); + Ld->isNonTemporal(), Ld->getAlignment()); SDValue NewChain = NewLd.getValue(1); if (TokenFactorIndex != -1) { Ops.push_back(NewChain); @@ -9514,7 +9533,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, } return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(), St->getSrcValue(), St->getSrcValueOffset(), - St->isVolatile(), St->getAlignment()); + St->isVolatile(), St->isNonTemporal(), + St->getAlignment()); } // Otherwise, lower to two pairs of 32-bit loads / stores. @@ -9524,10 +9544,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr, Ld->getSrcValue(), Ld->getSrcValueOffset(), - Ld->isVolatile(), Ld->getAlignment()); + Ld->isVolatile(), Ld->isNonTemporal(), + Ld->getAlignment()); SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr, Ld->getSrcValue(), Ld->getSrcValueOffset()+4, - Ld->isVolatile(), + Ld->isVolatile(), Ld->isNonTemporal(), MinAlign(Ld->getAlignment(), 4)); SDValue NewChain = LoLd.getValue(1); @@ -9544,11 +9565,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr, St->getSrcValue(), St->getSrcValueOffset(), - St->isVolatile(), St->getAlignment()); + St->isVolatile(), St->isNonTemporal(), + St->getAlignment()); SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr, St->getSrcValue(), St->getSrcValueOffset() + 4, St->isVolatile(), + St->isNonTemporal(), MinAlign(St->getAlignment(), 4)); return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt); } @@ -9731,7 +9754,7 @@ static bool LowerToBSwap(CallInst *CI) { // Verify this is a simple bswap. if (CI->getNumOperands() != 2 || CI->getType() != CI->getOperand(1)->getType() || - !CI->getType()->isInteger()) + !CI->getType()->isIntegerTy()) return false; const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); @@ -9780,17 +9803,26 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { return LowerToBSwap(CI); } // rorw $$8, ${0:w} --> llvm.bswap.i16 - if (CI->getType()->isInteger(16) && + if (CI->getType()->isIntegerTy(16) && AsmPieces.size() == 3 && - AsmPieces[0] == "rorw" && + (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") && AsmPieces[1] == "$$8," && AsmPieces[2] == "${0:w}" && - IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") { - return LowerToBSwap(CI); + IA->getConstraintString().compare(0, 5, "=r,0,") == 0) { + AsmPieces.clear(); + SplitString(IA->getConstraintString().substr(5), AsmPieces, ","); + std::sort(AsmPieces.begin(), AsmPieces.end()); + if (AsmPieces.size() == 4 && + AsmPieces[0] == "~{cc}" && + AsmPieces[1] == "~{dirflag}" && + AsmPieces[2] == "~{flags}" && + AsmPieces[3] == "~{fpsr}") { + return LowerToBSwap(CI); + } } break; case 3: - if (CI->getType()->isInteger(64) && + if (CI->getType()->isIntegerTy(64) && Constraints.size() >= 2 && Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 193ef05..4c12fcc 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -180,7 +180,7 @@ namespace llvm { /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector, /// corresponds to X86::PINSRW. - PINSRW, + PINSRW, MMX_PINSRW, /// PSHUFB - Shuffle 16 8-bit values within a vector. PSHUFB, @@ -249,6 +249,9 @@ namespace llvm { // with control flow. VASTART_SAVE_XMM_REGS, + // MINGW_ALLOCA - MingW's __alloca call to do stack probing. + MINGW_ALLOCA, + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - // Atomic 64-bit binary operations. @@ -259,6 +262,10 @@ namespace llvm { ATOMAND64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG + + // WARNING: Do not add anything in the end unless you want the node to + // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be + // thought as target memory ops! }; } @@ -639,7 +646,6 @@ namespace llvm { int FPDiff, DebugLoc dl); CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const; - NameDecorationStyle NameDecorationForCallConv(CallingConv::ID CallConv); unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG); std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, @@ -790,7 +796,11 @@ namespace llvm { MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, MachineBasicBlock *BB, DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; - + + MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 468dd67..8462255 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -59,10 +59,11 @@ def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr", // Pattern fragments. // -def i64immSExt8 : PatLeaf<(i64 imm), [{ - // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit - // sign extended field. - return (int64_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +def i64immSExt8 : PatLeaf<(i64 immSext8)>; + +def GetLo32XForm : SDNodeXForm<imm, [{ + // Transformation function: get the low 32 bits. + return getI32Imm((unsigned)N->getZExtValue()); }]>; def i64immSExt32 : PatLeaf<(i64 imm), [{ @@ -71,6 +72,7 @@ def i64immSExt32 : PatLeaf<(i64 imm), [{ return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue(); }]>; + def i64immZExt32 : PatLeaf<(i64 imm), [{ // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit // unsignedsign extended field. @@ -325,7 +327,7 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (load addr:$src))]>; @@ -556,7 +558,7 @@ def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2), addr:$dst)]>; def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), i64immSExt8:$src2), + [(store (adde (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>; } // Uses = [EFLAGS] @@ -893,35 +895,38 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), let isTwoAddress = 1 in { def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src), "rcl{q}\t{1, $dst|$dst, 1}", []>; -def RCL64m1 : RI<0xD1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), - "rcl{q}\t{1, $dst|$dst, 1}", []>; -let Uses = [CL] in { -def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; -def RCL64mCL : RI<0xD3, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; -} def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst), - (ins i64mem:$src, i8imm:$cnt), - "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src), "rcr{q}\t{1, $dst|$dst, 1}", []>; -def RCR64m1 : RI<0xD1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), - "rcr{q}\t{1, $dst|$dst, 1}", []>; +def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + let Uses = [CL] in { +def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src), "rcr{q}\t{%cl, $dst|$dst, CL}", []>; -def RCR64mCL : RI<0xD3, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), - "rcr{q}\t{%cl, $dst|$dst, CL}", []>; } -def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), - "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst), - (ins i64mem:$src, i8imm:$cnt), +} + +let isTwoAddress = 0 in { +def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), + "rcl{q}\t{1, $dst|$dst, 1}", []>; +def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), + "rcr{q}\t{1, $dst|$dst, 1}", []>; +def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt), "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + +let Uses = [CL] in { +def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; +def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; +} } let isTwoAddress = 1 in { @@ -1771,7 +1776,7 @@ def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; -def SWPGS : I<0x01, RawFrm, (outs), (ins), "swpgs", []>, TB; +def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB; def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), "push{q}\t%fs", []>, TB; @@ -1978,7 +1983,7 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm), (i64 0), (AND32ri (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit), - imm:$imm), + (i32 (GetLo32XForm imm:$imm))), x86_subreg_32bit)>; // r & (2^32-1) ==> movz @@ -2102,34 +2107,34 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; // (shl x (and y, 63)) ==> (shl x, y) -def : Pat<(shl GR64:$src1, (and CL:$amt, 63)), +def : Pat<(shl GR64:$src1, (and CL, 63)), (SHL64rCL GR64:$src1)>; -def : Pat<(store (shl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), +def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SHL64mCL addr:$dst)>; -def : Pat<(srl GR64:$src1, (and CL:$amt, 63)), +def : Pat<(srl GR64:$src1, (and CL, 63)), (SHR64rCL GR64:$src1)>; -def : Pat<(store (srl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), +def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SHR64mCL addr:$dst)>; -def : Pat<(sra GR64:$src1, (and CL:$amt, 63)), +def : Pat<(sra GR64:$src1, (and CL, 63)), (SAR64rCL GR64:$src1)>; -def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), +def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SAR64mCL addr:$dst)>; // Double shift patterns -def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), +def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm:$amt2)), addr:$dst), + GR64:$src2, (i8 imm)), addr:$dst), (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; -def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), +def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm:$amt2)), addr:$dst), + GR64:$src2, (i8 imm)), addr:$dst), (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index e22a903..ae24bfb 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -397,7 +397,7 @@ def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins), let canFoldAsLoad = 1 in { def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP, [(set RFP32:$dst, (loadf32 addr:$src))]>; -let isReMaterializable = 1, mayHaveSideEffects = 1 in +let isReMaterializable = 1 in def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP, [(set RFP64:$dst, (loadf64 addr:$src))]>; def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP, diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index a799f16..bb81cbf 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -29,7 +29,16 @@ def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>; def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>; def MRM6m : Format<30>; def MRM7m : Format<31>; def MRMInitReg : Format<32>; - +def MRM_C1 : Format<33>; +def MRM_C2 : Format<34>; +def MRM_C3 : Format<35>; +def MRM_C4 : Format<36>; +def MRM_C8 : Format<37>; +def MRM_C9 : Format<38>; +def MRM_E8 : Format<39>; +def MRM_F0 : Format<40>; +def MRM_F8 : Format<41>; +def MRM_F9 : Format<42>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our @@ -37,11 +46,13 @@ def MRMInitReg : Format<32>; class ImmType<bits<3> val> { bits<3> Value = val; } -def NoImm : ImmType<0>; -def Imm8 : ImmType<1>; -def Imm16 : ImmType<2>; -def Imm32 : ImmType<3>; -def Imm64 : ImmType<4>; +def NoImm : ImmType<0>; +def Imm8 : ImmType<1>; +def Imm8PCRel : ImmType<2>; +def Imm16 : ImmType<3>; +def Imm32 : ImmType<4>; +def Imm32PCRel : ImmType<5>; +def Imm64 : ImmType<6>; // FPFormat - This specifies what form this FP instruction has. This is used by // the Floating-Point stackifier pass. @@ -121,6 +132,12 @@ class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, let Pattern = pattern; let CodeSize = 3; } +class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern> + : X86Inst<o, f, Imm8PCRel, outs, ins, asm> { + let Pattern = pattern; + let CodeSize = 3; +} class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> { @@ -134,6 +151,13 @@ class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern> + : X86Inst<o, f, Imm32PCRel, outs, ins, asm> { + let Pattern = pattern; + let CodeSize = 3; +} + // FPStack Instruction Templates: // FPI - Floating Point Instruction template. class FPI<bits<8> o, Format F, dag outs, dag ins, string asm> diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 8d13c0f..39bda04 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -276,11 +276,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, - { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 }, - { X86::MOVSDrr, X86::MOVSDmr, 0, 0 }, { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, - { X86::MOVSSrr, X86::MOVSSmr, 0, 0 }, { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, { X86::MUL16r, X86::MUL16m, 1, 0 }, @@ -389,12 +386,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, { X86::MOVDQArr, X86::MOVDQArm, 16 }, - { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 }, - { X86::MOVSDrr, X86::MOVSDrm, 0 }, { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, - { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 }, - { X86::MOVSSrr, X86::MOVSSrm, 0 }, { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, @@ -682,23 +675,20 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, case X86::MOV16rr: case X86::MOV32rr: case X86::MOV64rr: - case X86::MOVSSrr: - case X86::MOVSDrr: // FP Stack register class copies case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: case X86::MOV_Fp3264: case X86::MOV_Fp3280: case X86::MOV_Fp6432: case X86::MOV_Fp8032: - + + // Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64 + // copies are done with FsMOVAPSrr and FsMOVAPDrr. + case X86::FsMOVAPSrr: case X86::FsMOVAPDrr: case X86::MOVAPSrr: case X86::MOVAPDrr: case X86::MOVDQArr: - case X86::MOVSS2PSrr: - case X86::MOVSD2PDrr: - case X86::MOVPS2SSrr: - case X86::MOVPD2SDrr: case X86::MMX_MOVQ64rr: assert(MI.getNumOperands() >= 2 && MI.getOperand(0).isReg() && @@ -1083,7 +1073,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, case X86::MOV8r0: Opc = X86::MOV8ri; break; case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; - case X86::MOV64r0: Opc = X86::MOV64ri; break; + case X86::MOV64r0: Opc = X86::MOV64ri64i32; break; } Clone = false; } @@ -1587,44 +1577,44 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { switch (BrOpc) { default: return X86::COND_INVALID; - case X86::JE: return X86::COND_E; - case X86::JNE: return X86::COND_NE; - case X86::JL: return X86::COND_L; - case X86::JLE: return X86::COND_LE; - case X86::JG: return X86::COND_G; - case X86::JGE: return X86::COND_GE; - case X86::JB: return X86::COND_B; - case X86::JBE: return X86::COND_BE; - case X86::JA: return X86::COND_A; - case X86::JAE: return X86::COND_AE; - case X86::JS: return X86::COND_S; - case X86::JNS: return X86::COND_NS; - case X86::JP: return X86::COND_P; - case X86::JNP: return X86::COND_NP; - case X86::JO: return X86::COND_O; - case X86::JNO: return X86::COND_NO; + case X86::JE_4: return X86::COND_E; + case X86::JNE_4: return X86::COND_NE; + case X86::JL_4: return X86::COND_L; + case X86::JLE_4: return X86::COND_LE; + case X86::JG_4: return X86::COND_G; + case X86::JGE_4: return X86::COND_GE; + case X86::JB_4: return X86::COND_B; + case X86::JBE_4: return X86::COND_BE; + case X86::JA_4: return X86::COND_A; + case X86::JAE_4: return X86::COND_AE; + case X86::JS_4: return X86::COND_S; + case X86::JNS_4: return X86::COND_NS; + case X86::JP_4: return X86::COND_P; + case X86::JNP_4: return X86::COND_NP; + case X86::JO_4: return X86::COND_O; + case X86::JNO_4: return X86::COND_NO; } } unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { switch (CC) { default: llvm_unreachable("Illegal condition code!"); - case X86::COND_E: return X86::JE; - case X86::COND_NE: return X86::JNE; - case X86::COND_L: return X86::JL; - case X86::COND_LE: return X86::JLE; - case X86::COND_G: return X86::JG; - case X86::COND_GE: return X86::JGE; - case X86::COND_B: return X86::JB; - case X86::COND_BE: return X86::JBE; - case X86::COND_A: return X86::JA; - case X86::COND_AE: return X86::JAE; - case X86::COND_S: return X86::JS; - case X86::COND_NS: return X86::JNS; - case X86::COND_P: return X86::JP; - case X86::COND_NP: return X86::JNP; - case X86::COND_O: return X86::JO; - case X86::COND_NO: return X86::JNO; + case X86::COND_E: return X86::JE_4; + case X86::COND_NE: return X86::JNE_4; + case X86::COND_L: return X86::JL_4; + case X86::COND_LE: return X86::JLE_4; + case X86::COND_G: return X86::JG_4; + case X86::COND_GE: return X86::JGE_4; + case X86::COND_B: return X86::JB_4; + case X86::COND_BE: return X86::JBE_4; + case X86::COND_A: return X86::JA_4; + case X86::COND_AE: return X86::JAE_4; + case X86::COND_S: return X86::JS_4; + case X86::COND_NS: return X86::JNS_4; + case X86::COND_P: return X86::JP_4; + case X86::COND_NP: return X86::JNP_4; + case X86::COND_O: return X86::JO_4; + case X86::COND_NO: return X86::JNO_4; } } @@ -1694,7 +1684,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; // Handle unconditional branches. - if (I->getOpcode() == X86::JMP) { + if (I->getOpcode() == X86::JMP_4) { if (!AllowModify) { TBB = I->getOperand(0).getMBB(); continue; @@ -1778,7 +1768,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { while (I != MBB.begin()) { --I; - if (I->getOpcode() != X86::JMP && + if (I->getOpcode() != X86::JMP_4 && GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) break; // Remove the branch. @@ -1804,7 +1794,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (Cond.empty()) { // Unconditional branch? assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB); return 1; } @@ -1814,16 +1804,16 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, switch (CC) { case X86::COND_NP_OR_E: // Synthesize NP_OR_E with two branches. - BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB); ++Count; break; case X86::COND_NE_OR_P: // Synthesize NE_OR_P with two branches. - BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); ++Count; break; default: { @@ -1834,7 +1824,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, } if (FBB) { // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB); + BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB); ++Count; } return Count; @@ -1860,7 +1850,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, CommonRC = SrcRC; else if (!DestRC->hasSubClass(SrcRC)) { // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, - // but we want to copy then as GR64. Similarly, for GR32_NOREX and + // but we want to copy them as GR64. Similarly, for GR32_NOREX and // GR32_NOSP, copy as GR32. if (SrcRC->hasSuperClass(&X86::GR64RegClass) && DestRC->hasSuperClass(&X86::GR64RegClass)) @@ -3556,6 +3546,14 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } } break; + + case X86II::MRM_C1: + case X86II::MRM_C8: + case X86II::MRM_C9: + case X86II::MRM_E8: + case X86II::MRM_F0: + FinalSize += 2; + break; } case X86II::MRMInitReg: diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index a6b3863..5111719 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -268,6 +268,18 @@ namespace X86II { // MRMInitReg - This form is used for instructions whose source and // destinations are the same register. MRMInitReg = 32, + + //// MRM_C1 - A mod/rm byte of exactly 0xC1. + MRM_C1 = 33, + MRM_C2 = 34, + MRM_C3 = 35, + MRM_C4 = 36, + MRM_C8 = 37, + MRM_C9 = 38, + MRM_E8 = 39, + MRM_F0 = 40, + MRM_F8 = 41, + MRM_F9 = 42, FormMask = 63, @@ -331,11 +343,13 @@ namespace X86II { // This three-bit field describes the size of an immediate operand. Zero is // unused so that we can tell if we forgot to set a value. ImmShift = 13, - ImmMask = 7 << ImmShift, - Imm8 = 1 << ImmShift, - Imm16 = 2 << ImmShift, - Imm32 = 3 << ImmShift, - Imm64 = 4 << ImmShift, + ImmMask = 7 << ImmShift, + Imm8 = 1 << ImmShift, + Imm8PCRel = 2 << ImmShift, + Imm16 = 3 << ImmShift, + Imm32 = 4 << ImmShift, + Imm32PCRel = 5 << ImmShift, + Imm64 = 6 << ImmShift, //===------------------------------------------------------------------===// // FP Instruction Classification... Zero is non-fp instruction. @@ -396,15 +410,37 @@ namespace X86II { return TSFlags >> X86II::OpcodeShift; } + static inline bool hasImm(unsigned TSFlags) { + return (TSFlags & X86II::ImmMask) != 0; + } + /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field /// of the specified instruction. static inline unsigned getSizeOfImm(unsigned TSFlags) { switch (TSFlags & X86II::ImmMask) { default: assert(0 && "Unknown immediate size"); - case X86II::Imm8: return 1; - case X86II::Imm16: return 2; - case X86II::Imm32: return 4; - case X86II::Imm64: return 8; + case X86II::Imm8: + case X86II::Imm8PCRel: return 1; + case X86II::Imm16: return 2; + case X86II::Imm32: + case X86II::Imm32PCRel: return 4; + case X86II::Imm64: return 8; + } + } + + /// isImmPCRel - Return true if the immediate of the specified instruction's + /// TSFlags indicates that it is pc relative. + static inline unsigned isImmPCRel(unsigned TSFlags) { + switch (TSFlags & X86II::ImmMask) { + default: assert(0 && "Unknown immediate size"); + case X86II::Imm8PCRel: + case X86II::Imm32PCRel: + return true; + case X86II::Imm8: + case X86II::Imm16: + case X86II::Imm32: + case X86II::Imm64: + return false; } } } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index f0b4239..8a6ff54 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -65,7 +65,7 @@ def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>, def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; -def SDTX86RdTsc : SDTypeProfile<0, 0, []>; +def SDTX86Void : SDTypeProfile<0, 0, []>; def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; @@ -143,7 +143,7 @@ def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, SDNPMayLoad]>; -def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG",SDTX86RdTsc, +def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void, [SDNPHasChain, SDNPOutFlag, SDNPSideEffect]>; def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; @@ -178,6 +178,9 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; +def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void, + [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + //===----------------------------------------------------------------------===// // X86 Operand Definitions. // @@ -343,18 +346,37 @@ def X86_COND_O : PatLeaf<(i8 13)>; def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE def X86_COND_S : PatLeaf<(i8 15)>; -def i16immSExt8 : PatLeaf<(i16 imm), [{ - // i16immSExt8 predicate - True if the 16-bit immediate fits in a 8-bit - // sign extended field. - return (int16_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +def immSext8 : PatLeaf<(imm), [{ + return N->getSExtValue() == (int8_t)N->getSExtValue(); }]>; -def i32immSExt8 : PatLeaf<(i32 imm), [{ - // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit - // sign extended field. - return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +def i16immSExt8 : PatLeaf<(i16 immSext8)>; +def i32immSExt8 : PatLeaf<(i32 immSext8)>; + +/// Load patterns: these constraint the match to the right address space. +def dsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) + if (PT->getAddressSpace() > 255) + return false; + return true; }]>; +def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) + return PT->getAddressSpace() == 256; + return false; +}]>; + +def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) + return PT->getAddressSpace() == 257; + return false; +}]>; + + // Helper fragments for loads. // It's always safe to treat a anyext i16 load as a i32 load if the i16 is // known to be 32-bit aligned or better. Ditto for i8 to i16. @@ -372,8 +394,7 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{ return false; }]>; -def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), -[{ +def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{ LoadSDNode *LD = cast<LoadSDNode>(N); if (const Value *Src = LD->getSrcValue()) if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) @@ -399,72 +420,11 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ return false; }]>; -def nvloadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ - LoadSDNode *LD = cast<LoadSDNode>(N); - if (const Value *Src = LD->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - if (LD->isVolatile()) - return false; - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (ExtType == ISD::NON_EXTLOAD) - return true; - if (ExtType == ISD::EXTLOAD) - return LD->getAlignment() >= 4; - return false; -}]>; - -def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - return PT->getAddressSpace() == 256; - return false; -}]>; - -def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - return PT->getAddressSpace() == 257; - return false; -}]>; - -def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; -def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; - -def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; -def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; -def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; +def loadi8 : PatFrag<(ops node:$ptr), (i8 (dsload node:$ptr))>; +def loadi64 : PatFrag<(ops node:$ptr), (i64 (dsload node:$ptr))>; +def loadf32 : PatFrag<(ops node:$ptr), (f32 (dsload node:$ptr))>; +def loadf64 : PatFrag<(ops node:$ptr), (f64 (dsload node:$ptr))>; +def loadf80 : PatFrag<(ops node:$ptr), (f80 (dsload node:$ptr))>; def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>; def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>; @@ -562,7 +522,7 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), } // x86-64 va_start lowering magic. -let usesCustomInserter = 1 in +let usesCustomInserter = 1 in { def VASTART_SAVE_XMM_REGS : I<0, Pseudo, (outs), (ins GR8:$al, @@ -573,6 +533,19 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo, imm:$regsavefi, imm:$offset)]>; +// Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets. Calls +// to _alloca is needed to probe the stack when allocating more than 4k bytes in +// one go. Touching the stack at 4K increments is necessary to ensure that the +// guard pages used by the OS virtual memory manager are allocated in correct +// sequence. +// The main point of having separate instruction are extra unmodelled effects +// (compared to ordinary calls) like stack pointer change. + +def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins), + "# dynamic stack allocation", + [(X86MingwAlloca)]>; +} + // Nop let neverHasSideEffects = 1 in { def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>; @@ -596,7 +569,7 @@ let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in "", []>; //===----------------------------------------------------------------------===// -// Control Flow Instructions... +// Control Flow Instructions. // // Return instructions. @@ -614,16 +587,46 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, "lret\t$amt", []>; } -// All branches are RawFrm, Void, Branch, and Terminators -let isBranch = 1, isTerminator = 1 in - class IBr<bits<8> opcode, dag ins, string asm, list<dag> pattern> : - I<opcode, RawFrm, (outs), ins, asm, pattern>; +// Unconditional branches. +let isBarrier = 1, isBranch = 1, isTerminator = 1 in { + def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst), + "jmp\t$dst", [(br bb:$dst)]>; + def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), + "jmp\t$dst", []>; +} -let isBranch = 1, isBarrier = 1 in { - def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>; - def JMP8 : IBr<0xEB, (ins brtarget8:$dst), "jmp\t$dst", []>; +// Conditional Branches. +let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in { + multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> { + def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>; + def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm, + [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB; + } } +defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>; +defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>; +defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>; +defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>; +defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>; +defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>; +defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>; +defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>; +defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>; +defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>; +defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>; +defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>; +defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>; +defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>; +defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>; +defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>; + +// FIXME: What about the CX/RCX versions of this instruction? +let Uses = [ECX], isBranch = 1, isTerminator = 1 in + def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), + "jcxz\t$dst", []>; + + // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", @@ -644,63 +647,6 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { "ljmp{l}\t{*}$dst", []>; } -// Conditional branches -let Uses = [EFLAGS] in { -// Short conditional jumps -def JO8 : IBr<0x70, (ins brtarget8:$dst), "jo\t$dst", []>; -def JNO8 : IBr<0x71, (ins brtarget8:$dst), "jno\t$dst", []>; -def JB8 : IBr<0x72, (ins brtarget8:$dst), "jb\t$dst", []>; -def JAE8 : IBr<0x73, (ins brtarget8:$dst), "jae\t$dst", []>; -def JE8 : IBr<0x74, (ins brtarget8:$dst), "je\t$dst", []>; -def JNE8 : IBr<0x75, (ins brtarget8:$dst), "jne\t$dst", []>; -def JBE8 : IBr<0x76, (ins brtarget8:$dst), "jbe\t$dst", []>; -def JA8 : IBr<0x77, (ins brtarget8:$dst), "ja\t$dst", []>; -def JS8 : IBr<0x78, (ins brtarget8:$dst), "js\t$dst", []>; -def JNS8 : IBr<0x79, (ins brtarget8:$dst), "jns\t$dst", []>; -def JP8 : IBr<0x7A, (ins brtarget8:$dst), "jp\t$dst", []>; -def JNP8 : IBr<0x7B, (ins brtarget8:$dst), "jnp\t$dst", []>; -def JL8 : IBr<0x7C, (ins brtarget8:$dst), "jl\t$dst", []>; -def JGE8 : IBr<0x7D, (ins brtarget8:$dst), "jge\t$dst", []>; -def JLE8 : IBr<0x7E, (ins brtarget8:$dst), "jle\t$dst", []>; -def JG8 : IBr<0x7F, (ins brtarget8:$dst), "jg\t$dst", []>; - -def JCXZ8 : IBr<0xE3, (ins brtarget8:$dst), "jcxz\t$dst", []>; - -def JE : IBr<0x84, (ins brtarget:$dst), "je\t$dst", - [(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB; -def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst", - [(X86brcond bb:$dst, X86_COND_NE, EFLAGS)]>, TB; -def JL : IBr<0x8C, (ins brtarget:$dst), "jl\t$dst", - [(X86brcond bb:$dst, X86_COND_L, EFLAGS)]>, TB; -def JLE : IBr<0x8E, (ins brtarget:$dst), "jle\t$dst", - [(X86brcond bb:$dst, X86_COND_LE, EFLAGS)]>, TB; -def JG : IBr<0x8F, (ins brtarget:$dst), "jg\t$dst", - [(X86brcond bb:$dst, X86_COND_G, EFLAGS)]>, TB; -def JGE : IBr<0x8D, (ins brtarget:$dst), "jge\t$dst", - [(X86brcond bb:$dst, X86_COND_GE, EFLAGS)]>, TB; - -def JB : IBr<0x82, (ins brtarget:$dst), "jb\t$dst", - [(X86brcond bb:$dst, X86_COND_B, EFLAGS)]>, TB; -def JBE : IBr<0x86, (ins brtarget:$dst), "jbe\t$dst", - [(X86brcond bb:$dst, X86_COND_BE, EFLAGS)]>, TB; -def JA : IBr<0x87, (ins brtarget:$dst), "ja\t$dst", - [(X86brcond bb:$dst, X86_COND_A, EFLAGS)]>, TB; -def JAE : IBr<0x83, (ins brtarget:$dst), "jae\t$dst", - [(X86brcond bb:$dst, X86_COND_AE, EFLAGS)]>, TB; - -def JS : IBr<0x88, (ins brtarget:$dst), "js\t$dst", - [(X86brcond bb:$dst, X86_COND_S, EFLAGS)]>, TB; -def JNS : IBr<0x89, (ins brtarget:$dst), "jns\t$dst", - [(X86brcond bb:$dst, X86_COND_NS, EFLAGS)]>, TB; -def JP : IBr<0x8A, (ins brtarget:$dst), "jp\t$dst", - [(X86brcond bb:$dst, X86_COND_P, EFLAGS)]>, TB; -def JNP : IBr<0x8B, (ins brtarget:$dst), "jnp\t$dst", - [(X86brcond bb:$dst, X86_COND_NP, EFLAGS)]>, TB; -def JO : IBr<0x80, (ins brtarget:$dst), "jo\t$dst", - [(X86brcond bb:$dst, X86_COND_O, EFLAGS)]>, TB; -def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst", - [(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB; -} // Uses = [EFLAGS] // Loop instructions @@ -721,7 +667,7 @@ let isCall = 1 in XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [ESP] in { - def CALLpcrel32 : Ii32<0xE8, RawFrm, + def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i32imm_pcrel:$dst,variable_ops), "call\t$dst", []>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), @@ -761,8 +707,10 @@ def TCRETURNri : I<0, Pseudo, (outs), "#TC_RETURN $dst $offset", []>; -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst, variable_ops), +// FIXME: The should be pseudo instructions that are lowered when going to +// mcinst. +let isCall = 1, isBranch = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in + def TAILJMPd : Ii32<0xE9, RawFrm, (outs),(ins i32imm_pcrel:$dst,variable_ops), "jmp\t$dst # TAILCALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in @@ -929,6 +877,9 @@ let Defs = [RAX, RDX] in def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB; +let Defs = [RAX, RCX, RDX] in +def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; + let isBarrier = 1, hasCtrlDep = 1 in { def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; } @@ -1059,7 +1010,7 @@ def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(set GR8:$dst, (loadi8 addr:$src))]>; @@ -1093,7 +1044,7 @@ def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; let mayLoad = 1, - canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in + canFoldAsLoad = 1, isReMaterializable = 1 in def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; @@ -1115,7 +1066,10 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src), // // Extra precision multiplication -let Defs = [AL,AH,EFLAGS], Uses = [AL] in + +// AL is really implied by AX, by the registers in Defs must match the +// SDNode results (i8, i32). +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the @@ -1133,7 +1087,7 @@ def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), "mul{l}\t$src", []>; // EAX,EDX = EAX*GR32 -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. @@ -1155,7 +1109,7 @@ def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), } let neverHasSideEffects = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>; // AL,AH = AL*GR8 let Defs = [AX,DX,EFLAGS], Uses = [AX] in @@ -1165,7 +1119,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; // EAX,EDX = EAX*GR32 let mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), "imul{b}\t$src", []>; // AL,AH = AL*[mem8] let Defs = [AX,DX,EFLAGS], Uses = [AX] in @@ -1178,7 +1132,7 @@ def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), } // neverHasSideEffects // unsigned division/remainder -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1188,7 +1142,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX "div{l}\t$src", []>; let mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH "div{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1201,7 +1155,7 @@ def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), } // Signed division/remainder. -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "idiv{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1211,7 +1165,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX "idiv{l}\t$src", []>; let mayLoad = 1, mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH "idiv{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -2328,98 +2282,100 @@ let isTwoAddress = 0 in { def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src), "rcl{b}\t{1, $dst|$dst, 1}", []>; -def RCL8m1 : I<0xD0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src), - "rcl{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src), "rcl{b}\t{%cl, $dst|$dst, CL}", []>; -def RCL8mCL : I<0xD2, MRM2m, (outs i8mem:$dst), (ins i8mem:$src), - "rcl{b}\t{%cl, $dst|$dst, CL}", []>; } def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL8mi : Ii8<0xC0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt), - "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src), "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; -def RCL16m1 : I<0xD1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src), - "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src), "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -def RCL16mCL : I<0xD3, MRM2m, (outs i16mem:$dst), (ins i16mem:$src), - "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCL16mi : Ii8<0xC1, MRM2m, (outs i16mem:$dst), - (ins i16mem:$src, i8imm:$cnt), - "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src), "rcl{l}\t{1, $dst|$dst, 1}", []>; -def RCL32m1 : I<0xD1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src), - "rcl{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src), "rcl{l}\t{%cl, $dst|$dst, CL}", []>; -def RCL32mCL : I<0xD3, MRM2m, (outs i32mem:$dst), (ins i32mem:$src), - "rcl{l}\t{%cl, $dst|$dst, CL}", []>; } def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL32mi : Ii8<0xC1, MRM2m, (outs i32mem:$dst), - (ins i32mem:$src, i8imm:$cnt), - "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src), "rcr{b}\t{1, $dst|$dst, 1}", []>; -def RCR8m1 : I<0xD0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src), - "rcr{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src), "rcr{b}\t{%cl, $dst|$dst, CL}", []>; -def RCR8mCL : I<0xD2, MRM3m, (outs i8mem:$dst), (ins i8mem:$src), - "rcr{b}\t{%cl, $dst|$dst, CL}", []>; } def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR8mi : Ii8<0xC0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt), - "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src), "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; -def RCR16m1 : I<0xD1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src), - "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src), "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -def RCR16mCL : I<0xD3, MRM3m, (outs i16mem:$dst), (ins i16mem:$src), - "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCR16mi : Ii8<0xC1, MRM3m, (outs i16mem:$dst), - (ins i16mem:$src, i8imm:$cnt), - "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src), "rcr{l}\t{1, $dst|$dst, 1}", []>; -def RCR32m1 : I<0xD1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src), - "rcr{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src), "rcr{l}\t{%cl, $dst|$dst, CL}", []>; -def RCR32mCL : I<0xD3, MRM3m, (outs i32mem:$dst), (ins i32mem:$src), - "rcr{l}\t{%cl, $dst|$dst, CL}", []>; } def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR32mi : Ii8<0xC1, MRM3m, (outs i32mem:$dst), - (ins i32mem:$src, i8imm:$cnt), + +let isTwoAddress = 0 in { +def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), + "rcl{b}\t{1, $dst|$dst, 1}", []>; +def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst), + "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; +def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt), + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst), + "rcl{l}\t{1, $dst|$dst, 1}", []>; +def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt), + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst), + "rcr{b}\t{1, $dst|$dst, 1}", []>; +def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt), + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst), + "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; +def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt), + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst), + "rcr{l}\t{1, $dst|$dst, 1}", []>; +def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in { +def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst), + "rcl{b}\t{%cl, $dst|$dst, CL}", []>; +def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst), + "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst), + "rcl{l}\t{%cl, $dst|$dst, CL}", []>; +def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst), + "rcr{b}\t{%cl, $dst|$dst, CL}", []>; +def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), + "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), + "rcr{l}\t{%cl, $dst|$dst, CL}", []>; +} +} + // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL] in { def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), @@ -4100,7 +4056,7 @@ def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; -def INVLPG : I<0x01, RawFrm, (outs), (ins), "invlpg", []>, TB; +def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins), "str{w}\t{$dst}", []>, TB; @@ -4262,17 +4218,17 @@ def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; // VMX instructions // 66 0F 38 80 -def INVEPT : I<0x38, RawFrm, (outs), (ins), "invept", []>, OpSize, TB; +def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8; // 66 0F 38 81 -def INVVPID : I<0x38, RawFrm, (outs), (ins), "invvpid", []>, OpSize, TB; +def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8; // 0F 01 C1 -def VMCALL : I<0x01, RawFrm, (outs), (ins), "vmcall", []>, TB; +def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), "vmclear\t$vmcs", []>, OpSize, TB; // 0F 01 C2 -def VMLAUNCH : I<0x01, RawFrm, (outs), (ins), "vmlaunch", []>, TB; +def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB; // 0F 01 C3 -def VMRESUME : I<0x01, RawFrm, (outs), (ins), "vmresume", []>, TB; +def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB; def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), "vmptrld\t$vmcs", []>, TB; def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins), @@ -4294,7 +4250,7 @@ def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; // 0F 01 C4 -def VMXOFF : I<0x01, RawFrm, (outs), (ins), "vmxoff", []>, OpSize; +def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), "vmxon\t{$vmxon}", []>, XD; @@ -4462,12 +4418,6 @@ def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>; -// (and (i32 load), 255) -> (zextload i8) -def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))), - (MOVZX32rm8 addr:$src)>; -def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 65535))), - (MOVZX32rm16 addr:$src)>; - //===----------------------------------------------------------------------===// // Some peepholes //===----------------------------------------------------------------------===// @@ -4563,43 +4513,43 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; // (shl x (and y, 31)) ==> (shl x, y) -def : Pat<(shl GR8:$src1, (and CL:$amt, 31)), +def : Pat<(shl GR8:$src1, (and CL, 31)), (SHL8rCL GR8:$src1)>; -def : Pat<(shl GR16:$src1, (and CL:$amt, 31)), +def : Pat<(shl GR16:$src1, (and CL, 31)), (SHL16rCL GR16:$src1)>; -def : Pat<(shl GR32:$src1, (and CL:$amt, 31)), +def : Pat<(shl GR32:$src1, (and CL, 31)), (SHL32rCL GR32:$src1)>; -def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), (SHL8mCL addr:$dst)>; -def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), (SHL16mCL addr:$dst)>; -def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SHL32mCL addr:$dst)>; -def : Pat<(srl GR8:$src1, (and CL:$amt, 31)), +def : Pat<(srl GR8:$src1, (and CL, 31)), (SHR8rCL GR8:$src1)>; -def : Pat<(srl GR16:$src1, (and CL:$amt, 31)), +def : Pat<(srl GR16:$src1, (and CL, 31)), (SHR16rCL GR16:$src1)>; -def : Pat<(srl GR32:$src1, (and CL:$amt, 31)), +def : Pat<(srl GR32:$src1, (and CL, 31)), (SHR32rCL GR32:$src1)>; -def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), (SHR8mCL addr:$dst)>; -def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), (SHR16mCL addr:$dst)>; -def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SHR32mCL addr:$dst)>; -def : Pat<(sra GR8:$src1, (and CL:$amt, 31)), +def : Pat<(sra GR8:$src1, (and CL, 31)), (SAR8rCL GR8:$src1)>; -def : Pat<(sra GR16:$src1, (and CL:$amt, 31)), +def : Pat<(sra GR16:$src1, (and CL, 31)), (SAR16rCL GR16:$src1)>; -def : Pat<(sra GR32:$src1, (and CL:$amt, 31)), +def : Pat<(sra GR32:$src1, (and CL, 31)), (SAR32rCL GR32:$src1)>; -def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst), (SAR8mCL addr:$dst)>; -def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), (SAR16mCL addr:$dst)>; -def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SAR32mCL addr:$dst)>; // (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c) @@ -4620,11 +4570,11 @@ def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), addr:$dst), (SHRD32mrCL addr:$dst, GR32:$src2)>; -def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)), +def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm:$amt2)), addr:$dst), + GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; // (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c) @@ -4645,11 +4595,11 @@ def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), addr:$dst), (SHLD32mrCL addr:$dst, GR32:$src2)>; -def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)), +def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm:$amt2)), addr:$dst), + GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; // (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c) @@ -4670,11 +4620,11 @@ def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), addr:$dst), (SHRD16mrCL addr:$dst, GR16:$src2)>; -def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), +def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm:$amt2)), addr:$dst), + GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; // (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c) @@ -4695,11 +4645,11 @@ def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), addr:$dst), (SHLD16mrCL addr:$dst, GR16:$src2)>; -def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), +def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm:$amt2)), addr:$dst), + GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; // (anyext (setcc_carry)) -> (setcc_carry) diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 89f020c..c8e0723 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -141,7 +141,7 @@ def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (load_mmx addr:$src))]>; @@ -426,13 +426,15 @@ def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), // Extract / Insert -def MMX_X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>; -def MMX_X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>; +def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW", + SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>, + SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; + def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1), + [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1), (iPTR imm:$src2)))]>; let Constraints = "$src1 = $dst" in { def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg, @@ -597,13 +599,6 @@ let AddedComplexity = 10 in { (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; } -// Patterns to perform vector shuffling with a zeroed out vector. -let AddedComplexity = 20 in { - def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV, - (v2i32 (scalar_to_vector (load_mmx addr:$src))))), - (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>; -} - // Some special case PANDN patterns. // FIXME: Get rid of these. def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e26c979..2743dba 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -160,6 +160,32 @@ def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>; def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>; def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>; +// MOVNT Support +// Like 'store', but requires the non-temporal bit to be set +def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal(); + return false; +}]>; + +def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal() && !ST->isTruncatingStore() && + ST->getAddressingMode() == ISD::UNINDEXED && + ST->getAlignment() >= 16; + return false; +}]>; + +def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal() && + ST->getAlignment() < 16; + return false; +}]>; + def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; @@ -344,18 +370,56 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in { // SSE1 Instructions //===----------------------------------------------------------------------===// -// Move Instructions -let neverHasSideEffects = 1 in -def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - "movss\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +// Move Instructions. Register-to-register movss is not used for FR32 +// register copies because it's a partial register update; FsMOVAPSrr is +// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG +// because INSERT_SUBREG requires that the insert be implementable in terms of +// a copy, and just mentioned, we don't use movss for copies. +let Constraints = "$src1 = $dst" in +def MOVSSrr : SSI<0x10, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), + "movss\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>; + +// Extract the low 32-bit value from one vector and insert it into another. +let AddedComplexity = 15 in +def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), + (MOVSSrr VR128:$src1, + (EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>; + +// Implicitly promote a 32-bit scalar to a vector. +def : Pat<(v4f32 (scalar_to_vector FR32:$src)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>; + +// Loading from memory automatically zeroing upper bits. +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), "movss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (loadf32 addr:$src))]>; + +// MOVSSrm zeros the high parts of the register; represent this +// with SUBREG_TO_REG. +let AddedComplexity = 20 in { +def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; +def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; +def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; +} + +// Store scalar value to memory. def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", [(store FR32:$src, addr:$dst)]>; +// Extract and store. +def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSSmr addr:$dst, + (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; + // Conversion instructions def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), "cvttss2si\t{$src, $dst|$dst, $src}", @@ -518,7 +582,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), // Alias instruction to load FR32 from f128mem using movaps. Upper bits are // disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; @@ -715,7 +779,7 @@ defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin, let neverHasSideEffects = 1 in def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>; @@ -727,7 +791,7 @@ def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), let neverHasSideEffects = 1 in def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (loadv4f32 addr:$src))]>; @@ -736,7 +800,7 @@ def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(store (v4f32 VR128:$src), addr:$dst)]>; // Intrinsic forms of MOVUPS load and store -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>; @@ -762,6 +826,9 @@ let Constraints = "$src1 = $dst" in { } // Constraints = "$src1 = $dst" +def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (MOVHPSrm VR128:$src1, addr:$src2)>; + def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -793,9 +860,9 @@ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), let AddedComplexity = 20 in { def : Pat<(v4f32 (movddup VR128:$src, (undef))), - (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (MOVLHPSrr VR128:$src, VR128:$src)>; def : Pat<(v2i64 (movddup VR128:$src, (undef))), - (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (MOVLHPSrr VR128:$src, VR128:$src)>; } @@ -1010,10 +1077,33 @@ def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; // Non-temporal stores -def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), +def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>; +let AddedComplexity = 400 in { // Prefer non-temporal versions +def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + +def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>; + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (MOVNTDQ_64mr VR128:$src, addr:$dst)>; + +def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, + TB, Requires<[HasSSE2]>; + +def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, + TB, Requires<[HasSSE2]>; +} + // Load, store, and memory fence def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>; @@ -1032,84 +1122,73 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllZerosV))]>; -let Predicates = [HasSSE1] in { - def : Pat<(v2i64 immAllZerosV), (V_SET0)>; - def : Pat<(v8i16 immAllZerosV), (V_SET0)>; - def : Pat<(v16i8 immAllZerosV), (V_SET0)>; - def : Pat<(v2f64 immAllZerosV), (V_SET0)>; - def : Pat<(v4f32 immAllZerosV), (V_SET0)>; -} - -// FR32 to 128-bit vector conversion. -let isAsCheapAsAMove = 1 in -def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4f32 (scalar_to_vector FR32:$src)))]>; -def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>; - -// FIXME: may not be able to eliminate this movss with coalescing the src and -// dest register classes are different. We really want to write this pattern -// like this: -// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), -// (f32 FR32:$src)>; -let isAsCheapAsAMove = 1 in -def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (vector_extract (v4f32 VR128:$src), - (iPTR 0)))]>; -def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), - "movss\t{$src, $dst|$dst, $src}", - [(store (f32 (vector_extract (v4f32 VR128:$src), - (iPTR 0))), addr:$dst)]>; - - -// Move to lower bits of a VR128, leaving upper bits alone. -// Three operand (but two address) aliases. -let Constraints = "$src1 = $dst" in { -let neverHasSideEffects = 1 in - def MOVLSS2PSrr : SSI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $dst|$dst, $src2}", []>; - - let AddedComplexity = 15 in - def MOVLPSrr : SSI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "movss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (movl VR128:$src1, VR128:$src2)))]>; -} +def : Pat<(v2i64 immAllZerosV), (V_SET0)>; +def : Pat<(v8i16 immAllZerosV), (V_SET0)>; +def : Pat<(v16i8 immAllZerosV), (V_SET0)>; +def : Pat<(v2f64 immAllZerosV), (V_SET0)>; +def : Pat<(v4f32 immAllZerosV), (V_SET0)>; -// Move to lower bits of a VR128 and zeroing upper bits. -// Loading from memory automatically zeroing upper bits. -let AddedComplexity = 20 in -def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector - (loadf32 addr:$src))))))]>; - -def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (MOVZSS2PSrm addr:$src)>; +def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; //===---------------------------------------------------------------------===// // SSE2 Instructions //===---------------------------------------------------------------------===// -// Move Instructions -let neverHasSideEffects = 1 in -def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +// Move Instructions. Register-to-register movsd is not used for FR64 +// register copies because it's a partial register update; FsMOVAPDrr is +// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG +// because INSERT_SUBREG requires that the insert be implementable in terms of +// a copy, and just mentioned, we don't use movsd for copies. +let Constraints = "$src1 = $dst" in +def MOVSDrr : SDI<0x10, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), + "movsd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>; + +// Extract the low 64-bit value from one vector and insert it into another. +let AddedComplexity = 15 in +def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, + (EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>; + +// Implicitly promote a 64-bit scalar to a vector. +def : Pat<(v2f64 (scalar_to_vector FR64:$src)), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>; + +// Loading from memory automatically zeroing upper bits. +let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), "movsd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (loadf64 addr:$src))]>; + +// MOVSDrm zeros the high parts of the register; represent this +// with SUBREG_TO_REG. +let AddedComplexity = 20 in { +def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +} + +// Store scalar value to memory. def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", [(store FR64:$src, addr:$dst)]>; +// Extract and store. +def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSDmr addr:$dst, + (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>; + // Conversion instructions def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src), "cvttsd2si\t{$src, $dst|$dst, $src}", @@ -1163,7 +1242,8 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), Requires<[HasSSE2, OptForSize]>; def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>; + (CVTSS2SDrr (MOVSSrm addr:$src))>, + Requires<[HasSSE2, OptForSpeed]>; // Match intrinsics which expect XMM operand(s). def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), @@ -1282,7 +1362,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), // Alias instruction to load FR64 from f128mem using movapd. Upper bits are // disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), "movapd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; @@ -1480,7 +1560,7 @@ defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin, let neverHasSideEffects = 1 in def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movapd\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movapd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>; @@ -2295,34 +2375,47 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; // Non-temporal stores -def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; -def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; -def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), +def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; +def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; +def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti\t{$src, $dst|$dst, $src}", [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, TB, Requires<[HasSSE2]>; +let AddedComplexity = 400 in { // Prefer non-temporal versions +def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; + +def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + +def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), + (MOVNTDQmr VR128:$src, addr:$dst)>; +} + // Flush cache def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, TB, Requires<[HasSSE2]>; // Load, store, and memory fence -def LFENCE : I<0xAE, MRM5r, (outs), (ins), +def LFENCE : I<0xAE, MRM_E8, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; -def MFENCE : I<0xAE, MRM6r, (outs), (ins), +def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; //TODO: custom lower this so as to never even generate the noop -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), +def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 0)), (NOOP)>; def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), +def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), (MFENCE)>; // Alias instructions that map zero vector to pxor / xorp* for sse. @@ -2334,17 +2427,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; -// FR64 to 128-bit vector conversion. -let isAsCheapAsAMove = 1 in -def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (scalar_to_vector FR64:$src)))]>; -def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>; - def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2373,20 +2455,9 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), [(store (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))), addr:$dst)]>; -// FIXME: may not be able to eliminate this movss with coalescing the src and -// dest register classes are different. We really want to write this pattern -// like this: -// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), -// (f32 FR32:$src)>; -let isAsCheapAsAMove = 1 in -def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (vector_extract (v2f64 VR128:$src), - (iPTR 0)))]>; -def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract (v2f64 VR128:$src), - (iPTR 0))), addr:$dst)]>; +def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>; + def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), @@ -2403,44 +2474,11 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; - -// Move to lower bits of a VR128, leaving upper bits alone. -// Three operand (but two address) aliases. -let Constraints = "$src1 = $dst" in { - let neverHasSideEffects = 1 in - def MOVLSD2PDrr : SDI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", []>; - - let AddedComplexity = 15 in - def MOVLPDrr : SDI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (movl VR128:$src1, VR128:$src2)))]>; -} - // Store / copy lower 64-bits of a XMM register. def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; -// Move to lower bits of a VR128 and zeroing upper bits. -// Loading from memory automatically zeroing upper bits. -let AddedComplexity = 20 in { -def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (X86vzmovl (v2f64 (scalar_to_vector - (loadf64 addr:$src))))))]>; - -def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (MOVZSD2PDrm addr:$src)>; -def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (MOVZSD2PDrm addr:$src)>; -def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>; -} - // movd / movq to XMM register zero-extends let AddedComplexity = 15 in { def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), @@ -2613,9 +2651,9 @@ let Constraints = "$src1 = $dst" in { } // Thread synchronization -def MONITOR : I<0x01, MRM1r, (outs), (ins), "monitor", +def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; -def MWAIT : I<0x01, MRM1r, (outs), (ins), "mwait", +def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; // vector_shuffle v1, <undef> <1, 1, 3, 3> @@ -2986,13 +3024,15 @@ let Predicates = [HasSSE2] in { let AddedComplexity = 15 in { // Zeroing a VR128 then do a MOVS{S|D} to the lower bits. def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>; + (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), - (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE1]>; + (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>; + (MOVSSrr (v4f32 (V_SET0)), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>; + (MOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>; } // Splat v2f64 / v2i64 @@ -3010,8 +3050,7 @@ def : Pat<(unpckh (v2i64 VR128:$src), (undef)), // Special unary SHUFPSrri case. def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), (SHUFPSrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE1]>; + (SHUFFLE_get_shuf_imm VR128:$src3))>; let AddedComplexity = 5 in def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, @@ -3057,13 +3096,13 @@ def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))), } let AddedComplexity = 10 in { def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))), - (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (UNPCKLPSrr VR128:$src, VR128:$src)>; def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKLBWrr VR128:$src, VR128:$src)>; def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKLWDrr VR128:$src, VR128:$src)>; def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKLDQrr VR128:$src, VR128:$src)>; } // vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> @@ -3077,13 +3116,13 @@ def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))), } let AddedComplexity = 10 in { def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))), - (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (UNPCKHPSrr VR128:$src, VR128:$src)>; def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKHBWrr VR128:$src, VR128:$src)>; def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKHWDrr VR128:$src, VR128:$src)>; def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKHDQrr VR128:$src, VR128:$src)>; } let AddedComplexity = 20 in { @@ -3105,45 +3144,49 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), let AddedComplexity = 20 in { // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; + (MOVLPSrm VR128:$src1, addr:$src2)>; def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + (MOVLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + (MOVLPSrm VR128:$src1, addr:$src2)>; def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + (MOVLPDrm VR128:$src1, addr:$src2)>; } // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; + (MOVLPSmr addr:$src1, VR128:$src2)>; def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVLPDmr addr:$src1, VR128:$src2)>; def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; + (MOVLPSmr addr:$src1, VR128:$src2)>; def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVLPDmr addr:$src1, VR128:$src2)>; let AddedComplexity = 15 in { // Setting the lowest element in the vector. def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), - (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>; def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), - (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>; -// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd) +// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), - (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>, + Requires<[HasSSE2]>; def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), - (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>, + Requires<[HasSSE2]>; } // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but // fall back to this for SSE1) def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), (SHUFPSrri VR128:$src2, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>; + (SHUFFLE_get_shuf_imm VR128:$src3))>; // Set lowest element and zero upper elements. let AddedComplexity = 15 in @@ -3185,30 +3228,30 @@ def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))), // Use movaps / movups for SSE integer load / store (one byte shorter). def : Pat<(alignedloadv4i32 addr:$src), - (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>; + (MOVAPSrm addr:$src)>; def : Pat<(loadv4i32 addr:$src), - (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>; + (MOVUPSrm addr:$src)>; def : Pat<(alignedloadv2i64 addr:$src), - (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>; + (MOVAPSrm addr:$src)>; def : Pat<(loadv2i64 addr:$src), - (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>; + (MOVUPSrm addr:$src)>; def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v2i64 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v4i32 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; //===----------------------------------------------------------------------===// // SSE4.1 Instructions @@ -3397,7 +3440,7 @@ let Constraints = "$src1 = $dst" in { (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, - (OpNode VR128:$src1, (memop addr:$src2)))]>, OpSize; + (OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize; def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp index d3b0052..250634f 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -55,6 +55,11 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { if (!is64Bit) Data64bitsDirective = 0; // we can't emit a 64-bit unit + // Use ## as a comment string so that .s files generated by llvm can go + // through the GCC preprocessor without causing an error. This is needed + // because "clang foo.s" runs the C preprocessor, which is usually reserved + // for .S files on other systems. Perhaps this is because the file system + // wasn't always case preserving or something. CommentString = "##"; PCSymbol = "."; @@ -70,6 +75,8 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) { AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; + TextAlignFillValue = 0x90; + PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; PCSymbol = "."; @@ -94,27 +101,6 @@ MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const { X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; -} - - -X86WinMCAsmInfo::X86WinMCAsmInfo(const Triple &Triple) { - AsmTransCBE = x86_asm_table; - AssemblerDialect = AsmWriterFlavor; - GlobalPrefix = "_"; - CommentString = ";"; - - PrivateGlobalPrefix = "$"; - AlignDirective = "\tALIGN\t"; - ZeroDirective = "\tdb\t"; - AsciiDirective = "\tdb\t"; - AscizDirective = 0; - Data8bitsDirective = "\tdb\t"; - Data16bitsDirective = "\tdw\t"; - Data32bitsDirective = "\tdd\t"; - Data64bitsDirective = "\tdq\t"; - HasDotTypeDotSizeDirective = false; - HasSingleParameterDotFile = false; - - AlignmentIsInBytes = true; + TextAlignFillValue = 0x90; } diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h index ca227b7..69716bf 100644 --- a/lib/Target/X86/X86MCAsmInfo.h +++ b/lib/Target/X86/X86MCAsmInfo.h @@ -33,11 +33,6 @@ namespace llvm { struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF { explicit X86MCAsmInfoCOFF(const Triple &Triple); }; - - struct X86WinMCAsmInfo : public MCAsmInfo { - explicit X86WinMCAsmInfo(const Triple &Triple); - }; - } // namespace llvm #endif diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 764c87a..3f18696 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -14,53 +14,44 @@ #define DEBUG_TYPE "x86-emitter" #include "X86.h" #include "X86InstrInfo.h" +#include "X86FixupKinds.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -// FIXME: This should move to a header. -namespace llvm { -namespace X86 { -enum Fixups { - reloc_pcrel_word = FirstTargetFixupKind, - reloc_picrel_word, - reloc_absolute_word, - reloc_absolute_word_sext, - reloc_absolute_dword -}; -} -} - namespace { class X86MCCodeEmitter : public MCCodeEmitter { X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT const TargetMachine &TM; const TargetInstrInfo &TII; + MCContext &Ctx; bool Is64BitMode; public: - X86MCCodeEmitter(TargetMachine &tm, bool is64Bit) - : TM(tm), TII(*TM.getInstrInfo()) { + X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) + : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) { Is64BitMode = is64Bit; } ~X86MCCodeEmitter() {} unsigned getNumFixupKinds() const { - return 5; + return 3; } - MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { - static MCFixupKindInfo Infos[] = { - { "reloc_pcrel_word", 0, 4 * 8 }, - { "reloc_picrel_word", 0, 4 * 8 }, - { "reloc_absolute_word", 0, 4 * 8 }, - { "reloc_absolute_word_sext", 0, 4 * 8 }, - { "reloc_absolute_dword", 0, 8 * 8 } + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[] = { + { "reloc_pcrel_4byte", 0, 4 * 8 }, + { "reloc_pcrel_1byte", 0, 1 * 8 }, + { "reloc_riprel_4byte", 0, 4 * 8 } }; + + if (Kind < FirstTargetFixupKind) + return MCCodeEmitter::getFixupKindInfo(Kind); - assert(Kind >= FirstTargetFixupKind && Kind < MaxTargetFixupKind && + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); return Infos[Kind - FirstTargetFixupKind]; } @@ -83,9 +74,11 @@ public: } } - void EmitDisplacementField(const MCOperand &Disp, int64_t Adj, bool IsPCRel, - unsigned &CurByte, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const; + void EmitImmediate(const MCOperand &Disp, + unsigned ImmSize, MCFixupKind FixupKind, + unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + int ImmOffset = 0) const; inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) { @@ -106,8 +99,8 @@ public: void EmitMemModRMByte(const MCInst &MI, unsigned Op, - unsigned RegOpcodeField, intptr_t PCAdj, - unsigned &CurByte, raw_ostream &OS, + unsigned RegOpcodeField, + unsigned TSFlags, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const; void EncodeInstruction(const MCInst &MI, raw_ostream &OS, @@ -119,13 +112,15 @@ public: MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &, - TargetMachine &TM) { - return new X86MCCodeEmitter(TM, false); + TargetMachine &TM, + MCContext &Ctx) { + return new X86MCCodeEmitter(TM, Ctx, false); } MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &, - TargetMachine &TM) { - return new X86MCCodeEmitter(TM, true); + TargetMachine &TM, + MCContext &Ctx) { + return new X86MCCodeEmitter(TM, Ctx, true); } @@ -135,36 +130,59 @@ static bool isDisp8(int Value) { return Value == (signed char)Value; } +/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate +/// in an instruction with the specified TSFlags. +static MCFixupKind getImmFixupKind(unsigned TSFlags) { + unsigned Size = X86II::getSizeOfImm(TSFlags); + bool isPCRel = X86II::isImmPCRel(TSFlags); + + switch (Size) { + default: assert(0 && "Unknown immediate size"); + case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1; + case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4; + case 2: assert(!isPCRel); return FK_Data_2; + case 8: assert(!isPCRel); return FK_Data_8; + } +} + + void X86MCCodeEmitter:: -EmitDisplacementField(const MCOperand &DispOp, int64_t Adj, bool IsPCRel, - unsigned &CurByte, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const { +EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, + unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const { // If this is a simple integer displacement that doesn't require a relocation, // emit it now. if (DispOp.isImm()) { - EmitConstant(DispOp.getImm(), 4, CurByte, OS); + // FIXME: is this right for pc-rel encoding?? Probably need to emit this as + // a fixup if so. + EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS); return; } -#if 0 - // Otherwise, this is something that requires a relocation. Emit it as such - // now. - unsigned RelocType = Is64BitMode ? - (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext) - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); -#endif + // If we have an immoffset, add it to the expression. + const MCExpr *Expr = DispOp.getExpr(); + + // If the fixup is pc-relative, we need to bias the value to be relative to + // the start of the field, not the end of the field. + if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) || + FixupKind == MCFixupKind(X86::reloc_riprel_4byte)) + ImmOffset -= 4; + if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte)) + ImmOffset -= 1; + + if (ImmOffset) + Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx), + Ctx); // Emit a symbolic constant as a fixup and 4 zeros. - Fixups.push_back(MCFixup::Create(CurByte, DispOp.getExpr(), - MCFixupKind(X86::reloc_absolute_word))); - EmitConstant(0, 4, CurByte, OS); + Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind)); + EmitConstant(0, Size, CurByte, OS); } void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField, - intptr_t PCAdj, - unsigned &CurByte, + unsigned TSFlags, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const{ const MCOperand &Disp = MI.getOperand(Op+3); @@ -172,31 +190,48 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, const MCOperand &Scale = MI.getOperand(Op+1); const MCOperand &IndexReg = MI.getOperand(Op+2); unsigned BaseReg = Base.getReg(); - - // FIXME: Eliminate! - bool IsPCRel = false; + + // Handle %rip relative addressing. + if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode + assert(IndexReg.getReg() == 0 && Is64BitMode && + "Invalid rip-relative address"); + EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); + + // rip-relative addressing is actually relative to the *next* instruction. + // Since an immediate can follow the mod/rm byte for an instruction, this + // means that we need to bias the immediate field of the instruction with + // the size of the immediate field. If we have this case, add it into the + // expression to emit. + int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0; + EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_riprel_4byte), + CurByte, OS, Fixups, -ImmSize); + return; + } + + unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U; + // Determine whether a SIB byte is needed. // If no BaseReg, issue a RIP relative instruction only if the MCE can // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table // 2-7) and absolute references. + if (// The SIB byte must be used if there is an index register. IndexReg.getReg() == 0 && - // The SIB byte must be used if the base is ESP/RSP. - BaseReg != X86::ESP && BaseReg != X86::RSP && + // The SIB byte must be used if the base is ESP/RSP/R12, all of which + // encode to an R/M value of 4, which indicates that a SIB byte is + // present. + BaseRegNo != N86::ESP && // If there is no base register and we're in 64-bit mode, we need a SIB // byte to emit an addr that is just 'disp32' (the non-RIP relative form). (!Is64BitMode || BaseReg != 0)) { - if (BaseReg == 0 || // [disp32] in X86-32 mode - BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode + if (BaseReg == 0) { // [disp32] in X86-32 mode EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); - EmitDisplacementField(Disp, PCAdj, true, CurByte, OS, Fixups); + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); return; } - unsigned BaseRegNo = GetX86RegNum(Base); - // If the base is not EBP/ESP and there is no displacement, use simple // indirect register encoding, this handles addresses like [EAX]. The // encoding for [EBP] with no displacement means [disp32] so we handle it @@ -209,13 +244,13 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. if (Disp.isImm() && isDisp8(Disp.getImm())) { EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS); - EmitConstant(Disp.getImm(), 1, CurByte, OS); + EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); return; } // Otherwise, emit the most general non-SIB encoding: [REG+disp32] EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS); - EmitDisplacementField(Disp, PCAdj, IsPCRel, CurByte, OS, Fixups); + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); return; } @@ -270,9 +305,9 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Do we need to output a displacement? if (ForceDisp8) - EmitConstant(Disp.getImm(), 1, CurByte, OS); + EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); else if (ForceDisp32 || Disp.getImm() != 0) - EmitDisplacementField(Disp, PCAdj, IsPCRel, CurByte, OS, Fixups); + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); } /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64 @@ -280,11 +315,11 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, /// size, and 3) use of X86-64 extended registers. static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, const TargetInstrDesc &Desc) { - unsigned REX = 0; - - // Pseudo instructions do not need REX prefix byte. + // Pseudo instructions never have a rex byte. if ((TSFlags & X86II::FormMask) == X86II::Pseudo) return 0; + + unsigned REX = 0; if (TSFlags & X86II::REX_W) REX |= 1 << 3; @@ -482,52 +517,29 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!"); default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n"; - assert(0 && "Unknown FormMask value in X86MCCodeEmitter!"); - case X86II::RawFrm: { + assert(0 && "Unknown FormMask value in X86MCCodeEmitter!"); + case X86II::Pseudo: return; // Pseudo instructions encode to nothing. + case X86II::RawFrm: EmitByte(BaseOpcode, CurByte, OS); - - if (CurOp == NumOps) - break; - - assert(0 && "Unimpl RawFrm expr"); break; - } - case X86II::AddRegFrm: { + case X86II::AddRegFrm: EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS); - if (CurOp == NumOps) - break; - - const MCOperand &MO1 = MI.getOperand(CurOp++); - if (MO1.isImm()) { - unsigned Size = X86II::getSizeOfImm(TSFlags); - EmitConstant(MO1.getImm(), Size, CurByte, OS); - break; - } - - assert(0 && "Unimpl AddRegFrm expr"); break; - } case X86II::MRMDestReg: EmitByte(BaseOpcode, CurByte, OS); EmitRegModRMByte(MI.getOperand(CurOp), GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); CurOp += 2; - if (CurOp != NumOps) - EmitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(TSFlags), CurByte, OS); break; case X86II::MRMDestMem: EmitByte(BaseOpcode, CurByte, OS); EmitMemModRMByte(MI, CurOp, GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)), - 0, CurByte, OS, Fixups); + TSFlags, CurByte, OS, Fixups); CurOp += X86AddrNumOperands + 1; - if (CurOp != NumOps) - EmitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(TSFlags), CurByte, OS); break; case X86II::MRMSrcReg: @@ -535,9 +547,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS); CurOp += 2; - if (CurOp != NumOps) - EmitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(TSFlags), CurByte, OS); break; case X86II::MRMSrcMem: { @@ -551,117 +560,78 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, else AddrOperands = X86AddrNumOperands; - // FIXME: What is this actually doing? - intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? - X86II::getSizeOfImm(TSFlags) : 0; - EmitMemModRMByte(MI, CurOp+1, GetX86RegNum(MI.getOperand(CurOp)), - PCAdj, CurByte, OS, Fixups); + TSFlags, CurByte, OS, Fixups); CurOp += AddrOperands + 1; - if (CurOp != NumOps) - EmitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(TSFlags), CurByte, OS); break; } case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: case X86II::MRM3r: case X86II::MRM4r: case X86II::MRM5r: - case X86II::MRM6r: case X86II::MRM7r: { + case X86II::MRM6r: case X86II::MRM7r: EmitByte(BaseOpcode, CurByte, OS); - - // Special handling of lfence, mfence, monitor, and mwait. - // FIXME: This is terrible, they should get proper encoding bits in TSFlags. - if (Opcode == X86::LFENCE || Opcode == X86::MFENCE || - Opcode == X86::MONITOR || Opcode == X86::MWAIT) { - EmitByte(ModRMByte(3, (TSFlags & X86II::FormMask)-X86II::MRM0r, 0), - CurByte, OS); - - switch (Opcode) { - default: break; - case X86::MONITOR: EmitByte(0xC8, CurByte, OS); break; - case X86::MWAIT: EmitByte(0xC9, CurByte, OS); break; - } - } else { - EmitRegModRMByte(MI.getOperand(CurOp++), - (TSFlags & X86II::FormMask)-X86II::MRM0r, - CurByte, OS); - } - - if (CurOp == NumOps) - break; - - const MCOperand &MO1 = MI.getOperand(CurOp++); - if (MO1.isImm()) { - EmitConstant(MO1.getImm(), X86II::getSizeOfImm(TSFlags), CurByte, OS); - break; - } - - assert(0 && "relo unimpl"); -#if 0 - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); - if (Opcode == X86::MOV64ri32) - rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? - if (MO1.isGlobal()) { - bool Indirect = gvNeedsNonLazyPtr(MO1, TM); - emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - Indirect); - } else if (MO1.isSymbol()) - emitExternalSymbolAddress(MO1.getSymbolName(), rt); - else if (MO1.isCPI()) - emitConstPoolAddress(MO1.getIndex(), rt); - else if (MO1.isJTI()) - emitJumpTableAddress(MO1.getIndex(), rt); + EmitRegModRMByte(MI.getOperand(CurOp++), + (TSFlags & X86II::FormMask)-X86II::MRM0r, + CurByte, OS); break; -#endif - } case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: { - intptr_t PCAdj = 0; - if (CurOp + X86AddrNumOperands != NumOps) { - if (MI.getOperand(CurOp+X86AddrNumOperands).isImm()) - PCAdj = X86II::getSizeOfImm(TSFlags); - else - PCAdj = 4; - } - + case X86II::MRM6m: case X86II::MRM7m: EmitByte(BaseOpcode, CurByte, OS); EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m, - PCAdj, CurByte, OS, Fixups); + TSFlags, CurByte, OS, Fixups); CurOp += X86AddrNumOperands; - - if (CurOp == NumOps) - break; - - const MCOperand &MO = MI.getOperand(CurOp++); - if (MO.isImm()) { - EmitConstant(MO.getImm(), X86II::getSizeOfImm(TSFlags), CurByte, OS); - break; - } - - assert(0 && "relo not handled"); -#if 0 - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); - if (Opcode == X86::MOV64mi32) - rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? - if (MO.isGlobal()) { - bool Indirect = gvNeedsNonLazyPtr(MO, TM); - emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, - Indirect); - } else if (MO.isSymbol()) - emitExternalSymbolAddress(MO.getSymbolName(), rt); - else if (MO.isCPI()) - emitConstPoolAddress(MO.getIndex(), rt); - else if (MO.isJTI()) - emitJumpTableAddress(MO.getIndex(), rt); -#endif + break; + case X86II::MRM_C1: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC1, CurByte, OS); + break; + case X86II::MRM_C2: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC2, CurByte, OS); + break; + case X86II::MRM_C3: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC3, CurByte, OS); + break; + case X86II::MRM_C4: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC4, CurByte, OS); + break; + case X86II::MRM_C8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC8, CurByte, OS); + break; + case X86II::MRM_C9: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC9, CurByte, OS); + break; + case X86II::MRM_E8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xE8, CurByte, OS); + break; + case X86II::MRM_F0: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF0, CurByte, OS); + break; + case X86II::MRM_F8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF8, CurByte, OS); + break; + case X86II::MRM_F9: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF9, CurByte, OS); break; } - } + + // If there is a remaining operand, it must be a trailing immediate. Emit it + // according to the right size for the instruction. + if (CurOp != NumOps) + EmitImmediate(MI.getOperand(CurOp++), + X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + CurByte, OS, Fixups); #ifndef NDEBUG // FIXME: Verify. diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index bb53bf1..4b2529b 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -18,12 +18,6 @@ namespace llvm { -enum NameDecorationStyle { - None, - StdCall, - FastCall -}; - /// X86MachineFunctionInfo - This class is derived from MachineFunction and /// contains private X86 target-specific information for each MachineFunction. class X86MachineFunctionInfo : public MachineFunctionInfo { @@ -41,10 +35,6 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// Used on windows platform for stdcall & fastcall name decoration unsigned BytesToPopOnReturn; - /// DecorationStyle - If the function requires additional name decoration, - /// DecorationStyle holds the right way to do so. - NameDecorationStyle DecorationStyle; - /// ReturnAddrIndex - FrameIndex for return slot. int ReturnAddrIndex; @@ -66,7 +56,6 @@ public: X86MachineFunctionInfo() : ForceFramePointer(false), CalleeSavedFrameSize(0), BytesToPopOnReturn(0), - DecorationStyle(None), ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), @@ -76,7 +65,6 @@ public: : ForceFramePointer(false), CalleeSavedFrameSize(0), BytesToPopOnReturn(0), - DecorationStyle(None), ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), @@ -91,9 +79,6 @@ public: unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; } void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;} - NameDecorationStyle getDecorationStyle() const { return DecorationStyle; } - void setDecorationStyle(NameDecorationStyle style) { DecorationStyle = style;} - int getRAIndex() const { return ReturnAddrIndex; } void setRAIndex(int Index) { ReturnAddrIndex = Index; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 081c6d9..0f4ce37 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -191,6 +191,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR16_NOREXRegClass; else if (A == &X86::GR16_ABCDRegClass) return &X86::GR16_ABCDRegClass; + } else if (B == &X86::FR32RegClass) { + return A; } break; case 2: @@ -207,6 +209,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || A == &X86::GR16_NOREXRegClass) return &X86::GR16_ABCDRegClass; + } else if (B == &X86::FR64RegClass) { + return A; } break; case 3: @@ -234,6 +238,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR32_NOREXRegClass; else if (A == &X86::GR32_ABCDRegClass) return &X86::GR64_ABCDRegClass; + } else if (B == &X86::VR128RegClass) { + return A; } break; case 4: @@ -446,8 +452,10 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *F = MF.getFunction(); bool requiresRealignment = - RealignStack && (MFI->getMaxAlignment() > StackAlign); + RealignStack && ((MFI->getMaxAlignment() > StackAlign) || + F->hasFnAttr(Attribute::StackAlignment)); // FIXME: Currently we don't support stack realignment for functions with // variable-sized allocas. @@ -485,7 +493,7 @@ X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const { Offset += SlotSize; } else { unsigned Align = MFI->getObjectAlignment(FI); - assert( (-(Offset + StackSize)) % Align == 0); + assert((-(Offset + StackSize)) % Align == 0); Align = 0; return Offset + StackSize; } @@ -627,10 +635,6 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - // Calculate and set max stack object alignment early, so we can decide - // whether we will need stack realignment (and thus FP). - MFI->calculateMaxStackAlignment(); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); @@ -1053,7 +1057,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(NumBytes); BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca"); + .addExternalSymbol("_alloca") + .addReg(StackPtr, RegState::Define | RegState::Implicit); } else { // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) @@ -1064,7 +1069,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(NumBytes - 4); BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca"); + .addExternalSymbol("_alloca") + .addReg(StackPtr, RegState::Define | RegState::Implicit); // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 8fb5e92..e4bdb4e 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -35,7 +35,8 @@ namespace X86 { /// these indices must be kept in sync with the class indices in the /// X86RegisterInfo.td file. enum SubregIndex { - SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4 + SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4, + SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3 }; } diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 1559bf7..ed2ce6c 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -158,22 +158,22 @@ let Namespace = "X86" in { def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; // YMM Registers, used by AVX instructions - def YMM0: Register<"ymm0">, DwarfRegNum<[17, 21, 21]>; - def YMM1: Register<"ymm1">, DwarfRegNum<[18, 22, 22]>; - def YMM2: Register<"ymm2">, DwarfRegNum<[19, 23, 23]>; - def YMM3: Register<"ymm3">, DwarfRegNum<[20, 24, 24]>; - def YMM4: Register<"ymm4">, DwarfRegNum<[21, 25, 25]>; - def YMM5: Register<"ymm5">, DwarfRegNum<[22, 26, 26]>; - def YMM6: Register<"ymm6">, DwarfRegNum<[23, 27, 27]>; - def YMM7: Register<"ymm7">, DwarfRegNum<[24, 28, 28]>; - def YMM8: Register<"ymm8">, DwarfRegNum<[25, -2, -2]>; - def YMM9: Register<"ymm9">, DwarfRegNum<[26, -2, -2]>; - def YMM10: Register<"ymm10">, DwarfRegNum<[27, -2, -2]>; - def YMM11: Register<"ymm11">, DwarfRegNum<[28, -2, -2]>; - def YMM12: Register<"ymm12">, DwarfRegNum<[29, -2, -2]>; - def YMM13: Register<"ymm13">, DwarfRegNum<[30, -2, -2]>; - def YMM14: Register<"ymm14">, DwarfRegNum<[31, -2, -2]>; - def YMM15: Register<"ymm15">, DwarfRegNum<[32, -2, -2]>; + def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>; + def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>; + def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>; + def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegNum<[20, 24, 24]>; + def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegNum<[21, 25, 25]>; + def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegNum<[22, 26, 26]>; + def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegNum<[23, 27, 27]>; + def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegNum<[24, 28, 28]>; + def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegNum<[25, -2, -2]>; + def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegNum<[26, -2, -2]>; + def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegNum<[27, -2, -2]>; + def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegNum<[28, -2, -2]>; + def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegNum<[29, -2, -2]>; + def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>; + def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>; + def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>; // Floating point stack registers def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>; @@ -238,6 +238,10 @@ def x86_subreg_8bit_hi : PatLeaf<(i32 2)>; def x86_subreg_16bit : PatLeaf<(i32 3)>; def x86_subreg_32bit : PatLeaf<(i32 4)>; +def x86_subreg_ss : PatLeaf<(i32 1)>; +def x86_subreg_sd : PatLeaf<(i32 2)>; +def x86_subreg_xmm : PatLeaf<(i32 3)>; + def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W], [AL, CL, DL, BL, SPL, BPL, SIL, DIL, @@ -277,11 +281,31 @@ def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>; -def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, +def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + +def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + +def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; +def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + +def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + //===----------------------------------------------------------------------===// // Register Class Definitions... now that we have all of the pieces, define the // top-level register classes. The order specified in the register list is @@ -793,6 +817,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]> { + let SubRegClassList = [FR32, FR64]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -811,7 +836,9 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, - YMM12, YMM13, YMM14, YMM15]>; + YMM12, YMM13, YMM14, YMM15]> { + let SubRegClassList = [FR32, FR64, VR128]; +} // Status flags registers. def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 618dd10..594a470 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -20,9 +20,9 @@ namespace llvm { class GlobalValue; class TargetMachine; - + /// PICStyles - The X86 backend supports a number of different styles of PIC. -/// +/// namespace PICStyles { enum Style { StubPIC, // Used on i386-darwin in -fPIC mode. @@ -46,7 +46,7 @@ protected: /// PICStyle - Which PIC style to use /// PICStyles::Style PICStyle; - + /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or /// none supported. X86SSEEnum X86SSELevel; @@ -58,7 +58,7 @@ protected: /// HasCMov - True if this processor has conditional move instructions /// (generally pentium pro+). bool HasCMov; - + /// HasX86_64 - True if the processor supports X86-64 instructions. /// bool HasX86_64; @@ -78,8 +78,9 @@ protected: /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; - /// HasVectorUAMem - True if SIMD operations can have unaligned memory operands. - /// This may require setting a feature bit in the processor. + /// HasVectorUAMem - True if SIMD operations can have unaligned memory + /// operands. This may require setting a feature bit in the + /// processor. bool HasVectorUAMem; /// DarwinVers - Nonzero if this is a darwin platform: the numeric @@ -150,20 +151,20 @@ public: bool isTargetDarwin() const { return TargetType == isDarwin; } bool isTargetELF() const { return TargetType == isELF; } - + bool isTargetWindows() const { return TargetType == isWindows; } bool isTargetMingw() const { return TargetType == isMingw; } bool isTargetCygwin() const { return TargetType == isCygwin; } bool isTargetCygMing() const { return TargetType == isMingw || TargetType == isCygwin; } - + /// isTargetCOFF - Return true if this is any COFF/Windows target variant. bool isTargetCOFF() const { return TargetType == isMingw || TargetType == isCygwin || TargetType == isWindows; } - + bool isTargetWin64() const { return Is64Bit && (TargetType == isMingw || TargetType == isWindows); } @@ -175,7 +176,7 @@ public: else if (isTargetDarwin()) p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32"; else if (isTargetMingw() || isTargetWindows()) - p = "e-p:32:32-f64:64:64-i64:64:64-f80:128:128-n8:16:32"; + p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32"; else p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"; @@ -196,11 +197,11 @@ public: bool isPICStyleStubAny() const { return PICStyle == PICStyles::StubDynamicNoPIC || PICStyle == PICStyles::StubPIC; } - + /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard, /// 10 = Snow Leopard, etc. unsigned getDarwinVers() const { return DarwinVers; } - + /// ClassifyGlobalReference - Classify a global variable reference for the /// current subtarget according to how we should reference it in a non-pcrel /// context. diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index f835e29..56ddaf8 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -30,9 +30,8 @@ static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { case Triple::MinGW32: case Triple::MinGW64: case Triple::Cygwin: - return new X86MCAsmInfoCOFF(TheTriple); case Triple::Win32: - return new X86WinMCAsmInfo(TheTriple); + return new X86MCAsmInfoCOFF(TheTriple); default: return new X86ELFMCAsmInfo(TheTriple); } @@ -48,11 +47,16 @@ extern "C" void LLVMInitializeX86Target() { RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo); // Register the code emitter. - // FIXME: Remove the heinous one when the new one works. TargetRegistry::RegisterCodeEmitter(TheX86_32Target, - createHeinousX86MCCodeEmitter); + createX86_32MCCodeEmitter); TargetRegistry::RegisterCodeEmitter(TheX86_64Target, - createHeinousX86MCCodeEmitter); + createX86_64MCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterAsmBackend(TheX86_32Target, + createX86_32AsmBackend); + TargetRegistry::RegisterAsmBackend(TheX86_64Target, + createX86_64AsmBackend); } @@ -201,32 +205,3 @@ void X86TargetMachine::setCodeModelForJIT() { else setCodeModel(CodeModel::Small); } - -/// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are 4-byte, -/// 8-byte, and target default. The CIE is hard-coded to indicate that the LSDA -/// pointer in the FDE section is an "sdata4", and should be encoded as a 4-byte -/// pointer by default. However, some systems may require a different size due -/// to bugs or other conditions. We will default to a 4-byte encoding unless the -/// system tells us otherwise. -/// -/// The issue is when the CIE says their is an LSDA. That mandates that every -/// FDE have an LSDA slot. But if the function does not need an LSDA. There -/// needs to be some way to signify there is none. The LSDA is encoded as -/// pc-rel. But you don't look for some magic value after adding the pc. You -/// have to look for a zero before adding the pc. The problem is that the size -/// of the zero to look for depends on the encoding. The unwinder bug in SL is -/// that it always checks for a pointer-size zero. So on x86_64 it looks for 8 -/// bytes of zero. If you have an LSDA, it works fine since the 8-bytes are -/// non-zero so it goes ahead and then reads the value based on the encoding. -/// But if you use sdata4 and there is no LSDA, then the test for zero gives a -/// false negative and the unwinder thinks there is an LSDA. -/// -/// FIXME: This call-back isn't good! We should be using the correct encoding -/// regardless of the system. However, there are some systems which have bugs -/// that prevent this from occuring. -DwarfLSDAEncoding::Encoding X86TargetMachine::getLSDAEncoding() const { - if (Subtarget.isTargetDarwin() && Subtarget.getDarwinVers() != 10) - return DwarfLSDAEncoding::Default; - - return DwarfLSDAEncoding::EightByte; -} diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index eee29be..2bb5454 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -62,18 +62,6 @@ public: return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; } - /// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are - /// 4-byte, 8-byte, and target default. The CIE is hard-coded to indicate that - /// the LSDA pointer in the FDE section is an "sdata4", and should be encoded - /// as a 4-byte pointer by default. However, some systems may require a - /// different size due to bugs or other conditions. We will default to a - /// 4-byte encoding unless the system tells us otherwise. - /// - /// FIXME: This call-back isn't good! We should be using the correct encoding - /// regardless of the system. However, there are some systems which have bugs - /// that prevent this from occuring. - virtual DwarfLSDAEncoding::Encoding getLSDAEncoding() const; - // Set up the pass pipeline. virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index b8cef7d..29a0be5 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -7,61 +7,112 @@ // //===----------------------------------------------------------------------===// -#include "X86TargetObjectFile.h" #include "X86MCTargetExpr.h" +#include "X86TargetObjectFile.h" +#include "X86TargetMachine.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/Target/Mangler.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/Dwarf.h" using namespace llvm; +using namespace dwarf; -const MCExpr *X8632_MachoTargetObjectFile:: +const MCExpr *X8664_MachoTargetObjectFile:: getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { - // The mach-o version of this method defaults to returning a stub reference. - IsIndirect = true; - IsPCRel = false; - - - MachineModuleInfoMachO &MachOMMI = - MMI->getObjFileInfo<MachineModuleInfoMachO>(); - - // FIXME: Use GetSymbolWithGlobalValueBase. - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; - - // Add information about the stub reference to MachOMMI so that the stub gets - // emitted by the asmprinter. - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); - MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym); - if (StubSym == 0) { - Name.clear(); + MachineModuleInfo *MMI, unsigned Encoding) const { + + // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which + // is an indirect pc-relative reference. + if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) { + SmallString<128> Name; Mang->getNameWithPrefix(Name, GV, false); - StubSym = getContext().GetOrCreateSymbol(Name.str()); + const MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + const MCExpr *Res = + X86MCTargetExpr::Create(Sym, X86MCTargetExpr::GOTPCREL, getContext()); + const MCExpr *Four = MCConstantExpr::Create(4, getContext()); + return MCBinaryExpr::CreateAdd(Res, Four, getContext()); } - - return MCSymbolRefExpr::Create(Sym, getContext()); + + return TargetLoweringObjectFileMachO:: + getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding); } -const MCExpr *X8664_MachoTargetObjectFile:: -getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { - - // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which - // is an indirect pc-relative reference. - IsIndirect = true; - IsPCRel = true; - - // FIXME: Use GetSymbolWithGlobalValueBase. - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, false); - const MCSymbol *Sym = getContext().CreateSymbol(Name); - const MCExpr *Res = - X86MCTargetExpr::Create(Sym, X86MCTargetExpr::GOTPCREL, getContext()); - const MCExpr *Four = MCConstantExpr::Create(4, getContext()); - return MCBinaryExpr::CreateAdd(Res, Four, getContext()); +unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small || Model == CodeModel::Medium) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | (Model == CodeModel::Small ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small || Model == CodeModel::Medium) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; } +unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 377a93b..0444417 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -10,21 +10,13 @@ #ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H #define LLVM_TARGET_X86_TARGETOBJECTFILE_H +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { - - /// X8632_MachoTargetObjectFile - This TLOF implementation is used for - /// Darwin/x86-32. - class X8632_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { - public: - - virtual const MCExpr * - getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const; - }; - + class X86TargetMachine; + /// X8664_MachoTargetObjectFile - This TLOF implementation is used for /// Darwin/x86-64. class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { @@ -32,9 +24,31 @@ namespace llvm { virtual const MCExpr * getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const; + MachineModuleInfo *MMI, unsigned Encoding) const; + }; + + class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF { + const X86TargetMachine &TM; + public: + X8632_ELFTargetObjectFile(const X86TargetMachine &tm) + :TM(tm) { } + virtual unsigned getPersonalityEncoding() const; + virtual unsigned getLSDAEncoding() const; + virtual unsigned getFDEEncoding() const; + virtual unsigned getTTypeEncoding() const; + }; + + class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF { + const X86TargetMachine &TM; + public: + X8664_ELFTargetObjectFile(const X86TargetMachine &tm) + :TM(tm) { } + virtual unsigned getPersonalityEncoding() const; + virtual unsigned getLSDAEncoding() const; + virtual unsigned getFDEEncoding() const; + virtual unsigned getTTypeEncoding() const; }; + } // end namespace llvm #endif diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp index d18f55d..82e23a1 100644 --- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" @@ -62,6 +63,11 @@ namespace { } void printMemOperand(const MachineInstr *MI, int opNum); + void printInlineJT(const MachineInstr *MI, int opNum, + const std::string &directive = ".jmptable"); + void printInlineJT32(const MachineInstr *MI, int opNum) { + printInlineJT(MI, opNum, ".jmptable32"); + } void printOperand(const MachineInstr *MI, int opNum); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode); @@ -257,6 +263,23 @@ void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum) printOperand(MI, opNum+1); } +void XCoreAsmPrinter:: +printInlineJT(const MachineInstr *MI, int opNum, const std::string &directive) +{ + unsigned JTI = MI->getOperand(opNum).getIndex(); + const MachineFunction *MF = MI->getParent()->getParent(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + O << "\t" << directive << " "; + for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = JTBBs[i]; + if (i > 0) + O << ","; + O << *MBB->getSymbol(OutContext); + } +} + void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { const MachineOperand &MO = MI->getOperand(opNum); switch (MO.getType()) { diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 383fd91..b1ab132 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -65,8 +65,6 @@ namespace { bool SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); - virtual void InstructionSelect(); - virtual const char *getPassName() const { return "XCore DAG->DAG Pattern Instruction Selection"; } @@ -147,15 +145,6 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDNode *Op, SDValue Addr, return false; } -/// InstructionSelect - This callback is invoked by -/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void XCoreDAGToDAGISel::InstructionSelect() { - // Select target instructions for the DAG. - SelectRoot(*CurDAG); - - CurDAG->RemoveDeadNodes(); -} - SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); EVT NVT = N->getValueType(0); @@ -164,7 +153,11 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { default: break; case ISD::Constant: { if (Predicate_immMskBitp(N)) { - SDValue MskSize = Transform_msksize_xform(N); + // Transformation function: get the size of a mask + int64_t MaskVal = cast<ConstantSDNode>(N)->getZExtValue(); + assert(isMask_32(MaskVal)); + // Look for the first non-zero bit + SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(MaskVal)); return CurDAG->getMachineNode(XCore::MKMSK_rus, dl, MVT::i32, MskSize); } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index bf8c38f..e6515d8 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" @@ -53,6 +54,8 @@ getTargetNodeName(unsigned Opcode) const case XCoreISD::RETSP : return "XCoreISD::RETSP"; case XCoreISD::LADD : return "XCoreISD::LADD"; case XCoreISD::LSUB : return "XCoreISD::LSUB"; + case XCoreISD::BR_JT : return "XCoreISD::BR_JT"; + case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32"; default : return NULL; } } @@ -106,9 +109,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::TRAP, MVT::Other, Legal); - // Expand jump tables for now - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::JumpTable, MVT::i32, Custom); + // Jump tables. + setOperationAction(ISD::BR_JT, MVT::Other, Custom); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32 , Custom); @@ -157,7 +159,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -315,14 +317,27 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerJumpTable(SDValue Op, SelectionDAG &DAG) +LowerBR_JT(SDValue Op, SelectionDAG &DAG) { - // FIXME there isn't really debug info here + SDValue Chain = Op.getOperand(0); + SDValue Table = Op.getOperand(1); + SDValue Index = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - EVT PtrVT = Op.getValueType(); - JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); - SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); + unsigned JTI = JT->getIndex(); + MachineFunction &MF = DAG.getMachineFunction(); + const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); + + unsigned NumEntries = MJTI->getJumpTables()[JTI].MBBs.size(); + if (NumEntries <= 32) { + return DAG.getNode(XCoreISD::BR_JT, dl, MVT::Other, Chain, TargetJT, Index); + } + assert((NumEntries >> 31) == 0); + SDValue ScaledIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index, + DAG.getConstant(1, MVT::i32)); + return DAG.getNode(XCoreISD::BR_JT32, dl, MVT::Other, Chain, TargetJT, + ScaledIndex); } static bool @@ -390,7 +405,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) if (Offset % 4 == 0) { // We've managed to infer better alignment information than the load // already has. Use an aligned load. - return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4); + // + // FIXME: No new alignment information is actually passed here. + // Should the offset really be 4? + // + return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4, + false, false, 0); } // Lower to // ldw low, base[offset >> 2] @@ -407,9 +427,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset); SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain, - LowAddr, NULL, 4); + LowAddr, NULL, 4, false, false, 0); SDValue High = DAG.getLoad(getPointerTy(), dl, Chain, - HighAddr, NULL, 4); + HighAddr, NULL, 4, false, false, 0); SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift); SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift); SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted); @@ -423,12 +443,13 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) int SVOffset = LD->getSrcValueOffset(); SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, BasePtr, LD->getSrcValue(), SVOffset, MVT::i16, - LD->isVolatile(), 2); + LD->isVolatile(), LD->isNonTemporal(), 2); SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, DAG.getConstant(2, MVT::i32)); SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain, HighAddr, LD->getSrcValue(), SVOffset + 2, - MVT::i16, LD->isVolatile(), 2); + MVT::i16, LD->isVolatile(), + LD->isNonTemporal(), 2); SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, DAG.getConstant(16, MVT::i32)); SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted); @@ -452,7 +473,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__misaligned_load", getPointerTy()), - Args, DAG, dl, DAG.GetOrdering(Chain.getNode())); + Args, DAG, dl); SDValue Ops[] = { CallResult.first, CallResult.second }; @@ -487,12 +508,14 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) DAG.getConstant(16, MVT::i32)); SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr, ST->getSrcValue(), SVOffset, MVT::i16, - ST->isVolatile(), 2); + ST->isVolatile(), ST->isNonTemporal(), + 2); SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, DAG.getConstant(2, MVT::i32)); SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr, ST->getSrcValue(), SVOffset + 2, - MVT::i16, ST->isVolatile(), 2); + MVT::i16, ST->isVolatile(), + ST->isNonTemporal(), 2); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh); } @@ -513,7 +536,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__misaligned_store", getPointerTy()), - Args, DAG, dl, DAG.GetOrdering(Chain.getNode())); + Args, DAG, dl); return CallResult.second; } @@ -561,15 +584,16 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); EVT VT = Node->getValueType(0); SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0), - Node->getOperand(1), V, 0); + Node->getOperand(1), V, 0, false, false, 0); // Increment the pointer, VAList, to the next vararg SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, DAG.getConstant(VT.getSizeInBits(), getPointerTy())); // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0); + Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0, + false, false, 0); // Load the actual argument out of the pointer VAList - return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0); + return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, false, false, 0); } SDValue XCoreTargetLowering:: @@ -582,7 +606,8 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0, + false, false, 0); } SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { @@ -877,7 +902,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0, + false, false, 0)); } } @@ -908,7 +934,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, RegInfo.addLiveIn(ArgRegs[i], VReg); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); // Move argument from virt reg -> stack - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + false, false, 0); MemOps.push_back(Store); } if (!MemOps.empty()) @@ -1134,10 +1161,8 @@ static inline bool isImmUs4(int64_t val) bool XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { - // Be conservative with void - // FIXME: Can we be more aggressive? if (Ty->getTypeID() == Type::VoidTyID) - return false; + return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs); const TargetData *TD = TM.getTargetData(); unsigned Size = TD->getTypeAllocSize(Ty); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index f7b620e..0c638af 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -28,7 +28,7 @@ namespace llvm { namespace XCoreISD { enum NodeType { // Start the numbering where the builtin ops and target ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END+XCore::INSTRUCTION_LIST_END, + FIRST_NUMBER = ISD::BUILTIN_OP_END, // Branch and link (call) BL, @@ -52,7 +52,13 @@ namespace llvm { LADD, // Corresponds to LSUB instruction - LSUB + LSUB, + + // Jumptable branch. + BR_JT, + + // Jumptable branch using long branches for each entry. + BR_JT32 }; } @@ -122,7 +128,7 @@ namespace llvm { SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG); SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG); SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index 5a54844..722e747 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -145,6 +145,11 @@ static inline bool IsCondBranch(unsigned BrOpc) { return IsBRF(BrOpc) || IsBRT(BrOpc); } +static inline bool IsBR_JT(unsigned BrOpc) { + return BrOpc == XCore::BR_JT + || BrOpc == XCore::BR_JT32; +} + /// GetCondFromBranchOpc - Return the XCore CC that matches /// the correspondent Branch instruction opcode. static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc) @@ -271,6 +276,14 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, return false; } + // Likewise if it ends with a branch table followed by an unconditional branch. + if (IsBR_JT(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) { + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return true; + } + // Otherwise, can't handle this. return true; } diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 10dc18c..46805d5 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -34,6 +34,15 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink, def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; +def SDT_XCoreBR_JT : SDTypeProfile<0, 2, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +def XCoreBR_JT : SDNode<"XCoreISD::BR_JT", SDT_XCoreBR_JT, + [SDNPHasChain]>; + +def XCoreBR_JT32 : SDNode<"XCoreISD::BR_JT32", SDT_XCoreBR_JT, + [SDNPHasChain]>; + def SDT_XCoreAddress : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; @@ -185,6 +194,15 @@ def MEMii : Operand<i32> { let MIOperandInfo = (ops i32imm, i32imm); } +// Jump tables. +def InlineJT : Operand<i32> { + let PrintMethod = "printInlineJT"; +} + +def InlineJT32 : Operand<i32> { + let PrintMethod = "printInlineJT32"; +} + //===----------------------------------------------------------------------===// // Instruction Class Templates //===----------------------------------------------------------------------===// @@ -624,7 +642,7 @@ defm RETSP : FU6_LU6<"retsp", XCoreRetsp>; // TODO extdp, kentsp, krestsp, blat, setsr // clrsr, getsr, kalli -let isBranch = 1, isTerminator = 1 in { +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { def BRBU_u6 : _FU6< (outs), (ins brtarget:$target), @@ -756,24 +774,34 @@ def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), // One operand short // TODO edu, eeu, waitet, waitef, freer, tstart, msync, mjoin, syncr, clrtp -// bru, setdp, setcp, setv, setev, kcall +// setdp, setcp, setv, setev, kcall // dgetreg -let isBranch=1, isIndirectBranch=1, isTerminator=1 in +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in def BAU_1r : _F1R<(outs), (ins GRRegs:$addr), "bau $addr", [(brind GRRegs:$addr)]>; +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in +def BR_JT : PseudoInstXCore<(outs), (ins InlineJT:$t, GRRegs:$i), + "bru $i\n$t", + [(XCoreBR_JT tjumptable:$t, GRRegs:$i)]>; + +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in +def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i), + "bru $i\n$t", + [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>; + let Defs=[SP], neverHasSideEffects=1 in def SETSP_1r : _F1R<(outs), (ins GRRegs:$src), "set sp, $src", []>; -let isBarrier = 1, hasCtrlDep = 1 in +let hasCtrlDep = 1 in def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src), "ecallt $src", []>; -let isBarrier = 1, hasCtrlDep = 1 in +let hasCtrlDep = 1 in def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src), "ecallf $src", []>; diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h index 7efb990..7424c78 100644 --- a/lib/Target/XCore/XCoreTargetObjectFile.h +++ b/lib/Target/XCore/XCoreTargetObjectFile.h @@ -10,13 +10,12 @@ #ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H #define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" namespace llvm { class XCoreTargetObjectFile : public TargetLoweringObjectFileELF { public: - void Initialize(MCContext &Ctx, const TargetMachine &TM); // TODO: Classify globals as xcore wishes. |