diff options
Diffstat (limited to 'lib/Target')
167 files changed, 1592 insertions, 1255 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index acb57f7..2a1e8e4 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -18,9 +18,7 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" -#include <cassert> namespace llvm { diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 4ec19cc..ca30716 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "asm-printer" -#include "ARM.h" #include "ARMAsmPrinter.h" +#include "ARM.h" #include "ARMBuildAttrs.h" #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 75b796e..366e2fa 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -935,6 +935,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); } } else llvm_unreachable("Unknown reg class!"); @@ -953,6 +955,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); } else llvm_unreachable("Unknown reg class!"); break; @@ -2756,24 +2760,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4q8oddPseudo_UPD: case ARM::VLD4q16oddPseudo_UPD: case ARM::VLD4q32oddPseudo_UPD: - case ARM::VLD1DUPq8Pseudo: - case ARM::VLD1DUPq16Pseudo: - case ARM::VLD1DUPq32Pseudo: - case ARM::VLD1DUPq8PseudoWB_fixed: - case ARM::VLD1DUPq16PseudoWB_fixed: - case ARM::VLD1DUPq32PseudoWB_fixed: - case ARM::VLD1DUPq8PseudoWB_register: - case ARM::VLD1DUPq16PseudoWB_register: - case ARM::VLD1DUPq32PseudoWB_register: - case ARM::VLD2DUPd8Pseudo: - case ARM::VLD2DUPd16Pseudo: - case ARM::VLD2DUPd32Pseudo: - case ARM::VLD2DUPd8PseudoWB_fixed: - case ARM::VLD2DUPd16PseudoWB_fixed: - case ARM::VLD2DUPd32PseudoWB_fixed: - case ARM::VLD2DUPd8PseudoWB_register: - case ARM::VLD2DUPd16PseudoWB_register: - case ARM::VLD2DUPd32PseudoWB_register: + case ARM::VLD1DUPq8: + case ARM::VLD1DUPq16: + case ARM::VLD1DUPq32: + case ARM::VLD1DUPq8wb_fixed: + case ARM::VLD1DUPq16wb_fixed: + case ARM::VLD1DUPq32wb_fixed: + case ARM::VLD1DUPq8wb_register: + case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq32wb_register: + case ARM::VLD2DUPd8: + case ARM::VLD2DUPd16: + case ARM::VLD2DUPd32: + case ARM::VLD2DUPd8wb_fixed: + case ARM::VLD2DUPd16wb_fixed: + case ARM::VLD2DUPd32wb_fixed: + case ARM::VLD2DUPd8wb_register: + case ARM::VLD2DUPd16wb_register: + case ARM::VLD2DUPd32wb_register: case ARM::VLD4DUPd8Pseudo: case ARM::VLD4DUPd16Pseudo: case ARM::VLD4DUPd32Pseudo: diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index d2aff9a..291369f 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "ARMBaseRegisterInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMBaseRegisterInfo.h" #include "ARMFrameLowering.h" #include "ARMInstrInfo.h" #include "ARMMachineFunctionInfo.h" @@ -79,6 +79,7 @@ getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); Reserved.set(ARM::PC); + Reserved.set(ARM::FPSCR); if (TFI->hasFP(MF)) Reserved.set(FramePtr); if (hasBasePointer(MF)) @@ -492,8 +493,7 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { // When outgoing call frames are so large that we adjust the stack pointer // around the call, we can no longer use the stack pointer to reach the // emergency spill slot. - if (needsStackRealignment(MF) && (MFI->hasVarSizedObjects() || - !TFI->hasReservedCallFrame(MF))) + if (needsStackRealignment(MF) && !TFI->hasReservedCallFrame(MF)) return true; // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited @@ -517,7 +517,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { } bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // We can't realign the stack if: @@ -532,8 +531,9 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // register allocation with frame pointer elimination, it is too late now. if (!MRI->canReserveReg(FramePtr)) return false; - // We may also need a base pointer if there are dynamic allocas. - if (!MFI->hasVarSizedObjects()) + // We may also need a base pointer if there are dynamic allocas or stack + // pointer adjustments around calls. + if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF)) return true; if (!EnableBasePointer) return false; diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 437b4c7..2b9c55d 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -15,13 +15,13 @@ #ifndef ARMCALLINGCONV_H #define ARMCALLINGCONV_H -#include "llvm/CallingConv.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" #include "ARMSubtarget.h" -#include "ARM.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Target/TargetInstrInfo.h" namespace llvm { @@ -29,7 +29,7 @@ namespace llvm { static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { - static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; // Try to get the first register. if (unsigned Reg = State.AllocateReg(RegList, 4)) @@ -72,9 +72,9 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { - static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; - static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; - static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 }; + static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; + static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; + static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 }; unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); if (Reg == 0) { @@ -118,8 +118,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State) { - static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; - static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; + static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); if (Reg == 0) diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c4ab99d..c2b7816 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -99,8 +99,8 @@ namespace { // Entries for NEON load/store information table. The table is sorted by // PseudoOpc for fast binary-search lookups. struct NEONLdStTableEntry { - unsigned PseudoOpc; - unsigned RealOpc; + uint16_t PseudoOpc; + uint16_t RealOpc; bool IsLoad; bool isUpdating; bool hasWritebackOperand; @@ -129,16 +129,6 @@ namespace { } static const NEONLdStTableEntry NEONLdStTable[] = { -{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,false}, -{ ARM::VLD1DUPq16PseudoWB_fixed, ARM::VLD1DUPq16wb_fixed, true, true, true, SingleSpc, 2, 4,false}, -{ ARM::VLD1DUPq16PseudoWB_register, ARM::VLD1DUPq16wb_register, true, true, true, SingleSpc, 2, 4,false}, -{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,false}, -{ ARM::VLD1DUPq32PseudoWB_fixed, ARM::VLD1DUPq32wb_fixed, true, true, false, SingleSpc, 2, 2,false}, -{ ARM::VLD1DUPq32PseudoWB_register, ARM::VLD1DUPq32wb_register, true, true, true, SingleSpc, 2, 2,false}, -{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,false}, -{ ARM::VLD1DUPq8PseudoWB_fixed, ARM::VLD1DUPq8wb_fixed, true, true, false, SingleSpc, 2, 8,false}, -{ ARM::VLD1DUPq8PseudoWB_register, ARM::VLD1DUPq8wb_register, true, true, true, SingleSpc, 2, 8,false}, - { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true}, { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true}, { ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true}, @@ -149,16 +139,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, -{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, false, SingleSpc, 2, 4,false}, -{ ARM::VLD2DUPd16PseudoWB_fixed, ARM::VLD2DUPd16wb_fixed, true, true, false, SingleSpc, 2, 4,false}, -{ ARM::VLD2DUPd16PseudoWB_register, ARM::VLD2DUPd16wb_register, true, true, true, SingleSpc, 2, 4,false}, -{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, false, SingleSpc, 2, 2,false}, -{ ARM::VLD2DUPd32PseudoWB_fixed, ARM::VLD2DUPd32wb_fixed, true, true, false, SingleSpc, 2, 2,false}, -{ ARM::VLD2DUPd32PseudoWB_register, ARM::VLD2DUPd32wb_register, true, true, true, SingleSpc, 2, 2,false}, -{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, false, SingleSpc, 2, 8,false}, -{ ARM::VLD2DUPd8PseudoWB_fixed, ARM::VLD2DUPd8wb_fixed, true, true, false, SingleSpc, 2, 8,false}, -{ ARM::VLD2DUPd8PseudoWB_register, ARM::VLD2DUPd8wb_register, true, true, true, SingleSpc, 2, 8,false}, - { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, { ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true}, @@ -345,7 +325,7 @@ static const NEONLdStTableEntry NEONLdStTable[] = { /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON /// load or store pseudo instruction. static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) { - unsigned NumEntries = array_lengthof(NEONLdStTable); + const unsigned NumEntries = array_lengthof(NEONLdStTable); #ifndef NDEBUG // Make sure the table is sorted. @@ -1090,24 +1070,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD4q8oddPseudo_UPD: case ARM::VLD4q16oddPseudo_UPD: case ARM::VLD4q32oddPseudo_UPD: - case ARM::VLD1DUPq8Pseudo: - case ARM::VLD1DUPq16Pseudo: - case ARM::VLD1DUPq32Pseudo: - case ARM::VLD1DUPq8PseudoWB_fixed: - case ARM::VLD1DUPq16PseudoWB_fixed: - case ARM::VLD1DUPq32PseudoWB_fixed: - case ARM::VLD1DUPq8PseudoWB_register: - case ARM::VLD1DUPq16PseudoWB_register: - case ARM::VLD1DUPq32PseudoWB_register: - case ARM::VLD2DUPd8Pseudo: - case ARM::VLD2DUPd16Pseudo: - case ARM::VLD2DUPd32Pseudo: - case ARM::VLD2DUPd8PseudoWB_fixed: - case ARM::VLD2DUPd16PseudoWB_fixed: - case ARM::VLD2DUPd32PseudoWB_fixed: - case ARM::VLD2DUPd8PseudoWB_register: - case ARM::VLD2DUPd16PseudoWB_register: - case ARM::VLD2DUPd32PseudoWB_register: case ARM::VLD3DUPd8Pseudo: case ARM::VLD3DUPd16Pseudo: case ARM::VLD3DUPd32Pseudo: diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 818b202..a24eab4 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1384,7 +1384,10 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, SrcVT == MVT::i1) { const APInt &CIVal = ConstInt->getValue(); Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue(); - if (Imm < 0) { + // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather + // then a cmn, because there is no way to represent 2147483648 as a + // signed 32-bit int. + if (Imm < 0 && Imm != (int)0x80000000) { isNegativeImm = true; Imm = -Imm; } @@ -1475,7 +1478,6 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool ARMFastISel::SelectCmp(const Instruction *I) { const CmpInst *CI = cast<CmpInst>(I); - Type *Ty = CI->getOperand(0)->getType(); // Get the compare predicate. ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); @@ -1495,11 +1497,10 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { unsigned DestReg = createResultReg(RC); Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); unsigned ZeroReg = TargetMaterializeConstant(Zero); - bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); - unsigned CondReg = isFloat ? ARM::FPSCR : ARM::CPSR; + // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) .addReg(ZeroReg).addImm(1) - .addImm(ARMPred).addReg(CondReg); + .addImm(ARMPred).addReg(ARM::CPSR); UpdateValueMap(I, DestReg); return true; @@ -1851,6 +1852,48 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context); CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false)); + // Check that we can handle all of the arguments. If we can't, then bail out + // now before we add code to the MBB. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + MVT ArgVT = ArgVTs[VA.getValNo()]; + + // We don't handle NEON/vector parameters yet. + if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) + return false; + + // Now copy/store arg to correct locations. + if (VA.isRegLoc() && !VA.needsCustom()) { + continue; + } else if (VA.needsCustom()) { + // TODO: We need custom lowering for vector (v2f64) args. + if (VA.getLocVT() != MVT::f64 || + // TODO: Only handle register args for now. + !VA.isRegLoc() || !ArgLocs[++i].isRegLoc()) + return false; + } else { + switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) { + default: + return false; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + break; + case MVT::f32: + if (!Subtarget->hasVFP2()) + return false; + break; + case MVT::f64: + if (!Subtarget->hasVFP2()) + return false; + break; + } + } + } + + // At the point, we are able to handle the call's arguments in fast isel. + // Get a count of how many bytes are to be pushed on the stack. NumBytes = CCInfo.getNextStackOffset(); @@ -1866,9 +1909,8 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, unsigned Arg = ArgRegs[VA.getValNo()]; MVT ArgVT = ArgVTs[VA.getValNo()]; - // We don't handle NEON/vector parameters yet. - if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) - return false; + assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) && + "We don't handle NEON/vector parameters yet."); // Handle arg promotion, etc. switch (VA.getLocInfo()) { @@ -1908,12 +1950,13 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, RegArgs.push_back(VA.getLocReg()); } else if (VA.needsCustom()) { // TODO: We need custom lowering for vector (v2f64) args. - if (VA.getLocVT() != MVT::f64) return false; + assert(VA.getLocVT() == MVT::f64 && + "Custom lowering for v2f64 args not available"); CCValAssign &NextVA = ArgLocs[++i]; - // TODO: Only handle register args for now. - if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false; + assert(VA.isRegLoc() && NextVA.isRegLoc() && + "We only handle register args!"); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::VMOVRRD), VA.getLocReg()) @@ -1929,9 +1972,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, Addr.Base.Reg = ARM::SP; Addr.Offset = VA.getLocMemOffset(); - if (!ARMEmitStore(ArgVT, Arg, Addr)) return false; + bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet; + assert(EmitRet && "Could not emit a store for argument!"); } } + return true; } @@ -2136,7 +2181,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(NULL); - if(isThumb2) + if (isThumb2) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 0fd6025..bd4b2a9 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -501,7 +501,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, // SP can move around if there are allocas. We may also lose track of SP // when emergency spilling inside a non-reserved call frame setup. - bool hasMovingSP = MFI->hasVarSizedObjects() || !hasReservedCallFrame(MF); + bool hasMovingSP = !hasReservedCallFrame(MF); // When dynamically realigning the stack, use the frame pointer for // parameters, and the stack/base pointer for locals. diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index c99db98..ffb9acb 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1589,9 +1589,9 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; - case ARM::VLD2DUPd8PseudoWB_fixed: return ARM::VLD2DUPd8PseudoWB_register; - case ARM::VLD2DUPd16PseudoWB_fixed: return ARM::VLD2DUPd16PseudoWB_register; - case ARM::VLD2DUPd32PseudoWB_fixed: return ARM::VLD2DUPd32PseudoWB_register; + case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; + case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; + case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; } return Opc; // If not one we handle, return it unchanged. } @@ -2891,8 +2891,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD2DUP: { - unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo, - ARM::VLD2DUPd32Pseudo }; + unsigned Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, + ARM::VLD2DUPd32 }; return SelectVLDDup(N, false, 2, Opcodes); } @@ -2909,9 +2909,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD2DUP_UPD: { - unsigned Opcodes[] = { ARM::VLD2DUPd8PseudoWB_fixed, - ARM::VLD2DUPd16PseudoWB_fixed, - ARM::VLD2DUPd32PseudoWB_fixed }; + unsigned Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed, + ARM::VLD2DUPd32wb_fixed }; return SelectVLDDup(N, true, 2, Opcodes); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 477b5f4..e26dd22 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -13,10 +13,10 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "arm-isel" +#include "ARMISelLowering.h" #include "ARM.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" -#include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" #include "ARMRegisterInfo.h" @@ -49,7 +49,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include <sstream> using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); @@ -87,7 +86,7 @@ namespace { } // The APCS parameter registers. -static const unsigned GPRArgRegs[] = { +static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; @@ -456,6 +455,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); } + setOperationAction(ISD::ConstantFP, MVT::f32, Custom); + if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); addDRTypeForNEON(MVT::v8i8); @@ -3673,6 +3674,27 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { return Result; } +SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) const { + if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) + return SDValue(); + + ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); + assert(Op.getValueType() == MVT::f32 && + "ConstantFP custom lowering should only occur for f32."); + + APFloat FPVal = CFP->getValueAPF(); + int ImmVal = ARM_AM::getFP32Imm(FPVal); + if (ImmVal == -1) + return SDValue(); + + DebugLoc DL = Op.getDebugLoc(); + SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); + SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, + DAG.getConstant(0, MVT::i32)); +} + /// isNEONModifiedImm - Check if the specified splat value corresponds to a /// valid vector constant for a NEON instruction with a "modified immediate" /// operand (e.g., VMOV). If so, return the encoded value. @@ -5109,6 +5131,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::SETCC: return LowerVSETCC(Op, DAG); + case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); @@ -6842,33 +6865,63 @@ static SDValue PerformMULCombine(SDNode *N, if (!C) return SDValue(); - uint64_t MulAmt = C->getZExtValue(); + int64_t MulAmt = C->getSExtValue(); unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); + ShiftAmt = ShiftAmt & (32 - 1); SDValue V = N->getOperand(0); DebugLoc DL = N->getDebugLoc(); SDValue Res; MulAmt >>= ShiftAmt; - if (isPowerOf2_32(MulAmt - 1)) { - // (mul x, 2^N + 1) => (add (shl x, N), x) - Res = DAG.getNode(ISD::ADD, DL, VT, - V, DAG.getNode(ISD::SHL, DL, VT, - V, DAG.getConstant(Log2_32(MulAmt-1), - MVT::i32))); - } else if (isPowerOf2_32(MulAmt + 1)) { - // (mul x, 2^N - 1) => (sub (shl x, N), x) - Res = DAG.getNode(ISD::SUB, DL, VT, - DAG.getNode(ISD::SHL, DL, VT, - V, DAG.getConstant(Log2_32(MulAmt+1), - MVT::i32)), - V); - } else - return SDValue(); + + if (MulAmt >= 0) { + if (isPowerOf2_32(MulAmt - 1)) { + // (mul x, 2^N + 1) => (add (shl x, N), x) + Res = DAG.getNode(ISD::ADD, DL, VT, + V, + DAG.getNode(ISD::SHL, DL, VT, + V, + DAG.getConstant(Log2_32(MulAmt - 1), + MVT::i32))); + } else if (isPowerOf2_32(MulAmt + 1)) { + // (mul x, 2^N - 1) => (sub (shl x, N), x) + Res = DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, + V, + DAG.getConstant(Log2_32(MulAmt + 1), + MVT::i32)), + V); + } else + return SDValue(); + } else { + uint64_t MulAmtAbs = -MulAmt; + if (isPowerOf2_32(MulAmtAbs + 1)) { + // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) + Res = DAG.getNode(ISD::SUB, DL, VT, + V, + DAG.getNode(ISD::SHL, DL, VT, + V, + DAG.getConstant(Log2_32(MulAmtAbs + 1), + MVT::i32))); + } else if (isPowerOf2_32(MulAmtAbs - 1)) { + // (mul x, -(2^N + 1)) => - (add (shl x, N), x) + Res = DAG.getNode(ISD::ADD, DL, VT, + V, + DAG.getNode(ISD::SHL, DL, VT, + V, + DAG.getConstant(Log2_32(MulAmtAbs-1), + MVT::i32))); + Res = DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, MVT::i32),Res); + + } else + return SDValue(); + } if (ShiftAmt != 0) - Res = DAG.getNode(ISD::SHL, DL, VT, Res, - DAG.getConstant(ShiftAmt, MVT::i32)); + Res = DAG.getNode(ISD::SHL, DL, VT, + Res, DAG.getConstant(ShiftAmt, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 7f12293..a71b74e 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -15,6 +15,7 @@ #ifndef ARMISELLOWERING_H #define ARMISELLOWERING_H +#include "ARM.h" #include "ARMSubtarget.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -434,6 +435,8 @@ namespace llvm { SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 7bedf30..72af535 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -14,11 +14,11 @@ #ifndef ARMINSTRUCTIONINFO_H #define ARMINSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" +#include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" #include "ARMSubtarget.h" -#include "ARM.h" +#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 0b1406e..8196582 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -637,6 +637,7 @@ def BitfieldAsmOperand : AsmOperandClass { let Name = "Bitfield"; let ParserMethod = "parseBitfield"; } + def bf_inv_mask_imm : Operand<i32>, PatLeaf<(imm), [{ return ARM::isBitFieldInvertedMask(N->getZExtValue()); @@ -4084,74 +4085,43 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; -let isCodeGenOnly = 1 in { // Conditional instructions -multiclass AsI1_bincc_irs<bits<4> opcod, string opc, - InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { - def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, - iii, opc, "\t$Rd, $Rn, $imm", []>, - RegConstraint<"$Rn = $Rd"> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-0} = imm; - } - def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, - iir, opc, "\t$Rd, $Rn, $Rm", []>, - RegConstraint<"$Rn = $Rd"> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-4} = 0b00000000; - let Inst{3-0} = Rm; - } - - def rsi : AsI1<opcod, (outs GPR:$Rd), - (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, - iis, opc, "\t$Rd, $Rn, $shift", []>, - RegConstraint<"$Rn = $Rd"> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-5} = shift{11-5}; - let Inst{4} = 0; - let Inst{3-0} = shift{3-0}; - } - - def rsr : AsI1<opcod, (outs GPR:$Rd), - (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, - iis, opc, "\t$Rd, $Rn, $shift", []>, - RegConstraint<"$Rn = $Rd"> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-8} = shift{11-8}; - let Inst{7} = 0; - let Inst{6-5} = shift{6-5}; - let Inst{4} = 1; - let Inst{3-0} = shift{3-0}; - } -} // AsI1_bincc_irs - -defm ANDCC : AsI1_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm ORRCC : AsI1_bincc_irs<0b1100, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm EORCC : AsI1_bincc_irs<0b0001, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi, + Instruction irsr, + InstrItinClass iii, InstrItinClass iir, + InstrItinClass iis> { + def ri : ARMPseudoExpand<(outs GPR:$Rd), + (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s), + 4, iii, [], + (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; + def rr : ARMPseudoExpand<(outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + 4, iir, [], + (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; + def rsi : ARMPseudoExpand<(outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s), + 4, iis, [], + (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; + def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s), + 4, iis, [], + (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; +} + +defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr, + IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr, + IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr, + IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -} // isCodeGenOnly } // neverHasSideEffects + //===----------------------------------------------------------------------===// // Atomic operations intrinsics // @@ -4605,10 +4575,16 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; +def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", + (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, 0, pred:$p)>; def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; +def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", + (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0, pred:$p)>; def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; @@ -4642,10 +4618,16 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; +def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm", + (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, 0)>; def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; +def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm", + (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0)>; def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), @@ -5252,6 +5234,20 @@ def : ARMInstAlias<"mul${s}${p} $Rn, $Rm", def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>; +// Pre-v6, 'mov r0, r0' was used as a NOP encoding. +def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>, + Requires<[IsARM, NoV6]>; + +// UMULL/SMULL are available on all arches, but the instruction definitions +// need difference constraints pre-v6. Use these aliases for the assembly +// parsing on pre-v6. +def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm", + (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; +def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", + (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; + // 'it' blocks in ARM mode just validate the predicates. The IT itself // is discarded. def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 8684ce1..f61eb2b 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -94,7 +94,7 @@ def VecListDPairAsmOperand : AsmOperandClass { let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListDPair : RegisterOperand<DPair, "printVectorListDPair"> { +def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { let ParserMatchClass = VecListDPairAsmOperand; } // Register list of three sequential D registers. @@ -121,7 +121,7 @@ def VecListDPairSpacedAsmOperand : AsmOperandClass { let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListDPairSpaced"> { +def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { let ParserMatchClass = VecListDPairSpacedAsmOperand; } // Register list of three D registers spaced by 2 (three Q registers). @@ -153,23 +153,24 @@ def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { let ParserMatchClass = VecListOneDAllLanesAsmOperand; } // Register list of two D registers, with "all lanes" subscripting. -def VecListTwoDAllLanesAsmOperand : AsmOperandClass { - let Name = "VecListTwoDAllLanes"; +def VecListDPairAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListDPairAllLanes"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> { - let ParserMatchClass = VecListTwoDAllLanesAsmOperand; +def VecListDPairAllLanes : RegisterOperand<DPair, + "printVectorListTwoAllLanes"> { + let ParserMatchClass = VecListDPairAllLanesAsmOperand; } // Register list of two D registers spaced by 2 (two sequential Q registers). -def VecListTwoQAllLanesAsmOperand : AsmOperandClass { - let Name = "VecListTwoQAllLanes"; +def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListDPairSpacedAllLanes"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoQAllLanes : RegisterOperand<DPR, +def VecListDPairSpacedAllLanes : RegisterOperand<DPair, "printVectorListTwoSpacedAllLanes"> { - let ParserMatchClass = VecListTwoQAllLanesAsmOperand; + let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; } // Register list of three D registers, with "all lanes" subscripting. def VecListThreeDAllLanesAsmOperand : AsmOperandClass { @@ -1276,39 +1277,32 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> { - let Pattern = [(set QPR:$dst, - (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))]; -} - def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; -def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>; -def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>; -def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>; - def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPd32 addrmode6:$addr)>; -def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), - (VLD1DUPq32Pseudo addrmode6:$addr)>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { - -class VLD1QDUP<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd), +class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), (ins addrmode6dup:$Rn), IIC_VLD1dup, - "vld1", Dt, "$Vd, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", + [(set VecListDPairAllLanes:$Vd, + (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">; -def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; -def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; +def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>; +def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>; +def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; +def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), + (VLD1DUPq32 addrmode6:$addr)>; + +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, @@ -1333,7 +1327,7 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { } multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, - (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (outs VecListDPairAllLanes:$Vd, GPR:$wb), (ins addrmode6dup:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { @@ -1343,7 +1337,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, - (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (outs VecListDPairAllLanes:$Vd, GPR:$wb), (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1361,13 +1355,6 @@ defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; -def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; -def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; -def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; -def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; -def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; -def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; - // VLD2DUP : Vector Load (single 2-element structure to all lanes) class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), @@ -1378,18 +1365,14 @@ class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> let DecoderMethod = "DecodeVLD2DupInstruction"; } -def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListTwoDAllLanes>; -def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListTwoDAllLanes>; -def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListTwoDAllLanes>; +def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>; +def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>; +def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>; -def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>; -def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>; -def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>; - -// ...with double-spaced registers (not used for codegen): -def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListTwoQAllLanes>; -def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListTwoQAllLanes>; -def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListTwoQAllLanes>; +// ...with double-spaced registers +def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>; +def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; +def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; // ...with address register writeback: multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { @@ -1414,20 +1397,13 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { } } -defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListTwoDAllLanes>; -defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListTwoDAllLanes>; -defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListTwoDAllLanes>; - -defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListTwoQAllLanes>; -defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListTwoQAllLanes>; -defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListTwoQAllLanes>; +defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>; +defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>; +defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>; -def VLD2DUPd8PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>; -def VLD2DUPd8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>; -def VLD2DUPd16PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>; -def VLD2DUPd16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>; -def VLD2DUPd32PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>; -def VLD2DUPd32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>; +defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>; +defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; +defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; // VLD3DUP : Vector Load (single 3-element structure to all lanes) class VLD3DUP<bits<4> op7_4, string Dt> diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index e8984e1..1f7edc1 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -574,7 +574,7 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, cc_out:$s)>; // and with the optional destination operand, too. - def : t2InstAlias<!strconcat(opc, "${s}${p}.ri", " $Rdn, $imm"), + def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"), (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; @@ -2952,45 +2952,36 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>, RegConstraint<"$false = $Rd">; +} // isCodeGenOnly = 1 -multiclass T2I_bincc_irs<bits<4> opcod, string opc, +multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { // shifted imm - def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), - iii, opc, ".w\t$Rd, $Rn, $imm", []>, - RegConstraint<"$Rn = $Rd"> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = opcod; - let Inst{15} = 0; - } + def ri : t2PseudoExpand<(outs rGPR:$Rd), + (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s), + 4, iii, [], + (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; // register - def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - iir, opc, ".w\t$Rd, $Rn, $Rm", []>, - RegConstraint<"$Rn = $Rd"> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{14-12} = 0b000; // imm3 - let Inst{7-6} = 0b00; // imm2 - let Inst{5-4} = 0b00; // type - } + def rr : t2PseudoExpand<(outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s), + 4, iir, [], + (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; // shifted register - def rs : T2sTwoRegShiftedReg<(outs rGPR:$Rd), - (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), - iis, opc, ".w\t$Rd, $Rn, $ShiftedRm", []>, - RegConstraint<"$Rn = $Rd"> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - } + def rs : t2PseudoExpand<(outs rGPR:$Rd), + (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s), + 4, iis, [], + (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; } // T2I_bincc_irs -defm t2ANDCC : T2I_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; -defm t2ORRCC : T2I_bincc_irs<0b0010, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; -defm t2EORCC : T2I_bincc_irs<0b0100, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; - -} // isCodeGenOnly = 1 +defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs, + IIC_iBITi, IIC_iBITr, IIC_iBITsi>; +defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs, + IIC_iBITi, IIC_iBITr, IIC_iBITsi>; +defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs, + IIC_iBITi, IIC_iBITr, IIC_iBITsi>; } // neverHasSideEffects //===----------------------------------------------------------------------===// @@ -3768,20 +3759,32 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; +def : t2InstAlias<"mcr $cop, $opc1, $Rt, $CRn, $CRm", + (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, 0)>; def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; +def : t2InstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", + (t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, 0)>; /* from coprocessor to ARM core register */ def t2MRC : t2MovRCopro<0b1110, "mrc", 1, (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; +def : t2InstAlias<"mrc $cop, $opc1, $Rt, $CRn, $CRm", + (t2MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0)>; def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1, (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; +def : t2InstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", + (t2MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0)>; def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index aa10af7..e9d5720 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -206,6 +206,14 @@ def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>, Requires<[HasVFP2]>; def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>, Requires<[HasVFP2]>; +defm : VFPDTAnyInstAlias<"vpush${p}", "$r", + (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>; +defm : VFPDTAnyInstAlias<"vpush${p}", "$r", + (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>; +defm : VFPDTAnyInstAlias<"vpop${p}", "$r", + (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>; +defm : VFPDTAnyInstAlias<"vpop${p}", "$r", + (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>; // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores @@ -286,7 +294,7 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b), (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; // These are encoded as unary instructions. -let Defs = [FPSCR] in { +let Defs = [FPSCR_NZCV] in { def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", @@ -315,7 +323,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, // VFP pipelines on A8. let D = VFPNeonA8Domain; } -} // Defs = [FPSCR] +} // Defs = [FPSCR_NZCV] //===----------------------------------------------------------------------===// // FP Unary Operations. @@ -335,7 +343,7 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, let D = VFPNeonA8Domain; } -let Defs = [FPSCR] in { +let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", @@ -376,7 +384,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, // VFP pipelines on A8. let D = VFPNeonA8Domain; } -} // Defs = [FPSCR] +} // Defs = [FPSCR_NZCV] def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), @@ -810,7 +818,29 @@ let Constraints = "$a = $dst" in { // FP to Fixed-Point: -def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, +// Single Precision register +class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, + dag oops, dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> { + bits<5> dst; + // if dp_operation then UInt(D:Vd) else UInt(Vd:D); + let Inst{22} = dst{0}; + let Inst{15-12} = dst{4-1}; +} + +// Double Precision register +class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, + dag oops, dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> { + bits<5> dst; + // if dp_operation then UInt(D:Vd) else UInt(Vd:D); + let Inst{22} = dst{4}; + let Inst{15-12} = dst{3-0}; +} + +def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -818,7 +848,7 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, let D = VFPNeonA8Domain; } -def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, +def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -826,7 +856,7 @@ def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, let D = VFPNeonA8Domain; } -def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, +def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -834,7 +864,7 @@ def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, let D = VFPNeonA8Domain; } -def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, +def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -842,25 +872,25 @@ def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, let D = VFPNeonA8Domain; } -def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, +def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>; -def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0, +def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>; -def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1, +def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>; -def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1, +def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>; // Fixed-Point to FP: -def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, +def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -868,7 +898,7 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, let D = VFPNeonA8Domain; } -def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, +def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -876,7 +906,7 @@ def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, let D = VFPNeonA8Domain; } -def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, +def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -884,7 +914,7 @@ def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, let D = VFPNeonA8Domain; } -def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, +def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -892,19 +922,19 @@ def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, let D = VFPNeonA8Domain; } -def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, +def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>; -def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0, +def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>; -def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1, +def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>; -def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, +def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>; @@ -1166,9 +1196,9 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm, // APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags // to APSR. -let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in +let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins), - "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>; + "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>; // Application level FPSCR -> GPR let hasSideEffects = 1, Uses = [FPSCR] in @@ -1182,6 +1212,10 @@ let Uses = [FPSCR] in { "vmrs", "\t$Rt, fpexc", []>; def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins), "vmrs", "\t$Rt, fpsid", []>; + def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, mvfr0", []>; + def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, mvfr1", []>; } //===----------------------------------------------------------------------===// @@ -1304,6 +1338,13 @@ def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">; def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">; def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">; def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">; +def : VFP2MnemonicAlias<"fmrx", "vmrs">; +def : VFP2MnemonicAlias<"fmxr", "vmsr">; + +// Be friendly and accept the old form of zero-compare +def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>; +def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>; + def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm", diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index afbe0e4..753e578 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -62,7 +62,7 @@ extern "C" { // concerned, so we can't just preserve the callee saved regs. "stmdb sp!, {r0, r1, r2, r3, lr}\n" #if (defined(__VFP_FP__) && !defined(__SOFTFP__)) - "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" + "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" #endif // The LR contains the address of the stub function on entry. // pass it as the argument to the C part of the callback @@ -86,7 +86,7 @@ extern "C" { // #if (defined(__VFP_FP__) && !defined(__SOFTFP__)) // Restore VFP caller-saved registers. - "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" + "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" #endif // // We need to exchange the values in slots 0 and 1 so we can diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index 1f83762..6f3819a 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "ARMRegisterInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMRegisterInfo.h" using namespace llvm; void ARMRegisterInfo::anchor() { } diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 65ed95d..8a24842 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -15,13 +15,12 @@ #define ARMREGISTERINFO_H #include "ARM.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "ARMBaseRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class ARMSubtarget; class ARMBaseInstrInfo; - class Type; struct ARMRegisterInfo : public ARMBaseRegisterInfo { virtual void anchor(); diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index b16a12c..1327fb8 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -153,14 +153,21 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>; } // Current Program Status Register. -def CPSR : ARMReg<0, "cpsr">; -def APSR : ARMReg<1, "apsr">; -def SPSR : ARMReg<2, "spsr">; -def FPSCR : ARMReg<3, "fpscr">; -def ITSTATE : ARMReg<4, "itstate">; +// We model fpscr with two registers: FPSCR models the control bits and will be +// reserved. FPSCR_NZCV models the flag bits and will be unreserved. +def CPSR : ARMReg<0, "cpsr">; +def APSR : ARMReg<1, "apsr">; +def SPSR : ARMReg<2, "spsr">; +def FPSCR : ARMReg<3, "fpscr">; +def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> { + let Aliases = [FPSCR]; +} +def ITSTATE : ARMReg<4, "itstate">; // Special Registers - only available in privileged mode. def FPSID : ARMReg<0, "fpsid">; +def MVFR1 : ARMReg<6, "mvfr1">; +def MVFR0 : ARMReg<7, "mvfr0">; def FPEXC : ARMReg<8, "fpexc">; // Register classes. @@ -304,7 +311,8 @@ def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1], // Register class representing a pair of consecutive D registers. // Use the Q registers for the even-odd pairs. -def DPair : RegisterClass<"ARM", [v2i64], 128, (interleave QPR, TuplesOE2D)> { +def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + 128, (interleave QPR, TuplesOE2D)> { // Allocate starting at non-VFP2 registers D16-D31 first. let AltOrders = [(rotl DPair, 16)]; let AltOrderSelect = [{ return 1; }]; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 2045482..911eb13 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1101,13 +1101,8 @@ public: return VectorList.Count == 4; } - bool isVecListTwoQ() const { - if (!isDoubleSpacedVectorList()) return false; - return VectorList.Count == 2; - } - bool isVecListDPairSpaced() const { - if (!isSingleSpacedVectorList()) return false; + if (isSingleSpacedVectorList()) return false; return (ARMMCRegisterClasses[ARM::DPairSpcRegClassID] .contains(VectorList.RegNum)); } @@ -1133,12 +1128,13 @@ public: return VectorList.Count == 1; } - bool isVecListTwoDAllLanes() const { + bool isVecListDPairAllLanes() const { if (!isSingleSpacedVectorAllLanes()) return false; - return VectorList.Count == 2; + return (ARMMCRegisterClasses[ARM::DPairRegClassID] + .contains(VectorList.RegNum)); } - bool isVecListTwoQAllLanes() const { + bool isVecListDPairSpacedAllLanes() const { if (!isDoubleSpacedVectorAllLanes()) return false; return VectorList.Count == 2; } @@ -2858,8 +2854,12 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!RC->contains(Reg)) return Error(RegLoc, "invalid register in register list"); // List must be monotonically increasing. - if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) - return Error(RegLoc, "register list not in ascending order"); + if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) { + if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) + Warning(RegLoc, "register list not in ascending order"); + else + return Error(RegLoc, "register list not in ascending order"); + } if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) { Warning(RegLoc, "duplicated register (" + RegTok.getString() + ") in register list"); @@ -2905,6 +2905,12 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { Parser.Lex(); // Eat the ']'. return MatchOperand_Success; } + + // There's an optional '#' token here. Normally there wouldn't be, but + // inline assemble puts one in, and it's friendly to accept that. + if (Parser.getTok().is(AsmToken::Hash)) + Parser.Lex(); // Eat the '#' + const MCExpr *LaneIndex; SMLoc Loc = Parser.getTok().getLoc(); if (getParser().ParseExpression(LaneIndex)) { @@ -2981,12 +2987,13 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { case NoLanes: E = Parser.getTok().getLoc(); Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, - &ARMMCRegisterClasses[ARM::DPairRegClassID]); - + &ARMMCRegisterClasses[ARM::DPairRegClassID]); Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E)); break; case AllLanes: E = Parser.getTok().getLoc(); + Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, + &ARMMCRegisterClasses[ARM::DPairRegClassID]); Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false, S, E)); break; @@ -3152,7 +3159,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { switch (LaneKind) { case NoLanes: - // Non-lane two-register operands have been converted to the + // Two-register operands have been converted to the // composite register classes. if (Count == 2) { const MCRegisterClass *RC = (Spacing == 1) ? @@ -3165,6 +3172,14 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { (Spacing == 2), S, E)); break; case AllLanes: + // Two-register operands have been converted to the + // composite register classes. + if (Count == 2) { + const MCRegisterClass *RC = (Spacing == 1) ? + &ARMMCRegisterClasses[ARM::DPairRegClassID] : + &ARMMCRegisterClasses[ARM::DPairSpcRegClassID]; + FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC); + } Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count, (Spacing == 2), S, E)); @@ -3253,7 +3268,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (isMClass()) { // See ARMv6-M 10.1.1 - unsigned FlagsVal = StringSwitch<unsigned>(Mask) + std::string Name = Mask.lower(); + unsigned FlagsVal = StringSwitch<unsigned>(Name) .Case("apsr", 0) .Case("iapsr", 1) .Case("eapsr", 2) @@ -4427,10 +4443,11 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, else if (Res == -1) // irrecoverable error return true; // If this is VMRS, check for the apsr_nzcv operand. - if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") { + if (Mnemonic == "vmrs" && + Parser.getTok().getString().equals_lower("apsr_nzcv")) { S = Parser.getTok().getLoc(); Parser.Lex(); - Operands.push_back(ARMOperand::CreateToken("apsr_nzcv", S)); + Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S)); return false; } @@ -4598,7 +4615,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" || Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" || Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" || - Mnemonic == "fmuls" || Mnemonic == "fcmps" || + Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" || (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 4101f59..ce4587b 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -849,7 +849,7 @@ extern "C" void LLVMInitializeARMDisassembler() { createThumbDisassembler); } -static const unsigned GPRDecoderTable[] = { +static const uint16_t GPRDecoderTable[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9, ARM::R10, ARM::R11, @@ -869,8 +869,14 @@ static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { - if (RegNo == 15) return MCDisassembler::Fail; - return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); + DecodeStatus S = MCDisassembler::Success; + + if (RegNo == 15) + S = MCDisassembler::SoftFail; + + Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder)); + + return S; } static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, @@ -916,7 +922,7 @@ static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); } -static const unsigned SPRDecoderTable[] = { +static const uint16_t SPRDecoderTable[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, ARM::S4, ARM::S5, ARM::S6, ARM::S7, ARM::S8, ARM::S9, ARM::S10, ARM::S11, @@ -937,7 +943,7 @@ static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static const unsigned DPRDecoderTable[] = { +static const uint16_t DPRDecoderTable[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, ARM::D4, ARM::D5, ARM::D6, ARM::D7, ARM::D8, ARM::D9, ARM::D10, ARM::D11, @@ -973,7 +979,7 @@ DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo, return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder); } -static const unsigned QPRDecoderTable[] = { +static const uint16_t QPRDecoderTable[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11, @@ -992,7 +998,7 @@ static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static const unsigned DPairDecoderTable[] = { +static const uint16_t DPairDecoderTable[] = { ARM::Q0, ARM::D1_D2, ARM::Q1, ARM::D3_D4, ARM::Q2, ARM::D5_D6, ARM::Q3, ARM::D7_D8, ARM::Q4, ARM::D9_D10, ARM::Q5, ARM::D11_D12, ARM::Q6, ARM::D13_D14, ARM::Q7, ARM::D15_D16, ARM::Q8, ARM::D17_D18, @@ -1011,7 +1017,7 @@ static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static const unsigned DPairSpacedDecoderTable[] = { +static const uint16_t DPairSpacedDecoderTable[] = { ARM::D0_D2, ARM::D1_D3, ARM::D2_D4, ARM::D3_D5, ARM::D4_D6, ARM::D5_D7, ARM::D6_D8, ARM::D7_D9, ARM::D8_D10, ARM::D9_D11, ARM::D10_D12, ARM::D11_D13, @@ -2001,27 +2007,15 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // First output register switch (Inst.getOpcode()) { - case ARM::VLD1q16: - case ARM::VLD1q32: - case ARM::VLD1q64: - case ARM::VLD1q8: - case ARM::VLD1q16wb_fixed: - case ARM::VLD1q16wb_register: - case ARM::VLD1q32wb_fixed: - case ARM::VLD1q32wb_register: - case ARM::VLD1q64wb_fixed: - case ARM::VLD1q64wb_register: - case ARM::VLD1q8wb_fixed: - case ARM::VLD1q8wb_register: - case ARM::VLD2d16: - case ARM::VLD2d32: - case ARM::VLD2d8: - case ARM::VLD2d16wb_fixed: - case ARM::VLD2d16wb_register: - case ARM::VLD2d32wb_fixed: - case ARM::VLD2d32wb_register: - case ARM::VLD2d8wb_fixed: - case ARM::VLD2d8wb_register: + case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: case ARM::VLD1q8: + case ARM::VLD1q16wb_fixed: case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_fixed: case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_fixed: case ARM::VLD1q64wb_register: + case ARM::VLD1q8wb_fixed: case ARM::VLD1q8wb_register: + case ARM::VLD2d16: case ARM::VLD2d32: case ARM::VLD2d8: + case ARM::VLD2d16wb_fixed: case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_fixed: case ARM::VLD2d32wb_register: + case ARM::VLD2d8wb_fixed: case ARM::VLD2d8wb_register: if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; break; @@ -2325,6 +2319,8 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VST2b8wb_register: case ARM::VST2b16wb_register: case ARM::VST2b32wb_register: + Inst.addOperand(MCOperand::CreateImm(0)); + break; case ARM::VST3d8_UPD: case ARM::VST3d16_UPD: case ARM::VST3d32_UPD: @@ -2366,6 +2362,23 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VST1q16wb_fixed: case ARM::VST1q32wb_fixed: case ARM::VST1q64wb_fixed: + case ARM::VST1d8Twb_fixed: + case ARM::VST1d16Twb_fixed: + case ARM::VST1d32Twb_fixed: + case ARM::VST1d64Twb_fixed: + case ARM::VST1d8Qwb_fixed: + case ARM::VST1d16Qwb_fixed: + case ARM::VST1d32Qwb_fixed: + case ARM::VST1d64Qwb_fixed: + case ARM::VST2d8wb_fixed: + case ARM::VST2d16wb_fixed: + case ARM::VST2d32wb_fixed: + case ARM::VST2q8wb_fixed: + case ARM::VST2q16wb_fixed: + case ARM::VST2q32wb_fixed: + case ARM::VST2b8wb_fixed: + case ARM::VST2b16wb_fixed: + case ARM::VST2b32wb_fixed: break; } @@ -2525,8 +2538,19 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, align *= (1 << size); - if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: case ARM::VLD1DUPq8: + case ARM::VLD1DUPq16wb_fixed: case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq32wb_fixed: case ARM::VLD1DUPq32wb_register: + case ARM::VLD1DUPq8wb_fixed: case ARM::VLD1DUPq8wb_register: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + } if (Rm != 0xF) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -2556,18 +2580,33 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned align = fieldFromInstruction32(Insn, 4, 1); unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2); - unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1; + unsigned pred = fieldFromInstruction32(Insn, 22, 4); align *= 2*size; - if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder))) - return MCDisassembler::Fail; - if (Rm != 0xF) { - if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + switch (Inst.getOpcode()) { + case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8: + case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register: + case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register: + case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VLD2DUPd16x2: case ARM::VLD2DUPd32x2: case ARM::VLD2DUPd8x2: + case ARM::VLD2DUPd16x2wb_fixed: case ARM::VLD2DUPd16x2wb_register: + case ARM::VLD2DUPd32x2wb_fixed: case ARM::VLD2DUPd32x2wb_register: + case ARM::VLD2DUPd8x2wb_fixed: case ARM::VLD2DUPd8x2wb_register: + if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; + break; } + if (Rm != 0xF) + Inst.addOperand(MCOperand::CreateImm(0)); + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(align)); @@ -2579,6 +2618,9 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; } + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + return S; } diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index bae4e78..2b994df 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1026,15 +1026,6 @@ void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - // Normally, it's not safe to use register enum values directly with - // addition to get the next register, but for VFP registers, the - // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "}"; -} - -void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); @@ -1042,9 +1033,9 @@ void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum, O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}"; } -void ARMInstPrinter::printVectorListDPairSpaced(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); @@ -1081,11 +1072,10 @@ void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI, void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - // Normally, it's not safe to use register enum values directly with - // addition to get the next register, but for VFP registers, the - // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}"; + unsigned Reg = MI->getOperand(OpNum).getReg(); + unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); + unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1); + O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}"; } void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, @@ -1111,23 +1101,13 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}"; } -void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - // Normally, it's not safe to use register enum values directly with - // addition to get the next register, but for VFP registers, the - // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}"; -} - void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - // Normally, it's not safe to use register enum values directly with - // addition to get the next register, but for VFP registers, the - // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; + unsigned Reg = MI->getOperand(OpNum).getReg(); + unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); + unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); + O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}"; } void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI, diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 1037161..e9cd407 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -134,9 +134,8 @@ public: void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVectorListDPair(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVectorListDPairSpaced(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, @@ -147,8 +146,6 @@ public: raw_ostream &O); void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, - raw_ostream &O); void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index d3a3d3a..25849ee 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -167,6 +167,7 @@ static unsigned getRelaxedOpcode(unsigned Op) { case ARM::tBcc: return ARM::t2Bcc; case ARM::tLDRpciASM: return ARM::t2LDRpci; case ARM::tADR: return ARM::t2ADR; + case ARM::tB: return ARM::t2B; } } @@ -181,6 +182,16 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, const MCInstFragment *DF, const MCAsmLayout &Layout) const { switch ((unsigned)Fixup.getKind()) { + case ARM::fixup_arm_thumb_br: { + // Relaxing tB to t2B. tB has a signed 12-bit displacement with the + // low bit being an implied zero. There's an implied +4 offset for the + // branch, so we adjust the other way here to determine what's + // encodable. + // + // Relax if the value is too big for a (signed) i8. + int64_t Offset = int64_t(Value) - 4; + return Offset > 2046 || Offset < -2048; + } case ARM::fixup_arm_thumb_bcc: { // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the // low bit being an implied zero. There's an implied +4 offset for the diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 06eb4e5..ae11be8 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -187,21 +187,37 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) { case S31: case D31: return 31; // Composite registers use the regnum of the first register in the list. - case D1_D2: return 1; - case D3_D5: return 3; - case D5_D7: return 5; - case D7_D9: return 7; - case D9_D10: return 9; - case D11_D12: return 11; - case D13_D14: return 13; - case D15_D16: return 15; - case D17_D18: return 17; - case D19_D20: return 19; - case D21_D22: return 21; - case D23_D24: return 23; - case D25_D26: return 25; - case D27_D28: return 27; - case D29_D30: return 29; + /* Q0 */ case D0_D2: return 0; + case D1_D2: case D1_D3: return 1; + /* Q1 */ case D2_D4: return 2; + case D3_D4: case D3_D5: return 3; + /* Q2 */ case D4_D6: return 4; + case D5_D6: case D5_D7: return 5; + /* Q3 */ case D6_D8: return 6; + case D7_D8: case D7_D9: return 7; + /* Q4 */ case D8_D10: return 8; + case D9_D10: case D9_D11: return 9; + /* Q5 */ case D10_D12: return 10; + case D11_D12: case D11_D13: return 11; + /* Q6 */ case D12_D14: return 12; + case D13_D14: case D13_D15: return 13; + /* Q7 */ case D14_D16: return 14; + case D15_D16: case D15_D17: return 15; + /* Q8 */ case D16_D18: return 16; + case D17_D18: case D17_D19: return 17; + /* Q9 */ case D18_D20: return 18; + case D19_D20: case D19_D21: return 19; + /* Q10 */ case D20_D22: return 20; + case D21_D22: case D21_D23: return 21; + /* Q11 */ case D22_D24: return 22; + case D23_D24: case D23_D25: return 23; + /* Q12 */ case D24_D26: return 24; + case D25_D26: case D25_D27: return 25; + /* Q13 */ case D26_D28: return 26; + case D27_D28: case D27_D29: return 27; + /* Q14 */ case D28_D30: return 28; + case D29_D30: case D29_D31: return 29; + /* Q15 */ } } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 1606b92..ed27f9f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -151,13 +151,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, Triple TheTriple(TT); if (TheTriple.isOSDarwin()) - return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); + return createMachOStreamer(Ctx, MAB, OS, Emitter, false); if (TheTriple.isOSWindows()) { llvm_unreachable("ARM does not support Windows COFF format"); } - return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); + return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack); } static MCInstPrinter *createARMMCInstPrinter(const Target &T, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index faf73ac..9d3da14 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -34,12 +34,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter { MCValue Target, unsigned Log2Size, uint64_t &FixedValue); - void RecordARMMovwMovtRelocation(MachObjectWriter *Writer, - const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue); + void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue); public: ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, @@ -102,34 +102,47 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, Log2Size = llvm::Log2_32(4); return true; + // For movw/movt r_type relocations they always have a pair following them and + // the r_length bits are used differently. The encoding of the r_length is as + // follows: + // low bit of r_length: + // 0 - :lower16: for movw instructions + // 1 - :upper16: for movt instructions + // high bit of r_length: + // 0 - arm instructions + // 1 - thumb instructions case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: + RelocType = unsigned(macho::RIT_ARM_Half); + Log2Size = 1; + return true; case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movt_hi16_pcrel: - RelocType = unsigned(macho::RIT_ARM_HalfDifference); - // Report as 'long', even though that is not quite accurate. - Log2Size = llvm::Log2_32(4); + RelocType = unsigned(macho::RIT_ARM_Half); + Log2Size = 3; return true; case ARM::fixup_arm_movw_lo16: case ARM::fixup_arm_movw_lo16_pcrel: + RelocType = unsigned(macho::RIT_ARM_Half); + Log2Size = 0; + return true; case ARM::fixup_t2_movw_lo16: case ARM::fixup_t2_movw_lo16_pcrel: RelocType = unsigned(macho::RIT_ARM_Half); - // Report as 'long', even though that is not quite accurate. - Log2Size = llvm::Log2_32(4); + Log2Size = 2; return true; } } void ARMMachObjectWriter:: -RecordARMMovwMovtRelocation(MachObjectWriter *Writer, - const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, - MCValue Target, - uint64_t &FixedValue) { +RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Type = macho::RIT_ARM_Half; @@ -313,10 +326,9 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // scattered relocation entry. Differences always require scattered // relocations. if (Target.getSymB()) { - if (RelocType == macho::RIT_ARM_Half || - RelocType == macho::RIT_ARM_HalfDifference) - return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, FixedValue); + if (RelocType == macho::RIT_ARM_Half) + return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment, + Fixup, Target, FixedValue); return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, Log2Size, FixedValue); } @@ -391,6 +403,30 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, (Log2Size << 25) | (IsExtern << 27) | (Type << 28)); + + // Even when it's not a scattered relocation, movw/movt always uses + // a PAIR relocation. + if (Type == macho::RIT_ARM_Half) { + // The other-half value only gets populated for the movt relocation. + uint32_t Value = 0;; + switch ((unsigned)Fixup.getKind()) { + default: break; + case ARM::fixup_arm_movt_hi16: + case ARM::fixup_arm_movt_hi16_pcrel: + case ARM::fixup_t2_movt_hi16: + case ARM::fixup_t2_movt_hi16_pcrel: + Value = FixedValue; + break; + } + macho::RelocationEntry MREPair; + MREPair.Word0 = Value; + MREPair.Word1 = ((0xffffff) | + (Log2Size << 25) | + (macho::RIT_Pair << 28)); + + Writer->addRelocation(Fragment->getParent(), MREPair); + } + Writer->addRelocation(Fragment->getParent(), MRE); } diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index a89a663..edd73c2 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "Thumb1FrameLowering.h" -#include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index adaccdd..8cf7cac 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -13,7 +13,6 @@ #include "Thumb1InstrInfo.h" #include "ARM.h" -#include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 4d97626..27fce9b 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -14,10 +14,10 @@ #ifndef THUMB1INSTRUCTIONINFO_H #define THUMB1INSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "ARM.h" -#include "ARMInstrInfo.h" +#include "ARMBaseInstrInfo.h" #include "Thumb1RegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 6b8bf0e..ef77bbd 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -12,12 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "Thumb1RegisterInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" -#include "Thumb1InstrInfo.h" -#include "Thumb1RegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 9060e59..6971842 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -16,13 +16,12 @@ #define THUMB1REGISTERINFO_H #include "ARM.h" -#include "ARMRegisterInfo.h" +#include "ARMBaseRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class ARMSubtarget; class ARMBaseInstrInfo; - class Type; struct Thumb1RegisterInfo : public ARMBaseRegisterInfo { public: diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 6cb182a..2fe4b85 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -15,7 +15,6 @@ #include "ARM.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" -#include "Thumb2InstrInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index a754649..1ae2ef1 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -14,10 +14,10 @@ #ifndef THUMB2INSTRUCTIONINFO_H #define THUMB2INSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "ARM.h" #include "ARMInstrInfo.h" #include "Thumb2RegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 6d210fe..29a87d0 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// +#include "Thumb2RegisterInfo.h" #include "ARM.h" +#include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" -#include "Thumb2InstrInfo.h" -#include "Thumb2RegisterInfo.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h index 824378a..6b397e8 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/lib/Target/ARM/Thumb2RegisterInfo.h @@ -16,13 +16,12 @@ #define THUMB2REGISTERINFO_H #include "ARM.h" -#include "ARMRegisterInfo.h" +#include "ARMBaseRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class ARMSubtarget; class ARMBaseInstrInfo; - class Type; struct Thumb2RegisterInfo : public ARMBaseRegisterInfo { public: diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 5ee5f42..fb9d93b 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -39,9 +39,9 @@ namespace { /// ReduceTable - A static table with information on mapping from wide /// opcodes to narrow struct ReduceEntry { - unsigned WideOpc; // Wide opcode - unsigned NarrowOpc1; // Narrow opcode to transform to - unsigned NarrowOpc2; // Narrow opcode when it's two-address + uint16_t WideOpc; // Wide opcode + uint16_t NarrowOpc1; // Narrow opcode to transform to + uint16_t NarrowOpc2; // Narrow opcode when it's two-address uint8_t Imm1Limit; // Limit of immediate field (bits) uint8_t Imm2Limit; // Limit of immediate field when it's two-address unsigned LowRegs1 : 1; // Only possible if low-registers are used @@ -189,7 +189,7 @@ Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { } static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { - for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs) + for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) if (*Regs == ARM::CPSR) return true; return false; diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 80973b7..b6b209e 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -2392,17 +2392,17 @@ void CWriter::visitSwitchInst(SwitchInst &SI) { printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2); Out << ";\n"; - unsigned NumCases = SI.getNumCases(); // Skip the first item since that's the default case. - for (unsigned i = 0; i < NumCases; ++i) { - ConstantInt* CaseVal = SI.getCaseValue(i); - BasicBlock* Succ = SI.getCaseSuccessor(i); + for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { + ConstantInt* CaseVal = i.getCaseValue(); + BasicBlock* Succ = i.getCaseSuccessor(); Out << " case "; writeOperand(CaseVal); Out << ":\n"; printPHICopiesForSuccessor (SI.getParent(), Succ, 2); printBranchToBlock(SI.getParent(), Succ, 2); - if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent()))) + if (Function::iterator(Succ) == + llvm::next(Function::iterator(SI.getParent()))) Out << " break;\n"; } diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp index 916f9ba..fac806e 100644 --- a/lib/Target/CellSPU/SPUFrameLowering.cpp +++ b/lib/Target/CellSPU/SPUFrameLowering.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "SPU.h" #include "SPUFrameLowering.h" +#include "SPU.h" #include "SPUInstrBuilder.h" #include "SPUInstrInfo.h" #include "llvm/Function.h" diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 3d2b32d..55b3f72 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -31,14 +31,10 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include <map> using namespace llvm; -// Used in getTargetNodeName() below namespace { - std::map<unsigned, const char *> node_names; - // Byte offset of the preferred slot (counted from the MSB) int prefslotOffset(EVT VT) { int retval=0; @@ -481,40 +477,34 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setSchedulingPreference(Sched::RegPressure); } -const char * -SPUTargetLowering::getTargetNodeName(unsigned Opcode) const -{ - if (node_names.empty()) { - node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG"; - node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; - node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; - node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; - node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; - node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr"; - node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; - node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; - node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; - node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; - node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; - node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; - node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; - node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS"; - node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES"; - node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; - node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; - node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; - node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = - "SPUISD::ROTBYTES_LEFT_BITS"; - node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; - node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; - node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; - node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; - node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; - } - - std::map<unsigned, const char *>::iterator i = node_names.find(Opcode); - - return ((i != node_names.end()) ? i->second : 0); +const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG"; + case SPUISD::Hi: return "SPUISD::Hi"; + case SPUISD::Lo: return "SPUISD::Lo"; + case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr"; + case SPUISD::AFormAddr: return "SPUISD::AFormAddr"; + case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr"; + case SPUISD::LDRESULT: return "SPUISD::LDRESULT"; + case SPUISD::CALL: return "SPUISD::CALL"; + case SPUISD::SHUFB: return "SPUISD::SHUFB"; + case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK"; + case SPUISD::CNTB: return "SPUISD::CNTB"; + case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC"; + case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT"; + case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS"; + case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES"; + case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL"; + case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR"; + case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT"; + case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS"; + case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK"; + case SPUISD::SELB: return "SPUISD::SELB"; + case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER"; + case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER"; + case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER"; + } } //===----------------------------------------------------------------------===// @@ -1216,7 +1206,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, if (isVarArg) { // FIXME: we should be able to query the argument registers from // tablegen generated code. - static const unsigned ArgRegs[] = { + static const uint16_t ArgRegs[] = { SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, @@ -1230,7 +1220,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 }; // size of ArgRegs array - unsigned NumArgRegs = 77; + const unsigned NumArgRegs = 77; // We will spill (79-3)+1 registers to the stack SmallVector<SDValue, 79-3+1> MemOps; diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index e28e2a4..25c5355 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -15,9 +15,9 @@ #ifndef SPU_ISELLOWERING_H #define SPU_ISELLOWERING_H +#include "SPU.h" #include "llvm/Target/TargetLowering.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "SPU.h" namespace llvm { namespace SPUISD { diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index f0d21ad..85e5821 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -15,8 +15,8 @@ #define SPU_INSTRUCTIONINFO_H #include "SPU.h" -#include "llvm/Target/TargetInstrInfo.h" #include "SPURegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "SPUGenInstrInfo.inc" diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index 92983e1..1b2da5f 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "reginfo" -#include "SPU.h" #include "SPURegisterInfo.h" +#include "SPU.h" #include "SPUInstrBuilder.h" #include "SPUSubtarget.h" #include "SPUMachineFunction.h" diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index e43f5ad..21f6b25 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "SPU.h" #include "SPUTargetMachine.h" +#include "SPU.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/Support/DynamicLibrary.h" diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index c179292..3e5d38c 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -23,9 +23,6 @@ #include "llvm/Target/TargetData.h" namespace llvm { -class PassManager; -class GlobalValue; -class TargetFrameLowering; /// SPUTargetMachine /// diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 76b5e9c..107c6cc 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -1090,10 +1090,10 @@ void CppWriter::printInstruction(const Instruction *I, << getOpName(SI->getDefaultDest()) << ", " << SI->getNumCases() << ", " << bbname << ");"; nl(Out); - unsigned NumCases = SI->getNumCases(); - for (unsigned i = 0; i < NumCases; ++i) { - const ConstantInt* CaseVal = SI->getCaseValue(i); - const BasicBlock *BB = SI->getCaseSuccessor(i); + for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + const ConstantInt* CaseVal = i.getCaseValue(); + const BasicBlock *BB = i.getCaseSuccessor(); Out << iName << "->addCase(" << getOpName(CaseVal) << ", " << getOpName(BB) << ");"; diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index bbefcaf..270c7a7 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -15,7 +15,6 @@ #ifndef TARGET_Hexagon_H #define TARGET_Hexagon_H -#include <cassert> #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/Target/TargetLowering.h" diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 688b8e3..bf333b7 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -32,11 +32,11 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -46,8 +46,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp index 71787de..46c20e9 100644 --- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp +++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp @@ -14,13 +14,13 @@ //===----------------------------------------------------------------------===// #include "HexagonCallingConvLower.h" +#include "Hexagon.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "Hexagon.h" using namespace llvm; Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg, diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 49c6cdf..e8a6924 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -8,13 +8,13 @@ // //===----------------------------------------------------------------------===// +#include "HexagonFrameLowering.h" #include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" #include "HexagonMachineFunctionInfo.h" -#include "HexagonFrameLowering.h" #include "llvm/Function.h" #include "llvm/Type.h" #include "llvm/ADT/BitVector.h" diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 04ea4ed..57772a5 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -27,6 +27,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "hwloops" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" #include "llvm/Constants.h" #include "llvm/PassSupport.h" #include "llvm/ADT/DenseMap.h" @@ -43,8 +45,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include <algorithm> -#include "Hexagon.h" -#include "HexagonTargetMachine.h" using namespace llvm; diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index ed4b840..d6da0d0 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -28,17 +28,16 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "HexagonMachineFunctionInfo.h" #include "llvm/Support/CommandLine.h" +using namespace llvm; const unsigned Hexagon_MAX_RET_SIZE = 64; -using namespace llvm; static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden, @@ -159,7 +158,7 @@ static bool CC_Hexagon32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - static const unsigned RegList[] = { + static const uint16_t RegList[] = { Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, Hexagon::R5 }; @@ -182,10 +181,10 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT, return false; } - static const unsigned RegList1[] = { + static const uint16_t RegList1[] = { Hexagon::D1, Hexagon::D2 }; - static const unsigned RegList2[] = { + static const uint16_t RegList2[] = { Hexagon::R1, Hexagon::R3 }; if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) { diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 5396486..4208bcb 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -15,10 +15,10 @@ #ifndef Hexagon_ISELLOWERING_H #define Hexagon_ISELLOWERING_H +#include "Hexagon.h" #include "llvm/Target/TargetLowering.h" #include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" -#include "Hexagon.h" namespace llvm { namespace HexagonISD { diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 07872d4..3d7ace5 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "HexagonRegisterInfo.h" #include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "Hexagon.h" #include "llvm/ADT/STLExtras.h" diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index eb088c3..7306870 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -14,10 +14,10 @@ #ifndef HexagonINSTRUCTIONINFO_H #define HexagonINSTRUCTIONINFO_H +#include "HexagonRegisterInfo.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetFrameLowering.h" -#include "HexagonRegisterInfo.h" #define GET_INSTRINFO_HEADER diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index 06c732f..55cbc09 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -36,6 +36,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "hexagon-peephole" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" #include "llvm/Constants.h" #include "llvm/PassSupport.h" #include "llvm/ADT/DenseMap.h" @@ -45,16 +47,13 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include <algorithm> -#include "Hexagon.h" -#include "HexagonTargetMachine.h" - -#include "llvm/Support/CommandLine.h" using namespace llvm; diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index c481270..2a9de92 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonRegisterInfo.h" +#include "Hexagon.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" #include "HexagonMachineFunctionInfo.h" diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index fc65305..6cf727b 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -16,9 +16,10 @@ #define HexagonREGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/MC/MachineLocation.h" + #define GET_REGINFO_HEADER #include "HexagonGenRegisterInfo.inc" -#include "llvm/MC/MachineLocation.h" // // We try not to hard code the reserved registers in our code, diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 319eab2..b9e6894 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -50,7 +50,7 @@ extern "C" void LLVMInitializeHexagonTarget() { /// HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - TargetOptions Options, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index 70bea56..0336965 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -14,13 +14,13 @@ #ifndef HexagonTARGETMACHINE_H #define HexagonTARGETMACHINE_H -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "HexagonISelLowering.h" #include "HexagonSelectionDAGInfo.h" #include "HexagonFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" namespace llvm { @@ -37,8 +37,9 @@ class HexagonTargetMachine : public LLVMTargetMachine { public: HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU, - StringRef FS, TargetOptions Options, Reloc::Model RM, - CodeModel::Model CM, CodeGenOpt::Level OL); + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const HexagonInstrInfo *getInstrInfo() const { return &InstrInfo; diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index d3ce5a6..32cc709 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -11,6 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "HexagonTargetObjectFile.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/Target/TargetData.h" @@ -18,9 +21,6 @@ #include "llvm/MC/MCContext.h" #include "llvm/Support/ELF.h" #include "llvm/Support/CommandLine.h" -#include "HexagonSubtarget.h" -#include "HexagonTargetObjectFile.h" -#include "HexagonTargetMachine.h" using namespace llvm; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 74abc56..3cfa4fd 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- HexagonMCTargetDesc.cpp - Cell Hexagon Target Descriptions --------===// +//===-- HexagonMCTargetDesc.cpp - Hexagon Target Descriptions -------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file provides Cell Hexagon specific target descriptions. +// This file provides Hexagon specific target descriptions. // //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index 364841f..b18d23a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -1,4 +1,4 @@ -//===-- SPUMCTargetDesc.h - Hexagon Target Descriptions ---------*- C++ -*-===// +//===-- HexagonMCTargetDesc.h - Hexagon Target Descriptions -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef SPUMCTARGETDESC_H -#define SPUMCTARGETDESC_H +#ifndef HEXAGONMCTARGETDESC_H +#define HEXAGONMCTARGETDESC_H namespace llvm { class MCSubtargetInfo; diff --git a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt index 1114d99..73c7e01 100644 --- a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; +;===- ./lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; diff --git a/lib/Target/Hexagon/MCTargetDesc/Makefile b/lib/Target/Hexagon/MCTargetDesc/Makefile index 67be2bc..885be2d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/Makefile +++ b/lib/Target/Hexagon/MCTargetDesc/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===## +##===- lib/Target/Hexagon/TargetDesc/Makefile --------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index adedf93..6b958c8 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -34,9 +34,9 @@ extern const MCInstrDesc MBlazeInsts[]; using namespace llvm; -const unsigned UNSUPPORTED = -1; +const uint16_t UNSUPPORTED = -1; -static const unsigned mblazeBinary2Opcode[] = { +static const uint16_t mblazeBinary2Opcode[] = { MBlaze::ADD, MBlaze::RSUB, MBlaze::ADDC, MBlaze::RSUBC, //00,01,02,03 MBlaze::ADDK, MBlaze::RSUBK, MBlaze::ADDKC, MBlaze::RSUBKC, //04,05,06,07 MBlaze::ADDI, MBlaze::RSUBI, MBlaze::ADDIC, MBlaze::RSUBIC, //08,09,0A,0B diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.h b/lib/Target/MBlaze/MBlazeFrameLowering.h index 8be15bf..01e6578 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.h +++ b/lib/Target/MBlaze/MBlazeFrameLowering.h @@ -15,11 +15,10 @@ #define MBLAZE_FRAMEINFO_H #include "MBlaze.h" -#include "MBlazeSubtarget.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { - class MBlazeSubtarget; +class MBlazeSubtarget; class MBlazeFrameLowering : public TargetFrameLowering { protected: diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 23c8e13..9ef6bb6 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -657,7 +657,7 @@ static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const unsigned ArgRegs[] = { + static const uint16_t ArgRegs[] = { MBlaze::R5, MBlaze::R6, MBlaze::R7, MBlaze::R8, MBlaze::R9, MBlaze::R10 }; diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h index 168694b..6a79fc1 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -15,11 +15,11 @@ #ifndef MBlazeISELLOWERING_H #define MBlazeISELLOWERING_H +#include "MBlaze.h" +#include "MBlazeSubtarget.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" -#include "MBlaze.h" -#include "MBlazeSubtarget.h" namespace llvm { namespace MBlazeCC { diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h index a309d2b..5252147 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -15,9 +15,9 @@ #define MBLAZEINSTRUCTIONINFO_H #include "MBlaze.h" +#include "MBlazeRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" -#include "MBlazeRegisterInfo.h" #define GET_INSTRINFO_HEADER #include "MBlazeGenInstrInfo.inc" diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.h b/lib/Target/MBlaze/MBlazeMCInstLower.h index bb77ed4..7b97744 100644 --- a/lib/Target/MBlaze/MBlazeMCInstLower.h +++ b/lib/Target/MBlaze/MBlazeMCInstLower.h @@ -14,7 +14,6 @@ namespace llvm { class AsmPrinter; - class MCAsmInfo; class MCContext; class MCInst; class MCOperand; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index 6801a1a..46f5207 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -14,9 +14,9 @@ #define DEBUG_TYPE "mblaze-frame-info" +#include "MBlazeRegisterInfo.h" #include "MBlaze.h" #include "MBlazeSubtarget.h" -#include "MBlazeRegisterInfo.h" #include "MBlazeMachineFunction.h" #include "llvm/Constants.h" #include "llvm/Type.h" diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 5c07424..dd7de9b 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MBlaze.h" #include "MBlazeTargetMachine.h" +#include "MBlaze.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/FormattedStream.h" diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index fd5de34..c03ba47 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MSP430.h" #include "MSP430InstrInfo.h" +#include "MSP430.h" #include "MSP430MachineFunctionInfo.h" #include "MSP430TargetMachine.h" #include "llvm/Function.h" diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index fe2a75c..04f339b 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -14,8 +14,8 @@ #ifndef LLVM_TARGET_MSP430INSTRINFO_H #define LLVM_TARGET_MSP430INSTRINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "MSP430RegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "MSP430GenInstrInfo.inc" diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h index 297efd2..24151e2 100644 --- a/lib/Target/MSP430/MSP430MCInstLower.h +++ b/lib/Target/MSP430/MSP430MCInstLower.h @@ -14,7 +14,6 @@ namespace llvm { class AsmPrinter; - class MCAsmInfo; class MCContext; class MCInst; class MCOperand; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index f9ddfb3..51ec71a 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -13,9 +13,9 @@ #define DEBUG_TYPE "msp430-reg-info" +#include "MSP430RegisterInfo.h" #include "MSP430.h" #include "MSP430MachineFunctionInfo.h" -#include "MSP430RegisterInfo.h" #include "MSP430TargetMachine.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h index e7bebbd..4d8792e 100644 --- a/lib/Target/MSP430/MSP430Subtarget.h +++ b/lib/Target/MSP430/MSP430Subtarget.h @@ -15,12 +15,11 @@ #define LLVM_TARGET_MSP430_SUBTARGET_H #include "llvm/Target/TargetSubtargetInfo.h" +#include <string> #define GET_SUBTARGETINFO_HEADER #include "MSP430GenSubtargetInfo.inc" -#include <string> - namespace llvm { class StringRef; diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index af62e48..9f2eda1 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MSP430.h" #include "MSP430TargetMachine.h" +#include "MSP430.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index d69570b..9d5a2f1 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -92,25 +92,42 @@ public: if (!Value) return; // Doesn't change encoding. + // Where do we start in the object unsigned Offset = Fixup.getOffset(); - // FIXME: The below code will not work across endian models - // How many bytes/bits are we fixing up? - unsigned NumBytes = ((getFixupKindInfo(Kind).TargetSize-1)/8)+1; - uint64_t Mask = ((uint64_t)1 << getFixupKindInfo(Kind).TargetSize) - 1; + // Number of bytes we need to fixup + unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8; + // Used to point to big endian bytes + unsigned FullSize; + + switch ((unsigned)Kind) { + case Mips::fixup_Mips_16: + FullSize = 2; + break; + case Mips::fixup_Mips_64: + FullSize = 8; + break; + default: + FullSize = 4; + break; + } // Grab current value, if any, from bits. uint64_t CurVal = 0; - for (unsigned i = 0; i != NumBytes; ++i) - CurVal |= ((uint8_t)Data[Offset + i]) << (i * 8); + for (unsigned i = 0; i != NumBytes; ++i) { + unsigned Idx = IsLittle ? i : (FullSize - 1 - i); + CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8); + } + + uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize)); CurVal = (CurVal & ~Mask) | ((CurVal + Value) & Mask); - // Write out the bytes back to the code/data bits. - // First the unaffected bits and then the fixup. + // Write out the fixed up bytes back to the code/data bits. for (unsigned i = 0; i != NumBytes; ++i) { - Data[Offset + i] = uint8_t((CurVal >> (i * 8)) & 0xff); + unsigned Idx = IsLittle ? i : (FullSize - 1 - i); + Data[Offset + Idx] = (uint8_t)((CurVal >> (i*8)) & 0xff); } -} + } unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index b039678..9ebb6d2 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -49,9 +49,9 @@ public: void EmitInstruction(uint64_t Val, unsigned Size, raw_ostream &OS) const { // Output the instruction encoding in little endian byte order. - for (unsigned i = 0; i != Size; ++i) { - EmitByte(Val & 255, OS); - Val >>= 8; + for (unsigned i = 0; i < Size; ++i) { + unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8; + EmitByte((Val >> Shift) & 0xff, OS); } } diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index bacecf2..bafadc8 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -21,8 +21,6 @@ namespace llvm { class MipsTargetMachine; class FunctionPass; - class MachineCodeEmitter; - class formatted_raw_ostream; FunctionPass *createMipsISelDag(MipsTargetMachine &TM); FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp index 31b669a..dc8fbd0 100644 --- a/lib/Target/Mips/MipsAnalyzeImmediate.cpp +++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp @@ -26,28 +26,28 @@ void MipsAnalyzeImmediate::AddInstr(InstSeqLs &SeqLs, const Inst &I) { Iter->push_back(I); } -void MipsAnalyzeImmediate::GetInstSeqLsADDiu(int64_t Imm, unsigned RemSize, +void MipsAnalyzeImmediate::GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs) { - GetInstSeqLs((Imm + 0x8000) & ~0xffff, RemSize, SeqLs); - AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffff)); + GetInstSeqLs((Imm + 0x8000ULL) & 0xffffffffffff0000ULL, RemSize, SeqLs); + AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffffULL)); } -void MipsAnalyzeImmediate::GetInstSeqLsORi(int64_t Imm, unsigned RemSize, +void MipsAnalyzeImmediate::GetInstSeqLsORi(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs) { - GetInstSeqLs(Imm & ~0xffff, RemSize, SeqLs); - AddInstr(SeqLs, Inst(ORi, Imm & 0xffff)); + GetInstSeqLs(Imm & 0xffffffffffff0000ULL, RemSize, SeqLs); + AddInstr(SeqLs, Inst(ORi, Imm & 0xffffULL)); } -void MipsAnalyzeImmediate::GetInstSeqLsSLL(int64_t Imm, unsigned RemSize, +void MipsAnalyzeImmediate::GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs) { unsigned Shamt = CountTrailingZeros_64(Imm); GetInstSeqLs(Imm >> Shamt, RemSize - Shamt, SeqLs); AddInstr(SeqLs, Inst(SLL, Shamt)); } -void MipsAnalyzeImmediate::GetInstSeqLs(int64_t Imm, unsigned RemSize, +void MipsAnalyzeImmediate::GetInstSeqLs(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs) { - int64_t MaskedImm = Imm & (((uint64_t)-1) >> (64 - Size)); + uint64_t MaskedImm = Imm & (0xffffffffffffffffULL >> (64 - Size)); // Do nothing if Imm is 0. if (!MaskedImm) @@ -122,7 +122,7 @@ void MipsAnalyzeImmediate::GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts) { } const MipsAnalyzeImmediate::InstSeq -&MipsAnalyzeImmediate::Analyze(int64_t Imm, unsigned Size, +&MipsAnalyzeImmediate::Analyze(uint64_t Imm, unsigned Size, bool LastInstrIsADDiu) { this->Size = Size; diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h index 24e6e5f..a094dda 100644 --- a/lib/Target/Mips/MipsAnalyzeImmediate.h +++ b/lib/Target/Mips/MipsAnalyzeImmediate.h @@ -25,7 +25,7 @@ namespace llvm { /// Analyze - Get an instrucion sequence to load immediate Imm. The last /// instruction in the sequence must be an ADDiu if LastInstrIsADDiu is /// true; - const InstSeq &Analyze(int64_t Imm, unsigned Size, bool LastInstrIsADDiu); + const InstSeq &Analyze(uint64_t Imm, unsigned Size, bool LastInstrIsADDiu); private: typedef SmallVector<InstSeq, 5> InstSeqLs; @@ -34,18 +34,18 @@ namespace llvm { /// GetInstSeqLsADDiu - Get instrucion sequences which end with an ADDiu to /// load immediate Imm - void GetInstSeqLsADDiu(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + void GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); /// GetInstSeqLsORi - Get instrucion sequences which end with an ORi to /// load immediate Imm - void GetInstSeqLsORi(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + void GetInstSeqLsORi(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); /// GetInstSeqLsSLL - Get instrucion sequences which end with a SLL to /// load immediate Imm - void GetInstSeqLsSLL(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + void GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); /// GetInstSeqLs - Get instrucion sequences to load immediate Imm. - void GetInstSeqLs(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + void GetInstSeqLs(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); /// ReplaceADDiuSLLWithLUi - Replace an ADDiu & SLL pair with a LUi. void ReplaceADDiuSLLWithLUi(InstSeq &Seq); diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index aeabc0f..f2b842a 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-asm-printer" -#include "Mips.h" #include "MipsAsmPrinter.h" +#include "Mips.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MipsMCInstLower.h" @@ -34,8 +34,6 @@ #include "llvm/Instructions.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index 8502db2..473da7e 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -22,9 +22,9 @@ namespace llvm { class MCStreamer; class MachineInstr; -class raw_ostream; class MachineBasicBlock; class Module; +class raw_ostream; class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter { diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index e83c64e..ebfbb4a 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MipsAnalyzeImmediate.h" #include "MipsFrameLowering.h" +#include "MipsAnalyzeImmediate.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MCTargetDesc/MipsBaseInfo.h" diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 782d203..536879e 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -99,6 +99,8 @@ private: return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); } + void ProcessFunctionAfterISel(MachineFunction &MF); + bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); void InitGlobalBaseReg(MachineFunction &MF); virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, @@ -181,10 +183,57 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { } } +bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, + const MachineInstr& MI) { + unsigned DstReg = 0, ZeroReg = 0; + + // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". + if ((MI.getOpcode() == Mips::ADDiu) && + (MI.getOperand(1).getReg() == Mips::ZERO) && + (MI.getOperand(2).getImm() == 0)) { + DstReg = MI.getOperand(0).getReg(); + ZeroReg = Mips::ZERO; + } else if ((MI.getOpcode() == Mips::DADDiu) && + (MI.getOperand(1).getReg() == Mips::ZERO_64) && + (MI.getOperand(2).getImm() == 0)) { + DstReg = MI.getOperand(0).getReg(); + ZeroReg = Mips::ZERO_64; + } + + if (!DstReg) + return false; + + // Replace uses with ZeroReg. + for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), + E = MRI->use_end(); U != E; ++U) { + MachineOperand &MO = U.getOperand(); + MachineInstr *MI = MO.getParent(); + + // Do not replace if it is a phi's operand or is tied to def operand. + if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo())) + continue; + + MO.setReg(ZeroReg); + } + + return true; +} + +void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) { + InitGlobalBaseReg(MF); + + MachineRegisterInfo *MRI = &MF.getRegInfo(); + + for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE; + ++MFI) + for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) + ReplaceUsesWithZeroReg(MRI, *I); +} + bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { bool Ret = SelectionDAGISel::runOnMachineFunction(MF); - InitGlobalBaseReg(MF); + ProcessFunctionAfterISel(MF); return Ret; } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index dc894d9..ecde5b6 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -18,13 +18,13 @@ #include "MipsTargetMachine.h" #include "MipsTargetObjectFile.h" #include "MipsSubtarget.h" +#include "InstPrinter/MipsInstPrinter.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/Intrinsics.h" #include "llvm/CallingConv.h" -#include "InstPrinter/MipsInstPrinter.h" -#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -130,22 +130,32 @@ MipsTargetLowering(MipsTargetMachine &TM) // Mips Custom Operations setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); - setOperationAction(ISD::BlockAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); - setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); - setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SETCC, MVT::f32, Custom); + setOperationAction(ISD::SETCC, MVT::f64, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + if (HasMips64) { + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::BlockAddress, MVT::i64, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); + setOperationAction(ISD::SELECT, MVT::i64, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); + } setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i32, Expand); @@ -185,8 +195,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); @@ -214,9 +222,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); @@ -246,11 +251,11 @@ MipsTargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::SDIVREM); setTargetDAGCombine(ISD::UDIVREM); - setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); - setMinFunctionAlignment(2); + setMinFunctionAlignment(HasMips64 ? 3 : 2); setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP); computeRegisterProperties(); @@ -559,21 +564,37 @@ static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True, True.getValueType(), True, False, Cond); } -static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { +static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); - SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0)); + SDValue SetCC = N->getOperand(0); - if (Cond.getOpcode() != MipsISD::FPCmp) + if ((SetCC.getOpcode() != ISD::SETCC) || + !SetCC.getOperand(0).getValueType().isInteger()) return SDValue(); - SDValue True = DAG.getConstant(1, MVT::i32); - SDValue False = DAG.getConstant(0, MVT::i32); + SDValue False = N->getOperand(2); + EVT FalseTy = False.getValueType(); - return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc()); + if (!FalseTy.isInteger()) + return SDValue(); + + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(False); + + if (!CN || CN->getZExtValue()) + return SDValue(); + + const DebugLoc DL = N->getDebugLoc(); + ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get(); + SDValue True = N->getOperand(1); + + SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), + SetCC.getOperand(1), ISD::getSetCCInverse(CC, true)); + + return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True); } static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, @@ -684,8 +705,8 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) case ISD::SDIVREM: case ISD::UDIVREM: return PerformDivRemCombine(N, DAG, DCI, Subtarget); - case ISD::SETCC: - return PerformSETCCCombine(N, DAG, DCI, Subtarget); + case ISD::SELECT: + return PerformSELECTCombine(N, DAG, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DAG, DCI, Subtarget); case ISD::OR: @@ -708,6 +729,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); @@ -1475,6 +1497,18 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG) const Op.getDebugLoc()); } +SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + SDValue Cond = CreateFPCmp(DAG, Op); + + assert(Cond.getOpcode() == MipsISD::FPCmp && + "Floating point operand expected."); + + SDValue True = DAG.getConstant(1, MVT::i32); + SDValue False = DAG.getConstant(0, MVT::i32); + + return CreateCMovFP(DAG, Cond, True, False, Op.getDebugLoc()); +} + SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't actually debug info here @@ -1841,13 +1875,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, static const unsigned IntRegsSize=4, FloatRegsSize=2; - static const unsigned IntRegs[] = { + static const uint16_t IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; - static const unsigned F32Regs[] = { + static const uint16_t F32Regs[] = { Mips::F12, Mips::F14 }; - static const unsigned F64Regs[] = { + static const uint16_t F64Regs[] = { Mips::D6, Mips::D7 }; @@ -1926,10 +1960,10 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, return false; // CC must always match } -static const unsigned Mips64IntRegs[8] = +static const uint16_t Mips64IntRegs[8] = {Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64, Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64}; -static const unsigned Mips64DPRegs[8] = +static const uint16_t Mips64DPRegs[8] = {Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64}; @@ -1996,7 +2030,7 @@ AnalyzeMips64CallOperands(CCState &CCInfo, static const unsigned O32IntRegsSize = 4; -static const unsigned O32IntRegs[] = { +static const uint16_t O32IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; @@ -2115,9 +2149,9 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, if (!IsRegLoc) LocMemOffset = VA.getLocMemOffset(); else { - const unsigned *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, + const uint16_t *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, VA.getLocReg()); - const unsigned *RegEnd = Mips64IntRegs + 8; + const uint16_t *RegEnd = Mips64IntRegs + 8; // Copy double words to registers. for (; (Reg != RegEnd) && (ByValSize >= Offset + 8); ++Reg, Offset += 8) { @@ -2540,7 +2574,7 @@ CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, MachineFrameInfo *MFI, bool IsRegLoc, SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI, EVT PtrTy) { - const unsigned *Reg = Mips64IntRegs + 8; + const uint16_t *Reg = Mips64IntRegs + 8; int FOOffset; // Frame object offset from virtual frame pointer. if (IsRegLoc) { @@ -2709,7 +2743,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, if (isVarArg) { unsigned NumOfRegs = IsO32 ? 4 : 8; - const unsigned *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs; + const uint16_t *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs; unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumOfRegs); int FirstRegSlotOffset = IsO32 ? 0 : -64 ; // offset of $a0's slot. const TargetRegisterClass *RC diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 621bbec..66f45cd 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -15,10 +15,10 @@ #ifndef MipsISELLOWERING_H #define MipsISELLOWERING_H -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLowering.h" #include "Mips.h" #include "MipsSubtarget.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" namespace llvm { namespace MipsISD { @@ -128,6 +128,7 @@ namespace llvm { SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 10caf30..4be727d 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -15,9 +15,9 @@ #define MIPSINSTRUCTIONINFO_H #include "Mips.h" +#include "MipsRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" -#include "MipsRegisterInfo.h" #define GET_INSTRINFO_HEADER #include "MipsGenInstrInfo.inc" diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index be65298..0d51298 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "MipsMCInstLower.h" #include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" -#include "MipsMCInstLower.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index cbd5264..20bb338 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -14,11 +14,9 @@ #include "llvm/Support/Compiler.h" namespace llvm { - class MCAsmInfo; class MCContext; class MCInst; class MCOperand; - class MCSymbol; class MachineInstr; class MachineFunction; class Mangler; @@ -38,7 +36,7 @@ public: void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); void LowerCPRESTORE(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); void LowerUnalignedLoadStore(const MachineInstr *MI, - SmallVector<MCInst, 4>& MCInsts); + SmallVector<MCInst, 4>& MCInsts); void LowerSETGP01(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); private: MCOperand LowerSymbolOperand(const MachineOperand &MO, diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index 57ff069..abb5404 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -14,10 +14,10 @@ #ifndef MIPS_MACHINE_FUNCTION_INFO_H #define MIPS_MACHINE_FUNCTION_INFO_H -#include <utility> #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include <utility> namespace llvm { diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index e0ecba2..5cfda34 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -13,10 +13,10 @@ #define DEBUG_TYPE "mips-reg-info" +#include "MipsRegisterInfo.h" #include "Mips.h" #include "MipsAnalyzeImmediate.h" #include "MipsSubtarget.h" -#include "MipsRegisterInfo.h" #include "MipsMachineFunction.h" #include "llvm/Constants.h" #include "llvm/Type.h" @@ -83,12 +83,12 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { - static const unsigned ReservedCPURegs[] = { + static const uint16_t ReservedCPURegs[] = { Mips::ZERO, Mips::AT, Mips::K0, Mips::K1, Mips::SP, Mips::FP, Mips::RA }; - static const unsigned ReservedCPU64Regs[] = { + static const uint16_t ReservedCPU64Regs[] = { Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64, Mips::SP_64, Mips::FP_64, Mips::RA_64 }; diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 8806aaf..ad02231 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "Mips.h" #include "MipsTargetMachine.h" +#include "Mips.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 19ae142..80c00e8 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -14,15 +14,15 @@ #ifndef MIPSTARGETMACHINE_H #define MIPSTARGETMACHINE_H -#include "MipsSubtarget.h" +#include "MipsFrameLowering.h" #include "MipsInstrInfo.h" #include "MipsISelLowering.h" -#include "MipsFrameLowering.h" +#include "MipsJITInfo.h" #include "MipsSelectionDAGInfo.h" +#include "MipsSubtarget.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" -#include "MipsJITInfo.h" namespace llvm { class formatted_raw_ostream; diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h index 77a298d..a3e0f32 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h +++ b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h @@ -17,9 +17,9 @@ #ifndef PTXBASEINFO_H #define PTXBASEINFO_H +#include "PTXMCTargetDesc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "PTXMCTargetDesc.h" namespace llvm { namespace PTXStateSpace { diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h index 7d46cce..ffb92cb 100644 --- a/lib/Target/PTX/PTX.h +++ b/lib/Target/PTX/PTX.h @@ -1,4 +1,3 @@ -//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index 58ac5f2..0b6ac7b 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -14,8 +14,8 @@ #define DEBUG_TYPE "ptx-asm-printer" -#include "PTX.h" #include "PTXAsmPrinter.h" +#include "PTX.h" #include "PTXMachineFunctionInfo.h" #include "PTXParamManager.h" #include "PTXRegisterInfo.h" diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index e5d4edc..db1c953 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "PTX.h" #include "PTXISelLowering.h" +#include "PTX.h" #include "PTXMachineFunctionInfo.h" #include "PTXRegisterInfo.h" #include "PTXSubtarget.h" diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h index fd20982..33220f4 100644 --- a/lib/Target/PTX/PTXISelLowering.h +++ b/lib/Target/PTX/PTXISelLowering.h @@ -18,8 +18,6 @@ #include "llvm/Target/TargetLowering.h" namespace llvm { -class PTXSubtarget; -class PTXTargetMachine; namespace PTXISD { enum NodeType { diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp index 9d6cbf1..443cd54 100644 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -13,8 +13,8 @@ #define DEBUG_TYPE "ptx-instrinfo" -#include "PTX.h" #include "PTXInstrInfo.h" +#include "PTX.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" diff --git a/lib/Target/PTX/PTXParamManager.cpp b/lib/Target/PTX/PTXParamManager.cpp index 74538e6..cc1cc71 100644 --- a/lib/Target/PTX/PTXParamManager.cpp +++ b/lib/Target/PTX/PTXParamManager.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "PTX.h" #include "PTXParamManager.h" +#include "PTX.h" #include "llvm/ADT/StringExtras.h" using namespace llvm; diff --git a/lib/Target/PTX/PTXParamManager.h b/lib/Target/PTX/PTXParamManager.h index 32342f7..92e7728 100644 --- a/lib/Target/PTX/PTXParamManager.h +++ b/lib/Target/PTX/PTXParamManager.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include <string> namespace llvm { diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp index 3f087cd..b6ffd38 100644 --- a/lib/Target/PTX/PTXRegisterInfo.cpp +++ b/lib/Target/PTX/PTXRegisterInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "PTX.h" #include "PTXRegisterInfo.h" +#include "PTX.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 9305377..40835d0 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "PTX.h" #include "PTXTargetMachine.h" +#include "PTX.h" #include "llvm/PassManager.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" @@ -26,6 +26,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" @@ -37,8 +38,6 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 02dad45..9c6eefe 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAsmBackend.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 5dc1863..24a7178 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -25,14 +25,11 @@ namespace llvm { class PPCTargetMachine; class FunctionPass; - class formatted_raw_ostream; class JITCodeEmitter; - class Target; class MachineInstr; class AsmPrinter; class MCInst; - class TargetMachine; - + FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 591ae02..4abb469 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -20,6 +20,7 @@ #include "PPC.h" #include "PPCTargetMachine.h" #include "PPCSubtarget.h" +#include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCPredicates.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" @@ -53,7 +54,6 @@ #include "llvm/Support/ELF.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallString.h" -#include "InstPrinter/PPCInstPrinter.h" using namespace llvm; namespace { diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 8efc9c1..9883c2e 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -130,3 +130,34 @@ def CC_PPC_SVR4_ByVal : CallingConv<[ CCCustom<"CC_PPC_SVR4_Custom_Dummy"> ]>; +def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, + R29, R30, R31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4, + V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31)>; + +def CSR_SVR432 : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20, VRSAVE, + R21, R22, R23, R24, R25, R26, R27, R28, + R29, R30, R31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4, + V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31)>; + +def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20, + X21, X22, X23, X24, X25, X26, X27, X28, + X29, X30, X31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4, + V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31)>; + +def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAVE, + X21, X22, X23, X24, X25, X26, X27, X28, + X29, X30, X31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4, + V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31)>; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 6d612f7..b77a80b 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -38,7 +38,7 @@ using namespace llvm; /// VRRegNo - Map from a numbered VR register to its enum value. /// -static const unsigned short VRRegNo[] = { +static const uint16_t VRRegNo[] = { PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index 95d0d64..d80a385 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -14,10 +14,10 @@ #ifndef PPCHAZRECS_H #define PPCHAZRECS_H +#include "PPCInstrInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "PPCInstrInfo.h" namespace llvm { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index bfed7ba..85b5bc1 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -16,6 +16,11 @@ #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" #include "MCTargetDesc/PPCPredicates.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -24,16 +29,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DerivedTypes.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, @@ -1547,7 +1547,7 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const unsigned ArgRegs[] = { + static const uint16_t ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; @@ -1574,7 +1574,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const unsigned ArgRegs[] = { + static const uint16_t ArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; @@ -1598,8 +1598,8 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, /// GetFPR - Get the set of FP registers that should be allocated for arguments, /// on Darwin. -static const unsigned *GetFPR() { - static const unsigned FPR[] = { +static const uint16_t *GetFPR() { + static const uint16_t FPR[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 }; @@ -1780,13 +1780,13 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - static const unsigned GPArgRegs[] = { + static const uint16_t GPArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); - static const unsigned FPArgRegs[] = { + static const uint16_t FPArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; @@ -1879,18 +1879,18 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; - static const unsigned GPR_32[] = { // 32-bit registers. + static const uint16_t GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; - static const unsigned GPR_64[] = { // 64-bit registers. + static const uint16_t GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const unsigned *FPR = GetFPR(); + static const uint16_t *FPR = GetFPR(); - static const unsigned VR[] = { + static const uint16_t VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; @@ -1901,7 +1901,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; + const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; // In 32-bit non-varargs functions, the stack space for vectors is after the // stack space for non-vectors. We do not use this space unless we have @@ -2769,6 +2769,12 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, (CallConv == CallingConv::Fast && getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + if (InFlag.getNode()) Ops.push_back(InFlag); @@ -3141,17 +3147,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - static const unsigned GPR_32[] = { // 32-bit registers. + static const uint16_t GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; - static const unsigned GPR_64[] = { // 64-bit registers. + static const uint16_t GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const unsigned *FPR = GetFPR(); + static const uint16_t *FPR = GetFPR(); - static const unsigned VR[] = { + static const uint16_t VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; @@ -3159,7 +3165,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); - const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; + const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<TailCallArgumentInfo, 8> TailCallArguments; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 3534e9c..2e046c4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -15,10 +15,10 @@ #ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H -#include "llvm/Target/TargetLowering.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "PPC.h" #include "PPCSubtarget.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" namespace llvm { namespace PPCISD { diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 02bffed..78f3596 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -64,13 +64,7 @@ let Defs = [LR8] in PPC970_Unit_BRU; // Darwin ABI Calls. -let isCall = 1, PPC970_Unit = 7, - // All calls clobber the PPC64 non-callee saved registers. - Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12, - F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, - V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, - LR8,CTR8, - CR0,CR1,CR5,CR6,CR7,CARRY] in { +let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL8_Darwin : IForm<18, 0, 1, @@ -90,13 +84,7 @@ let isCall = 1, PPC970_Unit = 7, // ELF 64 ABI Calls = Darwin ABI Calls // Used to define BL8_ELF and BLA8_ELF -let isCall = 1, PPC970_Unit = 7, - // All calls clobber the PPC64 non-callee saved registers. - Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12, - F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, - V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, - LR8,CTR8, - CR0,CR1,CR5,CR6,CR7,CARRY] in { +let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL8_ELF : IForm<18, 0, 1, diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index e5f171d..7d49aa1 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -15,8 +15,8 @@ #define POWERPC_INSTRUCTIONINFO_H #include "PPC.h" -#include "llvm/Target/TargetInstrInfo.h" #include "PPCRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "PPCGenInstrInfo.inc" diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index e234012..939b71a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -438,13 +438,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { } // Darwin ABI Calls. -let isCall = 1, PPC970_Unit = 7, - // All calls clobber the non-callee saved registers... - Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, - F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, - V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, - LR,CTR, - CR0,CR1,CR5,CR6,CR7,CARRY] in { +let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL_Darwin : IForm<18, 0, 1, @@ -463,13 +457,7 @@ let isCall = 1, PPC970_Unit = 7, } // SVR4 ABI Calls. -let isCall = 1, PPC970_Unit = 7, - // All calls clobber the non-callee saved registers... - Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, - F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, - V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, - LR,CTR, - CR0,CR1,CR5,CR6,CR7,CARRY] in { +let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL_SVR4 : IForm<18, 0, 1, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 306cc1f..2976f01 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -13,10 +13,10 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "reginfo" +#include "PPCRegisterInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" -#include "PPCRegisterInfo.h" #include "PPCFrameLowering.h" #include "PPCSubtarget.h" #include "llvm/CallingConv.h" @@ -100,104 +100,20 @@ PPCRegisterInfo::getPointerRegClass(unsigned Kind) const { const uint16_t* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - // 32-bit Darwin calling convention. - static const uint16_t Darwin32_CalleeSavedRegs[] = { - PPC::R13, PPC::R14, PPC::R15, - PPC::R16, PPC::R17, PPC::R18, PPC::R19, - PPC::R20, PPC::R21, PPC::R22, PPC::R23, - PPC::R24, PPC::R25, PPC::R26, PPC::R27, - PPC::R28, PPC::R29, PPC::R30, PPC::R31, - - PPC::F14, PPC::F15, PPC::F16, PPC::F17, - PPC::F18, PPC::F19, PPC::F20, PPC::F21, - PPC::F22, PPC::F23, PPC::F24, PPC::F25, - PPC::F26, PPC::F27, PPC::F28, PPC::F29, - PPC::F30, PPC::F31, - - PPC::CR2, PPC::CR3, PPC::CR4, - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31, - - PPC::LR, 0 - }; - - // 32-bit SVR4 calling convention. - static const uint16_t SVR4_CalleeSavedRegs[] = { - PPC::R14, PPC::R15, - PPC::R16, PPC::R17, PPC::R18, PPC::R19, - PPC::R20, PPC::R21, PPC::R22, PPC::R23, - PPC::R24, PPC::R25, PPC::R26, PPC::R27, - PPC::R28, PPC::R29, PPC::R30, PPC::R31, - - PPC::F14, PPC::F15, PPC::F16, PPC::F17, - PPC::F18, PPC::F19, PPC::F20, PPC::F21, - PPC::F22, PPC::F23, PPC::F24, PPC::F25, - PPC::F26, PPC::F27, PPC::F28, PPC::F29, - PPC::F30, PPC::F31, - - PPC::CR2, PPC::CR3, PPC::CR4, - - PPC::VRSAVE, - - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31, - - 0 - }; - // 64-bit Darwin calling convention. - static const uint16_t Darwin64_CalleeSavedRegs[] = { - PPC::X14, PPC::X15, - PPC::X16, PPC::X17, PPC::X18, PPC::X19, - PPC::X20, PPC::X21, PPC::X22, PPC::X23, - PPC::X24, PPC::X25, PPC::X26, PPC::X27, - PPC::X28, PPC::X29, PPC::X30, PPC::X31, - - PPC::F14, PPC::F15, PPC::F16, PPC::F17, - PPC::F18, PPC::F19, PPC::F20, PPC::F21, - PPC::F22, PPC::F23, PPC::F24, PPC::F25, - PPC::F26, PPC::F27, PPC::F28, PPC::F29, - PPC::F30, PPC::F31, - - PPC::CR2, PPC::CR3, PPC::CR4, - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31, - - PPC::LR8, 0 - }; - - // 64-bit SVR4 calling convention. - static const uint16_t SVR4_64_CalleeSavedRegs[] = { - PPC::X14, PPC::X15, - PPC::X16, PPC::X17, PPC::X18, PPC::X19, - PPC::X20, PPC::X21, PPC::X22, PPC::X23, - PPC::X24, PPC::X25, PPC::X26, PPC::X27, - PPC::X28, PPC::X29, PPC::X30, PPC::X31, - - PPC::F14, PPC::F15, PPC::F16, PPC::F17, - PPC::F18, PPC::F19, PPC::F20, PPC::F21, - PPC::F22, PPC::F23, PPC::F24, PPC::F25, - PPC::F26, PPC::F27, PPC::F28, PPC::F29, - PPC::F30, PPC::F31, - - PPC::CR2, PPC::CR3, PPC::CR4, - - PPC::VRSAVE, + if (Subtarget.isDarwinABI()) + return Subtarget.isPPC64() ? CSR_Darwin64_SaveList : + CSR_Darwin32_SaveList; - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31, + return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList; +} - 0 - }; - +const unsigned* +PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { if (Subtarget.isDarwinABI()) - return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs : - Darwin32_CalleeSavedRegs; + return Subtarget.isPPC64() ? CSR_Darwin64_RegMask : + CSR_Darwin32_RegMask; - return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs; + return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask; } BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 6ce90bc..b1e6a72 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -42,6 +42,7 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + const unsigned *getCallPreservedMask(CallingConv::ID CC) const; BitVector getReservedRegs(const MachineFunction &MF) const; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index da20274..ba9c779 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "PPCTargetMachine.h" +#include "PPC.h" #include "llvm/PassManager.h" #include "llvm/MC/MCStreamer.h" #include "llvm/CodeGen/Passes.h" diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 6dd11c9..7da2b0c 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -24,8 +24,6 @@ #include "llvm/Target/TargetData.h" namespace llvm { -class PassManager; -class GlobalValue; /// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets. /// diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp index 1423b1e..9a729bd 100644 --- a/lib/Target/Sparc/FPMover.cpp +++ b/lib/Target/Sparc/FPMover.cpp @@ -59,19 +59,19 @@ FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) { /// registers that correspond to it. static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg, unsigned &OddReg) { - static const unsigned EvenHalvesOfPairs[] = { + static const uint16_t EvenHalvesOfPairs[] = { SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14, SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30 }; - static const unsigned OddHalvesOfPairs[] = { + static const uint16_t OddHalvesOfPairs[] = { SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15, SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31 }; - static const unsigned DoubleRegsInOrder[] = { + static const uint16_t DoubleRegsInOrder[] = { SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8, SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15 }; - for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i) + for (unsigned i = 0; i < array_lengthof(DoubleRegsInOrder); ++i) if (DoubleRegsInOrder[i] == DoubleReg) { EvenReg = EvenHalvesOfPairs[i]; OddReg = OddHalvesOfPairs[i]; diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index a6b63fb..ee12633 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -50,7 +50,7 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const unsigned RegList[] = { + static const uint16_t RegList[] = { SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 }; //Try to get first reg @@ -301,11 +301,11 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, // Store remaining ArgRegs to the stack if this is a varargs function. if (isVarArg) { - static const unsigned ArgRegs[] = { + static const uint16_t ArgRegs[] = { SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 }; unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6); - const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6; + const uint16_t *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6; unsigned ArgOffset = CCInfo.getNextStackOffset(); if (NumAllocated == 6) ArgOffset += StackOffset; diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 4a7c479..f483c96 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -15,8 +15,8 @@ #ifndef SPARC_ISELLOWERING_H #define SPARC_ISELLOWERING_H -#include "llvm/Target/TargetLowering.h" #include "Sparc.h" +#include "llvm/Target/TargetLowering.h" namespace llvm { namespace SPISD { diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index 4932531..204f698 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -14,8 +14,8 @@ #ifndef SPARCINSTRUCTIONINFO_H #define SPARCINSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "SparcRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "SparcGenInstrInfo.inc" diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index c392fcc..6357468 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "Sparc.h" #include "SparcRegisterInfo.h" +#include "Sparc.h" #include "SparcSubtarget.h" +#include "llvm/Type.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Type.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 80a3be6..6f31356 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#include "Sparc.h" #include "SparcTargetMachine.h" +#include "Sparc.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index d91830f..9e88472 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -67,11 +67,11 @@ private: MCStreamer &Out); /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) - /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. + /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. bool isSrcOp(X86Operand &Op); - /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode - /// or %es:(%edi) in 32bit mode. + /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi) + /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. bool isDstOp(X86Operand &Op); bool is64BitMode() const { @@ -468,7 +468,8 @@ bool X86AsmParser::isSrcOp(X86Operand &Op) { bool X86AsmParser::isDstOp(X86Operand &Op) { unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; - return Op.isMem() && Op.Mem.SegReg == X86::ES && + return Op.isMem() && + (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) && isa<MCConstantExpr>(Op.Mem.Disp) && cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0; @@ -838,6 +839,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // If we reached here, then we just ate the ( of the memory operand. Process // the rest of the memory operand. unsigned BaseReg = 0, IndexReg = 0, Scale = 1; + SMLoc IndexLoc; if (getLexer().is(AsmToken::Percent)) { SMLoc StartLoc, EndLoc; @@ -851,6 +853,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { if (getLexer().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. + IndexLoc = Parser.getTok().getLoc(); // Following the comma we should have either an index register, or a scale // value. We don't support the later form, but we want to parse it @@ -876,8 +879,10 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { SMLoc Loc = Parser.getTok().getLoc(); int64_t ScaleVal; - if (getParser().ParseAbsoluteExpression(ScaleVal)) + if (getParser().ParseAbsoluteExpression(ScaleVal)){ + Error(Loc, "expected scale expression"); return 0; + } // Validate the scale amount. if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ @@ -910,6 +915,23 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { SMLoc MemEnd = Parser.getTok().getLoc(); Parser.Lex(); // Eat the ')'. + // If we have both a base register and an index register make sure they are + // both 64-bit or 32-bit registers. + if (BaseReg != 0 && IndexReg != 0) { + if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && + !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) && + IndexReg != X86::RIZ) { + Error(IndexLoc, "index register is 32-bit, but base register is 64-bit"); + return 0; + } + if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && + !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) && + IndexReg != X86::EIZ){ + Error(IndexLoc, "index register is 64-bit, but base register is 32-bit"); + return 0; + } + } + return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, MemStart, MemEnd); } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index b0e66f0..fbd81d2 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -312,6 +312,15 @@ static int readPrefixes(struct InternalInstruction* insn) { if (consumeByte(insn, &byte)) return -1; + + /* + * If the first byte is a LOCK prefix break and let it be disassembled + * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>. + * FIXME there is currently no way to get the disassembler to print the + * lock prefix if it is not the first byte. + */ + if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) + break; switch (byte) { case 0xf0: /* LOCK */ diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 30a847f..f532019 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -29,7 +29,7 @@ using namespace llvm; void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, const char *(*getRegName)(unsigned)) { // If this is a shuffle operation, the switch should fill in this state. - SmallVector<unsigned, 8> ShuffleMask; + SmallVector<int, 8> ShuffleMask; const char *DestName = 0, *Src1Name = 0, *Src2Name = 0; switch (MI->getOpcode()) { @@ -500,7 +500,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, if (Src1Name == Src2Name) { for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) { if ((int)ShuffleMask[i] >= 0 && // Not sentinel. - ShuffleMask[i] >= e) // From second mask. + ShuffleMask[i] >= (int)e) // From second mask. ShuffleMask[i] -= e; } } @@ -518,13 +518,13 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, // Otherwise, it must come from src1 or src2. Print the span of elements // that comes from this src. - bool isSrc1 = ShuffleMask[i] < ShuffleMask.size(); + bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size(); const char *SrcName = isSrc1 ? Src1Name : Src2Name; OS << (SrcName ? SrcName : "mem") << '['; bool IsFirst = true; while (i != e && (int)ShuffleMask[i] >= 0 && - (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) { + (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) { if (!IsFirst) OS << ','; else diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 9ccbf1c..3f770f7 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAsmBackend.h" #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86FixupKinds.h" #include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 37727b6..80990e5 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -46,6 +46,11 @@ public: return (STI.getFeatureBits() & X86::Mode64Bit) != 0; } + bool is32BitMode() const { + // FIXME: Can tablegen auto-generate this? + return (STI.getFeatureBits() & X86::Mode64Bit) == 0; + } + static unsigned GetX86RegNum(const MCOperand &MO) { return X86_MC::getX86RegNum(MO.getReg()); } @@ -154,9 +159,8 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) { return MCFixup::getKindForSize(Size, isPCRel); } -/// Is32BitMemOperand - Return true if the specified instruction with a memory -/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit -/// memory operand. Op specifies the operand # of the memoperand. +/// Is32BitMemOperand - Return true if the specified instruction has +/// a 32-bit memory operand. Op specifies the operand # of the memoperand. static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) { const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); @@ -169,6 +173,36 @@ static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) { return false; } +/// Is64BitMemOperand - Return true if the specified instruction has +/// a 64-bit memory operand. Op specifies the operand # of the memoperand. +#ifndef NDEBUG +static bool Is64BitMemOperand(const MCInst &MI, unsigned Op) { + const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg()))) + return true; + return false; +} +#endif + +/// Is16BitMemOperand - Return true if the specified instruction has +/// a 16-bit memory operand. Op specifies the operand # of the memoperand. +static bool Is16BitMemOperand(const MCInst &MI, unsigned Op) { + const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg()))) + return true; + return false; +} + /// StartsWithGlobalOffsetTable - Check if this expression starts with /// _GLOBAL_OFFSET_TABLE_ and if it is of the form /// _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF @@ -817,8 +851,22 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, EmitByte(0xF3, CurByte, OS); // Emit the address size opcode prefix as needed. - if ((TSFlags & X86II::AdSize) || - (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand))) + bool need_address_override; + if (TSFlags & X86II::AdSize) { + need_address_override = true; + } else if (MemOperand == -1) { + need_address_override = false; + } else if (is64BitMode()) { + assert(!Is16BitMemOperand(MI, MemOperand)); + need_address_override = Is32BitMemOperand(MI, MemOperand); + } else if (is32BitMode()) { + assert(!Is64BitMemOperand(MI, MemOperand)); + need_address_override = Is16BitMemOperand(MI, MemOperand); + } else { + need_address_override = false; + } + + if (need_address_override) EmitByte(0x67, CurByte, OS); // Emit the operand size opcode prefix as needed. diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index a581993..624e56f 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -922,3 +922,22 @@ _test2: ## @test2 The insertps's of $0 are pointless complex copies. //===---------------------------------------------------------------------===// + +[UNSAFE FP] + +void foo(double, double, double); +void norm(double x, double y, double z) { + double scale = __builtin_sqrt(x*x + y*y + z*z); + foo(x/scale, y/scale, z/scale); +} + +We currently generate an sqrtsd and 3 divsd instructions. This is bad, fp div is +slow and not pipelined. In -ffast-math mode we could compute "1.0/scale" first +and emit 3 mulsd in place of the divs. This can be done as a target-independent +transform. + +If we're dealing with floats instead of doubles we could even replace the sqrtss +and inversion with an rsqrtss instruction, which computes 1/sqrt faster at the +cost of reduced accuracy. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index f4b85ae..32c722a 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -20,7 +20,7 @@ namespace llvm { -void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { // Defaults the copying the dest value. ShuffleMask.push_back(0); ShuffleMask.push_back(1); @@ -44,8 +44,7 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { } // <3,1> or <6,7,2,3> -void DecodeMOVHLPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { for (unsigned i = NElts/2; i != NElts; ++i) ShuffleMask.push_back(NElts+i); @@ -54,8 +53,7 @@ void DecodeMOVHLPSMask(unsigned NElts, } // <0,2> or <0,1,4,5> -void DecodeMOVLHPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { for (unsigned i = 0; i != NElts/2; ++i) ShuffleMask.push_back(i); @@ -66,8 +64,7 @@ void DecodeMOVLHPSMask(unsigned NElts, /// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodePSHUFMask(EVT VT, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = VT.getSizeInBits() / 128; @@ -83,8 +80,7 @@ void DecodePSHUFMask(EVT VT, unsigned Imm, } } -void DecodePSHUFHWMask(unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { ShuffleMask.push_back(0); ShuffleMask.push_back(1); ShuffleMask.push_back(2); @@ -95,8 +91,7 @@ void DecodePSHUFHWMask(unsigned Imm, } } -void DecodePSHUFLWMask(unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { for (unsigned i = 0; i != 4; ++i) { ShuffleMask.push_back((Imm & 3)); Imm >>= 2; @@ -110,8 +105,7 @@ void DecodePSHUFLWMask(unsigned Imm, /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates /// the type of the vector allowing it to handle different datatypes and vector /// widths. -void DecodeSHUFPMask(EVT VT, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = VT.getSizeInBits() / 128; @@ -136,7 +130,7 @@ void DecodeSHUFPMask(EVT VT, unsigned Imm, /// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd /// and punpckh*. VT indicates the type of the vector allowing it to handle /// different datatypes and vector widths. -void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -156,7 +150,7 @@ void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd /// and punpckl*. VT indicates the type of the vector allowing it to handle /// different datatypes and vector widths. -void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -174,7 +168,7 @@ void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { } void DecodeVPERM2X128Mask(EVT VT, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { + SmallVectorImpl<int> &ShuffleMask) { unsigned HalfSize = VT.getVectorNumElements()/2; unsigned FstHalfBegin = (Imm & 0x3) * HalfSize; unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize; diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 877c9bd..5b8c6ef 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -24,47 +24,41 @@ namespace llvm { enum { - SM_SentinelZero = ~0U + SM_SentinelZero = -1 }; -void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask); // <3,1> or <6,7,2,3> -void DecodeMOVHLPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask); // <0,2> or <0,1,4,5> -void DecodeMOVLHPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask); -void DecodePSHUFMask(EVT VT, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); -void DecodePSHUFHWMask(unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask); -void DecodePSHUFLWMask(unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask); /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates /// the type of the vector allowing it to handle different datatypes and vector /// widths. -void DecodeSHUFPMask(EVT VT, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); /// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd /// and punpckh*. VT indicates the type of the vector allowing it to handle /// different datatypes and vector widths. -void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask); /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd /// and punpckl*. VT indicates the type of the vector allowing it to handle /// different datatypes and vector widths. -void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask); void DecodeVPERM2X128Mask(EVT VT, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); + SmallVectorImpl<int> &ShuffleMask); } // llvm namespace diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 81e9422..ecc7b59 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -24,8 +24,6 @@ namespace llvm { class FunctionPass; class JITCodeEmitter; -class MachineCodeEmitter; -class Target; class X86TargetMachine; /// createX86ISelDag - This pass converts a legalized DAG into a diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 268cbf4..f1cedf3 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -13,13 +13,13 @@ //===----------------------------------------------------------------------===// #include "X86AsmPrinter.h" -#include "InstPrinter/X86ATTInstPrinter.h" -#include "InstPrinter/X86IntelInstPrinter.h" #include "X86MCInstLower.h" #include "X86.h" #include "X86COFFMachineModuleInfo.h" #include "X86MachineFunctionInfo.h" #include "X86TargetMachine.h" +#include "InstPrinter/X86ATTInstPrinter.h" +#include "InstPrinter/X86IntelInstPrinter.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 1058df5..a6ed9ba 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -24,11 +24,7 @@ namespace llvm { -class MachineJumpTableInfo; -class MCContext; -class MCInst; class MCStreamer; -class MCSymbol; class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { const X86Subtarget *Subtarget; diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index 63c08f1..0cec95a 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -14,9 +14,9 @@ #ifndef X86COFF_MACHINEMODULEINFO_H #define X86COFF_MACHINEMODULEINFO_H +#include "X86MachineFunctionInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/ADT/DenseSet.h" -#include "X86MachineFunctionInfo.h" namespace llvm { class X86MachineFunctionInfo; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index f90764e..3d63b7e 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1779,7 +1779,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) { // Count the number of XMM registers allocated. - static const unsigned XMMArgRegs[] = { + static const uint16_t XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 32de194..936df27 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -26,6 +26,7 @@ #define DEBUG_TYPE "x86-codegen" #include "X86.h" #include "X86InstrInfo.h" +#include "llvm/InlineAsm.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -37,7 +38,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -570,8 +570,8 @@ void FPS::finishBlockStack() { namespace { struct TableEntry { - unsigned from; - unsigned to; + uint16_t from; + uint16_t to; bool operator<(const TableEntry &TE) const { return from < TE.from; } friend bool operator<(const TableEntry &TE, unsigned V) { return TE.from < V; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index aa508b8..9405c2f 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -21,7 +21,6 @@ #include "X86TargetMachine.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" -#include "llvm/Support/CFG.h" #include "llvm/Type.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -32,6 +31,7 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -1654,7 +1654,7 @@ enum AtomicSz { AtomicSzEnd }; -static const unsigned int AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { +static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { { X86::LOCK_OR8mi, X86::LOCK_OR8mr, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cae9aad..88f3829 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1927,17 +1927,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; // FIXME: We should really autogenerate these arrays - static const unsigned GPR64ArgRegsWin64[] = { + static const uint16_t GPR64ArgRegsWin64[] = { X86::RCX, X86::RDX, X86::R8, X86::R9 }; - static const unsigned GPR64ArgRegs64Bit[] = { + static const uint16_t GPR64ArgRegs64Bit[] = { X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 }; - static const unsigned XMMArgRegs64Bit[] = { + static const uint16_t XMMArgRegs64Bit[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; - const unsigned *GPR64ArgRegs; + const uint16_t *GPR64ArgRegs; unsigned NumXMMRegs = 0; if (IsWin64) { @@ -2326,7 +2326,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // registers used and is in the range 0 - 8 inclusive. // Count the number of XMM registers allocated. - static const unsigned XMMArgRegs[] = { + static const uint16_t XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; @@ -2910,7 +2910,7 @@ static bool isTargetShuffle(unsigned Opcode) { } static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, - SDValue V1, SelectionDAG &DAG) { + SDValue V1, SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); case X86ISD::MOVSHDUP: @@ -2921,7 +2921,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, } static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, - SDValue V1, unsigned TargetMask, SelectionDAG &DAG) { + SDValue V1, unsigned TargetMask, + SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); case X86ISD::PSHUFD: @@ -2933,7 +2934,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, } static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, - SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) { + SDValue V1, SDValue V2, unsigned TargetMask, + SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); case X86ISD::PALIGN: @@ -3712,6 +3714,8 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) { static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT, bool V2IsSplat = false, bool V2IsUndef = false) { unsigned NumOps = VT.getVectorNumElements(); + if (VT.getSizeInBits() == 256) + return false; if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) return false; @@ -4342,9 +4346,81 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]); } +/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the +/// target specific opcode. Returns true if the Mask could be calculated. +/// Sets IsUnary to true if only uses one source. +static bool getTargetShuffleMask(SDNode *N, EVT VT, + SmallVectorImpl<int> &Mask, bool &IsUnary) { + unsigned NumElems = VT.getVectorNumElements(); + SDValue ImmN; + + IsUnary = false; + switch(N->getOpcode()) { + case X86ISD::SHUFP: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + break; + case X86ISD::UNPCKH: + DecodeUNPCKHMask(VT, Mask); + break; + case X86ISD::UNPCKL: + DecodeUNPCKLMask(VT, Mask); + break; + case X86ISD::MOVHLPS: + DecodeMOVHLPSMask(NumElems, Mask); + break; + case X86ISD::MOVLHPS: + DecodeMOVLHPSMask(NumElems, Mask); + break; + case X86ISD::PSHUFD: + case X86ISD::VPERMILP: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + IsUnary = true; + break; + case X86ISD::PSHUFHW: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + IsUnary = true; + break; + case X86ISD::PSHUFLW: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + IsUnary = true; + break; + case X86ISD::MOVSS: + case X86ISD::MOVSD: { + // The index 0 always comes from the first element of the second source, + // this is why MOVSS and MOVSD are used in the first place. The other + // elements come from the other positions of the first source vector + Mask.push_back(NumElems); + for (unsigned i = 1; i != NumElems; ++i) { + Mask.push_back(i); + } + break; + } + case X86ISD::VPERM2X128: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + break; + case X86ISD::MOVDDUP: + case X86ISD::MOVLHPD: + case X86ISD::MOVLPD: + case X86ISD::MOVLPS: + case X86ISD::MOVSHDUP: + case X86ISD::MOVSLDUP: + case X86ISD::PALIGN: + // Not yet implemented + return false; + default: llvm_unreachable("unknown target shuffle node"); + } + + return true; +} + /// getShuffleScalarElt - Returns the scalar element that will make up the ith /// element of the result of the vector shuffle. -static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, +static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, unsigned Depth) { if (Depth == 6) return SDValue(); // Limit search depth. @@ -4355,89 +4431,34 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, // Recurse into ISD::VECTOR_SHUFFLE node to find scalars. if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) { - Index = SV->getMaskElt(Index); + int Elt = SV->getMaskElt(Index); - if (Index < 0) + if (Elt < 0) return DAG.getUNDEF(VT.getVectorElementType()); unsigned NumElems = VT.getVectorNumElements(); - SDValue NewV = (Index < (int)NumElems) ? SV->getOperand(0) - : SV->getOperand(1); - return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1); + SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0) + : SV->getOperand(1); + return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1); } // Recurse into target specific vector shuffles to find scalars. if (isTargetShuffle(Opcode)) { unsigned NumElems = VT.getVectorNumElements(); - SmallVector<unsigned, 16> ShuffleMask; + SmallVector<int, 16> ShuffleMask; SDValue ImmN; + bool IsUnary; - switch(Opcode) { - case X86ISD::SHUFP: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::UNPCKH: - DecodeUNPCKHMask(VT, ShuffleMask); - break; - case X86ISD::UNPCKL: - DecodeUNPCKLMask(VT, ShuffleMask); - break; - case X86ISD::MOVHLPS: - DecodeMOVHLPSMask(NumElems, ShuffleMask); - break; - case X86ISD::MOVLHPS: - DecodeMOVLHPSMask(NumElems, ShuffleMask); - break; - case X86ISD::PSHUFD: - case X86ISD::VPERMILP: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::PSHUFHW: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::PSHUFLW: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::MOVSS: - case X86ISD::MOVSD: { - // The index 0 always comes from the first element of the second source, - // this is why MOVSS and MOVSD are used in the first place. The other - // elements come from the other positions of the first source vector. - unsigned OpNum = (Index == 0) ? 1 : 0; - return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG, - Depth+1); - } - case X86ISD::VPERM2X128: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::MOVDDUP: - case X86ISD::MOVLHPD: - case X86ISD::MOVLPD: - case X86ISD::MOVLPS: - case X86ISD::MOVSHDUP: - case X86ISD::MOVSLDUP: - case X86ISD::PALIGN: - return SDValue(); // Not yet implemented. - default: llvm_unreachable("unknown target shuffle node"); - } - - Index = ShuffleMask[Index]; - if (Index < 0) + if (!getTargetShuffleMask(N, VT, ShuffleMask, IsUnary)) + return SDValue(); + + int Elt = ShuffleMask[Index]; + if (Elt < 0) return DAG.getUNDEF(VT.getVectorElementType()); - SDValue NewV = (Index < (int)NumElems) ? N->getOperand(0) + SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0) : N->getOperand(1); - return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, + return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1); } @@ -4453,7 +4474,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) return (Index == 0) ? V.getOperand(0) - : DAG.getUNDEF(VT.getVectorElementType()); + : DAG.getUNDEF(VT.getVectorElementType()); if (V.getOpcode() == ISD::BUILD_VECTOR) return V.getOperand(Index); @@ -4465,38 +4486,37 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, /// shuffle operation which come from a consecutively from a zero. The /// search can start in two different directions, from left or right. static -unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems, +unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, unsigned NumElems, bool ZerosFromLeft, SelectionDAG &DAG) { - int i = 0; - - while (i < NumElems) { + unsigned i; + for (i = 0; i != NumElems; ++i) { unsigned Index = ZerosFromLeft ? i : NumElems-i-1; - SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0); + SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0); if (!(Elt.getNode() && (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt)))) break; - ++i; } return i; } -/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to -/// MaskE correspond consecutively to elements from one of the vector operands, +/// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE) +/// correspond consecutively to elements from one of the vector operands, /// starting from its index OpIdx. Also tell OpNum which source vector operand. static -bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE, - int OpIdx, int NumElems, unsigned &OpNum) { +bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, + unsigned MaskI, unsigned MaskE, unsigned OpIdx, + unsigned NumElems, unsigned &OpNum) { bool SeenV1 = false; bool SeenV2 = false; - for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) { + for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) { int Idx = SVOp->getMaskElt(i); // Ignore undef indicies if (Idx < 0) continue; - if (Idx < NumElems) + if (Idx < (int)NumElems) SeenV1 = true; else SeenV2 = true; @@ -4531,7 +4551,7 @@ static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, // if (!isShuffleMaskConsecutive(SVOp, 0, // Mask Start Index - NumElems-NumZeros-1, // Mask End Index + NumElems-NumZeros, // Mask End Index(exclusive) NumZeros, // Where to start looking in the src vector NumElems, // Number of elements in vector OpSrc)) // Which source operand ? @@ -4564,7 +4584,7 @@ static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, // if (!isShuffleMaskConsecutive(SVOp, NumZeros, // Mask Start Index - NumElems-1, // Mask End Index + NumElems, // Mask End Index(exclusive) 0, // Where to start looking in the src vector NumElems, // Number of elements in vector OpSrc)) // Which source operand ? @@ -6080,88 +6100,6 @@ static bool RelaxedMayFoldVectorLoad(SDValue V) { return false; } -/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by -/// a vector extract, and if both can be later optimized into a single load. -/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked -/// here because otherwise a target specific shuffle node is going to be -/// emitted for this shuffle, and the optimization not done. -/// FIXME: This is probably not the best approach, but fix the problem -/// until the right path is decided. -static -bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG, - const TargetLowering &TLI) { - EVT VT = V.getValueType(); - ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V); - - // Be sure that the vector shuffle is present in a pattern like this: - // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr) - if (!V.hasOneUse()) - return false; - - SDNode *N = *V.getNode()->use_begin(); - if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT) - return false; - - SDValue EltNo = N->getOperand(1); - if (!isa<ConstantSDNode>(EltNo)) - return false; - - // If the bit convert changed the number of elements, it is unsafe - // to examine the mask. - bool HasShuffleIntoBitcast = false; - if (V.getOpcode() == ISD::BITCAST) { - EVT SrcVT = V.getOperand(0).getValueType(); - if (SrcVT.getVectorNumElements() != VT.getVectorNumElements()) - return false; - V = V.getOperand(0); - HasShuffleIntoBitcast = true; - } - - // Select the input vector, guarding against out of range extract vector. - unsigned NumElems = VT.getVectorNumElements(); - unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt); - V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1); - - // If we are accessing the upper part of a YMM register - // then the EXTRACT_VECTOR_ELT is likely to be legalized to a sequence of - // EXTRACT_SUBVECTOR + EXTRACT_VECTOR_ELT, which are not detected at this point - // because the legalization of N did not happen yet. - if (Idx >= (int)NumElems/2 && VT.getSizeInBits() == 256) - return false; - - // Skip one more bit_convert if necessary - if (V.getOpcode() == ISD::BITCAST) { - if (!V.hasOneUse()) - return false; - V = V.getOperand(0); - } - - if (!ISD::isNormalLoad(V.getNode())) - return false; - - // Is the original load suitable? - LoadSDNode *LN0 = cast<LoadSDNode>(V); - - if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) - return false; - - if (!HasShuffleIntoBitcast) - return true; - - // If there's a bitcast before the shuffle, check if the load type and - // alignment is valid. - unsigned Align = LN0->getAlignment(); - unsigned NewAlign = - TLI.getTargetData()->getABITypeAlignment( - VT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT)) - return false; - - return true; -} - static SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) { EVT VT = Op.getValueType(); @@ -6282,12 +6220,6 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, if (SVOp->isSplat()) { unsigned NumElem = VT.getVectorNumElements(); int Size = VT.getSizeInBits(); - // Special case, this is the only place now where it's allowed to return - // a vector_shuffle operation without using a target specific node, because - // *hopefully* it will be optimized away by the dag combiner. FIXME: should - // this be moved to DAGCombine instead? - if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI)) - return Op; // Use vbroadcast whenever the splat comes from a foldable load SDValue LD = isVectorBroadcast(Op, Subtarget); @@ -13005,11 +12937,109 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target +/// specific shuffle of a load can be folded into a single element load. +/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but +/// shuffles have been customed lowered so we need to handle those here. +static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue InVec = N->getOperand(0); + SDValue EltNo = N->getOperand(1); + + if (!isa<ConstantSDNode>(EltNo)) + return SDValue(); + + EVT VT = InVec.getValueType(); + + bool HasShuffleIntoBitcast = false; + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + EVT BCVT = InVec.getOperand(0).getValueType(); + if (BCVT.getVectorNumElements() != VT.getVectorNumElements()) + return SDValue(); + InVec = InVec.getOperand(0); + HasShuffleIntoBitcast = true; + } + + if (!isTargetShuffle(InVec.getOpcode())) + return SDValue(); + + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + SmallVector<int, 16> ShuffleMask; + bool UnaryShuffle; + if (!getTargetShuffleMask(InVec.getNode(), VT, ShuffleMask, UnaryShuffle)) + return SDValue(); + + // Select the input vector, guarding against out of range extract vector. + unsigned NumElems = VT.getVectorNumElements(); + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt]; + SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0) + : InVec.getOperand(1); + + // If inputs to shuffle are the same for both ops, then allow 2 uses + unsigned AllowedUses = InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1; + + if (LdNode.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0)) + return SDValue(); + + AllowedUses = 1; // only allow 1 load use if we have a bitcast + LdNode = LdNode.getOperand(0); + } + + if (!ISD::isNormalLoad(LdNode.getNode())) + return SDValue(); + + LoadSDNode *LN0 = cast<LoadSDNode>(LdNode); + + if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile()) + return SDValue(); + + if (HasShuffleIntoBitcast) { + // If there's a bitcast before the shuffle, check if the load type and + // alignment is valid. + unsigned Align = LN0->getAlignment(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned NewAlign = TLI.getTargetData()-> + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT)) + return SDValue(); + } + + // All checks match so transform back to vector_shuffle so that DAG combiner + // can finish the job + DebugLoc dl = N->getDebugLoc(); + + // Create shuffle node taking into account the case that its a unary shuffle + SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1); + Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl, + InVec.getOperand(0), Shuffle, + &ShuffleMask[0]); + Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle, + EltNo); +} + /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index /// generation and convert it from being a bunch of shuffles and extracts /// to a simple store and scalar loads to extract the elements. static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, - const TargetLowering &TLI) { + TargetLowering::DAGCombinerInfo &DCI) { + SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI); + if (NewOp.getNode()) + return NewOp; + SDValue InputVector = N->getOperand(0); // Only operate on vectors of 4 elements, where the alternative shuffling @@ -13070,6 +13100,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, unsigned EltSize = InputVector.getValueType().getVectorElementType().getSizeInBits()/8; uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy()); SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), @@ -13093,6 +13124,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + + DebugLoc DL = N->getDebugLoc(); SDValue Cond = N->getOperand(0); // Get the LHS/RHS of the select. @@ -14897,7 +14930,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: break; case ISD::EXTRACT_VECTOR_ELT: - return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); + return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI); case ISD::VSELECT: case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index ac49232..42a5014 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -153,7 +153,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR32:$addr)]>; + [(X86ehret GR32:$addr)], IIC_RET>; } @@ -161,7 +161,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR64:$addr)]>; + [(X86ehret GR64:$addr)], IIC_RET>; } @@ -193,7 +193,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in { def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", - [(set GR8:$dst, 0)]>; + [(set GR8:$dst, 0)], IIC_ALU_NONMEM>; // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller // encoding and avoids a partial-register update sometimes, but doing so @@ -202,11 +202,11 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", // to an MCInst. def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, 0)]>, OpSize; + [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize; // FIXME: Set encoding to pseudo. def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, 0)]>; + [(set GR32:$dst, 0)], IIC_ALU_NONMEM>; } // We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a @@ -218,7 +218,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", let Defs = [EFLAGS], isCodeGenOnly=1, AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, 0)]>; + [(set GR64:$dst, 0)], IIC_ALU_NONMEM>; // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however @@ -226,7 +226,8 @@ def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), - "", [(set GR64:$dst, i64immZExt32:$src)]>; + "", [(set GR64:$dst, i64immZExt32:$src)], + IIC_ALU_NONMEM>; // Use sbb to materialize carry bit. let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in { @@ -236,14 +237,18 @@ let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in { // FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces // X86CodeEmitter. def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "", - [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; + [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))], + IIC_ALU_NONMEM>; def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>, + [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))], + IIC_ALU_NONMEM>, OpSize; def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; + [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))], + IIC_ALU_NONMEM>; def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; + [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))], + IIC_ALU_NONMEM>; } // isCodeGenOnly @@ -297,32 +302,32 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))), // let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", - [(X86rep_movs i8)]>, REP; + [(X86rep_movs i8)], IIC_REP_MOVS>, REP; def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", - [(X86rep_movs i16)]>, REP, OpSize; + [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize; def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", - [(X86rep_movs i32)]>, REP; + [(X86rep_movs i32)], IIC_REP_MOVS>, REP; } let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", - [(X86rep_movs i64)]>, REP; + [(X86rep_movs i64)], IIC_REP_MOVS>, REP; // FIXME: Should use "(X86rep_stos AL)" as the pattern. let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", - [(X86rep_stos i8)]>, REP; + [(X86rep_stos i8)], IIC_REP_STOS>, REP; let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", - [(X86rep_stos i16)]>, REP, OpSize; + [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize; let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", - [(X86rep_stos i32)]>, REP; + [(X86rep_stos i32)], IIC_REP_STOS>, REP; let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", - [(X86rep_stos i64)]>, REP; + [(X86rep_stos i64)], IIC_REP_STOS>, REP; //===----------------------------------------------------------------------===// @@ -571,7 +576,7 @@ let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), "lock\n\t" "or{l}\t{$zero, $dst|$dst, $zero}", - []>, Requires<[In32BitMode]>, LOCK; + [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK; let hasSideEffects = 1 in def Int_MemBarrier : I<0, Pseudo, (outs), (ins), @@ -591,72 +596,72 @@ def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), !strconcat("lock\n\t", mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_NONMEM>, LOCK; def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), !strconcat("lock\n\t", mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), - []>, OpSize, LOCK; + [], IIC_ALU_NONMEM>, OpSize, LOCK; def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), !strconcat("lock\n\t", mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_NONMEM>, LOCK; def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), !strconcat("lock\n\t", mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_NONMEM>, LOCK; def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), !strconcat("lock\n\t", mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), !strconcat("lock\n\t", mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), !strconcat("lock\n\t", mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), !strconcat("lock\n\t", mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), !strconcat("lock\n\t", mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), !strconcat("lock\n\t", mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), !strconcat("lock\n\t", mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; } @@ -673,29 +678,29 @@ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "lock\n\t" - "inc{b}\t$dst", []>, LOCK; + "inc{b}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "lock\n\t" - "inc{w}\t$dst", []>, OpSize, LOCK; + "inc{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK; def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "lock\n\t" - "inc{l}\t$dst", []>, LOCK; + "inc{l}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "lock\n\t" - "inc{q}\t$dst", []>, LOCK; + "inc{q}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "lock\n\t" - "dec{b}\t$dst", []>, LOCK; + "dec{b}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "lock\n\t" - "dec{w}\t$dst", []>, OpSize, LOCK; + "dec{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK; def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "lock\n\t" - "dec{l}\t$dst", []>, LOCK; + "dec{l}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "lock\n\t" - "dec{q}\t$dst", []>, LOCK; + "dec{q}\t$dst", [], IIC_UNARY_MEM>, LOCK; } // Atomic compare and swap. @@ -704,42 +709,42 @@ let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), "lock\n\t" "cmpxchg8b\t$ptr", - [(X86cas8 addr:$ptr)]>, TB, LOCK; + [(X86cas8 addr:$ptr)], IIC_CMPX_LOCK_8B>, TB, LOCK; let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], isCodeGenOnly = 1 in def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr), "lock\n\t" "cmpxchg16b\t$ptr", - [(X86cas16 addr:$ptr)]>, TB, LOCK, + [(X86cas16 addr:$ptr)], IIC_CMPX_LOCK_16B>, TB, LOCK, Requires<[HasCmpxchg16b]>; let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in { def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap), "lock\n\t" "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK; + [(X86cas addr:$ptr, GR8:$swap, 1)], IIC_CMPX_LOCK_8>, TB, LOCK; } let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in { def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap), "lock\n\t" "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK; + [(X86cas addr:$ptr, GR16:$swap, 2)], IIC_CMPX_LOCK>, TB, OpSize, LOCK; } let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in { def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), "lock\n\t" "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK; + [(X86cas addr:$ptr, GR32:$swap, 4)], IIC_CMPX_LOCK>, TB, LOCK; } let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in { def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), "lock\n\t" "cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK; + [(X86cas addr:$ptr, GR64:$swap, 8)], IIC_CMPX_LOCK>, TB, LOCK; } // Atomic exchange and add @@ -747,22 +752,26 @@ let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in { def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), "lock\n\t" "xadd{b}\t{$val, $ptr|$ptr, $val}", - [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>, + [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))], + IIC_XADD_LOCK_MEM8>, TB, LOCK; def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr), "lock\n\t" "xadd{w}\t{$val, $ptr|$ptr, $val}", - [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>, + [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))], + IIC_XADD_LOCK_MEM>, TB, OpSize, LOCK; def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr), "lock\n\t" "xadd{l}\t{$val, $ptr|$ptr, $val}", - [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>, + [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))], + IIC_XADD_LOCK_MEM>, TB, LOCK; def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr), "lock\n\t" "xadd{q}\t{$val, $ptr|$ptr, $val}", - [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, + [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))], + IIC_XADD_LOCK_MEM>, TB, LOCK; } diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 4f9f089..ae3ed1b 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -218,6 +218,11 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() >= 16; }]>; +// Like 'X86vzload', but always requires 128-bit vector alignment. +def alignedX86vzload : PatFrag<(ops node:$ptr), (X86vzload node:$ptr), [{ + return cast<MemSDNode>(N)->getAlignment() >= 16; +}]>; + // Like 'load', but always requires 256-bit vector alignment. def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() >= 32; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 5a479f0..307c96b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -25,13 +25,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/MC/MCAsmInfo.h" #include <limits> #define GET_INSTRINFO_CTOR @@ -82,6 +82,12 @@ enum { TB_FOLDED_STORE = 1 << 19 }; +struct X86OpTblEntry { + uint16_t RegOp; + uint16_t MemOp; + uint32_t Flags; +}; + X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit() ? X86::ADJCALLSTACKDOWN64 @@ -91,7 +97,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : X86::ADJCALLSTACKUP32)), TM(tm), RI(tm, *this) { - static const unsigned OpTbl2Addr[][3] = { + static const X86OpTblEntry OpTbl2Addr[] = { { X86::ADC32ri, X86::ADC32mi, 0 }, { X86::ADC32ri8, X86::ADC32mi8, 0 }, { X86::ADC32rr, X86::ADC32mr, 0 }, @@ -259,16 +265,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) }; for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { - unsigned RegOp = OpTbl2Addr[i][0]; - unsigned MemOp = OpTbl2Addr[i][1]; - unsigned Flags = OpTbl2Addr[i][2]; + unsigned RegOp = OpTbl2Addr[i].RegOp; + unsigned MemOp = OpTbl2Addr[i].MemOp; + unsigned Flags = OpTbl2Addr[i].Flags; AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable, RegOp, MemOp, // Index 0, folded load and store, no alignment requirement. Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); } - static const unsigned OpTbl0[][3] = { + static const X86OpTblEntry OpTbl0[] = { { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD }, { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD }, { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD }, @@ -370,14 +376,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { - unsigned RegOp = OpTbl0[i][0]; - unsigned MemOp = OpTbl0[i][1]; - unsigned Flags = OpTbl0[i][2]; + unsigned RegOp = OpTbl0[i].RegOp; + unsigned MemOp = OpTbl0[i].MemOp; + unsigned Flags = OpTbl0[i].Flags; AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable, RegOp, MemOp, TB_INDEX_0 | Flags); } - static const unsigned OpTbl1[][3] = { + static const X86OpTblEntry OpTbl1[] = { { X86::CMP16rr, X86::CMP16rm, 0 }, { X86::CMP32rr, X86::CMP32rm, 0 }, { X86::CMP64rr, X86::CMP64rm, 0 }, @@ -555,16 +561,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { - unsigned RegOp = OpTbl1[i][0]; - unsigned MemOp = OpTbl1[i][1]; - unsigned Flags = OpTbl1[i][2]; + unsigned RegOp = OpTbl1[i].RegOp; + unsigned MemOp = OpTbl1[i].MemOp; + unsigned Flags = OpTbl1[i].Flags; AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable, RegOp, MemOp, // Index 1, folded load Flags | TB_INDEX_1 | TB_FOLDED_LOAD); } - static const unsigned OpTbl2[][3] = { + static const X86OpTblEntry OpTbl2[] = { { X86::ADC32rr, X86::ADC32rm, 0 }, { X86::ADC64rr, X86::ADC64rm, 0 }, { X86::ADD16rr, X86::ADD16rm, 0 }, @@ -1108,9 +1114,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { - unsigned RegOp = OpTbl2[i][0]; - unsigned MemOp = OpTbl2[i][1]; - unsigned Flags = OpTbl2[i][2]; + unsigned RegOp = OpTbl2[i].RegOp; + unsigned MemOp = OpTbl2[i].MemOp; + unsigned Flags = OpTbl2[i].Flags; AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable, RegOp, MemOp, // Index 2, folded load @@ -3627,7 +3633,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { // These are the replaceable SSE instructions. Some of these have Int variants // that we don't include here. We don't want to replace instructions selected // by intrinsics. -static const unsigned ReplaceableInstrs[][3] = { +static const uint16_t ReplaceableInstrs[][3] = { //PackedSingle PackedDouble PackedInt { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr }, { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm }, @@ -3667,7 +3673,7 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr } }; -static const unsigned ReplaceableInstrsAVX2[][3] = { +static const uint16_t ReplaceableInstrsAVX2[][3] = { //PackedSingle PackedDouble PackedInt { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm }, { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr }, @@ -3688,14 +3694,14 @@ static const unsigned ReplaceableInstrsAVX2[][3] = { // FIXME: Some shuffle and unpack instructions have equivalents in different // domains, but they require a bit more work than just switching opcodes. -static const unsigned *lookup(unsigned opcode, unsigned domain) { +static const uint16_t *lookup(unsigned opcode, unsigned domain) { for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) if (ReplaceableInstrs[i][domain-1] == opcode) return ReplaceableInstrs[i]; return 0; } -static const unsigned *lookupAVX2(unsigned opcode, unsigned domain) { +static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) { for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i) if (ReplaceableInstrsAVX2[i][domain-1] == opcode) return ReplaceableInstrsAVX2[i]; @@ -3718,7 +3724,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { assert(Domain>0 && Domain<4 && "Invalid execution domain"); uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; assert(dom && "Not an SSE instruction"); - const unsigned *table = lookup(MI->getOpcode(), dom); + const uint16_t *table = lookup(MI->getOpcode(), dom); if (!table) { // try the other table assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) && "256-bit vector operations only available in AVX2"); diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index d065d2d..b23d756 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -14,10 +14,10 @@ #ifndef X86INSTRUCTIONINFO_H #define X86INSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "X86.h" #include "X86RegisterInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "X86GenInstrInfo.inc" diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index f585b47..dd7cf50 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1856,19 +1856,19 @@ def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>; def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>; // shld/shrd op,op -> shld op, op, CL -def : InstAlias<"shldw $r1, $r2", (SHLD16rrCL GR16:$r1, GR16:$r2)>; -def : InstAlias<"shldl $r1, $r2", (SHLD32rrCL GR32:$r1, GR32:$r2)>; -def : InstAlias<"shldq $r1, $r2", (SHLD64rrCL GR64:$r1, GR64:$r2)>; -def : InstAlias<"shrdw $r1, $r2", (SHRD16rrCL GR16:$r1, GR16:$r2)>; -def : InstAlias<"shrdl $r1, $r2", (SHRD32rrCL GR32:$r1, GR32:$r2)>; -def : InstAlias<"shrdq $r1, $r2", (SHRD64rrCL GR64:$r1, GR64:$r2)>; - -def : InstAlias<"shldw $mem, $reg", (SHLD16mrCL i16mem:$mem, GR16:$reg)>; -def : InstAlias<"shldl $mem, $reg", (SHLD32mrCL i32mem:$mem, GR32:$reg)>; -def : InstAlias<"shldq $mem, $reg", (SHLD64mrCL i64mem:$mem, GR64:$reg)>; -def : InstAlias<"shrdw $mem, $reg", (SHRD16mrCL i16mem:$mem, GR16:$reg)>; -def : InstAlias<"shrdl $mem, $reg", (SHRD32mrCL i32mem:$mem, GR32:$reg)>; -def : InstAlias<"shrdq $mem, $reg", (SHRD64mrCL i64mem:$mem, GR64:$reg)>; +def : InstAlias<"shldw $r2, $r1", (SHLD16rrCL GR16:$r1, GR16:$r2)>; +def : InstAlias<"shldl $r2, $r1", (SHLD32rrCL GR32:$r1, GR32:$r2)>; +def : InstAlias<"shldq $r2, $r1", (SHLD64rrCL GR64:$r1, GR64:$r2)>; +def : InstAlias<"shrdw $r2, $r1", (SHRD16rrCL GR16:$r1, GR16:$r2)>; +def : InstAlias<"shrdl $r2, $r1", (SHRD32rrCL GR32:$r1, GR32:$r2)>; +def : InstAlias<"shrdq $r2, $r1", (SHRD64rrCL GR64:$r1, GR64:$r2)>; + +def : InstAlias<"shldw $reg, $mem", (SHLD16mrCL i16mem:$mem, GR16:$reg)>; +def : InstAlias<"shldl $reg, $mem", (SHLD32mrCL i32mem:$mem, GR32:$reg)>; +def : InstAlias<"shldq $reg, $mem", (SHLD64mrCL i64mem:$mem, GR64:$reg)>; +def : InstAlias<"shrdw $reg, $mem", (SHRD16mrCL i16mem:$mem, GR16:$reg)>; +def : InstAlias<"shrdl $reg, $mem", (SHRD32mrCL i32mem:$mem, GR32:$reg)>; +def : InstAlias<"shrdq $reg, $mem", (SHRD64mrCL i64mem:$mem, GR64:$reg)>; /* FIXME: This is disabled because the asm matcher is currently incapable of * matching a fixed immediate like $1. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index c6d1d19..df42627 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -641,7 +641,7 @@ let Predicates = [HasAVX] in { (VMOVSDrr (v2i64 (V_SET0)), (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>; -// Extract and store. + // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), (VMOVSSmr addr:$dst, @@ -2306,7 +2306,7 @@ let Defs = [EFLAGS] in { "comisd", SSEPackedDouble>, TB, OpSize; } // Defs = [EFLAGS] -// sse12_cmp_packed - sse 1 & 2 compared packed instructions +// sse12_cmp_packed - sse 1 & 2 compare packed instructions multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int, string asm, string asm_alt, Domain d> { @@ -4820,8 +4820,10 @@ let Predicates = [HasSSE2], AddedComplexity = 20 in { } let Predicates = [HasAVX] in { -def : Pat<(v4i64 (X86vzload addr:$src)), +def : Pat<(v4i64 (alignedX86vzload addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>; +def : Pat<(v4i64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>; } //===---------------------------------------------------------------------===// @@ -7307,6 +7309,24 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), []>, VEX; } +// Extract and store. +let Predicates = [HasAVX] in { + def : Pat<(alignedstore (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; + def : Pat<(alignedstore (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; + def : Pat<(alignedstore (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; + + def : Pat<(int_x86_sse_storeu_ps addr:$dst, (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2)), + (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; + def : Pat<(int_x86_sse2_storeu_pd addr:$dst, (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2)), + (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; + def : Pat<(int_x86_sse2_storeu_dq addr:$dst, (bc_v16i8 (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2))), + (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; +} + +// AVX1 patterns let Predicates = [HasAVX] in { def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; @@ -7314,6 +7334,31 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; + +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4f32 (VEXTRACTF128rr + (v8f32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2f64 (VEXTRACTF128rr + (v4f64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2i64 (VEXTRACTF128rr + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i32 (VEXTRACTF128rr + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v8i16 (VEXTRACTF128rr + (v16i16 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v16i8 (VEXTRACTF128rr + (v32i8 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; } //===----------------------------------------------------------------------===// @@ -7711,7 +7756,7 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2), imm:$src3))]>, VEX_4V; -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2], AddedComplexity = 1 in { def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), (i32 imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, @@ -7756,6 +7801,19 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), (i32 imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; + +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; } //===----------------------------------------------------------------------===// @@ -7791,34 +7849,6 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; } -// AVX1 patterns -let Predicates = [HasAVX] in { -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v4f32 (VEXTRACTF128rr - (v8f32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v2f64 (VEXTRACTF128rr - (v4f64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v2i64 (VEXTRACTF128rr - (v4i64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v4i32 (VEXTRACTF128rr - (v8i32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v8i16 (VEXTRACTF128rr - (v16i16 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v16i8 (VEXTRACTF128rr - (v32i8 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -} - //===----------------------------------------------------------------------===// // VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores // diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 8843848..bddba6c 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -45,17 +45,17 @@ def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB; -def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB; -def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB, +def SYSRET : I<0x07, RawFrm, (outs), (ins), "sysret{l}", []>, TB; +def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysret{q}", []>, TB, Requires<[In64BitMode]>; def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB; -def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexitl", []>, TB; -def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexitq", []>, TB, +def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", []>, TB; +def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", []>, TB, Requires<[In64BitMode]>; -def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize; +def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize; def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>; def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>, Requires<[In64BitMode]>; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index a7a5c56..b578e8d 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -12,10 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "InstPrinter/X86ATTInstPrinter.h" #include "X86MCInstLower.h" #include "X86AsmPrinter.h" #include "X86COFFMachineModuleInfo.h" +#include "InstPrinter/X86ATTInstPrinter.h" +#include "llvm/Type.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -26,7 +27,6 @@ #include "llvm/Target/Mangler.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Type.h" using namespace llvm; X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf, diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 93e2744..b56025f 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "X86.h" #include "X86RegisterInfo.h" +#include "X86.h" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index d6d0149..17f4efd 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -114,6 +114,9 @@ def IIC_MOVZX : InstrItinClass; def IIC_MOVZX_R16_R8 : InstrItinClass; def IIC_MOVZX_R16_M8 : InstrItinClass; +def IIC_REP_MOVS : InstrItinClass; +def IIC_REP_STOS : InstrItinClass; + // SSE scalar/parallel binary operations def IIC_SSE_ALU_F32S_RR : InstrItinClass; def IIC_SSE_ALU_F32S_RM : InstrItinClass; @@ -250,6 +253,14 @@ def IIC_SSE_CVT_SS2SI64_RR : InstrItinClass; def IIC_SSE_CVT_SD2SI_RM : InstrItinClass; def IIC_SSE_CVT_SD2SI_RR : InstrItinClass; +def IIC_CMPX_LOCK : InstrItinClass; +def IIC_CMPX_LOCK_8 : InstrItinClass; +def IIC_CMPX_LOCK_8B : InstrItinClass; +def IIC_CMPX_LOCK_16B : InstrItinClass; + +def IIC_XADD_LOCK_MEM : InstrItinClass; +def IIC_XADD_LOCK_MEM8 : InstrItinClass; + //===----------------------------------------------------------------------===// // Processor instruction itineraries. diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index e8cf72a..77d4e56 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -144,6 +144,9 @@ def AtomItineraries : ProcessorItineraries< InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<2, [Port0, Port1]>] >, InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<3, [Port0, Port1]>] >, + InstrItinData<IIC_REP_MOVS, [InstrStage<75, [Port0, Port1]>] >, + InstrItinData<IIC_REP_STOS, [InstrStage<74, [Port0, Port1]>] >, + // SSE binary operations // arithmetic fp scalar InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<5, [Port1]>] >, @@ -289,6 +292,14 @@ def AtomItineraries : ProcessorItineraries< InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<9, [Port0, Port1]>] >, InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<10, [Port0, Port1]>] >, InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<8, [Port0, Port1]>] >, - InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] > -]>; + InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >, + + InstrItinData<IIC_CMPX_LOCK, [InstrStage<14, [Port0, Port1]>] >, + InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<18, [Port0, Port1]>] >, + InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<22, [Port0, Port1]>] >, + + InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<2, [Port0, Port1]>] >, + InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<3, [Port0, Port1]>] > + ]>; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index a36d0d8..7fd832b 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -14,9 +14,9 @@ #ifndef X86SUBTARGET_H #define X86SUBTARGET_H +#include "llvm/CallingConv.h" #include "llvm/ADT/Triple.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/CallingConv.h" #include <string> #define GET_SUBTARGETINFO_HEADER diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 143caba..8e935af 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -28,7 +28,6 @@ namespace llvm { -class formatted_raw_ostream; class StringRef; class X86TargetMachine : public LLVMTargetMachine { diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index ceb7a4a..a02a368 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -15,7 +15,6 @@ #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { - class X86TargetMachine; /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin /// x86-64. diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 4d8ef74..50fda58 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "XCore.h" #include "XCoreFrameLowering.h" +#include "XCore.h" #include "XCoreInstrInfo.h" #include "XCoreMachineFunctionInfo.h" #include "llvm/Function.h" diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index c2d2a5d..593cebc 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -1152,7 +1152,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, if (isVarArg) { /* Argument registers */ - static const unsigned ArgRegs[] = { + static const uint16_t ArgRegs[] = { XCore::R0, XCore::R1, XCore::R2, XCore::R3 }; XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index f5a6822..5cd3e67 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -15,9 +15,9 @@ #ifndef XCOREISELLOWERING_H #define XCOREISELLOWERING_H +#include "XCore.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" -#include "XCore.h" namespace llvm { diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index f930623..0a3008d 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "XCoreMachineFunctionInfo.h" #include "XCoreInstrInfo.h" +#include "XCoreMachineFunctionInfo.h" #include "XCore.h" #include "llvm/MC/MCContext.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index e47d212..42eeed8 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -14,8 +14,8 @@ #ifndef XCOREINSTRUCTIONINFO_H #define XCOREINSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "XCoreRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "XCoreGenInstrInfo.inc" diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 8730282..f3b4b4c 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -14,6 +14,8 @@ #include "XCoreRegisterInfo.h" #include "XCoreMachineFunctionInfo.h" #include "XCore.h" +#include "llvm/Type.h" +#include "llvm/Function.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -24,8 +26,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Type.h" -#include "llvm/Function.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" @@ -54,20 +54,6 @@ static inline bool isImmU16(unsigned val) { return val < (1 << 16); } -static const unsigned XCore_ArgRegs[] = { - XCore::R0, XCore::R1, XCore::R2, XCore::R3 -}; - -const unsigned * XCoreRegisterInfo::getArgRegs(const MachineFunction *MF) -{ - return XCore_ArgRegs; -} - -unsigned XCoreRegisterInfo::getNumArgRegs(const MachineFunction *MF) -{ - return array_lengthof(XCore_ArgRegs); -} - bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) { return MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index ab6ce56..7391cfd 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -62,15 +62,6 @@ public: // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; - //! Return the array of argument passing registers - /*! - \note The size of this array is returned by getArgRegsSize(). - */ - static const unsigned *getArgRegs(const MachineFunction *MF = 0); - - //! Return the size of the argument passing register array - static unsigned getNumArgRegs(const MachineFunction *MF = 0); - //! Return whether to emit frame moves static bool needsFrameMoves(const MachineFunction &MF); }; diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 2c174f4..2546681 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -14,13 +14,13 @@ #ifndef XCORETARGETMACHINE_H #define XCORETARGETMACHINE_H -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" #include "XCoreFrameLowering.h" #include "XCoreSubtarget.h" #include "XCoreInstrInfo.h" #include "XCoreISelLowering.h" #include "XCoreSelectionDAGInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" namespace llvm { |
