diff options
Diffstat (limited to 'lib/Target')
502 files changed, 19904 insertions, 11593 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 16d0da3..acb57f7 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -1,4 +1,4 @@ -//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===// +//===-- ARM.h - Top-level interface for ARM representation ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 86aeeb2..b05fe62 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -1,4 +1,4 @@ -//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===// +//===-- ARM.td - Describe the ARM Target Machine -----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -32,9 +32,15 @@ def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", "Enable VFP3 instructions", [FeatureVFP2]>; +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", + "Enable VFP4 instructions", + [FeatureVFP3]>; def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; +def FeatureNEON2 : SubtargetFeature<"neon2", "HasNEON2", "true", + "Enable Advanced SIMD2 instructions", + [FeatureNEON]>; def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", @@ -70,6 +76,8 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", "true", "Use NEON for single precision FP">; +// Allow more precision in FP computation +def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; // Disable 32-bit to 16-bit narrowing for experimentation. def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", @@ -83,6 +91,11 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; +// Some processors perform return stack prediction. CodeGen should avoid issue +// "normal" call instructions to callees which do not return. +def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Has return address stack">; + /// Some M architectures don't have the DSP extension (v7E-M vs. v7M) def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true", "Supports v7 DSP instructions in Thumb2">; @@ -198,13 +211,14 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, // V7a Processors. def : Processor<"cortex-a8", CortexA8Itineraries, [ProcA8, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2]>; + FeatureDSPThumb2, FeatureHasRAS]>; def : Processor<"cortex-a9", CortexA9Itineraries, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2]>; + FeatureDSPThumb2, FeatureHasRAS]>; def : Processor<"cortex-a9-mp", CortexA9Itineraries, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureMP]>; + FeatureDSPThumb2, FeatureMP, + FeatureHasRAS]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [HasV7Ops, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 6ae287a..4ec19cc 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -84,6 +84,7 @@ namespace { void EmitTextAttribute(unsigned Attribute, StringRef String) { switch (Attribute) { + default: llvm_unreachable("Unsupported Text attribute in ASM Mode"); case ARMBuildAttrs::CPU_name: Streamer.EmitRawText(StringRef("\t.cpu ") + String.lower()); break; @@ -92,7 +93,6 @@ namespace { case ARMBuildAttrs::VFP_arch: Streamer.EmitRawText(StringRef("\t.fpu ") + String.lower()); break; - default: assert(0 && "Unsupported Text attribute in ASM Mode"); break; } } void Finish() { } @@ -196,6 +196,7 @@ namespace { AttributeItemType item = Contents[i]; Streamer.EmitULEB128IntValue(item.Tag, 0); switch (item.Type) { + default: llvm_unreachable("Invalid attribute type"); case AttributeItemType::NumericAttribute: Streamer.EmitULEB128IntValue(item.IntValue, 0); break; @@ -203,8 +204,6 @@ namespace { Streamer.EmitBytes(item.StringValue.upper(), 0); Streamer.EmitIntValue(0, 1); // '\0' break; - default: - assert(0 && "Invalid attribute type"); } } @@ -299,6 +298,22 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitLabel(CurrentFnSym); } +void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { + uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); + assert(Size && "C++ constructor pointer had zero size!"); + + const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts()); + assert(GV && "C++ constructor pointer was not a GlobalValue!"); + + const MCExpr *E = MCSymbolRefExpr::Create(Mang->getSymbol(GV), + (Subtarget->isTargetDarwin() + ? MCSymbolRefExpr::VK_None + : MCSymbolRefExpr::VK_ARM_TARGET1), + OutContext); + + OutStreamer.EmitValue(E, Size); +} + /// runOnMachineFunction - This uses the EmitInstruction() /// method to print assembly for each instruction. /// @@ -315,8 +330,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned TF = MO.getTargetFlags(); switch (MO.getType()) { - default: - assert(0 && "<unknown operand type>"); + default: llvm_unreachable("<unknown operand type>"); case MachineOperand::MO_Register: { unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); @@ -585,10 +599,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified); // Emit ARM Build Attributes - if (Subtarget->isTargetELF()) { - + if (Subtarget->isTargetELF()) emitAttributes(); - } } @@ -719,15 +731,25 @@ void ARMAsmPrinter::emitAttributes() { if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of - * neon/vfpv3/vfpv2 for .fpu parameters */ - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); + * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ + if (Subtarget->hasNEON2()) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4"); + else + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); /* If emitted for NEON, omit from VFP below, since you can have both * NEON and VFP in build attributes but only one .fpu */ emitFPU = false; } + /* VFPv4 + .fpu */ + if (Subtarget->hasVFP4()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv4A); + if (emitFPU) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4"); + /* VFPv3 + .fpu */ - if (Subtarget->hasVFP3()) { + } else if (Subtarget->hasVFP3()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, ARMBuildAttrs::AllowFPv3A); if (emitFPU) @@ -817,7 +839,6 @@ static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber, static MCSymbolRefExpr::VariantKind getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { switch (Modifier) { - default: llvm_unreachable("Unknown modifier!"); case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None; case ARMCP::TLSGD: return MCSymbolRefExpr::VK_ARM_TLSGD; case ARMCP::TPOFF: return MCSymbolRefExpr::VK_ARM_TPOFF; @@ -825,7 +846,7 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { case ARMCP::GOT: return MCSymbolRefExpr::VK_ARM_GOT; case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_ARM_GOTOFF; } - return MCSymbolRefExpr::VK_None; + llvm_unreachable("Invalid ARMCPModifier!"); } MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) { @@ -1093,7 +1114,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { switch (Opc) { default: MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); case ARM::tPUSH: // Special case here: no src & dst reg, but two extra imp ops. StartOp = 2; NumOffset = 2; @@ -1108,6 +1129,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { break; case ARM::STR_PRE_IMM: case ARM::STR_PRE_REG: + case ARM::t2STR_PRE: assert(MI->getOperand(2).getReg() == ARM::SP && "Only stack pointer as a source reg is supported"); RegList.push_back(SrcReg); @@ -1121,14 +1143,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { switch (Opc) { default: MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); case ARM::MOVr: + case ARM::tMOVr: Offset = 0; break; case ARM::ADDri: Offset = -MI->getOperand(2).getImm(); break; case ARM::SUBri: + case ARM::t2SUBri: Offset = MI->getOperand(2).getImm(); break; case ARM::tSUBspi: @@ -1166,16 +1190,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { OutStreamer.EmitPad(Offset); } else { MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); } } else if (DstReg == ARM::SP) { // FIXME: .movsp goes here MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); } else { MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); } } } @@ -1204,7 +1228,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Check for manual lowerings. unsigned Opc = MI->getOpcode(); switch (Opc) { - case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass"); + case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass"); case ARM::DBG_VALUE: { if (isVerbose() && OutStreamer.hasRawTextSupport()) { SmallString<128> TmpStr; @@ -1319,6 +1343,60 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } + case ARM::BMOVPCBr9_CALL: + case ARM::BMOVPCB_CALL: { + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVr); + TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // Add 's' bit operand (always reg0 for this) + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::Bcc); + const GlobalValue *GV = MI->getOperand(0).getGlobal(); + MCSymbol *GVSym = Mang->getSymbol(GV); + const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + return; + } + case ARM::t2BMOVPCBr9_CALL: + case ARM::t2BMOVPCB_CALL: { + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::tMOVr); + TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::t2B); + const GlobalValue *GV = MI->getOperand(0).getGlobal(); + MCSymbol *GVSym = Mang->getSymbol(GV); + const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + return; + } case ARM::MOVi16_ga_pcrel: case ARM::t2MOVi16_ga_pcrel: { MCInst TmpInst; diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 7741fc4..4b276c5 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -1,4 +1,4 @@ -//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file ------------===// +//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -73,6 +73,7 @@ public: virtual void EmitFunctionEntryLabel(); void EmitStartOfAsmFile(Module &M); void EmitEndOfAsmFile(Module &M); + void EmitXXStructor(const Constant *CV); // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 8bf5475..75b796e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===// +//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -156,9 +156,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned OffImm = MI->getOperand(NumOps-2).getImm(); ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); switch (AddrMode) { - default: - assert(false && "Unknown indexed op!"); - return NULL; + default: llvm_unreachable("Unknown indexed op!"); case ARMII::AddrMode2: { bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; unsigned Amt = ARM_AM::getAM2Offset(OffImm); @@ -505,15 +503,11 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const { - // FIXME: This confuses implicit_def with optional CPSR def. - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.getImplicitDefs() && !MI->hasOptionalDef()) - return false; - bool Found = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == ARM::CPSR) { + if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) || + (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) { Pred.push_back(MO); Found = true; } @@ -558,85 +552,84 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { if (MCID.getSize()) return MCID.getSize(); - // If this machine instr is an inline asm, measure it. - if (MI->getOpcode() == ARM::INLINEASM) - return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); - if (MI->isLabel()) - return 0; - unsigned Opc = MI->getOpcode(); - switch (Opc) { - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::PROLOG_LABEL: - case TargetOpcode::EH_LABEL: - case TargetOpcode::DBG_VALUE: - return 0; - case TargetOpcode::BUNDLE: - return getInstBundleLength(MI); - case ARM::MOVi16_ga_pcrel: - case ARM::MOVTi16_ga_pcrel: - case ARM::t2MOVi16_ga_pcrel: - case ARM::t2MOVTi16_ga_pcrel: - return 4; - case ARM::MOVi32imm: - case ARM::t2MOVi32imm: - return 8; - case ARM::CONSTPOOL_ENTRY: - // If this machine instr is a constant pool entry, its size is recorded as - // operand #2. - return MI->getOperand(2).getImm(); - case ARM::Int_eh_sjlj_longjmp: - return 16; - case ARM::tInt_eh_sjlj_longjmp: - return 10; - case ARM::Int_eh_sjlj_setjmp: - case ARM::Int_eh_sjlj_setjmp_nofp: - return 20; - case ARM::tInt_eh_sjlj_setjmp: - case ARM::t2Int_eh_sjlj_setjmp: - case ARM::t2Int_eh_sjlj_setjmp_nofp: - return 12; - case ARM::BR_JTr: - case ARM::BR_JTm: - case ARM::BR_JTadd: - case ARM::tBR_JTr: - case ARM::t2BR_JT: - case ARM::t2TBB_JT: - case ARM::t2TBH_JT: { - // These are jumptable branches, i.e. a branch followed by an inlined - // jumptable. The size is 4 + 4 * number of entries. For TBB, each - // entry is one byte; TBH two byte each. - unsigned EntrySize = (Opc == ARM::t2TBB_JT) - ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); - unsigned NumOps = MCID.getNumOperands(); - MachineOperand JTOP = - MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); - unsigned JTI = JTOP.getIndex(); - const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - assert(MJTI != 0); - const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); - assert(JTI < JT.size()); - // Thumb instructions are 2 byte aligned, but JT entries are 4 byte - // 4 aligned. The assembler / linker may add 2 byte padding just before - // the JT entries. The size does not include this padding; the - // constant islands pass does separate bookkeeping for it. - // FIXME: If we know the size of the function is less than (1 << 16) *2 - // bytes, we can use 16-bit entries instead. Then there won't be an - // alignment issue. - unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; - unsigned NumEntries = getNumJTEntries(JT, JTI); - if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) - // Make sure the instruction that follows TBB is 2-byte aligned. - // FIXME: Constant island pass should insert an "ALIGN" instruction - // instead. - ++NumEntries; - return NumEntries * EntrySize + InstSize; - } - default: - // Otherwise, pseudo-instruction sizes are zero. - return 0; - } - return 0; // Not reached + // If this machine instr is an inline asm, measure it. + if (MI->getOpcode() == ARM::INLINEASM) + return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); + if (MI->isLabel()) + return 0; + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::DBG_VALUE: + return 0; + case TargetOpcode::BUNDLE: + return getInstBundleLength(MI); + case ARM::MOVi16_ga_pcrel: + case ARM::MOVTi16_ga_pcrel: + case ARM::t2MOVi16_ga_pcrel: + case ARM::t2MOVTi16_ga_pcrel: + return 4; + case ARM::MOVi32imm: + case ARM::t2MOVi32imm: + return 8; + case ARM::CONSTPOOL_ENTRY: + // If this machine instr is a constant pool entry, its size is recorded as + // operand #2. + return MI->getOperand(2).getImm(); + case ARM::Int_eh_sjlj_longjmp: + return 16; + case ARM::tInt_eh_sjlj_longjmp: + return 10; + case ARM::Int_eh_sjlj_setjmp: + case ARM::Int_eh_sjlj_setjmp_nofp: + return 20; + case ARM::tInt_eh_sjlj_setjmp: + case ARM::t2Int_eh_sjlj_setjmp: + case ARM::t2Int_eh_sjlj_setjmp_nofp: + return 12; + case ARM::BR_JTr: + case ARM::BR_JTm: + case ARM::BR_JTadd: + case ARM::tBR_JTr: + case ARM::t2BR_JT: + case ARM::t2TBB_JT: + case ARM::t2TBH_JT: { + // These are jumptable branches, i.e. a branch followed by an inlined + // jumptable. The size is 4 + 4 * number of entries. For TBB, each + // entry is one byte; TBH two byte each. + unsigned EntrySize = (Opc == ARM::t2TBB_JT) + ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); + unsigned NumOps = MCID.getNumOperands(); + MachineOperand JTOP = + MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); + unsigned JTI = JTOP.getIndex(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + assert(MJTI != 0); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + assert(JTI < JT.size()); + // Thumb instructions are 2 byte aligned, but JT entries are 4 byte + // 4 aligned. The assembler / linker may add 2 byte padding just before + // the JT entries. The size does not include this padding; the + // constant islands pass does separate bookkeeping for it. + // FIXME: If we know the size of the function is less than (1 << 16) *2 + // bytes, we can use 16-bit entries instead. Then there won't be an + // alignment issue. + unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; + unsigned NumEntries = getNumJTEntries(JT, JTI); + if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) + // Make sure the instruction that follows TBB is 2-byte aligned. + // FIXME: Constant island pass should insert an "ALIGN" instruction + // instead. + ++NumEntries; + return NumEntries * EntrySize + InstSize; + } + default: + // Otherwise, pseudo-instruction sizes are zero. + return 0; + } } unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { @@ -765,8 +758,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 16: if (ARM::QPRRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo)) + // Use aligned spills if the stack can be realigned. + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) .addFrameIndex(FI).addImm(16) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO)); @@ -851,7 +845,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - case ARM::VST1q64Pseudo: + case ARM::VST1q64: if (MI->getOperand(0).isFI() && MI->getOperand(2).getSubReg() == 0) { FrameIndex = MI->getOperand(0).getIndex(); @@ -914,8 +908,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case 16: if (ARM::QPRRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg) + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) .addFrameIndex(FI).addImm(16) .addMemOperand(MMO)); } else { @@ -937,11 +931,10 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) .addFrameIndex(FI)) .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - MIB.addReg(DestReg, RegState::Define | RegState::Implicit); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); } } else llvm_unreachable("Unknown reg class!"); @@ -952,15 +945,14 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) .addFrameIndex(FI)) .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); - MIB.addReg(DestReg, RegState::Define | RegState::Implicit); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); } else llvm_unreachable("Unknown reg class!"); break; @@ -997,7 +989,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - case ARM::VLD1q64Pseudo: + case ARM::VLD1q64: if (MI->getOperand(1).isFI() && MI->getOperand(0).getSubReg() == 0) { FrameIndex = MI->getOperand(1).getIndex(); @@ -1406,7 +1398,10 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, // saves compile time, because it doesn't require every single // stack slot reference to depend on the instruction that does the // modification. - if (MI->definesRegister(ARM::SP)) + // Calls don't actually change the stack pointer, even if they have imp-defs. + // No ARM calling conventions change the stack pointer. (X86 calling + // conventions sometimes do). + if (!MI->isCall() && MI->definesRegister(ARM::SP)) return true; return false; @@ -1471,13 +1466,12 @@ llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { int llvm::getMatchingCondBranchOpcode(int Opc) { if (Opc == ARM::B) return ARM::Bcc; - else if (Opc == ARM::tB) + if (Opc == ARM::tB) return ARM::tBcc; - else if (Opc == ARM::t2B) - return ARM::t2Bcc; + if (Opc == ARM::t2B) + return ARM::t2Bcc; llvm_unreachable("Unknown unconditional branch opcode!"); - return 0; } @@ -1650,7 +1644,6 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, } default: llvm_unreachable("Unsupported addressing mode!"); - break; } Offset += InstrOffs * Scale; @@ -1801,6 +1794,8 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { const MachineOperand &MO = Instr.getOperand(IO); + if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) + return false; if (!MO.isReg()) continue; // This instruction modifies or uses CPSR after the one we want to @@ -1862,6 +1857,10 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, for (unsigned IO = 0, EO = Instr.getNumOperands(); !isSafe && IO != EO; ++IO) { const MachineOperand &MO = Instr.getOperand(IO); + if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { + isSafe = true; + break; + } if (!MO.isReg() || MO.getReg() != ARM::CPSR) continue; if (MO.isDef()) { @@ -2012,7 +2011,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, switch (Opc) { default: llvm_unreachable("Unexpected multi-uops instruction!"); - break; case ARM::VLDMQIA: case ARM::VSTMQIA: return 2; @@ -2583,9 +2581,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD2DUPd8: case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: - case ARM::VLD2DUPd8_UPD: - case ARM::VLD2DUPd16_UPD: - case ARM::VLD2DUPd32_UPD: + case ARM::VLD2DUPd8wb_fixed: + case ARM::VLD2DUPd16wb_fixed: + case ARM::VLD2DUPd32wb_fixed: + case ARM::VLD2DUPd8wb_register: + case ARM::VLD2DUPd16wb_register: + case ARM::VLD2DUPd32wb_register: case ARM::VLD4DUPd8: case ARM::VLD4DUPd16: case ARM::VLD4DUPd32: @@ -2693,33 +2694,33 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (DefAlign < 8 && Subtarget.isCortexA9()) switch (DefMCID.getOpcode()) { default: break; - case ARM::VLD1q8Pseudo: - case ARM::VLD1q16Pseudo: - case ARM::VLD1q32Pseudo: - case ARM::VLD1q64Pseudo: - case ARM::VLD1q8PseudoWB_register: - case ARM::VLD1q16PseudoWB_register: - case ARM::VLD1q32PseudoWB_register: - case ARM::VLD1q64PseudoWB_register: - case ARM::VLD1q8PseudoWB_fixed: - case ARM::VLD1q16PseudoWB_fixed: - case ARM::VLD1q32PseudoWB_fixed: - case ARM::VLD1q64PseudoWB_fixed: - case ARM::VLD2d8Pseudo: - case ARM::VLD2d16Pseudo: - case ARM::VLD2d32Pseudo: + case ARM::VLD1q8: + case ARM::VLD1q16: + case ARM::VLD1q32: + case ARM::VLD1q64: + case ARM::VLD1q8wb_register: + case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_register: + case ARM::VLD1q8wb_fixed: + case ARM::VLD1q16wb_fixed: + case ARM::VLD1q32wb_fixed: + case ARM::VLD1q64wb_fixed: + case ARM::VLD2d8: + case ARM::VLD2d16: + case ARM::VLD2d32: case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8PseudoWB_fixed: - case ARM::VLD2d16PseudoWB_fixed: - case ARM::VLD2d32PseudoWB_fixed: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: case ARM::VLD2q8PseudoWB_fixed: case ARM::VLD2q16PseudoWB_fixed: case ARM::VLD2q32PseudoWB_fixed: - case ARM::VLD2d8PseudoWB_register: - case ARM::VLD2d16PseudoWB_register: - case ARM::VLD2d32PseudoWB_register: + case ARM::VLD2d8wb_register: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_register: case ARM::VLD2q8PseudoWB_register: case ARM::VLD2q16PseudoWB_register: case ARM::VLD2q32PseudoWB_register: @@ -2767,9 +2768,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD2DUPd8Pseudo: case ARM::VLD2DUPd16Pseudo: case ARM::VLD2DUPd32Pseudo: - case ARM::VLD2DUPd8Pseudo_UPD: - case ARM::VLD2DUPd16Pseudo_UPD: - case ARM::VLD2DUPd32Pseudo_UPD: + case ARM::VLD2DUPd8PseudoWB_fixed: + case ARM::VLD2DUPd16PseudoWB_fixed: + case ARM::VLD2DUPd32PseudoWB_fixed: + case ARM::VLD2DUPd8PseudoWB_register: + case ARM::VLD2DUPd16PseudoWB_register: + case ARM::VLD2DUPd32PseudoWB_register: case ARM::VLD4DUPd8Pseudo: case ARM::VLD4DUPd16Pseudo: case ARM::VLD4DUPd32Pseudo: @@ -2848,7 +2852,7 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MCInstrDesc &MCID = MI->getDesc(); unsigned Class = MCID.getSchedClass(); unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; - if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) + if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. *PredCost = 1; @@ -2997,3 +3001,7 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // This will go before any implicit ops. AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); } + +bool ARMBaseInstrInfo::hasNOP() const { + return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 68e8208..314e317 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -1,4 +1,4 @@ -//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===// +//===-- ARMBaseInstrInfo.h - ARM Base Instruction Information ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -35,6 +35,9 @@ protected: explicit ARMBaseInstrInfo(const ARMSubtarget &STI); public: + // Return whether the target has an explicit NOP encoding. + bool hasNOP() const; + // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const =0; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 8ee6ce2..d2aff9a 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- ARMBaseRegisterInfo.cpp - ARM Register Information -------*- C++ -*-===// +//===-- ARMBaseRegisterInfo.cpp - ARM Register Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -61,28 +61,14 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, BasePtr(ARM::R6) { } -const unsigned* +const uint16_t* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - static const unsigned CalleeSavedRegs[] = { - ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8, - ARM::R7, ARM::R6, ARM::R5, ARM::R4, - - ARM::D15, ARM::D14, ARM::D13, ARM::D12, - ARM::D11, ARM::D10, ARM::D9, ARM::D8, - 0 - }; - - static const unsigned DarwinCalleeSavedRegs[] = { - // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved - // register. - ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4, - ARM::R11, ARM::R10, ARM::R8, + return (STI.isTargetIOS()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; +} - ARM::D15, ARM::D14, ARM::D13, ARM::D12, - ARM::D11, ARM::D10, ARM::D9, ARM::D8, - 0 - }; - return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs; +const uint32_t* +ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { + return (STI.isTargetIOS()) ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } BitVector ARMBaseRegisterInfo:: @@ -93,7 +79,6 @@ getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); Reserved.set(ARM::PC); - Reserved.set(ARM::FPSCR); if (TFI->hasFP(MF)) Reserved.set(FramePtr); if (hasBasePointer(MF)) @@ -135,104 +120,6 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, return false; } -const TargetRegisterClass * -ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, - unsigned SubIdx) const { - switch (SubIdx) { - default: return 0; - case ARM::ssub_0: - case ARM::ssub_1: - case ARM::ssub_2: - case ARM::ssub_3: { - // S sub-registers. - if (A->getSize() == 8) { - if (B == &ARM::SPR_8RegClass) - return &ARM::DPR_8RegClass; - assert(B == &ARM::SPRRegClass && "Expecting SPR register class!"); - if (A == &ARM::DPR_8RegClass) - return A; - return &ARM::DPR_VFP2RegClass; - } - - if (A->getSize() == 16) { - if (B == &ARM::SPR_8RegClass) - return &ARM::QPR_8RegClass; - return &ARM::QPR_VFP2RegClass; - } - - if (A->getSize() == 32) { - if (B == &ARM::SPR_8RegClass) - return 0; // Do not allow coalescing! - return &ARM::QQPR_VFP2RegClass; - } - - assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); - return 0; // Do not allow coalescing! - } - case ARM::dsub_0: - case ARM::dsub_1: - case ARM::dsub_2: - case ARM::dsub_3: { - // D sub-registers. - if (A->getSize() == 16) { - if (B == &ARM::DPR_VFP2RegClass) - return &ARM::QPR_VFP2RegClass; - if (B == &ARM::DPR_8RegClass) - return 0; // Do not allow coalescing! - return A; - } - - if (A->getSize() == 32) { - if (B == &ARM::DPR_VFP2RegClass) - return &ARM::QQPR_VFP2RegClass; - if (B == &ARM::DPR_8RegClass) - return 0; // Do not allow coalescing! - return A; - } - - assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); - if (B != &ARM::DPRRegClass) - return 0; // Do not allow coalescing! - return A; - } - case ARM::dsub_4: - case ARM::dsub_5: - case ARM::dsub_6: - case ARM::dsub_7: { - // D sub-registers of QQQQ registers. - if (A->getSize() == 64 && B == &ARM::DPRRegClass) - return A; - return 0; // Do not allow coalescing! - } - - case ARM::qsub_0: - case ARM::qsub_1: { - // Q sub-registers. - if (A->getSize() == 32) { - if (B == &ARM::QPR_VFP2RegClass) - return &ARM::QQPR_VFP2RegClass; - if (B == &ARM::QPR_8RegClass) - return 0; // Do not allow coalescing! - return A; - } - - assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); - if (B == &ARM::QPRRegClass) - return A; - return 0; // Do not allow coalescing! - } - case ARM::qsub_2: - case ARM::qsub_3: { - // Q sub-registers of QQQQ registers. - if (A->getSize() == 64 && B == &ARM::QPRRegClass) - return A; - return 0; // Do not allow coalescing! - } - } - return 0; -} - bool ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC, SmallVectorImpl<unsigned> &SubIndices, @@ -403,7 +290,7 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, /// getRawAllocationOrder - Returns the register allocation order for a /// specified register class with a target-dependent hint. -ArrayRef<unsigned> +ArrayRef<uint16_t> ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC, unsigned HintType, unsigned HintReg, const MachineFunction &MF) const { @@ -412,71 +299,71 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC, // of register pairs. // No FP, R9 is available. - static const unsigned GPREven1[] = { + static const uint16_t GPREven1[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10, ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R9, ARM::R11 }; - static const unsigned GPROdd1[] = { + static const uint16_t GPROdd1[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11, ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, ARM::R10 }; // FP is R7, R9 is available. - static const unsigned GPREven2[] = { + static const uint16_t GPREven2[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10, ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R9, ARM::R11 }; - static const unsigned GPROdd2[] = { + static const uint16_t GPROdd2[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11, ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, ARM::R10 }; // FP is R11, R9 is available. - static const unsigned GPREven3[] = { + static const uint16_t GPREven3[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R9 }; - static const unsigned GPROdd3[] = { + static const uint16_t GPROdd3[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9, ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7, ARM::R8 }; // No FP, R9 is not available. - static const unsigned GPREven4[] = { + static const uint16_t GPREven4[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10, ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8, ARM::R11 }; - static const unsigned GPROdd4[] = { + static const uint16_t GPROdd4[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11, ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, ARM::R10 }; // FP is R7, R9 is not available. - static const unsigned GPREven5[] = { + static const uint16_t GPREven5[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R10, ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8, ARM::R11 }; - static const unsigned GPROdd5[] = { + static const uint16_t GPROdd5[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R11, ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, ARM::R10 }; // FP is R11, R9 is not available. - static const unsigned GPREven6[] = { + static const uint16_t GPREven6[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8 }; - static const unsigned GPROdd6[] = { + static const uint16_t GPROdd6[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8 }; @@ -597,11 +484,16 @@ ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); if (!EnableBasePointer) return false; - if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) + // When outgoing call frames are so large that we adjust the stack pointer + // around the call, we can no longer use the stack pointer to reach the + // emergency spill slot. + if (needsStackRealignment(MF) && (MFI->hasVarSizedObjects() || + !TFI->hasReservedCallFrame(MF))) return true; // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited @@ -626,13 +518,28 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineRegisterInfo *MRI = &MF.getRegInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // We can't realign the stack if: // 1. Dynamic stack realignment is explicitly disabled, // 2. This is a Thumb1 function (it's not useful, so we don't bother), or // 3. There are VLAs in the function and the base pointer is disabled. - return (MF.getTarget().Options.RealignStack && !AFI->isThumb1OnlyFunction() && - (!MFI->hasVarSizedObjects() || EnableBasePointer)); + if (!MF.getTarget().Options.RealignStack) + return false; + if (AFI->isThumb1OnlyFunction()) + return false; + // Stack realignment requires a frame pointer. If we already started + // register allocation with frame pointer elimination, it is too late now. + if (!MRI->canReserveReg(FramePtr)) + return false; + // We may also need a base pointer if there are dynamic allocas. + if (!MFI->hasVarSizedObjects()) + return true; + if (!EnableBasePointer) + return false; + // A base pointer is required and allowed. Check that it isn't too late to + // reserve it. + return MRI->canReserveReg(BasePtr); } bool ARMBaseRegisterInfo:: @@ -640,7 +547,7 @@ needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); - bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) || + bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttr(Attribute::StackAlignment)); return requiresRealignment && canRealignStack(MF); @@ -666,12 +573,10 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); - return 0; } unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); - return 0; } unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, @@ -879,7 +784,7 @@ int64_t ARMBaseRegisterInfo:: getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); - int64_t InstrOffs = 0;; + int64_t InstrOffs = 0; int Scale = 1; unsigned ImmIdx = 0; switch (AddrMode) { @@ -920,7 +825,6 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { } default: llvm_unreachable("Unsupported addressing mode!"); - break; } return InstrOffs * Scale; @@ -1116,7 +1020,6 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, break; default: llvm_unreachable("Unsupported addressing mode!"); - break; } Offset += getFrameIndexInstrOffset(MI, i); @@ -1158,6 +1061,21 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj); + // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the + // call frame setup/destroy instructions have already been eliminated. That + // means the stack pointer cannot be used to access the emergency spill slot + // when !hasReservedCallFrame(). +#ifndef NDEBUG + if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){ + assert(TFI->hasReservedCallFrame(MF) && + "Cannot use SP to access the emergency spill slot in " + "functions without a reserved call frame"); + assert(!MF.getFrameInfo()->hasVarSizedObjects() && + "Cannot use SP to access the emergency spill slot in " + "functions with variable sized frame objects"); + } +#endif // NDEBUG + // Special handling of dbg_value instructions. if (MI.isDebugValue()) { MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index fee17ff..af79351 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -1,4 +1,4 @@ -//===- ARMBaseRegisterInfo.h - ARM Register Information Impl ----*- C++ -*-===// +//===-- ARMBaseRegisterInfo.h - ARM Register Information Impl ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -35,7 +35,7 @@ namespace ARMRI { /// isARMArea1Register - Returns true if the register is a low register (r0-r7) /// or a stack/pc register that we should push/pop. -static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) { +static inline bool isARMArea1Register(unsigned Reg, bool isIOS) { using namespace ARM; switch (Reg) { case R0: case R1: case R2: case R3: @@ -43,25 +43,25 @@ static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) { case LR: case SP: case PC: return true; case R8: case R9: case R10: case R11: - // For darwin we want r7 and lr to be next to each other. - return !isDarwin; + // For iOS we want r7 and lr to be next to each other. + return !isIOS; default: return false; } } -static inline bool isARMArea2Register(unsigned Reg, bool isDarwin) { +static inline bool isARMArea2Register(unsigned Reg, bool isIOS) { using namespace ARM; switch (Reg) { case R8: case R9: case R10: case R11: - // Darwin has this second area. - return isDarwin; + // iOS has this second area. + return isIOS; default: return false; } } -static inline bool isARMArea3Register(unsigned Reg, bool isDarwin) { +static inline bool isARMArea3Register(unsigned Reg, bool isIOS) { using namespace ARM; switch (Reg) { case D15: case D14: case D13: case D12: @@ -94,17 +94,11 @@ protected: public: /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint32_t *getCallPreservedMask(CallingConv::ID) const; BitVector getReservedRegs(const MachineFunction &MF) const; - /// getMatchingSuperRegClass - Return a subclass of the specified register - /// class A so that each register in it has a sub-register of the - /// specified sub-register index which is in the specified register class B. - virtual const TargetRegisterClass * - getMatchingSuperRegClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, unsigned Idx) const; - /// canCombineSubRegIndices - Given a register class and a list of /// subregister indices, return true if it's possible to combine the /// subregister indices into one that corresponds to a larger @@ -125,7 +119,7 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const; - ArrayRef<unsigned> getRawAllocationOrder(const TargetRegisterClass *RC, + ArrayRef<uint16_t> getRawAllocationOrder(const TargetRegisterClass *RC, unsigned HintType, unsigned HintReg, const MachineFunction &MF) const; diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h index 69eddf0..11bd6a4 100644 --- a/lib/Target/ARM/ARMBuildAttrs.h +++ b/lib/Target/ARM/ARMBuildAttrs.h @@ -1,4 +1,4 @@ -//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===// +//===-- ARMBuildAttrs.h - ARM Build Attributes ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index ff7db1f..437b4c7 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -1,4 +1,4 @@ -//===-- ARMCallingConv.h - ARM Custom Calling Convention Routines ---------===// +//=== ARMCallingConv.h - ARM Custom Calling Convention Routines -*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index c22a08e..d33364b 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -1,4 +1,4 @@ -//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===// +//===-- ARMCallingConv.td - Calling Conventions for ARM ----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -164,3 +164,14 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ S9, S10, S11, S12, S13, S14, S15]>>, CCDelegateTo<RetCC_ARM_AAPCS_Common> ]>; + +//===----------------------------------------------------------------------===// +// Callee-saved register lists. +//===----------------------------------------------------------------------===// + +def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, + (sequence "D%u", 15, 8))>; + +// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register. +// Also save R7-R4 first to match the stack frame fixed spill areas. +def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 365f0bb..e48d07a 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -74,7 +74,7 @@ namespace { /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for /// machine instructions. - unsigned getBinaryCodeForInstr(const MachineInstr &MI) const; + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; bool runOnMachineFunction(MachineFunction &MF); @@ -199,6 +199,8 @@ namespace { unsigned Op) const { return 0; } unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } + unsigned getARMBLTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op) @@ -421,7 +423,6 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const { case ARM_AM::ror: case ARM_AM::rrx: return 3; } - return 0; } /// getMovi32Value - Return binary encoding of operand for movw/movt. If the @@ -550,7 +551,6 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { switch (MI.getDesc().TSFlags & ARMII::FormMask) { default: { llvm_unreachable("Unhandled instruction encoding format!"); - break; } case ARMII::MiscFrm: if (MI.getOpcode() == ARM::LEApcrelJT) { @@ -559,7 +559,6 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { break; } llvm_unreachable("Unhandled instruction encoding!"); - break; case ARMII::Pseudo: emitPseudoInstruction(MI); break; diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 2039d41..2cdfd1e 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -52,6 +52,7 @@ static cl::opt<bool> AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), cl::desc("Adjust basic block layout to better use TB[BH]")); +// FIXME: This option should be removed once it has received sufficient testing. static cl::opt<bool> AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true), cl::desc("Align constant islands in code")); @@ -194,14 +195,23 @@ namespace { MachineInstr *MI; MachineInstr *CPEMI; MachineBasicBlock *HighWaterMark; + private: unsigned MaxDisp; + public: bool NegOk; bool IsSoImm; + bool KnownAlignment; CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp, bool neg, bool soimm) - : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) { + : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm), + KnownAlignment(false) { HighWaterMark = CPEMI->getParent(); } + /// getMaxDisp - Returns the maximum displacement supported by MI. + /// Correct for unknown alignment. + unsigned getMaxDisp() const { + return KnownAlignment ? MaxDisp : MaxDisp - 2; + } }; /// CPUsers - Keep track of all of the machine instructions that use various @@ -299,6 +309,7 @@ namespace { bool FixUpConditionalBr(ImmBranch &Br); bool FixUpUnconditionalBr(ImmBranch &Br); bool UndoLRSpillRestore(); + bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const; bool OptimizeThumb2Instructions(); bool OptimizeThumb2Branches(); bool ReorderThumb2JumpTables(); @@ -308,6 +319,7 @@ namespace { void ComputeBlockSize(MachineBasicBlock *MBB); unsigned GetOffsetOf(MachineInstr *MI) const; + unsigned GetUserOffset(CPUser&) const; void dumpBBs(); void verify(); @@ -316,7 +328,7 @@ namespace { bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, const CPUser &U) { return OffsetIsInRange(UserOffset, TrialOffset, - U.MaxDisp, U.NegOk, U.IsSoImm); + U.getMaxDisp(), U.NegOk, U.IsSoImm); } }; char ARMConstantIslands::ID = 0; @@ -335,11 +347,9 @@ void ARMConstantIslands::verify() { } for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { CPUser &U = CPUsers[i]; - unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8); - unsigned CPEOffset = GetOffsetOf(U.CPEMI); - unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset : - UserOffset - CPEOffset; - assert(Disp <= U.MaxDisp || "Constant pool entry out of range!"); + unsigned UserOffset = GetUserOffset(U); + assert(CPEIsInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp(), U.NegOk) && + "Constant pool entry out of range!"); } #endif } @@ -434,7 +444,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) CPChange |= HandleConstantPoolUser(i); if (CPChange && ++NoCPIters > 30) - llvm_unreachable("Constant Island pass failed to converge!"); + report_fatal_error("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); // Clear NewWaterList now. If we split a block for branches, it should @@ -446,7 +456,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) BRChange |= FixUpImmediateBr(ImmBranches[i]); if (BRChange && ++NoBRIters > 30) - llvm_unreachable("Branch Fix Up pass failed to converge!"); + report_fatal_error("Branch Fix Up pass failed to converge!"); DEBUG(dumpBBs()); if (!CPChange && !BRChange) @@ -536,7 +546,7 @@ ARMConstantIslands::DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // Ensure that future entries with higher alignment get inserted before // CPEMI. This is bucket sort with iterators. - for (unsigned a = LogAlign + 1; a < MaxAlign; ++a) + for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a) if (InsPoint[a] == InsAt) InsPoint[a] = CPEMI; @@ -545,7 +555,8 @@ ARMConstantIslands::DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { CPEs.push_back(CPEntry(CPEMI, i)); CPEntries.push_back(CPEs); ++NumCPEs; - DEBUG(dbgs() << "Moved CPI#" << i << " to end of function\n"); + DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = " + << Size << ", align = " << Align <<'\n'); } DEBUG(BB->dump()); } @@ -719,7 +730,6 @@ InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) { switch (Opc) { default: llvm_unreachable("Unknown addressing mode for CP reference!"); - break; // Taking the address of a CP entry. case ARM::LEApcrel: @@ -795,6 +805,9 @@ void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) { // The actual size may be smaller, but still a multiple of the instr size. if (I->isInlineAsm()) BBI.Unalign = isThumb ? 1 : 2; + // Also consider instructions that may be shrunk later. + else if (isThumb && mayOptimizeThumb2Instruction(I)) + BBI.Unalign = 1; } // tBR_JTr contains a .align 2 directive. @@ -816,11 +829,11 @@ unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { unsigned Offset = BBInfo[MBB->getNumber()].Offset; // Sum instructions before MI in MBB. - for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) { + for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { assert(I != MBB->end() && "Didn't find MI in its own basic block?"); - if (&*I == MI) return Offset; Offset += TII->GetInstSizeInBytes(I); } + return Offset; } /// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB @@ -923,34 +936,39 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { return NewBB; } +/// GetUserOffset - Compute the offset of U.MI as seen by the hardware +/// displacement computation. Update U.KnownAlignment to match its current +/// basic block location. +unsigned ARMConstantIslands::GetUserOffset(CPUser &U) const { + unsigned UserOffset = GetOffsetOf(U.MI); + const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()]; + unsigned KnownBits = BBI.internalKnownBits(); + + // The value read from PC is offset from the actual instruction address. + UserOffset += (isThumb ? 4 : 8); + + // Because of inline assembly, we may not know the alignment (mod 4) of U.MI. + // Make sure U.getMaxDisp() returns a constrained range. + U.KnownAlignment = (KnownBits >= 2); + + // On Thumb, offsets==2 mod 4 are rounded down by the hardware for + // purposes of the displacement computation; compensate for that here. + // For unknown alignments, getMaxDisp() constrains the range instead. + if (isThumb && U.KnownAlignment) + UserOffset &= ~3u; + + return UserOffset; +} + /// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool /// reference) is within MaxDisp of TrialOffset (a proposed location of a /// constant pool entry). +/// UserOffset is computed by GetUserOffset above to include PC adjustments. If +/// the mod 4 alignment of UserOffset is not known, the uncertainty must be +/// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that. bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK, bool IsSoImm) { - // On Thumb offsets==2 mod 4 are rounded down by the hardware for - // purposes of the displacement computation; compensate for that here. - // Effectively, the valid range of displacements is 2 bytes smaller for such - // references. - unsigned TotalAdj = 0; - if (isThumb && UserOffset%4 !=0) { - UserOffset -= 2; - TotalAdj = 2; - } - // CPEs will be rounded up to a multiple of 4. - if (isThumb && TrialOffset%4 != 0) { - TrialOffset += 2; - TotalAdj += 2; - } - - // In Thumb2 mode, later branch adjustments can shift instructions up and - // cause alignment change. In the worst case scenario this can cause the - // user's effective address to be subtracted by 2 and the CPE's address to - // be plus 2. - if (isThumb2 && TotalAdj != 4) - MaxDisp -= (4 - TotalAdj); - if (UserOffset <= TrialOffset) { // User before the Trial. if (TrialOffset - UserOffset <= MaxDisp) @@ -1049,14 +1067,22 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) { #endif // NDEBUG void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) { - for(unsigned i = BB->getNumber() + 1, e = MF->getNumBlockIDs(); i < e; ++i) { + unsigned BBNum = BB->getNumber(); + for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { // Get the offset and known bits at the end of the layout predecessor. // Include the alignment of the current block. unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); - // This is where block i begins. + // This is where block i begins. Stop if the offset is already correct, + // and we have updated 2 blocks. This is the maximum number of blocks + // changed before calling this function. + if (i > BBNum + 2 && + BBInfo[i].Offset == Offset && + BBInfo[i].KnownBits == KnownBits) + break; + BBInfo[i].Offset = Offset; BBInfo[i].KnownBits = KnownBits; } @@ -1092,7 +1118,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) MachineInstr *CPEMI = U.CPEMI; // Check to see if the CPE is already in-range. - if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) { + if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk, true)) { DEBUG(dbgs() << "In range\n"); return 1; } @@ -1107,7 +1133,8 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) // Removing CPEs can leave empty entries, skip if (CPEs[i].CPEMI == NULL) continue; - if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) { + if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), + U.NegOk)) { DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"); // Point the CPUser node to the replacement @@ -1208,8 +1235,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // If the block does not end in an unconditional branch already, and if the // end of the block is within range, make new water there. (The addition // below is for the unconditional branch we will be adding: 4 bytes on ARM + - // Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for - // inside OffsetIsInRange. + // Thumb2, 2 on Thumb1. if (BBHasFallthrough(UserMBB)) { // Size of branch to insert. unsigned Delta = isThumb1 ? 2 : 4; @@ -1262,7 +1288,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); unsigned KnownBits = UserBBI.internalKnownBits(); unsigned UPad = UnknownPadding(LogAlign, KnownBits); - unsigned BaseInsertOffset = UserOffset + U.MaxDisp; + unsigned BaseInsertOffset = UserOffset + U.getMaxDisp(); DEBUG(dbgs() << format("Split in middle of big block before %#x", BaseInsertOffset)); @@ -1343,9 +1369,8 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { MachineInstr *CPEMI = U.CPEMI; unsigned CPI = CPEMI->getOperand(1).getIndex(); unsigned Size = CPEMI->getOperand(2).getImm(); - // Compute this only once, it's expensive. The 4 or 8 is the value the - // hardware keeps in the PC. - unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8); + // Compute this only once, it's expensive. + unsigned UserOffset = GetUserOffset(U); // See if the current entry is within range, or there is a clone of it // in range. @@ -1652,6 +1677,25 @@ bool ARMConstantIslands::UndoLRSpillRestore() { return MadeChange; } +// mayOptimizeThumb2Instruction - Returns true if OptimizeThumb2Instructions +// below may shrink MI. +bool +ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const { + switch(MI->getOpcode()) { + // OptimizeThumb2Instructions. + case ARM::t2LEApcrel: + case ARM::t2LDRpci: + // OptimizeThumb2Branches. + case ARM::t2B: + case ARM::t2Bcc: + case ARM::tBcc: + // OptimizeThumb2JumpTables. + case ARM::t2BR_JT: + return true; + } + return false; +} + bool ARMConstantIslands::OptimizeThumb2Instructions() { bool MadeChange = false; @@ -1683,8 +1727,13 @@ bool ARMConstantIslands::OptimizeThumb2Instructions() { if (!NewOpc) continue; - unsigned UserOffset = GetOffsetOf(U.MI) + 4; + unsigned UserOffset = GetUserOffset(U); unsigned MaxOffs = ((1 << Bits) - 1) * Scale; + + // Be conservative with inline asm. + if (!U.KnownAlignment) + MaxOffs -= 2; + // FIXME: Check if offset is multiple of scale if scale is not 4. if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { U.MI->setDesc(TII->get(NewOpc)); @@ -1741,6 +1790,11 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { if (Opcode != ARM::tBcc) continue; + // If the conditional branch doesn't kill CPSR, then CPSR can be liveout + // so this transformation is not safe. + if (!Br.MI->killsRegister(ARM::CPSR)) + continue; + NewOpc = 0; unsigned PredReg = 0; ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg); diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 9576283..fa3226e 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -1,4 +1,4 @@ -//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===// +//===-- ARMConstantPoolValue.cpp - ARM constantpool value -----------------===// // // The LLVM Compiler Infrastructure // @@ -48,7 +48,6 @@ ARMConstantPoolValue::~ARMConstantPoolValue() {} const char *ARMConstantPoolValue::getModifierText() const { switch (Modifier) { - default: llvm_unreachable("Unknown modifier!"); // FIXME: Are these case sensitive? It'd be nice to lower-case all the // strings if that's legal. case ARMCP::no_modifier: return "none"; @@ -58,12 +57,12 @@ const char *ARMConstantPoolValue::getModifierText() const { case ARMCP::GOTTPOFF: return "gottpoff"; case ARMCP::TPOFF: return "tpoff"; } + llvm_unreachable("Unknown modifier!"); } int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - assert(false && "Shouldn't be calling this directly!"); - return -1; + llvm_unreachable("Shouldn't be calling this directly!"); } void diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 0d0def3..6b98d44 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -1,4 +1,4 @@ -//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===// +//===-- ARMConstantPoolValue.h - ARM constantpool value ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp index 51e68b4..f671317 100644 --- a/lib/Target/ARM/ARMELFWriterInfo.cpp +++ b/lib/Target/ARM/ARMELFWriterInfo.cpp @@ -41,43 +41,38 @@ unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { case ARM::reloc_arm_machine_cp_entry: case ARM::reloc_arm_jt_base: case ARM::reloc_arm_pic_jt: - assert(0 && "unsupported ARM relocation type"); break; - - case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; break; - case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; break; - case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; break; + llvm_unreachable("unsupported ARM relocation type"); + + case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; + case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; + case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; default: - llvm_unreachable("unknown ARM relocation type"); break; + llvm_unreachable("unknown ARM relocation type"); } - return 0; } long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, long int Modifier) const { - assert(0 && "ARMELFWriterInfo::getDefaultAddendForRelTy() not implemented"); - return 0; + llvm_unreachable("ARMELFWriterInfo::getDefaultAddendForRelTy() not " + "implemented"); } unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const { - assert(0 && "ARMELFWriterInfo::getRelocationTySize() not implemented"); - return 0; + llvm_unreachable("ARMELFWriterInfo::getRelocationTySize() not implemented"); } bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { - assert(0 && "ARMELFWriterInfo::isPCRelativeRel() not implemented"); - return 1; + llvm_unreachable("ARMELFWriterInfo::isPCRelativeRel() not implemented"); } unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const { - assert(0 && - "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented"); - return 0; + llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not " + "implemented"); } long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset, unsigned RelOffset, unsigned RelTy) const { - assert(0 && - "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented"); - return 0; + llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not " + "implemented"); } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 01d772d..c4ab99d 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1,4 +1,4 @@ -//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=// +//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===// // // The LLVM Compiler Infrastructure // @@ -148,25 +148,16 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, -{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD1q16PseudoWB_fixed, ARM::VLD1q16wb_fixed,true,false,false,SingleSpc, 2, 4 ,false}, -{ ARM::VLD1q16PseudoWB_register, ARM::VLD1q16wb_register, true, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q32PseudoWB_fixed, ARM::VLD1q32wb_fixed,true,false, false,SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q32PseudoWB_register, ARM::VLD1q32wb_register, true, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, false, SingleSpc, 2, 1 ,false}, -{ ARM::VLD1q64PseudoWB_fixed, ARM::VLD1q64wb_fixed,true,false, false,SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q64PseudoWB_register, ARM::VLD1q64wb_register, true, true, true, SingleSpc, 2, 1 ,false}, -{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD1q8PseudoWB_fixed, ARM::VLD1q8wb_fixed,true,false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD1q8PseudoWB_register, ARM::VLD1q8wb_register,true,true, true,SingleSpc,2,8,false}, - -{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, false, SingleSpc, 2, 4,true}, -{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, true, SingleSpc, 2, 4,true}, -{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, false, SingleSpc, 2, 2,true}, -{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, true, SingleSpc, 2, 2,true}, -{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, false, SingleSpc, 2, 8,true}, -{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, true, SingleSpc, 2, 8,true}, + +{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, false, SingleSpc, 2, 4,false}, +{ ARM::VLD2DUPd16PseudoWB_fixed, ARM::VLD2DUPd16wb_fixed, true, true, false, SingleSpc, 2, 4,false}, +{ ARM::VLD2DUPd16PseudoWB_register, ARM::VLD2DUPd16wb_register, true, true, true, SingleSpc, 2, 4,false}, +{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, false, SingleSpc, 2, 2,false}, +{ ARM::VLD2DUPd32PseudoWB_fixed, ARM::VLD2DUPd32wb_fixed, true, true, false, SingleSpc, 2, 2,false}, +{ ARM::VLD2DUPd32PseudoWB_register, ARM::VLD2DUPd32wb_register, true, true, true, SingleSpc, 2, 2,false}, +{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, false, SingleSpc, 2, 8,false}, +{ ARM::VLD2DUPd8PseudoWB_fixed, ARM::VLD2DUPd8wb_fixed, true, true, false, SingleSpc, 2, 8,false}, +{ ARM::VLD2DUPd8PseudoWB_register, ARM::VLD2DUPd8wb_register, true, true, true, SingleSpc, 2, 8,false}, { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, @@ -179,16 +170,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true}, { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true}, -{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD2d16PseudoWB_fixed, ARM::VLD2d16wb_fixed, true, true, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD2d16PseudoWB_register, ARM::VLD2d16wb_register, true, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD2d32PseudoWB_fixed, ARM::VLD2d32wb_fixed, true, true, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD2d32PseudoWB_register, ARM::VLD2d32wb_register, true, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD2d8PseudoWB_fixed, ARM::VLD2d8wb_fixed, true, true, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD2d8PseudoWB_register, ARM::VLD2d8wb_register, true, true, true, SingleSpc, 2, 8 ,false}, - { ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false}, { ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, { ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false}, @@ -283,19 +264,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false}, { ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false}, -{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VST1q16PseudoWB_fixed, ARM::VST1q16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, -{ ARM::VST1q16PseudoWB_register, ARM::VST1q16wb_register, false, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VST1q32PseudoWB_fixed, ARM::VST1q32wb_fixed, false, true, false, SingleSpc, 2, 2 ,false}, -{ ARM::VST1q32PseudoWB_register, ARM::VST1q32wb_register, false, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, false, SingleSpc, 2, 1 ,false}, -{ ARM::VST1q64PseudoWB_fixed, ARM::VST1q64wb_fixed, false, true, false, SingleSpc, 2, 1 ,false}, -{ ARM::VST1q64PseudoWB_register, ARM::VST1q64wb_register, false, true, true, SingleSpc, 2, 1 ,false}, -{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VST1q8PseudoWB_fixed, ARM::VST1q8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false}, -{ ARM::VST1q8PseudoWB_register, ARM::VST1q8wb_register, false, true, true, SingleSpc, 2, 8 ,false}, - { ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true}, { ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, { ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true}, @@ -307,16 +275,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true}, { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true}, -{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, -{ ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false}, -{ ARM::VST2d8PseudoWB_register, ARM::VST2d8wb_register, false, true, true, SingleSpc, 2, 8 ,false}, - { ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false}, { ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, { ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false}, @@ -631,6 +589,8 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); + // Transfer memoperands. + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI.eraseFromParent(); } @@ -837,7 +797,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } - case ARM::eh_sjlj_dispatchsetup: { + case ARM::Int_eh_sjlj_dispatchsetup: + case ARM::Int_eh_sjlj_dispatchsetup_nofp: + case ARM::tInt_eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); @@ -1024,6 +986,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); + MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI.eraseFromParent(); return true; } @@ -1054,6 +1017,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); + MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI.eraseFromParent(); return true; } @@ -1085,33 +1049,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } - case ARM::VLD1q8Pseudo: - case ARM::VLD1q16Pseudo: - case ARM::VLD1q32Pseudo: - case ARM::VLD1q64Pseudo: - case ARM::VLD1q8PseudoWB_register: - case ARM::VLD1q16PseudoWB_register: - case ARM::VLD1q32PseudoWB_register: - case ARM::VLD1q64PseudoWB_register: - case ARM::VLD1q8PseudoWB_fixed: - case ARM::VLD1q16PseudoWB_fixed: - case ARM::VLD1q32PseudoWB_fixed: - case ARM::VLD1q64PseudoWB_fixed: - case ARM::VLD2d8Pseudo: - case ARM::VLD2d16Pseudo: - case ARM::VLD2d32Pseudo: case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8PseudoWB_fixed: - case ARM::VLD2d16PseudoWB_fixed: - case ARM::VLD2d32PseudoWB_fixed: case ARM::VLD2q8PseudoWB_fixed: case ARM::VLD2q16PseudoWB_fixed: case ARM::VLD2q32PseudoWB_fixed: - case ARM::VLD2d8PseudoWB_register: - case ARM::VLD2d16PseudoWB_register: - case ARM::VLD2d32PseudoWB_register: case ARM::VLD2q8PseudoWB_register: case ARM::VLD2q16PseudoWB_register: case ARM::VLD2q32PseudoWB_register: @@ -1159,9 +1102,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD2DUPd8Pseudo: case ARM::VLD2DUPd16Pseudo: case ARM::VLD2DUPd32Pseudo: - case ARM::VLD2DUPd8Pseudo_UPD: - case ARM::VLD2DUPd16Pseudo_UPD: - case ARM::VLD2DUPd32Pseudo_UPD: + case ARM::VLD2DUPd8PseudoWB_fixed: + case ARM::VLD2DUPd16PseudoWB_fixed: + case ARM::VLD2DUPd32PseudoWB_fixed: + case ARM::VLD2DUPd8PseudoWB_register: + case ARM::VLD2DUPd16PseudoWB_register: + case ARM::VLD2DUPd32PseudoWB_register: case ARM::VLD3DUPd8Pseudo: case ARM::VLD3DUPd16Pseudo: case ARM::VLD3DUPd32Pseudo: @@ -1177,33 +1123,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandVLD(MBBI); return true; - case ARM::VST1q8Pseudo: - case ARM::VST1q16Pseudo: - case ARM::VST1q32Pseudo: - case ARM::VST1q64Pseudo: - case ARM::VST1q8PseudoWB_fixed: - case ARM::VST1q16PseudoWB_fixed: - case ARM::VST1q32PseudoWB_fixed: - case ARM::VST1q64PseudoWB_fixed: - case ARM::VST1q8PseudoWB_register: - case ARM::VST1q16PseudoWB_register: - case ARM::VST1q32PseudoWB_register: - case ARM::VST1q64PseudoWB_register: - case ARM::VST2d8Pseudo: - case ARM::VST2d16Pseudo: - case ARM::VST2d32Pseudo: case ARM::VST2q8Pseudo: case ARM::VST2q16Pseudo: case ARM::VST2q32Pseudo: - case ARM::VST2d8PseudoWB_fixed: - case ARM::VST2d16PseudoWB_fixed: - case ARM::VST2d32PseudoWB_fixed: case ARM::VST2q8PseudoWB_fixed: case ARM::VST2q16PseudoWB_fixed: case ARM::VST2q32PseudoWB_fixed: - case ARM::VST2d8PseudoWB_register: - case ARM::VST2d16PseudoWB_register: - case ARM::VST2d32PseudoWB_register: case ARM::VST2q8PseudoWB_register: case ARM::VST2q16PseudoWB_register: case ARM::VST2q32PseudoWB_register: @@ -1321,15 +1246,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandLaneOp(MBBI); return true; - case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false); return true; case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; - case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true); return true; case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; } - - return false; } bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index a98dfc3..818b202 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -157,14 +157,16 @@ class ARMFastISel : public FastISel { bool SelectLoad(const Instruction *I); bool SelectStore(const Instruction *I); bool SelectBranch(const Instruction *I); + bool SelectIndirectBr(const Instruction *I); bool SelectCmp(const Instruction *I); bool SelectFPExt(const Instruction *I); bool SelectFPTrunc(const Instruction *I); - bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode); - bool SelectSIToFP(const Instruction *I); - bool SelectFPToSI(const Instruction *I); - bool SelectSDiv(const Instruction *I); - bool SelectSRem(const Instruction *I); + bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); + bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode); + bool SelectIToFP(const Instruction *I, bool isSigned); + bool SelectFPToI(const Instruction *I, bool isSigned); + bool SelectDiv(const Instruction *I, bool isSigned); + bool SelectRem(const Instruction *I, bool isSigned); bool SelectCall(const Instruction *I, const char *IntrMemName); bool SelectIntrinsicCall(const IntrinsicInst &I); bool SelectSelect(const Instruction *I); @@ -299,10 +301,10 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -319,11 +321,11 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill)); @@ -342,12 +344,12 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addReg(Op2, Op2IsKill * RegState::Kill)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) @@ -366,11 +368,11 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm)); @@ -388,11 +390,11 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm)); @@ -411,12 +413,12 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) @@ -434,10 +436,10 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addImm(Imm)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addImm(Imm)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -453,10 +455,10 @@ unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addImm(Imm1).addImm(Imm2)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addImm(Imm1).addImm(Imm2)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -473,9 +475,10 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); assert(TargetRegisterInfo::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(TargetOpcode::COPY), ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill), Idx)); + DL, TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill), Idx)); return ResultReg; } @@ -486,7 +489,7 @@ unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(ARM::VMOVRS), MoveReg) + TII.get(ARM::VMOVSR), MoveReg) .addReg(SrcReg)); return MoveReg; } @@ -496,7 +499,7 @@ unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(ARM::VMOVSR), MoveReg) + TII.get(ARM::VMOVRS), MoveReg) .addReg(SrcReg)); return MoveReg; } @@ -617,40 +620,65 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { // TODO: Need more magic for ARM PIC. if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0; - // MachineConstantPool wants an explicit alignment. - unsigned Align = TD.getPrefTypeAlignment(GV->getType()); - if (Align == 0) { - // TODO: Figure out if this is correct. - Align = TD.getTypeAllocSize(GV->getType()); - } - - // Grab index. - unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); - unsigned Id = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id, - ARMCP::CPValue, - PCAdj); - unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); - - // Load value. - MachineInstrBuilder MIB; unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); - if (isThumb2) { - unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) - .addConstantPoolIndex(Idx); - if (RelocM == Reloc::PIC_) - MIB.addImm(Id); + + // Use movw+movt when possible, it avoids constant pool entries. + // Darwin targets don't support movt with Reloc::Static, see + // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support + // static movt relocations. + if (Subtarget->useMovt() && + Subtarget->isTargetDarwin() == (RelocM != Reloc::Static)) { + unsigned Opc; + switch (RelocM) { + case Reloc::PIC_: + Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel; + break; + case Reloc::DynamicNoPIC: + Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn; + break; + default: + Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm; + break; + } + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), + DestReg).addGlobalAddress(GV)); } else { - // The extra immediate is for addrmode2. - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), - DestReg) - .addConstantPoolIndex(Idx) - .addImm(0); + // MachineConstantPool wants an explicit alignment. + unsigned Align = TD.getPrefTypeAlignment(GV->getType()); + if (Align == 0) { + // TODO: Figure out if this is correct. + Align = TD.getTypeAllocSize(GV->getType()); + } + + // Grab index. + unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : + (Subtarget->isThumb() ? 4 : 8); + unsigned Id = AFI->createPICLabelUId(); + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id, + ARMCP::CPValue, + PCAdj); + unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); + + // Load value. + MachineInstrBuilder MIB; + if (isThumb2) { + unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addConstantPoolIndex(Idx); + if (RelocM == Reloc::PIC_) + MIB.addImm(Id); + } else { + // The extra immediate is for addrmode2. + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), + DestReg) + .addConstantPoolIndex(Idx) + .addImm(0); + } + AddOptionalDefs(MIB); } - AddOptionalDefs(MIB); if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { + MachineInstrBuilder MIB; unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); if (isThumb2) MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -700,7 +728,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { // This will get lowered later into the correct offsets and registers // via rewriteXFrameIndex. if (SI != FuncInfo.StaticAllocaMap.end()) { - TargetRegisterClass* RC = TLI.getRegClassFor(VT); + const TargetRegisterClass* RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -844,16 +872,6 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { } } - // Materialize the global variable's address into a reg which can - // then be used later to load the variable. - if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { - unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType())); - if (Tmp == 0) return false; - - Addr.Base.Reg = Tmp; - return true; - } - // Try to get this in a register if nothing else has worked. if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj); return Addr.Base.Reg != 0; @@ -865,9 +883,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { bool needsLowering = false; switch (VT.getSimpleVT().SimpleTy) { - default: - assert(false && "Unhandled load/store type!"); - break; + default: llvm_unreachable("Unhandled load/store type!"); case MVT::i1: case MVT::i8: case MVT::i16: @@ -895,8 +911,8 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { // put the alloca address into a register, set the base type back to // register and continue. This should almost never happen. if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { - TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass : - ARM::GPRRegisterClass; + const TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass + : ARM::GPRRegisterClass; unsigned ResultReg = createResultReg(RC); unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -971,7 +987,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, unsigned Opc; bool useAM3 = false; bool needVMOV = false; - TargetRegisterClass *RC; + const TargetRegisterClass *RC; switch (VT.getSimpleVT().SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; @@ -1336,6 +1352,16 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { return true; } +bool ARMFastISel::SelectIndirectBr(const Instruction *I) { + unsigned AddrReg = getRegForValue(I->getOperand(0)); + if (AddrReg == 0) return false; + + unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) + .addReg(AddrReg)); + return true; +} + bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt) { Type *Ty = Src1Value->getType(); @@ -1416,14 +1442,11 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // We have i1, i8, or i16, we need to either zero extend or sign extend. if (needsExt) { - unsigned ResultReg; - ResultReg = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); - if (ResultReg == 0) return false; - SrcReg1 = ResultReg; + SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); + if (SrcReg1 == 0) return false; if (!UseImm) { - ResultReg = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); - if (ResultReg == 0) return false; - SrcReg2 = ResultReg; + SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); + if (SrcReg2 == 0) return false; } } @@ -1467,8 +1490,8 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { // Now set a register based on the comparison. Explicitly set the predicates // here. unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; - TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass - : ARM::GPRRegisterClass; + const TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass + : ARM::GPRRegisterClass; unsigned DestReg = createResultReg(RC); Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); unsigned ZeroReg = TargetMaterializeConstant(Zero); @@ -1520,7 +1543,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) { return true; } -bool ARMFastISel::SelectSIToFP(const Instruction *I) { +bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { // Make sure we have VFP. if (!Subtarget->hasVFP2()) return false; @@ -1540,9 +1563,9 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) { // Handle sign-extension. if (SrcVT == MVT::i16 || SrcVT == MVT::i8) { EVT DestVT = MVT::i32; - unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, /*isZExt*/ false); - if (ResultReg == 0) return false; - SrcReg = ResultReg; + SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, + /*isZExt*/!isSigned); + if (SrcReg == 0) return false; } // The conversion routine works on fp-reg to fp-reg and the operand above @@ -1551,8 +1574,8 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) { if (FP == 0) return false; unsigned Opc; - if (Ty->isFloatTy()) Opc = ARM::VSITOS; - else if (Ty->isDoubleTy()) Opc = ARM::VSITOD; + if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS; + else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD; else return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); @@ -1563,7 +1586,7 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) { return true; } -bool ARMFastISel::SelectFPToSI(const Instruction *I) { +bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) { // Make sure we have VFP. if (!Subtarget->hasVFP2()) return false; @@ -1577,11 +1600,11 @@ bool ARMFastISel::SelectFPToSI(const Instruction *I) { unsigned Opc; Type *OpTy = I->getOperand(0)->getType(); - if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS; - else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD; + if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS; + else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD; else return false; - // f64->s32 or f32->s32 both need an intermediate f32 reg. + // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg. unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) @@ -1656,7 +1679,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { return true; } -bool ARMFastISel::SelectSDiv(const Instruction *I) { +bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) { MVT VT; Type *Ty = I->getType(); if (!isTypeLegal(Ty, VT)) @@ -1670,21 +1693,21 @@ bool ARMFastISel::SelectSDiv(const Instruction *I) { // Otherwise emit a libcall. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i8) - LC = RTLIB::SDIV_I8; + LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8; else if (VT == MVT::i16) - LC = RTLIB::SDIV_I16; + LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16; else if (VT == MVT::i32) - LC = RTLIB::SDIV_I32; + LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32; else if (VT == MVT::i64) - LC = RTLIB::SDIV_I64; + LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64; else if (VT == MVT::i128) - LC = RTLIB::SDIV_I128; + LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); return ARMEmitLibcall(I, LC); } -bool ARMFastISel::SelectSRem(const Instruction *I) { +bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) { MVT VT; Type *Ty = I->getType(); if (!isTypeLegal(Ty, VT)) @@ -1692,21 +1715,59 @@ bool ARMFastISel::SelectSRem(const Instruction *I) { RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i8) - LC = RTLIB::SREM_I8; + LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8; else if (VT == MVT::i16) - LC = RTLIB::SREM_I16; + LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16; else if (VT == MVT::i32) - LC = RTLIB::SREM_I32; + LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32; else if (VT == MVT::i64) - LC = RTLIB::SREM_I64; + LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64; else if (VT == MVT::i128) - LC = RTLIB::SREM_I128; + LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); return ARMEmitLibcall(I, LC); } -bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { +bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { + EVT DestVT = TLI.getValueType(I->getType(), true); + + // We can get here in the case when we have a binary operation on a non-legal + // type and the target independent selector doesn't know how to handle it. + if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) + return false; + + unsigned Opc; + switch (ISDOpcode) { + default: return false; + case ISD::ADD: + Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr; + break; + case ISD::OR: + Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr; + break; + case ISD::SUB: + Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr; + break; + } + + unsigned SrcReg1 = getRegForValue(I->getOperand(0)); + if (SrcReg1 == 0) return false; + + // TODO: Often the 2nd operand is an immediate, which can be encoded directly + // in the instruction, rather then materializing the value in a register. + unsigned SrcReg2 = getRegForValue(I->getOperand(1)); + if (SrcReg2 == 0) return false; + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addReg(SrcReg1).addReg(SrcReg2)); + UpdateValueMap(I, ResultReg); + return true; +} + +bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { EVT VT = TLI.getValueType(I->getType(), true); // We can get here in the case when we want to use NEON for our fp @@ -1814,10 +1875,8 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, case CCValAssign::Full: break; case CCValAssign::SExt: { MVT DestVT = VA.getLocVT(); - unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, - /*isZExt*/false); - assert (ResultReg != 0 && "Failed to emit a sext"); - Arg = ResultReg; + Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false); + assert (Arg != 0 && "Failed to emit a sext"); ArgVT = DestVT; break; } @@ -1825,10 +1884,8 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, // Intentional fall-through. Handle AExt and ZExt. case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); - unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, - /*isZExt*/true); - assert (ResultReg != 0 && "Failed to emit a sext"); - Arg = ResultReg; + Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); + assert (Arg != 0 && "Failed to emit a sext"); ArgVT = DestVT; break; } @@ -1898,7 +1955,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, // For this move we copy into two registers and then move into the // double fp reg we want. EVT DestVT = RVLocs[0].getValVT(); - TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); + const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); unsigned ResultReg = createResultReg(DstRC); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::VMOVDRR), ResultReg) @@ -1918,7 +1975,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16) CopyVT = MVT::i32; - TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); + const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); unsigned ResultReg = createResultReg(DstRC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), @@ -1980,15 +2037,14 @@ bool ARMFastISel::SelectRet(const Instruction *I) { if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) return false; - if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) - return false; - assert(DestVT == MVT::i32 && "ARM should always ext to i32"); - bool isZExt = Outs[0].Flags.isZExt(); - unsigned ResultReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, isZExt); - if (ResultReg == 0) return false; - SrcReg = ResultReg; + // Perform extension if flagged as either zext or sext. Otherwise, do + // nothing. + if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) { + SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt()); + if (SrcReg == 0) return false; + } } // Make the copy. @@ -2012,12 +2068,12 @@ bool ARMFastISel::SelectRet(const Instruction *I) { unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { - // Darwin needs the r9 versions of the opcodes. - bool isDarwin = Subtarget->isTargetDarwin(); + // iOS needs the r9 versions of the opcodes. + bool isiOS = Subtarget->isTargetIOS(); if (isThumb2) { - return isDarwin ? ARM::tBLr9 : ARM::tBL; + return isiOS ? ARM::tBLr9 : ARM::tBL; } else { - return isDarwin ? ARM::BLr9 : ARM::BL; + return isiOS ? ARM::BLr9 : ARM::BL; } } @@ -2076,7 +2132,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLr9 for darwin, BL otherwise. + // Issue the call, BLr9 for iOS, BL otherwise. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(NULL); @@ -2095,6 +2151,10 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); + // Add a register mask with the call-preserved registers. + // Proper defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CC)); + // Finish off the call including any return values. SmallVector<unsigned, 4> UsedRegs; if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; @@ -2147,10 +2207,11 @@ bool ARMFastISel::SelectCall(const Instruction *I, SmallVector<unsigned, 8> ArgRegs; SmallVector<MVT, 8> ArgVTs; SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; - Args.reserve(CS.arg_size()); - ArgRegs.reserve(CS.arg_size()); - ArgVTs.reserve(CS.arg_size()); - ArgFlags.reserve(CS.arg_size()); + unsigned arg_size = CS.arg_size(); + Args.reserve(arg_size); + ArgRegs.reserve(arg_size); + ArgVTs.reserve(arg_size); + ArgFlags.reserve(arg_size); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { // If we're lowering a memory intrinsic instead of a regular call, skip the @@ -2197,7 +2258,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLr9 for darwin, BL otherwise. + // Issue the call, BLr9 for iOS, BL otherwise. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(GV); @@ -2226,6 +2287,10 @@ bool ARMFastISel::SelectCall(const Instruction *I, for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); + // Add a register mask with the call-preserved registers. + // Proper defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CC)); + // Finish off the call including any return values. SmallVector<unsigned, 4> UsedRegs; if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; @@ -2260,9 +2325,10 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len) bool RV; unsigned ResultReg; RV = ARMEmitLoad(VT, ResultReg, Src); - assert (RV = true && "Should be able to handle this load."); + assert (RV == true && "Should be able to handle this load."); RV = ARMEmitStore(VT, ResultReg, Dest); - assert (RV = true && "Should be able to handle this store."); + assert (RV == true && "Should be able to handle this store."); + (void)RV; unsigned Size = VT.getSizeInBits()/8; Len -= Size; @@ -2325,7 +2391,6 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { return SelectCall(&I, "memset"); } } - return false; } bool ARMFastISel::SelectTrunc(const Instruction *I) { @@ -2427,6 +2492,8 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { return SelectStore(I); case Instruction::Br: return SelectBranch(I); + case Instruction::IndirectBr: + return SelectIndirectBr(I); case Instruction::ICmp: case Instruction::FCmp: return SelectCmp(I); @@ -2435,19 +2502,33 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { case Instruction::FPTrunc: return SelectFPTrunc(I); case Instruction::SIToFP: - return SelectSIToFP(I); + return SelectIToFP(I, /*isSigned*/ true); + case Instruction::UIToFP: + return SelectIToFP(I, /*isSigned*/ false); case Instruction::FPToSI: - return SelectFPToSI(I); + return SelectFPToI(I, /*isSigned*/ true); + case Instruction::FPToUI: + return SelectFPToI(I, /*isSigned*/ false); + case Instruction::Add: + return SelectBinaryIntOp(I, ISD::ADD); + case Instruction::Or: + return SelectBinaryIntOp(I, ISD::OR); + case Instruction::Sub: + return SelectBinaryIntOp(I, ISD::SUB); case Instruction::FAdd: - return SelectBinaryOp(I, ISD::FADD); + return SelectBinaryFPOp(I, ISD::FADD); case Instruction::FSub: - return SelectBinaryOp(I, ISD::FSUB); + return SelectBinaryFPOp(I, ISD::FSUB); case Instruction::FMul: - return SelectBinaryOp(I, ISD::FMUL); + return SelectBinaryFPOp(I, ISD::FMUL); case Instruction::SDiv: - return SelectSDiv(I); + return SelectDiv(I, /*isSigned*/ true); + case Instruction::UDiv: + return SelectDiv(I, /*isSigned*/ false); case Instruction::SRem: - return SelectSRem(I); + return SelectRem(I, /*isSigned*/ true); + case Instruction::URem: + return SelectRem(I, /*isSigned*/ false); case Instruction::Call: if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) return SelectIntrinsicCall(*II); @@ -2514,12 +2595,12 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, namespace llvm { llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { - // Completely untested on non-darwin. + // Completely untested on non-iOS. const TargetMachine &TM = funcInfo.MF->getTarget(); // Darwin and thumb1 only for now. const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); - if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() && + if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only() && !DisableARMFastISel) return new ARMFastISel(funcInfo); return 0; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 06944b1..0fd6025 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1,4 +1,4 @@ -//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====// +//===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===// // // The LLVM Compiler Infrastructure // @@ -16,23 +16,33 @@ #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt<bool> +SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), + cl::desc("Align ARM NEON spills in prolog and epilog")); + +static MachineBasicBlock::iterator +skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, + unsigned NumAlignedDPRCS2Regs); + /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - // Mac OS X requires FP not to be clobbered for backtracing purpose. - if (STI.isTargetDarwin()) + // iOS requires FP not to be clobbered for backtracing purpose. + if (STI.isTargetIOS()) return true; const MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -71,7 +81,7 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); } -static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { +static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) { for (unsigned i = 0; CSRegs[i]; ++i) if (Reg == CSRegs[i]) return true; @@ -80,7 +90,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { static bool isCSRestore(MachineInstr *MI, const ARMBaseInstrInfo &TII, - const unsigned *CSRegs) { + const uint16_t *CSRegs) { // Integer spill area is handled with "pop". if (MI->getOpcode() == ARM::LDMIA_RET || MI->getOpcode() == ARM::t2LDMIA_RET || @@ -139,6 +149,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // belongs to which callee-save spill areas. unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; + int D8SpillFI = 0; // Allocate the vararg register save area. This is not counted in NumBytes. if (VARegSaveSize) @@ -172,7 +183,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R11: if (Reg == FramePtr) FramePtrSpillFI = FI; - if (STI.isTargetDarwin()) { + if (STI.isTargetIOS()) { AFI->addGPRCalleeSavedArea2Frame(FI); GPRCS2Size += 4; } else { @@ -181,8 +192,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } break; default: - AFI->addDPRCalleeSavedAreaFrame(FI); - DPRCSSize += 8; + // This is a DPR. Exclude the aligned DPRCS2 spills. + if (Reg == ARM::D8) + D8SpillFI = FI; + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) { + AFI->addDPRCalleeSavedAreaFrame(FI); + DPRCSSize += 8; + } } } @@ -190,8 +206,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (GPRCS1Size > 0) MBBI++; // Set FP to point to the stack slot that contains the previous FP. - // For Darwin, FP is R7, which has now been stored in spill area 1. - // Otherwise, if this is not Darwin, all the callee-saved registers go + // For iOS, FP is R7, which has now been stored in spill area 1. + // Otherwise, if this is not iOS, all the callee-saved registers go // into spill area 1, including the FP in R11. In either case, it is // now safe to emit this assignment. bool HasFP = hasFP(MF); @@ -227,7 +243,17 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MBBI++; } - NumBytes = DPRCSOffset; + // Move past the aligned DPRCS2 area. + if (AFI->getNumAlignedDPRCS2Regs() > 0) { + MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs()); + // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and + // leaves the stack pointer pointing to the DPRCS2 area. + // + // Adjust NumBytes to represent the stack slots below the DPRCS2 area. + NumBytes += MFI->getObjectOffset(D8SpillFI); + } else + NumBytes = DPRCSOffset; + if (NumBytes) { // Adjust SP after all the callee-save spills. emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, @@ -254,7 +280,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // If we need dynamic stack realignment, do it here. Be paranoid and make // sure if we also have VLAs, we have a base pointer for frame access. - if (RegInfo->needsStackRealignment(MF)) { + // If aligned NEON registers were spilled, the stack has already been + // realigned. + if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { unsigned MaxAlign = MFI->getMaxAlignment(); assert (!AFI->isThumb1OnlyFunction()); if (!AFI->isThumbFunction()) { @@ -331,7 +359,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do --MBBI; @@ -355,7 +383,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, ARMCC::AL, 0, TII); else { // It's not possible to restore SP from FP in a single instruction. - // For Darwin, this looks like: + // For iOS, this looks like: // mov sp, r7 // sub sp, #24 // This is bad, if an interrupt is taken after the mov, sp is in an @@ -471,6 +499,10 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, else if (AFI->isDPRCalleeSavedAreaFrame(FI)) return Offset - AFI->getDPRCalleeSavedAreaOffset(); + // SP can move around if there are allocas. We may also lose track of SP + // when emergency spilling inside a non-reserved call frame setup. + bool hasMovingSP = MFI->hasVarSizedObjects() || !hasReservedCallFrame(MF); + // When dynamically realigning the stack, use the frame pointer for // parameters, and the stack/base pointer for locals. if (RegInfo->needsStackRealignment(MF)) { @@ -478,7 +510,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, if (isFixed) { FrameReg = RegInfo->getFrameRegister(MF); Offset = FPOffset; - } else if (MFI->hasVarSizedObjects()) { + } else if (hasMovingSP) { assert(RegInfo->hasBasePointer(MF) && "VLAs and dynamic stack alignment, but missing base pointer!"); FrameReg = RegInfo->getBaseRegister(); @@ -490,11 +522,10 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, if (hasFP(MF) && AFI->hasStackFrame()) { // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs (and thus the SP isn't reliable as a base). - if (isFixed || (MFI->hasVarSizedObjects() && - !RegInfo->hasBasePointer(MF))) { + if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; - } else if (MFI->hasVarSizedObjects()) { + } else if (hasMovingSP) { assert(RegInfo->hasBasePointer(MF) && "missing base pointer!"); if (AFI->isThumb2Function()) { // Try to use the frame pointer if we can, else use the base pointer @@ -541,6 +572,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned StmOpc, unsigned StrOpc, bool NoGap, bool(*Func)(unsigned, bool), + unsigned NumAlignedDPRCS2Regs, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); @@ -554,7 +586,11 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned LastReg = 0; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetDarwin())) continue; + if (!(Func)(Reg, STI.isTargetIOS())) continue; + + // D-registers in the aligned area DPRCS2 are NOT spilled here. + if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) + continue; // Add the callee-saved register as live-in unless it's LR and // @llvm.returnaddress is called. If LR is returned for @@ -604,7 +640,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, - bool(*Func)(unsigned, bool)) const { + bool(*Func)(unsigned, bool), + unsigned NumAlignedDPRCS2Regs) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -622,7 +659,11 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetDarwin())) continue; + if (!(Func)(Reg, STI.isTargetIOS())) continue; + + // The aligned reloads from area DPRCS2 are not inserted here. + if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) + continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) { Reg = ARM::PC; @@ -676,6 +717,247 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, } } +/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers +/// starting from d8. Also insert stack realignment code and leave the stack +/// pointer pointing to the d8 spill slot. +static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned NumAlignedDPRCS2Regs, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + // Mark the D-register spill slots as properly aligned. Since MFI computes + // stack slot layout backwards, this can actually mean that the d-reg stack + // slot offsets can be wrong. The offset for d8 will always be correct. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned DNum = CSI[i].getReg() - ARM::D8; + if (DNum >= 8) + continue; + int FI = CSI[i].getFrameIdx(); + // The even-numbered registers will be 16-byte aligned, the odd-numbered + // registers will be 8-byte aligned. + MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16); + + // The stack slot for D8 needs to be maximally aligned because this is + // actually the point where we align the stack pointer. MachineFrameInfo + // computes all offsets relative to the incoming stack pointer which is a + // bit weird when realigning the stack. Any extra padding for this + // over-alignment is not realized because the code inserted below adjusts + // the stack pointer by numregs * 8 before aligning the stack pointer. + if (DNum == 0) + MFI.setObjectAlignment(FI, MFI.getMaxAlignment()); + } + + // Move the stack pointer to the d8 spill slot, and align it at the same + // time. Leave the stack slot address in the scratch register r4. + // + // sub r4, sp, #numregs * 8 + // bic r4, r4, #align - 1 + // mov sp, r4 + // + bool isThumb = AFI->isThumbFunction(); + assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); + AFI->setShouldRestoreSPFromFP(true); + + // sub r4, sp, #numregs * 8 + // The immediate is <= 64, so it doesn't need any special encoding. + unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addReg(ARM::SP) + .addImm(8 * NumAlignedDPRCS2Regs))); + + // bic r4, r4, #align-1 + Opc = isThumb ? ARM::t2BICri : ARM::BICri; + unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment(); + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addReg(ARM::R4, RegState::Kill) + .addImm(MaxAlign - 1))); + + // mov sp, r4 + // The stack pointer must be adjusted before spilling anything, otherwise + // the stack slots could be clobbered by an interrupt handler. + // Leave r4 live, it is used below. + Opc = isThumb ? ARM::tMOVr : ARM::MOVr; + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP) + .addReg(ARM::R4); + MIB = AddDefaultPred(MIB); + if (!isThumb) + AddDefaultCC(MIB); + + // Now spill NumAlignedDPRCS2Regs registers starting from d8. + // r4 holds the stack slot address. + unsigned NextReg = ARM::D8; + + // 16-byte aligned vst1.64 with 4 d-regs and address writeback. + // The writeback is only needed when emitting two vst1.64 instructions. + if (NumAlignedDPRCS2Regs >= 6) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QQPRRegisterClass); + MBB.addLiveIn(SupReg); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), + ARM::R4) + .addReg(ARM::R4, RegState::Kill).addImm(16) + .addReg(NextReg) + .addReg(SupReg, RegState::ImplicitKill)); + NextReg += 4; + NumAlignedDPRCS2Regs -= 4; + } + + // We won't modify r4 beyond this point. It currently points to the next + // register to be spilled. + unsigned R4BaseReg = NextReg; + + // 16-byte aligned vst1.64 with 4 d-regs, no writeback. + if (NumAlignedDPRCS2Regs >= 4) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QQPRRegisterClass); + MBB.addLiveIn(SupReg); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) + .addReg(ARM::R4).addImm(16).addReg(NextReg) + .addReg(SupReg, RegState::ImplicitKill)); + NextReg += 4; + NumAlignedDPRCS2Regs -= 4; + } + + // 16-byte aligned vst1.64 with 2 d-regs. + if (NumAlignedDPRCS2Regs >= 2) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QPRRegisterClass); + MBB.addLiveIn(SupReg); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) + .addReg(ARM::R4).addImm(16).addReg(SupReg)); + NextReg += 2; + NumAlignedDPRCS2Regs -= 2; + } + + // Finally, use a vanilla vstr.64 for the odd last register. + if (NumAlignedDPRCS2Regs) { + MBB.addLiveIn(NextReg); + // vstr.64 uses addrmode5 which has an offset scale of 4. + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) + .addReg(NextReg) + .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2)); + } + + // The last spill instruction inserted should kill the scratch register r4. + llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI); +} + +/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an +/// iterator to the following instruction. +static MachineBasicBlock::iterator +skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, + unsigned NumAlignedDPRCS2Regs) { + // sub r4, sp, #numregs * 8 + // bic r4, r4, #align - 1 + // mov sp, r4 + ++MI; ++MI; ++MI; + assert(MI->mayStore() && "Expecting spill instruction"); + + // These switches all fall through. + switch(NumAlignedDPRCS2Regs) { + case 7: + ++MI; + assert(MI->mayStore() && "Expecting spill instruction"); + default: + ++MI; + assert(MI->mayStore() && "Expecting spill instruction"); + case 1: + case 2: + case 4: + assert(MI->killsRegister(ARM::R4) && "Missed kill flag"); + ++MI; + } + return MI; +} + +/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers +/// starting from d8. These instructions are assumed to execute while the +/// stack is still aligned, unlike the code inserted by emitPopInst. +static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned NumAlignedDPRCS2Regs, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + // Find the frame index assigned to d8. + int D8SpillFI = 0; + for (unsigned i = 0, e = CSI.size(); i != e; ++i) + if (CSI[i].getReg() == ARM::D8) { + D8SpillFI = CSI[i].getFrameIdx(); + break; + } + + // Materialize the address of the d8 spill slot into the scratch register r4. + // This can be fairly complicated if the stack frame is large, so just use + // the normal frame index elimination mechanism to do it. This code runs as + // the initial part of the epilog where the stack and base pointers haven't + // been changed yet. + bool isThumb = AFI->isThumbFunction(); + assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); + + unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addFrameIndex(D8SpillFI).addImm(0))); + + // Now restore NumAlignedDPRCS2Regs registers starting from d8. + unsigned NextReg = ARM::D8; + + // 16-byte aligned vld1.64 with 4 d-regs and writeback. + if (NumAlignedDPRCS2Regs >= 6) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QQPRRegisterClass); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) + .addReg(ARM::R4, RegState::Define) + .addReg(ARM::R4, RegState::Kill).addImm(16) + .addReg(SupReg, RegState::ImplicitDefine)); + NextReg += 4; + NumAlignedDPRCS2Regs -= 4; + } + + // We won't modify r4 beyond this point. It currently points to the next + // register to be spilled. + unsigned R4BaseReg = NextReg; + + // 16-byte aligned vld1.64 with 4 d-regs, no writeback. + if (NumAlignedDPRCS2Regs >= 4) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QQPRRegisterClass); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) + .addReg(ARM::R4).addImm(16) + .addReg(SupReg, RegState::ImplicitDefine)); + NextReg += 4; + NumAlignedDPRCS2Regs -= 4; + } + + // 16-byte aligned vld1.64 with 2 d-regs. + if (NumAlignedDPRCS2Regs >= 2) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QPRRegisterClass); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) + .addReg(ARM::R4).addImm(16)); + NextReg += 2; + NumAlignedDPRCS2Regs -= 2; + } + + // Finally, use a vanilla vldr.64 for the remaining odd register. + if (NumAlignedDPRCS2Regs) + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) + .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg))); + + // Last store kills r4. + llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI); +} + bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, @@ -690,12 +972,19 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; unsigned FltOpc = ARM::VSTMDDB_UPD; - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, + unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, MachineInstr::FrameSetup); - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, MachineInstr::FrameSetup); emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, - MachineInstr::FrameSetup); + NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + + // The code above does not insert spill code for the aligned DPRCS2 registers. + // The stack realignment code will be inserted between the push instructions + // and these spills. + if (NumAlignedDPRCS2Regs) + emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); return true; } @@ -710,15 +999,22 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; + unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); + + // The emitPopInst calls below do not insert reloads for the aligned DPRCS2 + // registers. Do that here instead. + if (NumAlignedDPRCS2Regs) + emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; unsigned FltOpc = ARM::VLDMDIA_UPD; - emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register); + emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, + NumAlignedDPRCS2Regs); emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea2Register); + &isARMArea2Register, 0); emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea1Register); + &isARMArea1Register, 0); return true; } @@ -842,6 +1138,55 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, return Limit; } +// In functions that realign the stack, it can be an advantage to spill the +// callee-saved vector registers after realigning the stack. The vst1 and vld1 +// instructions take alignment hints that can improve performance. +// +static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { + MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0); + if (!SpillAlignedNEONRegs) + return; + + // Naked functions don't spill callee-saved registers. + if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + return; + + // We are planning to use NEON instructions vst1 / vld1. + if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON()) + return; + + // Don't bother if the default stack alignment is sufficiently high. + if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8) + return; + + // Aligned spills require stack realignment. + const ARMBaseRegisterInfo *RegInfo = + static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + if (!RegInfo->canRealignStack(MF)) + return; + + // We always spill contiguous d-registers starting from d8. Count how many + // needs spilling. The register allocator will almost always use the + // callee-saved registers in order, but it can happen that there are holes in + // the range. Registers above the hole will be spilled to the standard DPRCS + // area. + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned NumSpills = 0; + for (; NumSpills < 8; ++NumSpills) + if (!MRI.isPhysRegOrOverlapUsed(ARM::D8 + NumSpills)) + break; + + // Don't do this for just one d-register. It's not worth it. + if (NumSpills < 2) + return; + + // Spill the first NumSpills D-registers after realigning the stack. + MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills); + + // A scratch register is required for the vst1 / vld1 instructions. + MF.getRegInfo().setPhysRegUsed(ARM::R4); +} + void ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { @@ -888,28 +1233,22 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MF.getRegInfo().setPhysRegUsed(ARM::R4); } + // See if we can spill vector registers to aligned stack. + checkNumAlignedDPRCS2Regs(MF); + // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; - if (MF.getRegInfo().isPhysRegUsed(Reg)) { + if (MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { Spilled = true; CanEliminateFrame = false; - } else { - // Check alias registers too. - for (const unsigned *Aliases = - RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) { - if (MF.getRegInfo().isPhysRegUsed(*Aliases)) { - Spilled = true; - CanEliminateFrame = false; - } - } } if (!ARM::GPRRegisterClass->contains(Reg)) @@ -918,7 +1257,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (Spilled) { NumGPRSpills++; - if (!STI.isTargetDarwin()) { + if (!STI.isTargetIOS()) { if (Reg == ARM::LR) LRSpilled = true; CS1Spilled = true; @@ -938,7 +1277,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, break; } } else { - if (!STI.isTargetDarwin()) { + if (!STI.isTargetIOS()) { UnspilledCS1GPRs.push_back(Reg); continue; } diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 61bb8af..a1c2b93 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -63,12 +63,13 @@ public: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc, unsigned StrOpc, bool NoGap, - bool(*Func)(unsigned, bool), + bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs, unsigned MIFlags = 0) const; void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, - bool(*Func)(unsigned, bool)) const; + bool(*Func)(unsigned, bool), + unsigned NumAlignedDPRCS2Regs) const; }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 7473141..c99db98 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -244,6 +244,7 @@ private: /// SelectCMOVOp - Select CMOV instructions for ARM. SDNode *SelectCMOVOp(SDNode *N); + SDNode *SelectConditionalOp(SDNode *N); SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); @@ -1562,10 +1563,6 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; - case ARM::VLD1q8PseudoWB_fixed: return ARM::VLD1q8PseudoWB_register; - case ARM::VLD1q16PseudoWB_fixed: return ARM::VLD1q16PseudoWB_register; - case ARM::VLD1q32PseudoWB_fixed: return ARM::VLD1q32PseudoWB_register; - case ARM::VLD1q64PseudoWB_fixed: return ARM::VLD1q64PseudoWB_register; case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; @@ -1575,26 +1572,26 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; - case ARM::VST1q8PseudoWB_fixed: return ARM::VST1q8PseudoWB_register; - case ARM::VST1q16PseudoWB_fixed: return ARM::VST1q16PseudoWB_register; - case ARM::VST1q32PseudoWB_fixed: return ARM::VST1q32PseudoWB_register; - case ARM::VST1q64PseudoWB_fixed: return ARM::VST1q64PseudoWB_register; case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; - case ARM::VLD2d8PseudoWB_fixed: return ARM::VLD2d8PseudoWB_register; - case ARM::VLD2d16PseudoWB_fixed: return ARM::VLD2d16PseudoWB_register; - case ARM::VLD2d32PseudoWB_fixed: return ARM::VLD2d32PseudoWB_register; + case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; + case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; + case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; - case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register; - case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register; - case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register; + case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; + case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; + case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; + + case ARM::VLD2DUPd8PseudoWB_fixed: return ARM::VLD2DUPd8PseudoWB_register; + case ARM::VLD2DUPd16PseudoWB_fixed: return ARM::VLD2DUPd16PseudoWB_register; + case ARM::VLD2DUPd32PseudoWB_fixed: return ARM::VLD2DUPd32PseudoWB_register; } return Opc; // If not one we handle, return it unchanged. } @@ -1668,7 +1665,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Opc = getVLDSTRegisterUpdateOpcode(Opc); // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64PseudoWB_fixed) || + if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) || !isa<ConstantSDNode>(Inc.getNode())) Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); } @@ -1818,7 +1815,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Opc = getVLDSTRegisterUpdateOpcode(Opc); // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs != 1 && Opc != ARM::VST1q64PseudoWB_fixed) || + if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) || !isa<ConstantSDNode>(Inc.getNode())) Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); } @@ -2043,8 +2040,14 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, Ops.push_back(MemAddr); Ops.push_back(Align); if (isUpdating) { + // fixed-stride update instructions don't have an explicit writeback + // operand. It's implicit in the opcode itself. SDValue Inc = N->getOperand(2); - Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + if (!isa<ConstantSDNode>(Inc.getNode())) + Ops.push_back(Inc); + // FIXME: VLD3 and VLD4 haven't been updated to that form yet. + else if (NumVecs > 2) + Ops.push_back(Reg0); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -2182,7 +2185,6 @@ SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, case ARM_AM::ror: Opc = ARM::t2MOVCCror; break; default: llvm_unreachable("Unknown so_reg opcode!"); - break; } SDValue SOShImm = CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); @@ -2293,9 +2295,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) // Pattern complexity = 18 cost = 1 size = 0 - SDValue CPTmp0; - SDValue CPTmp1; - SDValue CPTmp2; if (Subtarget->isThumb()) { SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal, CCVal, CCR, InFlag); @@ -2352,8 +2351,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag }; unsigned Opc = 0; switch (VT.getSimpleVT().SimpleTy) { - default: assert(false && "Illegal conditional move type!"); - break; + default: llvm_unreachable("Illegal conditional move type!"); case MVT::i32: Opc = Subtarget->isThumb() ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo) @@ -2369,6 +2367,115 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } +SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) { + SDValue FalseVal = N->getOperand(0); + SDValue TrueVal = N->getOperand(1); + ARMCC::CondCodes CCVal = + (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + SDValue CCR = N->getOperand(3); + assert(CCR.getOpcode() == ISD::Register); + SDValue InFlag = N->getOperand(4); + SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + + if (Subtarget->isThumb()) { + SDValue CPTmp0; + SDValue CPTmp1; + if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break; + case ARMISD::COR: Opc = ARM::t2ORRCCrs; break; + case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break; + } + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); + } + + ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); + if (T) { + unsigned TrueImm = T->getZExtValue(); + if (is_t2_so_imm(TrueImm)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::t2ANDCCri; break; + case ARMISD::COR: Opc = ARM::t2ORRCCri; break; + case ARMISD::CXOR: Opc = ARM::t2EORCCri; break; + } + SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); + SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); + } + } + + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break; + case ARMISD::COR: Opc = ARM::t2ORRCCrr; break; + case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break; + } + SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); + } + + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCrsi; break; + case ARMISD::COR: Opc = ARM::ORRCCrsi; break; + case ARMISD::CXOR: Opc = ARM::EORCCrsi; break; + } + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); + } + + if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCrsr; break; + case ARMISD::COR: Opc = ARM::ORRCCrsr; break; + case ARMISD::CXOR: Opc = ARM::EORCCrsr; break; + } + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8); + } + + ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); + if (T) { + unsigned TrueImm = T->getZExtValue(); + if (is_so_imm(TrueImm)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCri; break; + case ARMISD::COR: Opc = ARM::ORRCCri; break; + case ARMISD::CXOR: Opc = ARM::EORCCri; break; + } + SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); + SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); + } + } + + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCrr; break; + case ARMISD::COR: Opc = ARM::ORRCCrr; break; + case ARMISD::CXOR: Opc = ARM::EORCCrr; break; + } + SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); +} + /// Target-specific DAG combining for ISD::XOR. /// Target-independent combining lowers SELECT_CC nodes of the form /// select_cc setg[ge] X, 0, X, -X @@ -2706,6 +2813,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::CMOV: return SelectCMOVOp(N); + case ARMISD::CAND: + case ARMISD::COR: + case ARMISD::CXOR: + return SelectConditionalOp(N); case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); @@ -2798,8 +2909,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD2DUP_UPD: { - unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD, - ARM::VLD2DUPd32Pseudo_UPD }; + unsigned Opcodes[] = { ARM::VLD2DUPd8PseudoWB_fixed, + ARM::VLD2DUPd16PseudoWB_fixed, + ARM::VLD2DUPd32PseudoWB_fixed }; return SelectVLDDup(N, true, 2, Opcodes); } @@ -2818,18 +2930,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VLD1_UPD: { unsigned DOpcodes[] = { ARM::VLD1d8wb_fixed, ARM::VLD1d16wb_fixed, ARM::VLD1d32wb_fixed, ARM::VLD1d64wb_fixed }; - unsigned QOpcodes[] = { ARM::VLD1q8PseudoWB_fixed, - ARM::VLD1q16PseudoWB_fixed, - ARM::VLD1q32PseudoWB_fixed, - ARM::VLD1q64PseudoWB_fixed }; + unsigned QOpcodes[] = { ARM::VLD1q8wb_fixed, + ARM::VLD1q16wb_fixed, + ARM::VLD1q32wb_fixed, + ARM::VLD1q64wb_fixed }; return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0); } case ARMISD::VLD2_UPD: { - unsigned DOpcodes[] = { ARM::VLD2d8PseudoWB_fixed, - ARM::VLD2d16PseudoWB_fixed, - ARM::VLD2d32PseudoWB_fixed, - ARM::VLD1q64PseudoWB_fixed}; + unsigned DOpcodes[] = { ARM::VLD2d8wb_fixed, + ARM::VLD2d16wb_fixed, + ARM::VLD2d32wb_fixed, + ARM::VLD1q64wb_fixed}; unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q32PseudoWB_fixed }; @@ -2838,7 +2950,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VLD3_UPD: { unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD, - ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed}; + ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64wb_fixed}; unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q16Pseudo_UPD, ARM::VLD3q32Pseudo_UPD }; @@ -2850,7 +2962,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VLD4_UPD: { unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, - ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed}; + ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64wb_fixed}; unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q16Pseudo_UPD, ARM::VLD4q32Pseudo_UPD }; @@ -2887,18 +2999,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VST1_UPD: { unsigned DOpcodes[] = { ARM::VST1d8wb_fixed, ARM::VST1d16wb_fixed, ARM::VST1d32wb_fixed, ARM::VST1d64wb_fixed }; - unsigned QOpcodes[] = { ARM::VST1q8PseudoWB_fixed, - ARM::VST1q16PseudoWB_fixed, - ARM::VST1q32PseudoWB_fixed, - ARM::VST1q64PseudoWB_fixed }; + unsigned QOpcodes[] = { ARM::VST1q8wb_fixed, + ARM::VST1q16wb_fixed, + ARM::VST1q32wb_fixed, + ARM::VST1q64wb_fixed }; return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0); } case ARMISD::VST2_UPD: { - unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed, - ARM::VST2d16PseudoWB_fixed, - ARM::VST2d32PseudoWB_fixed, - ARM::VST1q64PseudoWB_fixed}; + unsigned DOpcodes[] = { ARM::VST2d8wb_fixed, + ARM::VST2d16wb_fixed, + ARM::VST2d32wb_fixed, + ARM::VST1q64wb_fixed}; unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, ARM::VST2q16PseudoWB_fixed, ARM::VST2q32PseudoWB_fixed }; @@ -3068,14 +3180,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vld1: { unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, ARM::VLD1d32, ARM::VLD1d64 }; - unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo, - ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, + ARM::VLD1q32, ARM::VLD1q64}; return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld2: { - unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo, - ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo }; + unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, + ARM::VLD2d32, ARM::VLD1q64 }; unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, ARM::VLD2q32Pseudo }; return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0); @@ -3129,14 +3241,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vst1: { unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, ARM::VST1d32, ARM::VST1d64 }; - unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo, - ARM::VST1q32Pseudo, ARM::VST1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, + ARM::VST1q32, ARM::VST1q64 }; return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst2: { - unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo, - ARM::VST2d32Pseudo, ARM::VST1q64Pseudo }; + unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, + ARM::VST2d32, ARM::VST1q64 }; unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, ARM::VST2q32Pseudo }; return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0); @@ -3197,14 +3309,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; case Intrinsic::arm_neon_vtbl2: - return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo); + return SelectVTBL(N, false, 2, ARM::VTBL2); case Intrinsic::arm_neon_vtbl3: return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); case Intrinsic::arm_neon_vtbl4: return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); case Intrinsic::arm_neon_vtbx2: - return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo); + return SelectVTBL(N, true, 2, ARM::VTBX2); case Intrinsic::arm_neon_vtbx3: return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); case Intrinsic::arm_neon_vtbx4: @@ -3238,7 +3350,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops.push_back(N->getOperand(2)); Ops.push_back(getAL(CurDAG)); // Predicate Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register - return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT, + return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops.data(), Ops.size()); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c6c1f5b..477b5f4 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -43,7 +43,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/VectorExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" @@ -262,7 +261,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::SRL_I128, 0); setLibcallName(RTLIB::SRA_I128, 0); - if (Subtarget->isAAPCS_ABI()) { + if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { // Double-precision floating-point arithmetic helper functions // RTABI chapter 4.1.2, Table 2 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); @@ -387,8 +386,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // Long long helper functions // RTABI chapter 4.2, Table 9 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); - setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); - setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); @@ -404,21 +401,28 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); + setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); + setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); // Memory operations // RTABI chapter 4.3.4 setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); + setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); } // Use divmod compiler-rt calls for iOS 5.0 and later. @@ -529,9 +533,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::v1i64, Expand); setOperationAction(ISD::SETCC, MVT::v2i64, Expand); // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with - // a destination type that is wider than the source. + // a destination type that is wider than the source, and nor does + // it have a FP_TO_[SU]INT instruction with a narrower destination than + // source. setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); @@ -551,7 +559,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::FP_TO_UINT); setTargetDAGCombine(ISD::FDIV); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand); + // It is legal to extload from v4i8 to v4i16 or v4i32. + MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, + MVT::v4i16, MVT::v2i16, + MVT::v2i32}; + for (unsigned i = 0; i < 6; ++i) { + setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); + setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); + setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); + } } computeRegisterProperties(); @@ -643,10 +659,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setExceptionPointerRegister(ARM::R0); - setExceptionSelectorRegister(ARM::R1); + + if (!Subtarget->isTargetDarwin()) { + // Non-Darwin platforms may return values in these registers via the + // personality function. + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setExceptionPointerRegister(ARM::R0); + setExceptionSelectorRegister(ARM::R1); + } setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use @@ -773,10 +794,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); - if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) - setTargetDAGCombine(ISD::OR); - if (Subtarget->hasNEON()) + if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) { setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::XOR); + } + + if (Subtarget->hasV6Ops()) + setTargetDAGCombine(ISD::SRL); setStackPointerRegisterToSaveRestore(ARM::SP); @@ -869,7 +894,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; + case ARMISD::CMOV: return "ARMISD::CMOV"; + case ARMISD::CAND: return "ARMISD::CAND"; + case ARMISD::COR: return "ARMISD::COR"; + case ARMISD::CXOR: return "ARMISD::CXOR"; case ARMISD::RBIT: return "ARMISD::RBIT"; @@ -990,7 +1019,7 @@ EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { /// getRegClassFor - Return the register class that should be used for the /// specified value type. -TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { +const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { // Map v4i64 to QQ registers but do not make the type legal. Similarly map // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to // load / store 4 to 8 consecutive D registers. @@ -1128,7 +1157,9 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); } case CallingConv::ARM_AAPCS_VFP: - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + if (!isVarArg) + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + // Fallthrough case CallingConv::ARM_AAPCS: return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); case CallingConv::ARM_APCS: @@ -1255,7 +1286,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, SDValue ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -1551,12 +1582,20 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; + else if (doesNotRet && isDirect && !isARMFunc && + Subtarget->hasRAS() && !Subtarget->isThumb1Only()) + // "mov lr, pc; b _foo" to avoid confusing the RSP + CallOpc = ARMISD::CALL_NOLINK; else CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; } else { - CallOpc = (isDirect || Subtarget->hasV5TOps()) - ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) - : ARMISD::CALL_NOLINK; + if (!isDirect && !Subtarget->hasV5TOps()) { + CallOpc = ARMISD::CALL_NOLINK; + } else if (doesNotRet && isDirect && Subtarget->hasRAS()) + // "mov lr, pc; b _foo" to avoid confusing the RSP + CallOpc = ARMISD::CALL_NOLINK; + else + CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; } std::vector<SDValue> Ops; @@ -1569,6 +1608,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + if (InFlag.getNode()) Ops.push_back(InFlag); @@ -1897,7 +1942,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { return false; unsigned NumCopies = 0; - SDNode* Copies[2]; + SDNode* Copies[2] = { 0, 0 }; SDNode *Use = *N->use_begin(); if (Use->getOpcode() == ISD::CopyToReg) { Copies[NumCopies++] = Use; @@ -1932,7 +1977,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { UI != UE; ++UI) { if (UI->getOpcode() == ISD::CopyToReg) { SDNode *Use = *UI; - if (Use == Copies[0] || Use == Copies[1]) + if (Use == Copies[0] || ((NumCopies == 2) && (Use == Copies[1]))) continue; return false; } @@ -2043,7 +2088,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()), false, false, false, false, - 0, CallingConv::C, false, /*isReturnValueUsed=*/true, + 0, CallingConv::C, /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); return CallResult.first; } @@ -2167,7 +2213,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - // FIXME: Enable this for static codegen when tool issues are fixed. + // FIXME: Enable this for static codegen when tool issues are fixed. Also + // update ARMFastISel::ARMMaterializeGV. if (Subtarget->useMovt() && RelocM != Reloc::Static) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register @@ -2398,7 +2445,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - TargetRegisterClass *RC; + const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = ARM::tGPRRegisterClass; else @@ -2484,7 +2531,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SmallVector<SDValue, 4> MemOps; for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { - TargetRegisterClass *RC; + const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = ARM::tGPRRegisterClass; else @@ -2567,7 +2614,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); } else { - TargetRegisterClass *RC; + const TargetRegisterClass *RC; if (RegVT == MVT::f32) RC = ARM::SPRRegisterClass; @@ -2809,6 +2856,11 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } + // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the + // undefined bits before doing a full-word comparison with zero. + Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, + DAG.getConstant(1, Cond.getValueType())); + return DAG.getSelectCC(dl, Cond, DAG.getConstant(0, Cond.getValueType()), SelectTrue, SelectFalse, ISD::SETNE); @@ -2926,12 +2978,11 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(4); DebugLoc dl = Op.getDebugLoc(); - bool SeenZero = false; - if (canChangeToInt(LHS, SeenZero, Subtarget) && - canChangeToInt(RHS, SeenZero, Subtarget) && - // If one of the operand is zero, it's safe to ignore the NaN case since - // we only care about equality comparisons. - (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { + bool LHSSeenZero = false; + bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); + bool RHSSeenZero = false; + bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); + if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { // If unsafe fp math optimization is enabled and there are no other uses of // the CMP operands, and the condition code is EQ or NE, we can optimize it // to an integer comparison. @@ -2940,10 +2991,13 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { else if (CC == ISD::SETUNE) CC = ISD::SETNE; + SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32); SDValue ARMcc; if (LHS.getValueType() == MVT::f32) { - LHS = bitcastf32Toi32(LHS, DAG); - RHS = bitcastf32Toi32(RHS, DAG); + LHS = DAG.getNode(ISD::AND, dl, MVT::i32, + bitcastf32Toi32(LHS, DAG), Mask); + RHS = DAG.getNode(ISD::AND, dl, MVT::i32, + bitcastf32Toi32(RHS, DAG), Mask); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, @@ -2954,6 +3008,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue RHS1, RHS2; expandf64Toi32(LHS, DAG, LHS1, LHS2); expandf64Toi32(RHS, DAG, RHS1, RHS2); + LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); + RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); ARMCC::CondCodes CondCode = IntCCToARMCC(CC); ARMcc = DAG.getConstant(CondCode, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); @@ -3047,11 +3103,21 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - assert(VT.getVectorElementType() == MVT::i32 && "Unexpected custom lowering"); + DebugLoc dl = Op.getDebugLoc(); - if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) - return Op; - return DAG.UnrollVectorOp(Op.getNode()); + if (Op.getValueType().getVectorElementType() == MVT::i32) { + if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) + return Op; + return DAG.UnrollVectorOp(Op.getNode()); + } + + assert(Op.getOperand(0).getValueType() == MVT::v4f32 && + "Invalid type for custom lowering!"); + if (VT != MVT::v4i16) + return DAG.UnrollVectorOp(Op.getNode()); + + Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); } static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { @@ -3063,8 +3129,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { unsigned Opc; switch (Op.getOpcode()) { - default: - assert(0 && "Invalid opcode!"); + default: llvm_unreachable("Invalid opcode!"); case ISD::FP_TO_SINT: Opc = ARMISD::FTOSI; break; @@ -3094,8 +3159,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { unsigned CastOpc; unsigned Opc; switch (Op.getOpcode()) { - default: - assert(0 && "Invalid opcode!"); + default: llvm_unreachable("Invalid opcode!"); case ISD::SINT_TO_FP: CastOpc = ISD::SIGN_EXTEND; Opc = ISD::SINT_TO_FP; @@ -3119,8 +3183,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { unsigned Opc; switch (Op.getOpcode()) { - default: - assert(0 && "Invalid opcode!"); + default: llvm_unreachable("Invalid opcode!"); case ISD::SINT_TO_FP: Opc = ARMISD::SITOF; break; @@ -3494,7 +3557,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { if (Op.getOperand(1).getValueType().isFloatingPoint()) { switch (SetCCOpcode) { - default: llvm_unreachable("Illegal FP comparison"); break; + default: llvm_unreachable("Illegal FP comparison"); case ISD::SETUNE: case ISD::SETNE: Invert = true; // Fallthrough case ISD::SETOEQ: @@ -3533,7 +3596,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { } else { // Integer comparisons. switch (SetCCOpcode) { - default: llvm_unreachable("Illegal integer comparison"); break; + default: llvm_unreachable("Illegal integer comparison"); case ISD::SETNE: Invert = true; case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETLT: Swap = true; @@ -3740,14 +3803,13 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, default: llvm_unreachable("unexpected size for isNEONModifiedImm"); - return SDValue(); } unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); return DAG.getTargetConstant(EncodedVal, MVT::i32); } -static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, +static bool isVEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); ReverseVEXT = false; @@ -3786,8 +3848,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, /// isVREVMask - Check if a vector shuffle corresponds to a VREV /// instruction with the specified blocksize. (The order of the elements /// within each block of the vector is reversed.) -static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned BlockSize) { +static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"); @@ -3813,15 +3874,14 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, return true; } -static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) { +static bool isVTBLMask(ArrayRef<int> M, EVT VT) { // We can handle <8 x i8> vector shuffles. If the index in the mask is out of // range, then 0 is placed into the resulting vector. So pretty much any mask // of 8 elements can work here. return VT == MVT::v8i8 && M.size() == 8; } -static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3839,8 +3899,7 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. -static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3855,8 +3914,7 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, return true; } -static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3879,8 +3937,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, -static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3904,8 +3961,7 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, return true; } -static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3930,8 +3986,7 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. -static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -4363,7 +4418,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, } static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, - SmallVectorImpl<int> &ShuffleMask, + ArrayRef<int> ShuffleMask, SelectionDAG &DAG) { // Check to see if we can use the VTBL instruction. SDValue V1 = Op.getOperand(0); @@ -4371,7 +4426,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, DebugLoc DL = Op.getDebugLoc(); SmallVector<SDValue, 8> VTBLMask; - for (SmallVectorImpl<int>::iterator + for (ArrayRef<int>::iterator I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); @@ -4391,7 +4446,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); - SmallVector<int, 8> ShuffleMask; // Convert shuffles that are directly supported on NEON to target-specific // DAG nodes, instead of keeping them as shuffles and matching them again @@ -4399,7 +4453,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // of inconsistencies between legalization and selection. // FIXME: floating-point vectors should be canonicalized to integer vectors // of the same time so that they get CSEd properly. - SVN->getMask(ShuffleMask); + ArrayRef<int> ShuffleMask = SVN->getMask(); unsigned EltSize = VT.getVectorElementType().getSizeInBits(); if (EltSize <= 32) { @@ -4959,7 +5013,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { unsigned Opc; bool ExtraOp = false; switch (Op.getOpcode()) { - default: assert(0 && "Invalid code"); + default: llvm_unreachable("Invalid code"); case ISD::ADDC: Opc = ARMISD::ADDC; break; case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; case ISD::SUBC: Opc = ARMISD::SUBC; break; @@ -5071,7 +5125,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); } - return SDValue(); } /// ReplaceNodeResults - Replace the results of node with an illegal result @@ -5083,7 +5136,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); - break; case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; @@ -5279,7 +5331,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - TargetRegisterClass *TRC = + const TargetRegisterClass *TRC = isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); @@ -5389,7 +5441,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - TargetRegisterClass *TRC = + const TargetRegisterClass *TRC = isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = MRI.createVirtualRegister(TRC); @@ -5499,7 +5551,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - TargetRegisterClass *TRC = + const TargetRegisterClass *TRC = isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; unsigned storesuccess = MRI.createVirtualRegister(TRC); @@ -5792,7 +5844,12 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4); - BuildMI(DispatchBB, dl, TII->get(ARM::eh_sjlj_dispatchsetup)); + if (AFI->isThumb1OnlyFunction()) + BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup)); + else if (!Subtarget->hasVFP2()) + BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp)); + else + BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { @@ -6014,7 +6071,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { // N.B. the order the invoke BBs are processed in doesn't matter here. const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); - const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF); + const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector<MachineBasicBlock*, 64> MBBLPads; for (SmallPtrSet<MachineBasicBlock*, 64>::iterator I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { @@ -6666,7 +6723,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; default: - assert(0 && "Invalid vector element type for padd optimization."); + llvm_unreachable("Invalid vector element type for padd optimization."); } SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), @@ -6818,8 +6875,52 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } +static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) { + if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse()) + return false; + + SDValue FalseVal = N.getOperand(0); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal); + if (!C) + return false; + if (AllOnes) + return C->isAllOnesValue(); + return C->isNullValue(); +} + +/// formConditionalOp - Combine an operation with a conditional move operand +/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y) +/// (and x, (cmov -1, y, cond)) => (and.cond, x, y) +static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG, + bool Commutable) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + bool isAND = N->getOpcode() == ISD::AND; + bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND); + if (!isCand && Commutable) { + isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND); + if (isCand) + std::swap(N0, N1); + } + if (!isCand) + return SDValue(); + + unsigned Opc = 0; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ISD::AND: Opc = ARMISD::CAND; break; + case ISD::OR: Opc = ARMISD::COR; break; + case ISD::XOR: Opc = ARMISD::CXOR; break; + } + return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0, + N1.getOperand(1), N1.getOperand(2), N1.getOperand(3), + N1.getOperand(4)); +} + static SDValue PerformANDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); @@ -6850,6 +6951,13 @@ static SDValue PerformANDCombine(SDNode *N, } } + if (!Subtarget->isThumb1Only()) { + // (and x, (cmov -1, y, cond)) => (and.cond x, y) + SDValue CAND = formConditionalOp(N, DAG, true); + if (CAND.getNode()) + return CAND; + } + return SDValue(); } @@ -6886,6 +6994,13 @@ static SDValue PerformORCombine(SDNode *N, } } + if (!Subtarget->isThumb1Only()) { + // (or x, (cmov 0, y, cond)) => (or.cond x, y) + SDValue COR = formConditionalOp(N, DAG, true); + if (COR.getNode()) + return COR; + } + SDValue N0 = N->getOperand(0); if (N0.getOpcode() != ISD::AND) return SDValue(); @@ -7034,6 +7149,25 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); } +static SDValue PerformXORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + if (!Subtarget->isThumb1Only()) { + // (xor x, (cmov 0, y, cond)) => (xor.cond x, y) + SDValue CXOR = formConditionalOp(N, DAG, true); + if (CXOR.getNode()) + return CXOR; + } + + return SDValue(); +} + /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff /// the bits being cleared by the AND are not demanded by the BFI. static SDValue PerformBFICombine(SDNode *N, @@ -7331,7 +7465,7 @@ static SDValue CombineBaseUpdate(SDNode *N, if (isIntrinsic) { unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); switch (IntNo) { - default: assert(0 && "unexpected intrinsic for Neon base update"); + default: llvm_unreachable("unexpected intrinsic for Neon base update"); case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; NumVecs = 1; break; case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; @@ -7364,7 +7498,7 @@ static SDValue CombineBaseUpdate(SDNode *N, } else { isLaneOp = true; switch (N->getOpcode()) { - default: assert(0 && "unexpected opcode for Neon base update"); + default: llvm_unreachable("unexpected opcode for Neon base update"); case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; @@ -7857,6 +7991,18 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); + if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { + // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high + // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. + SDValue N1 = N->getOperand(1); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + SDValue N0 = N->getOperand(0); + if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && + DAG.MaskedValueIsZero(N0.getOperand(0), + APInt::getHighBitsSet(32, 16))) + return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1); + } + } // Nothing to be done for scalar shifts. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -8085,7 +8231,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); - case ISD::AND: return PerformANDCombine(N, DCI); + case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); + case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); @@ -8377,7 +8524,6 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, if (Scale & 1) return false; return isPowerOf2_32(Scale); } - break; } return true; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index b8dc4bf..7f12293 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -56,7 +56,11 @@ namespace llvm { CMPFP, // ARM VFP compare instruction, sets FPSCR. CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. FMSTAT, // ARM fmstat instruction. + CMOV, // ARM conditional move instructions. + CAND, // ARM conditional and instructions. + COR, // ARM conditional or instructions. + CXOR, // ARM conditional xor instructions. BCC_i64, @@ -345,7 +349,7 @@ namespace llvm { /// getRegClassFor - Return the register class that should be used for the /// specified value type. - virtual TargetRegisterClass *getRegClassFor(EVT VT) const; + virtual const TargetRegisterClass *getRegClassFor(EVT VT) const; /// getMaximalGlobalOffset - Returns the maximal possible offset which can /// be used for loads / stores from the global. @@ -458,7 +462,7 @@ namespace llvm { virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 80f3773..1d38bcf 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1,4 +1,4 @@ -//===- ARMInstrFormats.td - ARM Instruction Formats ----------*- tablegen -*-=// +//===-- ARMInstrFormats.td - ARM Instruction Formats -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -290,6 +290,14 @@ class InstTemplate<AddrMode am, int sz, IndexMode im, class Encoding { field bits<32> Inst; + // Mask of bits that cause an encoding to be UNPREDICTABLE. + // If a bit is set, then if the corresponding bit in the + // target encoding differs from its value in the "Inst" field, + // the instruction is UNPREDICTABLE (SoftFail in abstract parlance). + field bits<32> Unpredictable = 0; + // SoftFail is the generic name for this field, but we alias it so + // as to make it more obvious what it means in ARM-land. + field bits<32> SoftFail = Unpredictable; } class InstARM<AddrMode am, int sz, IndexMode im, @@ -1594,8 +1602,11 @@ class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> { + bits<5> fbits; // size (fixed-point number): sx == 0 ? 16 : 32 let Inst{7} = op5; // sx + let Inst{5} = fbits{0}; + let Inst{3-0} = fbits{4-1}; } // VFP conversion instructions, if no NEON @@ -2019,6 +2030,15 @@ multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> { def : VFPDataTypeInstAlias<opc, ".64", asm, Result>; } +multiclass NEONDTAnyInstAlias<string opc, string asm, dag Result> { + let Predicates = [HasNEON] in { + def : VFPDataTypeInstAlias<opc, ".8", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".16", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".32", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".64", asm, Result>; +} +} + // The same alias classes using AsmPseudo instead, for the more complex // stuff in NEON that InstAlias can't quite handle. // Note that we can't use anonymous defm references here like we can @@ -2026,75 +2046,6 @@ multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> { // for instalias defs. class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> : AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>; -multiclass NEONDT8ReqAsmPseudoInst<string opc, string asm, dag iops> { - def I8 : NEONDataTypeAsmPseudoInst<opc, ".i8", asm, iops>; - def S8 : NEONDataTypeAsmPseudoInst<opc, ".s8", asm, iops>; - def U8 : NEONDataTypeAsmPseudoInst<opc, ".u8", asm, iops>; - def P8 : NEONDataTypeAsmPseudoInst<opc, ".p8", asm, iops>; -} -// NEONDT8ReqAsmPseudoInst plus plain ".8" -multiclass NEONDT8AsmPseudoInst<string opc, string asm, dag iops> { - def _8 : NEONDataTypeAsmPseudoInst<opc, ".8", asm, iops>; - defm _ : NEONDT8ReqAsmPseudoInst<opc, asm, iops>; -} -multiclass NEONDT16ReqAsmPseudoInst<string opc, string asm, dag iops> { - def I16 : NEONDataTypeAsmPseudoInst<opc, ".i16", asm, iops>; - def S16 : NEONDataTypeAsmPseudoInst<opc, ".s16", asm, iops>; - def U16 : NEONDataTypeAsmPseudoInst<opc, ".u16", asm, iops>; - def P16 : NEONDataTypeAsmPseudoInst<opc, ".p16", asm, iops>; -} -// NEONDT16ReqAsmPseudoInst plus plain ".16" -multiclass NEONDT16AsmPseudoInst<string opc, string asm, dag iops> { - def _16 : NEONDataTypeAsmPseudoInst<opc, ".16", asm, iops>; - defm _ : NEONDT16ReqAsmPseudoInst<opc, asm, iops>; -} -multiclass NEONDT32ReqAsmPseudoInst<string opc, string asm, dag iops> { - def I32 : NEONDataTypeAsmPseudoInst<opc, ".i32", asm, iops>; - def S32 : NEONDataTypeAsmPseudoInst<opc, ".s32", asm, iops>; - def U32 : NEONDataTypeAsmPseudoInst<opc, ".u32", asm, iops>; - def F32 : NEONDataTypeAsmPseudoInst<opc, ".f32", asm, iops>; - def F : NEONDataTypeAsmPseudoInst<opc, ".f", asm, iops>; -} -// NEONDT32ReqAsmPseudoInst plus plain ".32" -multiclass NEONDT32AsmPseudoInst<string opc, string asm, dag iops> { - def _32 : NEONDataTypeAsmPseudoInst<opc, ".32", asm, iops>; - defm _ : NEONDT32ReqAsmPseudoInst<opc, asm, iops>; -} -multiclass NEONDT64ReqAsmPseudoInst<string opc, string asm, dag iops> { - def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>; - def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>; - def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>; - def F64 : NEONDataTypeAsmPseudoInst<opc, ".f64", asm, iops>; - def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>; -} -// NEONDT64ReqAsmPseudoInst plus plain ".64" -multiclass NEONDT64AsmPseudoInst<string opc, string asm, dag iops> { - def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>; - defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>; -} -multiclass NEONDT64NoF64ReqAsmPseudoInst<string opc, string asm, dag iops> { - def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>; - def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>; - def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>; - def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>; -} -// NEONDT64ReqAsmPseudoInst plus plain ".64" -multiclass NEONDT64NoF64AsmPseudoInst<string opc, string asm, dag iops> { - def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>; - defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>; -} -multiclass NEONDTAnyAsmPseudoInst<string opc, string asm, dag iops> { - defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>; - defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>; - defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>; - defm _ : NEONDT64AsmPseudoInst<opc, asm, iops>; -} -multiclass NEONDTAnyNoF64AsmPseudoInst<string opc, string asm, dag iops> { - defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>; - defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>; - defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>; - defm _ : NEONDT64NoF64AsmPseudoInst<opc, asm, iops>; -} // Data type suffix token aliases. Implements Table A7-3 in the ARM ARM. def : TokenAlias<".s8", ".i8">; diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 48da03f..b8f607e 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===// +//===-- ARMInstrInfo.cpp - ARM Instruction Information --------------------===// // // The LLVM Compiler Infrastructure // @@ -21,12 +21,29 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI(*this, STI) { } +/// getNoopForMachoTarget - Return the noop instruction to use for a noop. +void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + if (hasNOP()) { + NopInst.setOpcode(ARM::NOP); + NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + NopInst.addOperand(MCOperand::CreateReg(0)); + } else { + NopInst.setOpcode(ARM::MOVr); + NopInst.addOperand(MCOperand::CreateReg(ARM::R0)); + NopInst.addOperand(MCOperand::CreateReg(ARM::R0)); + NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + NopInst.addOperand(MCOperand::CreateReg(0)); + NopInst.addOperand(MCOperand::CreateReg(0)); + } +} + unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { switch (Opc) { default: break; diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index f2c7bdc..7bedf30 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -1,4 +1,4 @@ -//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===// +//===-- ARMInstrInfo.h - ARM Instruction Information ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -28,6 +28,9 @@ class ARMInstrInfo : public ARMBaseInstrInfo { public: explicit ARMInstrInfo(const ARMSubtarget &STI); + /// getNoopForMachoTarget - Return the noop instruction to use for a noop. + void getNoopForMachoTarget(MCInst &NopInst) const; + // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 516a080..0b1406e 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -179,8 +179,14 @@ def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate<"FeatureVFP2">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate<"FeatureVFP3">; +def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, + AssemblerPredicate<"FeatureVFP4">; +def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON">; +def HasNEON2 : Predicate<"Subtarget->hasNEON2()">, + AssemblerPredicate<"FeatureNEON2">; +def NoNEON2 : Predicate<"!Subtarget->hasNEON2()">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16">; def HasDivide : Predicate<"Subtarget->hasDivide()">, @@ -206,8 +212,8 @@ def IsARClass : Predicate<"!Subtarget->isMClass()">, AssemblerPredicate<"!FeatureMClass">; def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate<"!ModeThumb">; -def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; -def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; +def IsIOS : Predicate<"Subtarget->isTargetIOS()">; +def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; // FIXME: Eventually this will be just "hasV6T2Ops". @@ -343,13 +349,11 @@ def bltarget : Operand<i32> { // Call target for ARM. Handles conditional/unconditional // FIXME: rename bl_target to t2_bltarget? def bl_target : Operand<i32> { - // Encoded the same as branch targets. - let EncoderMethod = "getARMBranchTargetOpValue"; + let EncoderMethod = "getARMBLTargetOpValue"; let OperandType = "OPERAND_PCREL"; } def blx_target : Operand<i32> { - // Encoded the same as branch targets. let EncoderMethod = "getARMBLXTargetOpValue"; let OperandType = "OPERAND_PCREL"; } @@ -760,7 +764,7 @@ def am2offset_reg : Operand<i32>, let PrintMethod = "printAddrMode2OffsetOperand"; // When using this for assembly, it's always as a post-index offset. let ParserMatchClass = PostIdxRegShiftedAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops GPRnopc, i32imm); } // FIXME: am2offset_imm should only need the immediate, not the GPR. Having @@ -772,7 +776,7 @@ def am2offset_imm : Operand<i32>, let EncoderMethod = "getAddrMode2OffsetOpValue"; let PrintMethod = "printAddrMode2OffsetOperand"; let ParserMatchClass = AM2OffsetImmAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops GPRnopc, i32imm); } @@ -1892,20 +1896,17 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { } } -// All calls clobber the non-callee saved registers. SP is marked as -// a use to prevent stack-pointer assignments that appear immediately -// before calls from potentially appearing dead. +// SP is marked as a use to prevent stack-pointer assignments that appear +// immediately before calls from potentially appearing dead. let isCall = 1, - // On non-Darwin platforms R9 is callee-saved. // FIXME: Do we really need a non-predicated version? If so, it should // at least be a pseudo instruction expanding to the predicated version // at MC lowering time. - Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], - Uses = [SP] in { + Defs = [LR], Uses = [SP] in { def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops), IIC_Br, "bl\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsARM, IsNotDarwin]> { + Requires<[IsARM, IsNotIOS]> { let Inst{31-28} = 0b1110; bits<24> func; let Inst{23-0} = func; @@ -1915,7 +1916,7 @@ let isCall = 1, def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops), IIC_Br, "bl", "\t$func", [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM, IsNotDarwin]> { + Requires<[IsARM, IsNotIOS]> { bits<24> func; let Inst{23-0} = func; let DecoderMethod = "DecodeBranchImmInstruction"; @@ -1925,7 +1926,7 @@ let isCall = 1, def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, IIC_Br, "blx\t$func", [(ARMcall GPR:$func)]>, - Requires<[IsARM, HasV5T, IsNotDarwin]> { + Requires<[IsARM, HasV5T, IsNotIOS]> { bits<4> func; let Inst{31-4} = 0b1110000100101111111111110011; let Inst{3-0} = func; @@ -1934,7 +1935,7 @@ let isCall = 1, def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, IIC_Br, "blx", "\t$func", [(ARMcall_pred GPR:$func)]>, - Requires<[IsARM, HasV5T, IsNotDarwin]> { + Requires<[IsARM, HasV5T, IsNotIOS]> { bits<4> func; let Inst{27-4} = 0b000100101111111111110011; let Inst{3-0} = func; @@ -1944,55 +1945,67 @@ let isCall = 1, // Note: Restrict $func to the tGPR regclass to prevent it being in LR. def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T, IsNotDarwin]>; + Requires<[IsARM, HasV4T, IsNotIOS]>; // ARMv4 def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T, IsNotDarwin]>; + Requires<[IsARM, NoV4T, IsNotIOS]>; + + // mov lr, pc; b if callee is marked noreturn to avoid confusing the + // return stack predictor. + def BMOVPCB_CALL : ARMPseudoInst<(outs), + (ins bl_target:$func, variable_ops), + 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, + Requires<[IsARM, IsNotIOS]>; } let isCall = 1, - // On Darwin R9 is call-clobbered. + // On IOS R9 is call-clobbered. // R7 is marked as a use to prevent frame-pointer assignments from being // moved above / below calls. - Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], - Uses = [R7, SP] in { + Defs = [LR], Uses = [R7, SP] in { def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops), 4, IIC_Br, [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>, - Requires<[IsARM, IsDarwin]>; + Requires<[IsARM, IsIOS]>; def BLr9_pred : ARMPseudoExpand<(outs), (ins bl_target:$func, pred:$p, variable_ops), 4, IIC_Br, [(ARMcall_pred tglobaladdr:$func)], (BL_pred bl_target:$func, pred:$p)>, - Requires<[IsARM, IsDarwin]>; + Requires<[IsARM, IsIOS]>; // ARMv5T and above def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops), 4, IIC_Br, [(ARMcall GPR:$func)], (BLX GPR:$func)>, - Requires<[IsARM, HasV5T, IsDarwin]>; + Requires<[IsARM, HasV5T, IsIOS]>; def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops), 4, IIC_Br, [(ARMcall_pred GPR:$func)], (BLX_pred GPR:$func, pred:$p)>, - Requires<[IsARM, HasV5T, IsDarwin]>; + Requires<[IsARM, HasV5T, IsIOS]>; // ARMv4T // Note: Restrict $func to the tGPR regclass to prevent it being in LR. def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T, IsDarwin]>; + Requires<[IsARM, HasV4T, IsIOS]>; // ARMv4 def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T, IsDarwin]>; + Requires<[IsARM, NoV4T, IsIOS]>; + + // mov lr, pc; b if callee is marked noreturn to avoid confusing the + // return stack predictor. + def BMOVPCBr9_CALL : ARMPseudoInst<(outs),(ins bl_target:$func, variable_ops), + 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, + Requires<[IsARM, IsIOS]>; } let isBranch = 1, isTerminator = 1 in { @@ -2060,45 +2073,43 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", // Tail calls. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { - // Darwin versions. - let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in { + // IOS versions. + let Uses = [SP] in { def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>, Requires<[IsDarwin]>; + IIC_Br, []>, Requires<[IsIOS]>; def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>, Requires<[IsDarwin]>; + IIC_Br, []>, Requires<[IsIOS]>; def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops), 4, IIC_Br, [], (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM, IsDarwin]>; + Requires<[IsARM, IsIOS]>; def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), 4, IIC_Br, [], (BX GPR:$dst)>, - Requires<[IsARM, IsDarwin]>; + Requires<[IsARM, IsIOS]>; } - // Non-Darwin versions (the difference is R9). - let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in { + // Non-IOS versions (the difference is R9). + let Uses = [SP] in { def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>, Requires<[IsNotDarwin]>; + IIC_Br, []>, Requires<[IsNotIOS]>; def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>, Requires<[IsNotDarwin]>; + IIC_Br, []>, Requires<[IsNotIOS]>; def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops), 4, IIC_Br, [], (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM, IsNotDarwin]>; + Requires<[IsARM, IsNotIOS]>; def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), 4, IIC_Br, [], (BX GPR:$dst)>, - Requires<[IsARM, IsNotDarwin]>; + Requires<[IsARM, IsNotIOS]>; } } @@ -4072,6 +4083,73 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), 4, IIC_iCMOVi, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; + +let isCodeGenOnly = 1 in { +// Conditional instructions +multiclass AsI1_bincc_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + iii, opc, "\t$Rd, $Rn, $imm", []>, + RegConstraint<"$Rn = $Rd"> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + let Inst{25} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-0} = imm; + } + def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, + iir, opc, "\t$Rd, $Rn, $Rm", []>, + RegConstraint<"$Rn = $Rd"> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rm; + } + + def rsi : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, + iis, opc, "\t$Rd, $Rn, $shift", []>, + RegConstraint<"$Rn = $Rd"> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + } + + def rsr : AsI1<opcod, (outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, + iis, opc, "\t$Rd, $Rn, $shift", []>, + RegConstraint<"$Rn = $Rd"> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + } +} // AsI1_bincc_irs + +defm ANDCC : AsI1_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm ORRCC : AsI1_bincc_irs<0b1100, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm EORCC : AsI1_bincc_irs<0b0001, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; + +} // isCodeGenOnly } // neverHasSideEffects //===----------------------------------------------------------------------===// @@ -4152,10 +4230,10 @@ let usesCustomInserter = 1 in { [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMIN_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMAX_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_ADD_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; @@ -4182,10 +4260,10 @@ let usesCustomInserter = 1 in { [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMIN_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMAX_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_ADD_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; @@ -4212,10 +4290,10 @@ let usesCustomInserter = 1 in { [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMIN_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMAX_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>; def ATOMIC_SWAP_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, @@ -4261,14 +4339,14 @@ def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>; def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; -} - -let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in +let hasExtraSrcRegAllocReq = 1 in def STREXD : AIstrex<0b01, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr), NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> { let DecoderMethod = "DecodeDoubleRegStore"; } +} + def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>, Requires<[IsARM, HasV7]> { @@ -4711,8 +4789,8 @@ let isCall = 1, // no encoding information is necessary. let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, - QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], hasSideEffects = 1, isBarrier = 1, - usesCustomInserter = 1 in { + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ], + hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), NoItinerary, [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, @@ -4721,28 +4799,37 @@ let Defs = let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ], - hasSideEffects = 1, isBarrier = 1 in { + hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), NoItinerary, [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, NoVFP]>; } -// FIXME: Non-Darwin version(s) +// FIXME: Non-IOS version(s) let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, Defs = [ R7, LR, SP ] in { def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), NoItinerary, [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsARM, IsDarwin]>; + Requires<[IsARM, IsIOS]>; } -// eh.sjlj.dispatchsetup pseudo-instruction. -// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are +// eh.sjlj.dispatchsetup pseudo-instructions. +// These pseudos are used for both ARM and Thumb2. Any differences are // handled when the pseudo is expanded (which happens before any passes // that need the instruction size). -let isBarrier = 1 in -def eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ], + isBarrier = 1 in +def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; + +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ], + isBarrier = 1 in +def Int_eh_sjlj_dispatchsetup_nofp : PseudoInst<(outs), (ins), NoItinerary, []>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -4801,28 +4888,34 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), // Tail calls def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>; + (TCRETURNri tcGPR:$dst)>, Requires<[IsIOS]>; def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; + (TCRETURNdi texternalsym:$dst)>, Requires<[IsIOS]>; def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; + (TCRETURNdi texternalsym:$dst)>, Requires<[IsIOS]>; def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>; + (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotIOS]>; def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; + (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotIOS]>; def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; + (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotIOS]>; // Direct calls def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, - Requires<[IsARM, IsNotDarwin]>; + Requires<[IsARM, IsNotIOS]>; def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>, - Requires<[IsARM, IsDarwin]>; + Requires<[IsARM, IsIOS]>; +def : ARMPat<(ARMcall_nolink texternalsym:$func), + (BMOVPCB_CALL texternalsym:$func)>, + Requires<[IsARM, IsNotIOS]>; +def : ARMPat<(ARMcall_nolink texternalsym:$func), + (BMOVPCBr9_CALL texternalsym:$func)>, + Requires<[IsARM, IsIOS]>; // zextload i1 -> zextload i8 def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; @@ -5158,3 +5251,7 @@ def : ARMInstAlias<"mul${s}${p} $Rn, $Rm", // "neg" is and alias for "rsb rd, rn, #0" def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>; + +// 'it' blocks in ARM mode just validate the predicates. The IT itself +// is discarded. +def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index c40860d..8684ce1 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1,4 +1,4 @@ -//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// +//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -39,6 +39,11 @@ def nImmVMOVI32 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmVMOVI32AsmOperand; } +def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } +def nImmVMOVI32Neg : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMOVI32NegAsmOperand; +} def nImmVMOVF32 : Operand<i32> { let PrintMethod = "printFPImmOperand"; let ParserMatchClass = FPImmOperand; @@ -84,13 +89,13 @@ def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { let ParserMatchClass = VecListOneDAsmOperand; } // Register list of two sequential D registers. -def VecListTwoDAsmOperand : AsmOperandClass { - let Name = "VecListTwoD"; +def VecListDPairAsmOperand : AsmOperandClass { + let Name = "VecListDPair"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> { - let ParserMatchClass = VecListTwoDAsmOperand; +def VecListDPair : RegisterOperand<DPair, "printVectorListDPair"> { + let ParserMatchClass = VecListDPairAsmOperand; } // Register list of three sequential D registers. def VecListThreeDAsmOperand : AsmOperandClass { @@ -111,13 +116,31 @@ def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { let ParserMatchClass = VecListFourDAsmOperand; } // Register list of two D registers spaced by 2 (two sequential Q registers). -def VecListTwoQAsmOperand : AsmOperandClass { - let Name = "VecListTwoQ"; +def VecListDPairSpacedAsmOperand : AsmOperandClass { + let Name = "VecListDPairSpaced"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListDPairSpaced"> { + let ParserMatchClass = VecListDPairSpacedAsmOperand; +} +// Register list of three D registers spaced by 2 (three Q registers). +def VecListThreeQAsmOperand : AsmOperandClass { + let Name = "VecListThreeQ"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { + let ParserMatchClass = VecListThreeQAsmOperand; +} +// Register list of three D registers spaced by 2 (three Q registers). +def VecListFourQAsmOperand : AsmOperandClass { + let Name = "VecListFourQ"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwoSpaced"> { - let ParserMatchClass = VecListTwoQAsmOperand; +def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { + let ParserMatchClass = VecListFourQAsmOperand; } // Register list of one D register, with "all lanes" subscripting. @@ -138,6 +161,56 @@ def VecListTwoDAllLanesAsmOperand : AsmOperandClass { def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> { let ParserMatchClass = VecListTwoDAllLanesAsmOperand; } +// Register list of two D registers spaced by 2 (two sequential Q registers). +def VecListTwoQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListTwoQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListTwoQAllLanes : RegisterOperand<DPR, + "printVectorListTwoSpacedAllLanes"> { + let ParserMatchClass = VecListTwoQAllLanesAsmOperand; +} +// Register list of three D registers, with "all lanes" subscripting. +def VecListThreeDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeDAllLanes : RegisterOperand<DPR, + "printVectorListThreeAllLanes"> { + let ParserMatchClass = VecListThreeDAllLanesAsmOperand; +} +// Register list of three D registers spaced by 2 (three sequential Q regs). +def VecListThreeQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeQAllLanes : RegisterOperand<DPR, + "printVectorListThreeSpacedAllLanes"> { + let ParserMatchClass = VecListThreeQAllLanesAsmOperand; +} +// Register list of four D registers, with "all lanes" subscripting. +def VecListFourDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { + let ParserMatchClass = VecListFourDAllLanesAsmOperand; +} +// Register list of four D registers spaced by 2 (four sequential Q regs). +def VecListFourQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourQAllLanes : RegisterOperand<DPR, + "printVectorListFourSpacedAllLanes"> { + let ParserMatchClass = VecListFourQAllLanesAsmOperand; +} + // Register list of one D register, with byte lane subscripting. def VecListOneDByteIndexAsmOperand : AsmOperandClass { @@ -169,7 +242,8 @@ def VecListOneDWordIndexed : Operand<i32> { let ParserMatchClass = VecListOneDWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } -// Register list of two D registers, with byte lane subscripting. + +// Register list of two D registers with byte lane subscripting. def VecListTwoDByteIndexAsmOperand : AsmOperandClass { let Name = "VecListTwoDByteIndexed"; let ParserMethod = "parseVectorList"; @@ -199,6 +273,130 @@ def VecListTwoDWordIndexed : Operand<i32> { let ParserMatchClass = VecListTwoDWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } +// Register list of two Q registers with half-word lane subscripting. +def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoQHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListTwoQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoQWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + + +// Register list of three D registers with byte lane subscripting. +def VecListThreeDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListThreeDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of three Q registers with half-word lane subscripting. +def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeQHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListThreeQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeQWordIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + +// Register list of four D registers with byte lane subscripting. +def VecListFourDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListFourDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListFourDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of four Q registers with half-word lane subscripting. +def VecListFourQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -395,7 +593,7 @@ class VLD1D<bits<4> op7_4, string Dt> let DecoderMethod = "DecodeVLDInstruction"; } class VLD1Q<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd), + : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), (ins addrmode6:$Rn), IIC_VLD1x2, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; @@ -413,11 +611,6 @@ def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; -def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>; -def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>; -def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>; -def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>; - // ...with address register writeback: multiclass VLD1DWB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), @@ -439,7 +632,7 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> { } } multiclass VLD1QWB<bits<4> op7_4, string Dt> { - def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), + def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { @@ -448,7 +641,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } - def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), + def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -467,15 +660,6 @@ defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; -def VLD1q8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; -def VLD1q16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; -def VLD1q32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; -def VLD1q64PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; -def VLD1q8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; -def VLD1q16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; -def VLD1q32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; -def VLD1q64PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; - // ...with 3 registers class VLD1D3<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), @@ -569,18 +753,14 @@ class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, let DecoderMethod = "DecodeVLDInstruction"; } -def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>; -def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>; -def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>; +def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; +def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; +def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; -def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>; -def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>; -def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>; - def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; @@ -607,21 +787,14 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, } } -defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>; -defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>; -defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>; +defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; +defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; +defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; -def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; -def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; -def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; -def VLD2d8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; -def VLD2d16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; -def VLD2d32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; - def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; @@ -630,12 +803,12 @@ def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; // ...with double-spaced registers -def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>; -def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>; -def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>; -defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>; -defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>; -defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>; +def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>; +def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>; +def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>; +defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>; +defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>; +defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -819,12 +992,11 @@ def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { } def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{4}; + let Inst{5-4} = Rn{5-4}; } def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { let Inst{7} = lane{0}; - let Inst{5} = Rn{4}; - let Inst{4} = Rn{4}; + let Inst{5-4} = Rn{5-4}; } def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; @@ -994,7 +1166,7 @@ def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; } def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; @@ -1005,7 +1177,7 @@ def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; } def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; @@ -1020,7 +1192,7 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { let Rm = 0b1111; - let Inst{4} = Rn{4}; + let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4LN"; } @@ -1031,7 +1203,7 @@ def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -1044,7 +1216,7 @@ def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -1072,7 +1244,7 @@ def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -1084,7 +1256,7 @@ def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -1197,48 +1369,65 @@ def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; // VLD2DUP : Vector Load (single 2-element structure to all lanes) -class VLD2DUP<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2), +class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> + : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), (ins addrmode6dup:$Rn), IIC_VLD2dup, - "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { + "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; } -def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">; -def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">; -def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">; +def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListTwoDAllLanes>; +def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListTwoDAllLanes>; +def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListTwoDAllLanes>; def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>; def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>; def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>; // ...with double-spaced registers (not used for codegen): -def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8">; -def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">; -def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">; +def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListTwoQAllLanes>; +def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListTwoQAllLanes>; +def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListTwoQAllLanes>; // ...with address register writeback: -class VLD2DUPWB<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu, - "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD2DupInstruction"; +multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { + def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, + (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD2dupu, + "vld2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1101, op7_4, + (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, + "vld2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">; -def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">; -def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">; +defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListTwoDAllLanes>; +defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListTwoDAllLanes>; +defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListTwoDAllLanes>; -def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">; -def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">; -def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">; +defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListTwoQAllLanes>; +defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListTwoQAllLanes>; +defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListTwoQAllLanes>; -def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; -def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; -def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; +def VLD2DUPd8PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>; +def VLD2DUPd8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>; +def VLD2DUPd16PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>; +def VLD2DUPd16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>; +def VLD2DUPd32PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>; +def VLD2DUPd32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>; // VLD3DUP : Vector Load (single 3-element structure to all lanes) class VLD3DUP<bits<4> op7_4, string Dt> @@ -1259,9 +1448,9 @@ def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; // ...with double-spaced registers (not used for codegen): -def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">; -def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">; -def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">; +def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; +def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; +def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; // ...with address register writeback: class VLD3DUPWB<bits<4> op7_4, string Dt> @@ -1277,9 +1466,9 @@ def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; -def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">; -def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">; -def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">; +def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; +def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; +def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; @@ -1305,9 +1494,9 @@ def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; // ...with double-spaced registers (not used for codegen): -def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">; -def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">; -def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } +def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; +def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; +def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } // ...with address register writeback: class VLD4DUPWB<bits<4> op7_4, string Dt> @@ -1324,9 +1513,9 @@ def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } -def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">; -def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">; -def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } +def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; +def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; +def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; @@ -1358,6 +1547,15 @@ class VSTQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, "$addr.addr = $wb">; +class VSTQQWBfixedPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, QQPR:$src), itin, + "$addr.addr = $wb">; +class VSTQQWBregisterPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, + "$addr.addr = $wb">; + class VSTQQQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; class VSTQQQQWBPseudo<InstrItinClass itin> @@ -1374,7 +1572,7 @@ class VST1D<bits<4> op7_4, string Dt> let DecoderMethod = "DecodeVSTInstruction"; } class VST1Q<bits<4> op7_4, string Dt> - : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListTwoD:$Vd), + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; @@ -1391,11 +1589,6 @@ def VST1q16 : VST1Q<{0,1,?,?}, "16">; def VST1q32 : VST1Q<{1,0,?,?}, "32">; def VST1q64 : VST1Q<{1,1,?,?}, "64">; -def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>; -def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>; -def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>; -def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>; - // ...with address register writeback: multiclass VST1DWB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), @@ -1419,7 +1612,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> { } multiclass VST1QWB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListTwoD:$Vd), IIC_VLD1x2u, + (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1428,7 +1621,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> { let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListTwoD:$Vd), + (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1448,15 +1641,6 @@ defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; -def VST1q8PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; -def VST1q16PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; -def VST1q32PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; -def VST1q64PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; -def VST1q8PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; -def VST1q16PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; -def VST1q32PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; -def VST1q64PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; - // ...with 3 registers class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), @@ -1556,18 +1740,14 @@ class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, let DecoderMethod = "DecodeVSTInstruction"; } -def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>; -def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>; -def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>; +def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; +def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; +def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; -def VST2d8Pseudo : VSTQPseudo<IIC_VST2>; -def VST2d16Pseudo : VSTQPseudo<IIC_VST2>; -def VST2d32Pseudo : VSTQPseudo<IIC_VST2>; - def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; @@ -1614,35 +1794,28 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> { } } -defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; -defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; -defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; -def VST2d8PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; -def VST2d16PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; -def VST2d32PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; -def VST2d8PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; -def VST2d16PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; -def VST2d32PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; - -def VST2q8PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q16PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q32PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q8PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q16PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q32PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; // ...with double-spaced registers -def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>; -def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>; -def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>; -defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; -defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; -defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; +def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>; +def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>; +def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>; +defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>; +defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>; +defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>; // VST3 : Vector Store (multiple 3-element structures) class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -3837,10 +4010,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, NoNEON2]>; def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, NoNEON2]>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", @@ -3895,10 +4068,10 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, NoNEON2]>; def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, NoNEON2]>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", @@ -3947,6 +4120,24 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; + +// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. +def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", + v2f32, fmul_su, fadd_mlx>, + Requires<[HasNEON2,FPContractions]>; + +def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", + v4f32, fmul_su, fadd_mlx>, + Requires<[HasNEON2,FPContractions]>; + +// Fused Vector Multiply Subtract (floating-point) +def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", + v2f32, fmul_su, fsub_mlx>, + Requires<[HasNEON2,FPContractions]>; +def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", + v4f32, fmul_su, fsub_mlx>, + Requires<[HasNEON2,FPContractions]>; + // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) @@ -4628,11 +4819,13 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, // Vector Swap def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, - (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, - "vswp", "$Vd, $Vm", "", []>; + (outs DPR:$Vd, DPR:$Vd1), (ins DPR:$Vm, DPR:$Vm1), + NoItinerary, "vswp", "$Vd, $Vd1", "$Vm = $Vd, $Vm1 = $Vd1", + []>; def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, - "vswp", "$Vd, $Vm", "", []>; + (outs QPR:$Vd, QPR:$Vd1), (ins QPR:$Vm, QPR:$Vm1), + NoItinerary, "vswp", "$Vd, $Vd1", "$Vm = $Vd, $Vm1 = $Vd1", + []>; // Vector Move Operations. @@ -4964,6 +5157,9 @@ defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, // VMOVL : Vector Lengthening Move defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; +def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; +def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; +def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; // Vector Conversions. @@ -5198,7 +5394,7 @@ def VTBL1 let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), - (ins VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, + (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), @@ -5211,8 +5407,6 @@ def VTBL4 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; } // hasExtraSrcRegAllocReq = 1 -def VTBL2Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>; def VTBL3Pseudo : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; def VTBL4Pseudo @@ -5228,7 +5422,7 @@ def VTBX1 let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), - (ins DPR:$orig, VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, + (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), @@ -5243,9 +5437,6 @@ def VTBX4 "$orig = $Vd", []>; } // hasExtraSrcRegAllocReq = 1 -def VTBX2Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src), - IIC_VTBX2, "$orig = $dst", []>; def VTBX3Pseudo : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), IIC_VTBX3, "$orig = $dst", []>; @@ -5295,9 +5486,13 @@ def : N3VSPat<fadd, VADDfd>; def : N3VSPat<fsub, VSUBfd>; def : N3VSPat<fmul, VMULfd>; def : N3VSMulOpPat<fmul, fadd, VMLAfd>, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; def : N3VSMulOpPat<fmul, fsub, VMLSfd>, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; +def : N3VSMulOpPat<fmul, fadd, VFMAfd>, + Requires<[HasNEON2, UseNEONForFP,FPContractions]>; +def : N3VSMulOpPat<fmul, fsub, VFMSfd>, + Requires<[HasNEON2, UseNEONForFP,FPContractions]>; def : N2VSPat<fabs, VABSfd>; def : N2VSPat<fneg, VNEGfd>; def : N3VSPat<NEONfmax, VMAXfd>; @@ -5374,6 +5569,117 @@ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +// Vector lengthening move with load, matching extending loads. + +// extload, zextload and sextload for a standard lengthening load. Example: +// Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr)) +// (VMOVLuv8i16 (VLDRD addrmode5:$addr))>; +multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) + (VLDRD addrmode5:$addr))>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) + (VLDRD addrmode5:$addr))>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) + (VLDRD addrmode5:$addr))>; +} + +// extload, zextload and sextload for a lengthening load which only uses +// half the lanes available. Example: +// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = +// Pat<(v4i16 (extloadvi8 addrmode5:$addr)) +// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), +// (VLDRS addrmode5:$addr), +// ssub_0)), +// dsub_0)>; +multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, + string InsnLanes, string InsnTy> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)>; +} + +// extload, zextload and sextload for a lengthening load followed by another +// lengthening load, to quadruple the initial length. +// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> = +// Pat<(v4i32 (extloadvi8 addrmode5:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), +// (VLDRS addrmode5:$addr), +// ssub_0)), +// dsub_0)), +// qsub_0)>; +multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, + string Insn1Lanes, string Insn1Ty, string Insn2Lanes, + string Insn2Ty, SubRegIndex RegType> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0)), + RegType)>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0)), + RegType)>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0)), + RegType)>; +} + +defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16 +defm : Lengthen_Single<"4", "i32", "i16">; // v4i16 -> v4i32 +defm : Lengthen_Single<"2", "i64", "i32">; // v2i32 -> v2i64 + +defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 +defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16 +defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 + +// Double lengthening - v4i8 -> v4i16 -> v4i32 +defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0>; +// v2i8 -> v2i16 -> v2i32 +defm : Lengthen_Double<"2", "i32", "i8", "8", "i16", "4", "i32", dsub_0>; +// v2i16 -> v2i32 -> v2i64 +defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64", qsub_0>; + +// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 +def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)), dsub_0))>; +def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)), dsub_0))>; +def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)), + (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)), dsub_0))>; //===----------------------------------------------------------------------===// // Assembler aliases @@ -5448,21 +5754,21 @@ def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm", (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; // VAND/VBIC/VEOR/VORR accept but do not require a type suffix. -defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; // ... two-operand aliases def : NEONInstAlias<"vand${p} $Vdn, $Vm", @@ -5482,17 +5788,17 @@ def : NEONInstAlias<"vorr${p} $Vdn, $Vm", def : NEONInstAlias<"vorr${p} $Vdn, $Vm", (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; // VMUL two-operand aliases. @@ -5668,122 +5974,724 @@ def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr", +def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdAsm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr", +def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdAsm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr", +def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr!", +def VLD1LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr!", +def VLD1LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr!", +def VLD1LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD1LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", +def VLD1LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD1LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", +def VLD1LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD1LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", +def VLD1LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; // VST1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VST1LNdAsm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr", +def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdAsm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr", +def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdAsm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr", +def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr!", +def VST1LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr!", +def VST1LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr!", +def VST1LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST1LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", +def VST1LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", (ins VecListOneDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST1LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", +def VST1LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST1LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", +def VST1LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", (ins VecListOneDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; // VLD2 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VLD2LNdAsm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr", +def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdAsm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr", +def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdAsm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr", +def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr!", +def VLD2LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr!", +def VLD2LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr!", +def VLD2LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VLD2LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", +def VLD2LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD2LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", +def VLD2LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VLD2LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", +def VLD2LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +def VLD2LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD2LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; // VST2 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -defm VST2LNdAsm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr", +def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdAsm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr", +def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdAsm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr", +def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr!", +def VST2LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr!", +def VST2LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr!", +def VST2LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; -defm VST2LNdWB_register_Asm : - NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", +def VST2LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST2LNdWB_register_Asm : - NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", +def VST2LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -defm VST2LNdWB_register_Asm : - NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", +def VST2LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; +def VST2LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST2LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD3 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + +def VLD3DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VLD3 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VLD3LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD3 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + +def VLD3dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VST3 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST3LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST3 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + +def VST3dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD4 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + +def VLD4DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VLD4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VLD4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + + +// VLD4 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + +def VLD4dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VST4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST4 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + +def VST4dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; // VMOV takes an optional datatype suffix -defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", +defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", +defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. @@ -5878,6 +6786,185 @@ def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +// Two-operand variants for VMAX. +def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm", + (VMAXsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm", + (VMAXsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm", + (VMAXsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm", + (VMAXuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm", + (VMAXuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm", + (VMAXuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm", + (VMAXfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm", + (VMAXsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm", + (VMAXsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm", + (VMAXsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm", + (VMAXuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm", + (VMAXuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm", + (VMAXuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm", + (VMAXfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// Two-operand variants for VMIN. +def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm", + (VMINsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm", + (VMINsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm", + (VMINsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm", + (VMINuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm", + (VMINuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm", + (VMINuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm", + (VMINfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm", + (VMINsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm", + (VMINsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm", + (VMINsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm", + (VMINuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm", + (VMINuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm", + (VMINuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm", + (VMINfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// Two-operand variants for VPADD. +def : NEONInstAlias<"vpadd${p}.i8 $Vdn, $Vm", + (VPADDi8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vpadd${p}.i16 $Vdn, $Vm", + (VPADDi16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vpadd${p}.i32 $Vdn, $Vm", + (VPADDi32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vpadd${p}.f32 $Vdn, $Vm", + (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +// Two-operand variants for VSRA. + // Signed. +def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm", + (VSRAsv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm", + (VSRAsv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm", + (VSRAsv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm", + (VSRAsv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm", + (VSRAsv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm", + (VSRAsv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm", + (VSRAsv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm", + (VSRAsv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + + // Unsigned. +def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm", + (VSRAuv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm", + (VSRAuv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm", + (VSRAuv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm", + (VSRAuv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm", + (VSRAuv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm", + (VSRAuv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm", + (VSRAuv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm", + (VSRAuv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +// Two-operand variants for VSRI. +def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm", + (VSRIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm", + (VSRIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm", + (VSRIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm", + (VSRIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm", + (VSRIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm", + (VSRIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm", + (VSRIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm", + (VSRIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +// Two-operand variants for VSLI. +def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm", + (VSLIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm", + (VSLIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", + (VSLIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", + (VSLIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm", + (VSLIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm", + (VSLIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", + (VSLIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", + (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +// VSWP allows, but does not require, a type suffix. +defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", + (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", + (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; + +// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. +defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", + (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", + (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", + (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", + (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; + +// "vmov Rd, #-imm" can be handled via "vmvn". +def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", + (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", + (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", + (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", + (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; + // 'gas' compatibility aliases for quad-word instructions. Strictly speaking, // these should restrict to just the Q register variants, but the register // classes are enough to match correctly regardless, so we keep it simple @@ -5911,3 +6998,18 @@ def : NEONMnemonicAlias<"vcvtq", "vcvt">; def : NEONMnemonicAlias<"vcleq", "vcle">; def : NEONMnemonicAlias<"vceqq", "vceq">; + +def : NEONMnemonicAlias<"vzipq", "vzip">; +def : NEONMnemonicAlias<"vswpq", "vswp">; + +def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; +def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; + + +// Alias for loading floating point immediates that aren't representable +// using the vmov.f32 encoding but the bitpattern is representable using +// the .i32 encoding. +def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", + (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", + (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index ac1a229..ba1791b 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1,4 +1,4 @@ -//===- ARMInstrThumb.td - Thumb support for ARM ------------*- tablegen -*-===// +//===-- ARMInstrThumb.td - Thumb support for ARM -----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -387,6 +387,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { bits<4> Rm; let Inst{6-3} = Rm; let Inst{2-0} = 0b000; + let Unpredictable{2-0} = 0b111; } } @@ -404,15 +405,14 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { // prevent stack-pointer assignments that appear immediately before calls from // potentially appearing dead. let isCall = 1, - // On non-Darwin platforms R9 is callee-saved. - Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], - Uses = [SP] in { + // On non-IOS platforms R9 is callee-saved. + Defs = [LR], Uses = [SP] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br, "bl${p}\t$func", [(ARMtcall tglobaladdr:$func)]>, - Requires<[IsThumb, IsNotDarwin]> { + Requires<[IsThumb, IsNotIOS]> { bits<22> func; let Inst{26} = func{21}; let Inst{25-16} = func{20-11}; @@ -426,7 +426,7 @@ let isCall = 1, (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br, "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsThumb, HasV5T, IsNotDarwin]> { + Requires<[IsThumb, HasV5T, IsNotIOS]> { bits<21> func; let Inst{25-16} = func{20-11}; let Inst{13} = 1; @@ -439,7 +439,7 @@ let isCall = 1, def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br, "blx${p}\t$func", [(ARMtcall GPR:$func)]>, - Requires<[IsThumb, HasV5T, IsNotDarwin]>, + Requires<[IsThumb, HasV5T, IsNotIOS]>, T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24; bits<4> func; let Inst{6-3} = func; @@ -450,38 +450,37 @@ let isCall = 1, def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>; + Requires<[IsThumb, IsThumb1Only, IsNotIOS]>; } let isCall = 1, - // On Darwin R9 is call-clobbered. + // On IOS R9 is call-clobbered. // R7 is marked as a use to prevent frame-pointer assignments from being // moved above / below calls. - Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], - Uses = [R7, SP] in { + Defs = [LR], Uses = [R7, SP] in { // Also used for Thumb2 def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops), 4, IIC_Br, [(ARMtcall tglobaladdr:$func)], (tBL pred:$p, t_bltarget:$func)>, - Requires<[IsThumb, IsDarwin]>; + Requires<[IsThumb, IsIOS]>; // ARMv5T and above, also used for Thumb2 def tBLXi_r9 : tPseudoExpand<(outs), (ins pred:$p, t_blxtarget:$func, variable_ops), 4, IIC_Br, [(ARMcall tglobaladdr:$func)], (tBLXi pred:$p, t_blxtarget:$func)>, - Requires<[IsThumb, HasV5T, IsDarwin]>; + Requires<[IsThumb, HasV5T, IsIOS]>; // Also used for Thumb2 def tBLXr_r9 : tPseudoExpand<(outs), (ins pred:$p, GPR:$func, variable_ops), 2, IIC_Br, [(ARMtcall GPR:$func)], (tBLXr pred:$p, GPR:$func)>, - Requires<[IsThumb, HasV5T, IsDarwin]>; + Requires<[IsThumb, HasV5T, IsIOS]>; // ARMv4T def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb, IsThumb1Only, IsDarwin]>; + Requires<[IsThumb, IsThumb1Only, IsIOS]>; } let isBranch = 1, isTerminator = 1, isBarrier = 1 in { @@ -523,28 +522,26 @@ let isBranch = 1, isTerminator = 1 in // Tail calls let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { - // Darwin versions. - let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in { - // tTAILJMPd: Darwin version uses a Thumb2 branch (no Thumb1 tail calls - // on Darwin), so it's in ARMInstrThumb2.td. + // IOS versions. + let Uses = [SP] in { + // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls + // on IOS), so it's in ARMInstrThumb2.td. def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), 4, IIC_Br, [], (tBX GPR:$dst, (ops 14, zero_reg))>, - Requires<[IsThumb, IsDarwin]>; + Requires<[IsThumb, IsIOS]>; } - // Non-Darwin versions (the difference is R9). - let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in { + // Non-IOS versions (the difference is R9). + let Uses = [SP] in { def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], (tB t_brtarget:$dst, pred:$p)>, - Requires<[IsThumb, IsNotDarwin]>; + Requires<[IsThumb, IsNotIOS]>; def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), 4, IIC_Br, [], (tBX GPR:$dst, (ops 14, zero_reg))>, - Requires<[IsThumb, IsNotDarwin]>; + Requires<[IsThumb, IsNotIOS]>; } } @@ -652,7 +649,7 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, } // Load tconstpool -// FIXME: Use ldr.n to work around a Darwin assembler bug. +// FIXME: Use ldr.n to work around a darwin assembler bug. let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, "ldr", ".n\t$Rt, $addr", @@ -666,10 +663,9 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, } // FIXME: Remove this entry when the above ldr.n workaround is fixed. -// For disassembly use only. -def tLDRpciDIS : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, - "ldr", "\t$Rt, $addr", - [/* disassembly only */]>, +// For assembly/disassembly use only. +def tLDRpciASM : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, + "ldr", "\t$Rt, $addr", []>, T1Encoding<{0,1,0,0,1,?}> { // A6.2 & A8.6.59 bits<3> Rt; @@ -1262,14 +1258,18 @@ def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), AddrModeNone, 0, NoItinerary, "","", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; -// FIXME: Non-Darwin version(s) +// FIXME: Non-IOS version(s) let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1, Defs = [ R7, LR, SP ] in def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), AddrModeNone, 0, IndexModeNone, Pseudo, NoItinerary, "", "", [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsThumb, IsDarwin]>; + Requires<[IsThumb, IsIOS]>; + +let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ], + isBarrier = 1 in +def tInt_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -1307,20 +1307,20 @@ def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // Direct calls def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>, - Requires<[IsThumb, IsNotDarwin]>; + Requires<[IsThumb, IsNotIOS]>; def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>, - Requires<[IsThumb, IsDarwin]>; + Requires<[IsThumb, IsIOS]>; def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>, - Requires<[IsThumb, HasV5T, IsNotDarwin]>; + Requires<[IsThumb, HasV5T, IsNotIOS]>; def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>, - Requires<[IsThumb, HasV5T, IsDarwin]>; + Requires<[IsThumb, HasV5T, IsIOS]>; // Indirect calls to ARM routines def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>, - Requires<[IsThumb, HasV5T, IsNotDarwin]>; + Requires<[IsThumb, HasV5T, IsNotIOS]>; def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>, - Requires<[IsThumb, HasV5T, IsDarwin]>; + Requires<[IsThumb, HasV5T, IsIOS]>; // zextload i1 -> zextload i8 def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr), diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 981592c..e8984e1 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1,4 +1,4 @@ -//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===// +//===-- ARMInstrThumb2.td - Thumb2 support for ARM ---------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -136,6 +136,12 @@ def t2ldrlabel : Operand<i32> { let PrintMethod = "printT2LdrLabelOperand"; } +def t2ldr_pcrel_imm12_asmoperand : AsmOperandClass {let Name = "MemPCRelImm12";} +def t2ldr_pcrel_imm12 : Operand<i32> { + let ParserMatchClass = t2ldr_pcrel_imm12_asmoperand; + // used for assembler pseudo instruction and maps to t2ldrlabel, so + // doesn't need encoder or print methods of its own. +} // ADR instruction labels. def t2adrlabel : Operand<i32> { @@ -552,6 +558,11 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, PatFrag opnode, string baseOpc, bit Commutable = 0> : T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> { + // Assembler aliases w/ the ".w" suffix. + def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn, + t2_so_imm:$imm, pred:$p, + cc_out:$s)>; // Assembler aliases w/o the ".w" suffix. def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"), (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn, @@ -563,6 +574,10 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, cc_out:$s)>; // and with the optional destination operand, too. + def : t2InstAlias<!strconcat(opc, "${s}${p}.ri", " $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, + t2_so_imm:$imm, pred:$p, + cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"), (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, @@ -940,7 +955,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let DecoderMethod = "DecodeT2LoadShift"; } - // FIXME: Is the pci variant actually needed? + // pci variant is very similar to i12, but supports negative offsets + // from the PC. def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii, opc, ".w\t$Rt, $addr", [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { @@ -2936,6 +2952,44 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>, RegConstraint<"$false = $Rd">; + +multiclass T2I_bincc_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { + // shifted imm + def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), + iii, opc, ".w\t$Rd, $Rn, $imm", []>, + RegConstraint<"$Rn = $Rd"> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = opcod; + let Inst{15} = 0; + } + // register + def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), + iir, opc, ".w\t$Rd, $Rn, $Rm", []>, + RegConstraint<"$Rn = $Rd"> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + let Inst{14-12} = 0b000; // imm3 + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type + } + // shifted register + def rs : T2sTwoRegShiftedReg<(outs rGPR:$Rd), + (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), + iis, opc, ".w\t$Rd, $Rn, $ShiftedRm", []>, + RegConstraint<"$Rn = $Rd"> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = opcod; + } +} // T2I_bincc_irs + +defm t2ANDCC : T2I_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; +defm t2ORRCC : T2I_bincc_irs<0b0010, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; +defm t2EORCC : T2I_bincc_irs<0b0100, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; + } // isCodeGenOnly = 1 } // neverHasSideEffects @@ -3058,9 +3112,7 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, let Inst{11-8} = Rd; let Inst{7-0} = addr{7-0}; } -} - -let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in +let hasExtraSrcRegAllocReq = 1 in def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, @@ -3069,6 +3121,7 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), bits<4> Rt2; let Inst{11-8} = Rt2; } +} def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, Requires<[IsThumb2, HasV7]> { @@ -3096,7 +3149,7 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, // $val is a scratch register for our use. let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, - QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], + Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), @@ -3212,18 +3265,49 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, let DecoderMethod = "DecodeThumb2BCCInstruction"; } -// Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so +// Tail calls. The IOS version of thumb tail calls uses a t2 branch, so // it goes here. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { - // Darwin version. - let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in + // IOS version. + let Uses = [SP] in def tTAILJMPd: tPseudoExpand<(outs), (ins uncondbrtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], (t2B uncondbrtarget:$dst, pred:$p)>, - Requires<[IsThumb2, IsDarwin]>; -} + Requires<[IsThumb2, IsIOS]>; +} + +let isCall = 1, + // On non-IOS platforms R9 is callee-saved. + Defs = [LR], Uses = [SP] in { + // mov lr, pc; b if callee is marked noreturn to avoid confusing the + // return stack predictor. + def t2BMOVPCB_CALL : tPseudoInst<(outs), + (ins t_bltarget:$func, variable_ops), + 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, + Requires<[IsThumb, IsNotIOS]>; +} + +let isCall = 1, + // On IOS R9 is call-clobbered. + // R7 is marked as a use to prevent frame-pointer assignments from being + // moved above / below calls. + Defs = [LR], Uses = [R7, SP] in { + // mov lr, pc; b if callee is marked noreturn to avoid confusing the + // return stack predictor. + def t2BMOVPCBr9_CALL : tPseudoInst<(outs), + (ins t_bltarget:$func, variable_ops), + 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, + Requires<[IsThumb, IsIOS]>; +} + +// Direct calls +def : T2Pat<(ARMcall_nolink texternalsym:$func), + (t2BMOVPCB_CALL texternalsym:$func)>, + Requires<[IsThumb, IsNotIOS]>; +def : T2Pat<(ARMcall_nolink texternalsym:$func), + (t2BMOVPCBr9_CALL texternalsym:$func)>, + Requires<[IsThumb, IsIOS]>; // IT block let Defs = [ITSTATE] in @@ -4141,6 +4225,37 @@ def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift", def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift", (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; +def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift", + (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; +def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift", + (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; + // ADR w/o the .w suffix def : t2InstAlias<"adr${p} $Rd, $addr", (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>; + +// LDR(literal) w/ alternate [pc, #imm] syntax. +def t2LDRpcrel : t2AsmPseudo<"ldr${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def t2LDRBpcrel : t2AsmPseudo<"ldrb${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def t2LDRHpcrel : t2AsmPseudo<"ldrh${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def t2LDRSBpcrel : t2AsmPseudo<"ldrsb${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def t2LDRSHpcrel : t2AsmPseudo<"ldrsh${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; + // Version w/ the .w suffix. +def : t2InstAlias<"ldr${p}.w $Rt, $addr", + (t2LDRpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrb${p}.w $Rt, $addr", + (t2LDRBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrh${p}.w $Rt, $addr", + (t2LDRHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrsb${p}.w $Rt, $addr", + (t2LDRSBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrsh${p}.w $Rt, $addr", + (t2LDRSHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; + +def : t2InstAlias<"add${p} $Rd, pc, $imm", + (t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 5d43556..aa10af7 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1,4 +1,4 @@ -//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===// +//===-- ARMInstrVFP.td - VFP support for ARM ---------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -61,6 +61,22 @@ def vfp_f64imm : Operand<f64>, let ParserMatchClass = FPImmOperand; } +// The VCVT to/from fixed-point instructions encode the 'fbits' operand +// (the number of fixed bits) differently than it appears in the assembly +// source. It's encoded as "Size - fbits" where Size is the size of the +// fixed-point representation (32 or 16) and fbits is the value appearing +// in the assembly source, an integer in [0,16] or (0,32], depending on size. +def fbits32_asm_operand : AsmOperandClass { let Name = "FBits32"; } +def fbits32 : Operand<i32> { + let PrintMethod = "printFBits32"; + let ParserMatchClass = fbits32_asm_operand; +} + +def fbits16_asm_operand : AsmOperandClass { let Name = "FBits16"; } +def fbits16 : Operand<i32> { + let PrintMethod = "printFBits16"; + let ParserMatchClass = fbits16_asm_operand; +} //===----------------------------------------------------------------------===// // Load / store Instructions. @@ -790,127 +806,109 @@ def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, // S32 (U=0, sx=1) -> SL // U32 (U=1, sx=1) -> UL -// FIXME: Marking these as codegen only seems wrong. They are real -// instructions(?) -let Constraints = "$a = $dst", isCodeGenOnly = 1 in { +let Constraints = "$a = $dst" in { // FP to Fixed-Point: def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), + IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>; def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), + IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>; def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), + IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>; def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), + IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>; // Fixed-Point to FP: def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), + IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>; def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), + IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>; def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), + IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>; def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), + IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>; -} // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in' +} // End of 'let Constraints = "$a = $dst" in' //===----------------------------------------------------------------------===// // FP Multiply-Accumulate Operations. @@ -922,7 +920,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -930,7 +928,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -938,10 +936,10 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,NoVFP4]>; def VMLSD : ADbI<0b11100, 0b00, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -949,7 +947,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -957,7 +955,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -965,10 +963,10 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; def VNMLAD : ADbI<0b11100, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -976,7 +974,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -984,7 +982,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -992,10 +990,10 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; def VNMLSD : ADbI<0b11100, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1003,14 +1001,14 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1018,11 +1016,116 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; + +//===----------------------------------------------------------------------===// +// Fused FP Multiply-Accumulate Operations. +// +def VFMAD : ADbI<0b11101, 0b10, 0, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4,FPContractions]>; +def VFMAS : ASbIn<0b11101, 0b10, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), + (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4,FPContractions]>; +def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), + (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + +def VFMSD : ADbI<0b11101, 0b10, 1, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4,FPContractions]>; + +def VFMSS : ASbIn<0b11101, 0b10, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), + (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4,FPContractions]>; +def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), + (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + +def VFNMAD : ADbI<0b11101, 0b01, 1, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4,FPContractions]>; + +def VFNMAS : ASbI<0b11101, 0b01, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), + (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4,FPContractions]>; +def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), + (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + +def VFNMSD : ADbI<0b11101, 0b01, 0, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4,FPContractions]>; + +def VFNMSS : ASbI<0b11101, 0b01, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), + (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4,FPContractions]>; +def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), + (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; //===----------------------------------------------------------------------===// // FP Conditional moves. @@ -1175,6 +1278,7 @@ def : VFP2MnemonicAlias<"fmrdd", "vmov">; def : VFP2MnemonicAlias<"fmrds", "vmov">; def : VFP2MnemonicAlias<"fmrrd", "vmov">; def : VFP2MnemonicAlias<"fmdrr", "vmov">; +def : VFP2MnemonicAlias<"fmuls", "vmul.f32">; def : VFP2MnemonicAlias<"fmuld", "vmul.f64">; def : VFP2MnemonicAlias<"fnegs", "vneg.f32">; def : VFP2MnemonicAlias<"fnegd", "vneg.f64">; @@ -1194,6 +1298,12 @@ def : VFP2MnemonicAlias<"fsts", "vstr">; def : VFP2MnemonicAlias<"fstd", "vstr">; def : VFP2MnemonicAlias<"fmacd", "vmla.f64">; def : VFP2MnemonicAlias<"fmacs", "vmla.f32">; +def : VFP2MnemonicAlias<"fcpys", "vmov.f32">; +def : VFP2MnemonicAlias<"fcpyd", "vmov.f64">; +def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">; +def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">; +def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">; +def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">; def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm", @@ -1235,10 +1345,18 @@ def : VFP2InstAlias<"vsub${p}.f64 $Dn, $Dm", def : VFP2InstAlias<"vsub${p}.f32 $Sn, $Sm", (VSUBS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>; -// VMOV can accept optional .f32/.f64 suffix. -def : VFP2InstAlias<"vmov${p}.f32 $Rt, $Sn", +// VMOV can accept optional 32-bit or less data type suffix suffix. +def : VFP2InstAlias<"vmov${p}.8 $Rt, $Sn", + (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.16 $Rt, $Sn", + (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.32 $Rt, $Sn", (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>; -def : VFP2InstAlias<"vmov${p}.f32 $Sn, $Rt", +def : VFP2InstAlias<"vmov${p}.8 $Sn, $Rt", + (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.16 $Sn, $Rt", + (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.32 $Sn, $Rt", (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>; def : VFP2InstAlias<"vmov${p}.f64 $Rt, $Rt2, $Dn", diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h index 2f97928..7928184 100644 --- a/lib/Target/ARM/ARMJITInfo.h +++ b/lib/Target/ARM/ARMJITInfo.h @@ -1,4 +1,4 @@ -//===- ARMJITInfo.h - ARM implementation of the JIT interface --*- C++ -*-===// +//===-- ARMJITInfo.h - ARM implementation of the JIT interface -*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 6712fb6..0f6dc04 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1,4 +1,4 @@ -//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// +//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// // // The LLVM Compiler Infrastructure // @@ -144,7 +144,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::db: return ARM::LDMDB; case ARM_AM::ib: return ARM::LDMIB; } - break; case ARM::STRi12: ++NumSTMGened; switch (Mode) { @@ -154,7 +153,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::db: return ARM::STMDB; case ARM_AM::ib: return ARM::STMIB; } - break; case ARM::t2LDRi8: case ARM::t2LDRi12: ++NumLDMGened; @@ -163,7 +161,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::t2LDMIA; case ARM_AM::db: return ARM::t2LDMDB; } - break; case ARM::t2STRi8: case ARM::t2STRi12: ++NumSTMGened; @@ -172,7 +169,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::t2STMIA; case ARM_AM::db: return ARM::t2STMDB; } - break; case ARM::VLDRS: ++NumVLDMGened; switch (Mode) { @@ -180,7 +176,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::VLDMSIA; case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. } - break; case ARM::VSTRS: ++NumVSTMGened; switch (Mode) { @@ -188,7 +183,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::VSTMSIA; case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. } - break; case ARM::VLDRD: ++NumVLDMGened; switch (Mode) { @@ -196,7 +190,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::VLDMDIA; case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. } - break; case ARM::VSTRD: ++NumVSTMGened; switch (Mode) { @@ -204,10 +197,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::VSTMDIA; case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. } - break; } - - return 0; } namespace llvm { @@ -262,8 +252,6 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { case ARM::STMIB_UPD: return ARM_AM::ib; } - - return ARM_AM::bad_am_submode; } } // end namespace ARM_AM @@ -509,50 +497,84 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, return; } -static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, - unsigned Bytes, unsigned Limit, - ARMCC::CondCodes Pred, unsigned PredReg){ +static bool definesCPSR(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) + // If the instruction has live CPSR def, then it's not safe to fold it + // into load / store. + return true; + } + + return false; +} + +static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, + unsigned Bytes, unsigned Limit, + ARMCC::CondCodes Pred, unsigned PredReg) { unsigned MyPredReg = 0; if (!MI) return false; - if (MI->getOpcode() != ARM::t2SUBri && - MI->getOpcode() != ARM::tSUBspi && - MI->getOpcode() != ARM::SUBri) - return false; + + bool CheckCPSRDef = false; + switch (MI->getOpcode()) { + default: return false; + case ARM::t2SUBri: + case ARM::SUBri: + CheckCPSRDef = true; + // fallthrough + case ARM::tSUBspi: + break; + } // Make sure the offset fits in 8 bits. if (Bytes == 0 || (Limit && Bytes >= Limit)) return false; unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME - return (MI->getOperand(0).getReg() == Base && - MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && - llvm::getInstrPredicate(MI, MyPredReg) == Pred && - MyPredReg == PredReg); + if (!(MI->getOperand(0).getReg() == Base && + MI->getOperand(1).getReg() == Base && + (MI->getOperand(2).getImm()*Scale) == Bytes && + llvm::getInstrPredicate(MI, MyPredReg) == Pred && + MyPredReg == PredReg)) + return false; + + return CheckCPSRDef ? !definesCPSR(MI) : true; } -static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, - unsigned Bytes, unsigned Limit, - ARMCC::CondCodes Pred, unsigned PredReg){ +static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, + unsigned Bytes, unsigned Limit, + ARMCC::CondCodes Pred, unsigned PredReg) { unsigned MyPredReg = 0; if (!MI) return false; - if (MI->getOpcode() != ARM::t2ADDri && - MI->getOpcode() != ARM::tADDspi && - MI->getOpcode() != ARM::ADDri) - return false; + + bool CheckCPSRDef = false; + switch (MI->getOpcode()) { + default: return false; + case ARM::t2ADDri: + case ARM::ADDri: + CheckCPSRDef = true; + // fallthrough + case ARM::tADDspi: + break; + } if (Bytes == 0 || (Limit && Bytes >= Limit)) // Make sure the offset fits in 8 bits. return false; unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME - return (MI->getOperand(0).getReg() == Base && - MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && - llvm::getInstrPredicate(MI, MyPredReg) == Pred && - MyPredReg == PredReg); + if (!(MI->getOperand(0).getReg() == Base && + MI->getOperand(1).getReg() == Base && + (MI->getOperand(2).getImm()*Scale) == Bytes && + llvm::getInstrPredicate(MI, MyPredReg) == Pred && + MyPredReg == PredReg)) + return false; + + return CheckCPSRDef ? !definesCPSR(MI) : true; } static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { @@ -606,7 +628,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, case ARM_AM::da: return ARM::LDMDA_UPD; case ARM_AM::db: return ARM::LDMDB_UPD; } - break; case ARM::STMIA: case ARM::STMDA: case ARM::STMDB: @@ -618,7 +639,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, case ARM_AM::da: return ARM::STMDA_UPD; case ARM_AM::db: return ARM::STMDB_UPD; } - break; case ARM::t2LDMIA: case ARM::t2LDMDB: switch (Mode) { @@ -626,7 +646,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, case ARM_AM::ia: return ARM::t2LDMIA_UPD; case ARM_AM::db: return ARM::t2LDMDB_UPD; } - break; case ARM::t2STMIA: case ARM::t2STMDB: switch (Mode) { @@ -634,38 +653,31 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, case ARM_AM::ia: return ARM::t2STMIA_UPD; case ARM_AM::db: return ARM::t2STMDB_UPD; } - break; case ARM::VLDMSIA: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMSIA_UPD; case ARM_AM::db: return ARM::VLDMSDB_UPD; } - break; case ARM::VLDMDIA: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMDIA_UPD; case ARM_AM::db: return ARM::VLDMDDB_UPD; } - break; case ARM::VSTMSIA: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMSIA_UPD; case ARM_AM::db: return ARM::VSTMSDB_UPD; } - break; case ARM::VSTMDIA: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMDIA_UPD; case ARM_AM::db: return ARM::VSTMDDB_UPD; } - break; } - - return 0; } /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base @@ -786,7 +798,6 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, return ARM::t2STR_PRE; default: llvm_unreachable("Unhandled opcode!"); } - return 0; } static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, @@ -812,7 +823,6 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, return ARM::t2STR_POST; default: llvm_unreachable("Unhandled opcode!"); } - return 0; } /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base @@ -1639,8 +1649,9 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, LastOp = Op; } - unsigned Opcode = Op->getOpcode(); - if (LastOpcode && Opcode != LastOpcode) + unsigned LSMOpcode + = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); + if (LastOpcode && LSMOpcode != LastOpcode) break; int Offset = getMemoryOpOffset(Op); @@ -1651,7 +1662,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, } LastOffset = Offset; LastBytes = Bytes; - LastOpcode = Opcode; + LastOpcode = LSMOpcode; if (++NumMove == 8) // FIXME: Tune this limit. break; } diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index daa126d..e2ac9a4 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -31,8 +31,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext); switch (MO.getTargetFlags()) { - default: - assert(0 && "Unknown target flag on symbol operand"); + default: llvm_unreachable("Unknown target flag on symbol operand"); case 0: break; case ARMII::MO_LO16: @@ -67,9 +66,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) { switch (MO.getType()) { - default: - assert(0 && "unknown operand type"); - return false; + default: llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: // Ignore all non-CPSR implicit register operands. if (MO.isImplicit() && MO.getReg() != ARM::CPSR) @@ -107,6 +104,9 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = MCOperand::CreateFPImm(Val.convertToDouble()); break; } + case MachineOperand::MO_RegisterMask: + // Ignore call clobbers. + return false; } return true; } diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp new file mode 100644 index 0000000..af445e2 --- /dev/null +++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -0,0 +1,14 @@ +//===-- ARMMachineFuctionInfo.cpp - ARM machine function info -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARMMachineFunctionInfo.h" + +using namespace llvm; + +void ARMFunctionInfo::anchor() { } diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 138f0c2..f1c8fc8 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -1,4 +1,4 @@ -//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===// +//===-- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -25,6 +25,7 @@ namespace llvm { /// ARMFunctionInfo - This class is derived from MachineFunctionInfo and /// contains private ARM-specific information for each MachineFunction. class ARMFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); /// isThumb - True if this function is compiled under Thumb mode. /// Used to initialized Align, so must precede it. @@ -63,6 +64,9 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// GPR callee-saved (2) : r8, r10, r11 /// -------------------------------------------- /// DPR callee-saved : d8 - d15 + /// + /// Also see AlignedDPRCSRegs below. Not all D-regs need to go in area 3. + /// Some may be spilled after the stack has been realigned. unsigned GPRCS1Offset; unsigned GPRCS2Offset; unsigned DPRCSOffset; @@ -79,6 +83,15 @@ class ARMFunctionInfo : public MachineFunctionInfo { BitVector GPRCS2Frames; BitVector DPRCSFrames; + /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in + /// the aligned portion of the stack frame. This is always a contiguous + /// sequence of D-registers starting from d8. + /// + /// We do not keep track of the frame indices used for these registers - they + /// behave like any other frame index in the aligned stack frame. These + /// registers also aren't included in DPRCSSize above. + unsigned NumAlignedDPRCS2Regs; + /// JumpTableUId - Unique id for jumptables. /// unsigned JumpTableUId; @@ -104,6 +117,7 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), + NumAlignedDPRCS2Regs(0), JumpTableUId(0), PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {} @@ -137,6 +151,9 @@ public: unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; } void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; } + unsigned getNumAlignedDPRCS2Regs() const { return NumAlignedDPRCS2Regs; } + void setNumAlignedDPRCS2Regs(unsigned n) { NumAlignedDPRCS2Regs = n; } + unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; } unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; } unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; } diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h index 18e1620..efa22fb 100644 --- a/lib/Target/ARM/ARMPerfectShuffle.h +++ b/lib/Target/ARM/ARMPerfectShuffle.h @@ -1,4 +1,4 @@ -//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table ------------------===// +//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index 1cba1ba..1f83762 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===// +//===-- ARMRegisterInfo.cpp - ARM Register Information --------------------===// // // The LLVM Compiler Infrastructure // @@ -16,6 +16,8 @@ #include "ARMRegisterInfo.h" using namespace llvm; +void ARMRegisterInfo::anchor() { } + ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) : ARMBaseRegisterInfo(tii, sti) { diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 8edfb9a..65ed95d 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -1,4 +1,4 @@ -//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===// +//===-- ARMRegisterInfo.h - ARM Register Information Impl -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -24,6 +24,7 @@ namespace llvm { class Type; struct ARMRegisterInfo : public ARMBaseRegisterInfo { + virtual void anchor(); public: ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); }; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 036822d..b16a12c 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -1,4 +1,4 @@ -//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===// +//===-- ARMRegisterInfo.td - ARM Register defs -------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -16,6 +16,8 @@ class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> { field bits<4> Num; let Namespace = "ARM"; let SubRegs = subregs; + // All bits of ARM registers with sub-registers are covered by sub-registers. + let CoveredBySubRegs = 1; } class ARMFReg<bits<6> num, string n> : Register<n> { @@ -25,28 +27,30 @@ class ARMFReg<bits<6> num, string n> : Register<n> { // Subregister indices. let Namespace = "ARM" in { +def qqsub_0 : SubRegIndex; +def qqsub_1 : SubRegIndex; + // Note: Code depends on these having consecutive numbers. -def ssub_0 : SubRegIndex; -def ssub_1 : SubRegIndex; -def ssub_2 : SubRegIndex; // In a Q reg. -def ssub_3 : SubRegIndex; +def qsub_0 : SubRegIndex; +def qsub_1 : SubRegIndex; +def qsub_2 : SubRegIndex<[qqsub_1, qsub_0]>; +def qsub_3 : SubRegIndex<[qqsub_1, qsub_1]>; def dsub_0 : SubRegIndex; def dsub_1 : SubRegIndex; -def dsub_2 : SubRegIndex; -def dsub_3 : SubRegIndex; -def dsub_4 : SubRegIndex; -def dsub_5 : SubRegIndex; -def dsub_6 : SubRegIndex; -def dsub_7 : SubRegIndex; +def dsub_2 : SubRegIndex<[qsub_1, dsub_0]>; +def dsub_3 : SubRegIndex<[qsub_1, dsub_1]>; +def dsub_4 : SubRegIndex<[qsub_2, dsub_0]>; +def dsub_5 : SubRegIndex<[qsub_2, dsub_1]>; +def dsub_6 : SubRegIndex<[qsub_3, dsub_0]>; +def dsub_7 : SubRegIndex<[qsub_3, dsub_1]>; -def qsub_0 : SubRegIndex; -def qsub_1 : SubRegIndex; -def qsub_2 : SubRegIndex; -def qsub_3 : SubRegIndex; - -def qqsub_0 : SubRegIndex; -def qqsub_1 : SubRegIndex; +def ssub_0 : SubRegIndex; +def ssub_1 : SubRegIndex; +def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>; +def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>; +// Let TableGen synthesize the remaining 12 ssub_* indices. +// We don't need to name them. } // Integer registers @@ -127,9 +131,7 @@ def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>; def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>; // Advanced SIMD (NEON) defines 16 quad-word aliases -let SubRegIndices = [dsub_0, dsub_1], - CompositeIndices = [(ssub_2 dsub_1, ssub_0), - (ssub_3 dsub_1, ssub_1)] in { +let SubRegIndices = [dsub_0, dsub_1] in { def Q0 : ARMReg< 0, "q0", [D0, D1]>; def Q1 : ARMReg< 1, "q1", [D2, D3]>; def Q2 : ARMReg< 2, "q2", [D4, D5]>; @@ -150,36 +152,6 @@ def Q14 : ARMReg<14, "q14", [D28, D29]>; def Q15 : ARMReg<15, "q15", [D30, D31]>; } -// Pseudo 256-bit registers to represent pairs of Q registers. These should -// never be present in the emitted code. -// These are used for NEON load / store instructions, e.g., vld4, vst3. -// NOTE: It's possible to define more QQ registers since technically the -// starting D register number doesn't have to be multiple of 4, e.g., -// D1, D2, D3, D4 would be a legal quad, but that would make the subregister -// stuff very messy. -let SubRegIndices = [qsub_0, qsub_1], - CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in { -def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>; -def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>; -def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>; -def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>; -def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>; -def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>; -def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>; -def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>; -} - -// Pseudo 512-bit registers to represent four consecutive Q registers. -let SubRegIndices = [qqsub_0, qqsub_1], - CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1), - (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1), - (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in { -def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>; -def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>; -def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>; -def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>; -} - // Current Program Status Register. def CPSR : ARMReg<0, "cpsr">; def APSR : ARMReg<1, "apsr">; @@ -261,6 +233,12 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> { }]; } +// Condition code registers. +def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> { + let CopyCost = -1; // Don't allow copying of status registers. + let isAllocatable = 0; +} + // Scalar single precision floating point register class.. def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>; @@ -316,37 +294,98 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], (DPR_8 dsub_0, dsub_1)]; } +// Pseudo-registers representing odd-even pairs of D registers. The even-odd +// pairs are already represented by the Q registers. +// These are needed by NEON instructions requiring two consecutive D registers. +// There is no D31_D0 register as that is always an UNPREDICTABLE encoding. +def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1], + [(decimate (shl DPR, 1), 2), + (decimate (shl DPR, 2), 2)]>; + +// Register class representing a pair of consecutive D registers. +// Use the Q registers for the even-odd pairs. +def DPair : RegisterClass<"ARM", [v2i64], 128, (interleave QPR, TuplesOE2D)> { + // Allocate starting at non-VFP2 registers D16-D31 first. + let AltOrders = [(rotl DPair, 16)]; + let AltOrderSelect = [{ return 1; }]; +} + +// Pseudo-registers representing 3 consecutive D registers. +def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2], + [(shl DPR, 0), + (shl DPR, 1), + (shl DPR, 2)]>; + +// 3 consecutive D registers. +def DTriple : RegisterClass<"ARM", [untyped], 64, (add Tuples3D)> { + let Size = 192; // 3 x 64 bits, we have no predefined type of that size. +} + +// Pseudo 256-bit registers to represent pairs of Q registers. These should +// never be present in the emitted code. +// These are used for NEON load / store instructions, e.g., vld4, vst3. +def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], [(shl QPR, 0), (shl QPR, 1)]>; + // Pseudo 256-bit vector register class to model pairs of Q registers // (4 consecutive D registers). -def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> { +def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> { let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3), (QPR qsub_0, qsub_1)]; // Allocate non-VFP2 aliases first. - let AltOrders = [(rotl QQPR, 4)]; + let AltOrders = [(rotl QQPR, 8)]; let AltOrderSelect = [{ return 1; }]; } -// Subset of QQPR that have 32-bit SPR subregs. -def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], 256, (trunc QQPR, 4)> { - let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3), - (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3), - (QPR_VFP2 qsub_0, qsub_1)]; +// Tuples of 4 D regs that isn't also a pair of Q regs. +def TuplesOE4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3], + [(decimate (shl DPR, 1), 2), + (decimate (shl DPR, 2), 2), + (decimate (shl DPR, 3), 2), + (decimate (shl DPR, 4), 2)]>; -} +// 4 consecutive D registers. +def DQuad : RegisterClass<"ARM", [v4i64], 256, + (interleave Tuples2Q, TuplesOE4D)>; + +// Pseudo 512-bit registers to represent four consecutive Q registers. +def Tuples2QQ : RegisterTuples<[qqsub_0, qqsub_1], + [(shl QQPR, 0), (shl QQPR, 2)]>; // Pseudo 512-bit vector register class to model 4 consecutive Q registers // (8 consecutive D registers). -def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> { +def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> { let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3, dsub_4, dsub_5, dsub_6, dsub_7), (QPR qsub_0, qsub_1, qsub_2, qsub_3)]; // Allocate non-VFP2 aliases first. - let AltOrders = [(rotl QQQQPR, 2)]; + let AltOrders = [(rotl QQQQPR, 8)]; let AltOrderSelect = [{ return 1; }]; } -// Condition code registers. -def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> { - let CopyCost = -1; // Don't allow copying of status registers. - let isAllocatable = 0; + +// Pseudo-registers representing 2-spaced consecutive D registers. +def Tuples2DSpc : RegisterTuples<[dsub_0, dsub_2], + [(shl DPR, 0), + (shl DPR, 2)]>; + +// Spaced pairs of D registers. +def DPairSpc : RegisterClass<"ARM", [v2i64], 64, (add Tuples2DSpc)>; + +def Tuples3DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4], + [(shl DPR, 0), + (shl DPR, 2), + (shl DPR, 4)]>; + +// Spaced triples of D registers. +def DTripleSpc : RegisterClass<"ARM", [untyped], 64, (add Tuples3DSpc)> { + let Size = 192; // 3 x 64 bits, we have no predefined type of that size. } + +def Tuples4DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4, dsub_6], + [(shl DPR, 0), + (shl DPR, 2), + (shl DPR, 4), + (shl DPR, 6)]>; + +// Spaced quads of D registers. +def DQuadSpc : RegisterClass<"ARM", [v4i64], 64, (add Tuples3DSpc)>; diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h index 291f3cc..9c32b15 100644 --- a/lib/Target/ARM/ARMRelocations.h +++ b/lib/Target/ARM/ARMRelocations.h @@ -1,4 +1,4 @@ -//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===// +//===-- ARMRelocations.h - ARM Code Relocations -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 958c5c6..45486fd 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -1,10 +1,10 @@ -//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===// -// +//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -118,6 +118,8 @@ def IIC_fpMUL32 : InstrItinClass; def IIC_fpMUL64 : InstrItinClass; def IIC_fpMAC32 : InstrItinClass; def IIC_fpMAC64 : InstrItinClass; +def IIC_fpFMAC32 : InstrItinClass; +def IIC_fpFMAC64 : InstrItinClass; def IIC_fpDIV32 : InstrItinClass; def IIC_fpDIV64 : InstrItinClass; def IIC_fpSQRT32 : InstrItinClass; @@ -208,6 +210,8 @@ def IIC_VPERMQ : InstrItinClass; def IIC_VPERMQ3 : InstrItinClass; def IIC_VMACD : InstrItinClass; def IIC_VMACQ : InstrItinClass; +def IIC_VFMACD : InstrItinClass; +def IIC_VFMACQ : InstrItinClass; def IIC_VRECSD : InstrItinClass; def IIC_VRECSQ : InstrItinClass; def IIC_VCNTiD : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index c1880a7..4d959f5 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -1,10 +1,10 @@ -//===- ARMScheduleV6.td - ARM v6 Scheduling Definitions ----*- tablegen -*-===// -// +//===-- ARMScheduleV6.td - ARM v6 Scheduling Definitions ---*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the ARM v6 processors. diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 36d58de..e2530d0 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -145,8 +145,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { - // Use default for non AAPCS subtargets - if (!Subtarget->isAAPCS_ABI()) + // Use default for non AAPCS (or Darwin) subtargets + if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin()) return SDValue(); const ARMTargetLowering &TLI = @@ -189,6 +189,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, 0, // number of fixed arguments TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv false, // is tail call + false, // does not return false, // is return val used DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), TLI.getPointerTy()), // callee diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 2cb5ab9..1bd6f1c 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -1,4 +1,4 @@ -//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===// +//===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===// // // The LLVM Compiler Infrastructure // @@ -47,7 +47,9 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasV7Ops(false) , HasVFPv2(false) , HasVFPv3(false) + , HasVFPv4(false) , HasNEON(false) + , HasNEON2(false) , UseNEONForSinglePrecisionFP(false) , SlowFPVMLx(false) , HasVMLxForwarding(false) @@ -103,18 +105,19 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, computeIssueWidth(); if (TT.find("eabi") != std::string::npos) + // FIXME: We might want to separate AAPCS and EABI. Some systems, e.g. + // Darwin-EABI conforms to AACPS but not the rest of EABI. TargetABI = ARM_ABI_AAPCS; if (isAAPCS_ABI()) stackAlignment = 8; - if (!isTargetDarwin()) + if (!isTargetIOS()) UseMovt = hasV6T2Ops(); else { IsR9Reserved = ReserveR9 | !HasV6Ops; UseMovt = DarwinUseMOVT && hasV6T2Ops(); - const Triple &T = getTargetTriple(); - SupportsTailCall = T.getOS() == Triple::IOS && !T.isOSVersionLT(5, 0); + SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0); } if (!isThumb() || hasThumb2()) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index a35f450..3d9c03d 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -1,4 +1,4 @@ -//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====// +//===-- ARMSubtarget.h - Define Subtarget for the ARM ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -45,11 +45,13 @@ protected: bool HasV6T2Ops; bool HasV7Ops; - /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are - /// supported. + /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEONVFPv4 - Specify what + /// floating point ISAs are supported. bool HasVFPv2; bool HasVFPv3; + bool HasVFPv4; bool HasNEON; + bool HasNEON2; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been /// specified. Use the method useNEONForSinglePrecisionFP() to @@ -123,6 +125,10 @@ protected: /// CPSR setting instruction. bool AvoidCPSRPartialUpdate; + /// HasRAS - Some processors perform return stack prediction. CodeGen should + /// avoid issue "normal" call instructions to callees which do not return. + bool HasRAS; + /// HasMPExtension - True if the subtarget supports Multiprocessing /// extension (ARMv7 only). bool HasMPExtension; @@ -197,7 +203,9 @@ protected: bool hasVFP2() const { return HasVFPv2; } bool hasVFP3() const { return HasVFPv3; } + bool hasVFP4() const { return HasVFPv4; } bool hasNEON() const { return HasNEON; } + bool hasNEON2() const { return HasNEON2 || (HasNEON && HasVFPv4); } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } @@ -210,6 +218,7 @@ protected: bool isFPOnlySP() const { return FPOnlySP; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } + bool hasRAS() const { return HasRAS; } bool hasMPExtension() const { return HasMPExtension; } bool hasThumb2DSP() const { return Thumb2DSP; } @@ -218,6 +227,7 @@ protected: const Triple &getTargetTriple() const { return TargetTriple; } + bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NativeClient; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 61b75cb..44229ad 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -34,6 +34,7 @@ extern "C" void LLVMInitializeARMTarget() { RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget); } + /// TargetMachine ctor - Create an ARM architecture model. /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, @@ -50,6 +51,8 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, this->Options.FloatABIType = FloatABI::Soft; } +void ARMTargetMachine::anchor() { } + ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -74,6 +77,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, "support ARM mode execution!"); } +void ThumbTargetMachine::anchor() { } + ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -102,33 +107,60 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { } -bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM) { - if (getOptLevel() != CodeGenOpt::None && EnableGlobalMerge) - PM.add(createGlobalMergePass(getTargetLowering())); +namespace { +/// ARM Code Generator Pass Configuration Options. +class ARMPassConfig : public TargetPassConfig { +public: + ARMPassConfig(ARMBaseTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + ARMBaseTargetMachine &getARMTargetMachine() const { + return getTM<ARMBaseTargetMachine>(); + } + + const ARMSubtarget &getARMSubtarget() const { + return *getARMTargetMachine().getSubtargetImpl(); + } + + virtual bool addPreISel(); + virtual bool addInstSelector(); + virtual bool addPreRegAlloc(); + virtual bool addPreSched2(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { + return new ARMPassConfig(this, PM); +} + +bool ARMPassConfig::addPreISel() { + if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge) + PM.add(createGlobalMergePass(TM->getTargetLowering())); return false; } -bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM) { - PM.add(createARMISelDag(*this, getOptLevel())); +bool ARMPassConfig::addInstSelector() { + PM.add(createARMISelDag(getARMTargetMachine(), getOptLevel())); return false; } -bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM) { +bool ARMPassConfig::addPreRegAlloc() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (getOptLevel() != CodeGenOpt::None && !Subtarget.isThumb1Only()) + if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); - if (getOptLevel() != CodeGenOpt::None && Subtarget.isCortexA9()) + if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) PM.add(createMLxExpansionPass()); return true; } -bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM) { +bool ARMPassConfig::addPreSched2() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None) { - if (!Subtarget.isThumb1Only()) + if (!getARMSubtarget().isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass()); - if (Subtarget.hasNEON()) + if (getARMSubtarget().hasNEON()) PM.add(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } @@ -137,22 +169,22 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM) { PM.add(createARMExpandPseudoPass()); if (getOptLevel() != CodeGenOpt::None) { - if (!Subtarget.isThumb1Only()) - PM.add(createIfConverterPass()); + if (!getARMSubtarget().isThumb1Only()) + addPass(IfConverterID); } - if (Subtarget.isThumb2()) + if (getARMSubtarget().isThumb2()) PM.add(createThumb2ITBlockPass()); return true; } -bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM) { - if (Subtarget.isThumb2()) { - if (!Subtarget.prefers32BitThumb()) +bool ARMPassConfig::addPreEmitPass() { + if (getARMSubtarget().isThumb2()) { + if (!getARMSubtarget().prefers32BitThumb()) PM.add(createThumb2SizeReductionPass()); // Constant island pass work on unbundled instructions. - PM.add(createUnpackMachineBundlesPass()); + addPass(UnpackMachineBundlesID); } PM.add(createARMConstantIslandPass()); @@ -160,8 +192,7 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM) { return true; } -bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, - JITCodeEmitter &JCE) { +bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) { // Machine code emitter pass for ARM. PM.add(createARMJITCodeEmitterPass(*this, JCE)); return false; diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index cd77822..abcdb24 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -52,17 +52,15 @@ public: } // Pass Pipeline Configuration - virtual bool addPreISel(PassManagerBase &PM); - virtual bool addInstSelector(PassManagerBase &PM); - virtual bool addPreRegAlloc(PassManagerBase &PM); - virtual bool addPreSched2(PassManagerBase &PM); - virtual bool addPreEmitPass(PassManagerBase &PM); + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE); }; /// ARMTargetMachine - ARM target machine. /// class ARMTargetMachine : public ARMBaseTargetMachine { + virtual void anchor(); ARMInstrInfo InstrInfo; const TargetData DataLayout; // Calculates type size & alignment ARMELFWriterInfo ELFWriterInfo; @@ -103,6 +101,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { /// Thumb-1 and Thumb-2. /// class ThumbTargetMachine : public ARMBaseTargetMachine { + virtual void anchor(); // Either Thumb1InstrInfo or Thumb2InstrInfo. OwningPtr<ARMBaseInstrInfo> InstrInfo; const TargetData DataLayout; // Calculates type size & alignment diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 721a225..a5ea1c2 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/StringExtras.h" using namespace llvm; using namespace dwarf; @@ -24,8 +25,9 @@ using namespace dwarf; void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); + isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI(); - if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) { + if (isAAPCS_ABI) { StaticCtorSection = getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | @@ -36,7 +38,6 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getDataRel()); - StructorOutputOrder = Structors::PriorityOrder; LSDASection = NULL; } @@ -46,3 +47,33 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, 0, SectionKind::getMetadata()); } + +const MCSection * +ARMElfTargetObjectFile::getStaticCtorSection(unsigned Priority) const { + if (!isAAPCS_ABI) + return TargetLoweringObjectFileELF::getStaticCtorSection(Priority); + + if (Priority == 65535) + return StaticCtorSection; + + // Emit ctors in priority order. + std::string Name = std::string(".init_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); +} + +const MCSection * +ARMElfTargetObjectFile::getStaticDtorSection(unsigned Priority) const { + if (!isAAPCS_ABI) + return TargetLoweringObjectFileELF::getStaticDtorSection(Priority); + + if (Priority == 65535) + return StaticDtorSection; + + // Emit dtors in priority order. + std::string Name = std::string(".fini_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); +} diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index c6a7261..ff21060 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -20,6 +20,7 @@ class TargetMachine; class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { protected: const MCSection *AttributesSection; + bool isAAPCS_ABI; public: ARMElfTargetObjectFile() : TargetLoweringObjectFileELF(), @@ -31,6 +32,9 @@ public: virtual const MCSection *getAttributesSection() const { return AttributesSection; } + + const MCSection * getStaticCtorSection(unsigned Priority) const; + const MCSection * getStaticDtorSection(unsigned Priority) const; }; } // end namespace llvm diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index cd86065..2045482 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -44,6 +44,7 @@ enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + const MCRegisterInfo *MRI; // Map of register aliases registers via the .req directive. StringMap<unsigned> RegisterReqs; @@ -101,6 +102,8 @@ class ARMAsmParser : public MCTargetAsmParser { bool parseDirectiveSyntax(SMLoc L); bool parseDirectiveReq(StringRef Name, SMLoc L); bool parseDirectiveUnreq(SMLoc L); + bool parseDirectiveArch(SMLoc L); + bool parseDirectiveEabiAttr(SMLoc L); StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode, bool &CarrySetting, unsigned &ProcessorIMod, @@ -234,6 +237,9 @@ public: : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { MCAsmParserExtension::Initialize(_Parser); + // Cache the MCRegisterInfo. + MRI = &getContext().getRegisterInfo(); + // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -268,7 +274,6 @@ class ARMOperand : public MCParsedAsmOperand { k_CoprocReg, k_CoprocOption, k_Immediate, - k_FPImmediate, k_MemBarrierOpt, k_Memory, k_PostIndexRegister, @@ -347,10 +352,6 @@ class ARMOperand : public MCParsedAsmOperand { const MCExpr *Val; } Imm; - struct { - unsigned Val; // encoded 8-bit representation - } FPImm; - /// Combined record for all forms of ARM address expressions. struct { unsigned BaseRegNum; @@ -361,7 +362,7 @@ class ARMOperand : public MCParsedAsmOperand { ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg unsigned ShiftImm; // shift for OffsetReg. unsigned Alignment; // 0 = no alignment specified - // n = alignment in bytes (8, 16, or 32) + // n = alignment in bytes (2, 4, 8, 16, or 32) unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) } Memory; @@ -436,9 +437,6 @@ public: case k_Immediate: Imm = o.Imm; break; - case k_FPImmediate: - FPImm = o.FPImm; - break; case k_MemBarrierOpt: MBOpt = o.MBOpt; break; @@ -507,15 +505,10 @@ public: } const MCExpr *getImm() const { - assert(Kind == k_Immediate && "Invalid access!"); + assert(isImm() && "Invalid access!"); return Imm.Val; } - unsigned getFPImm() const { - assert(Kind == k_FPImmediate && "Invalid access!"); - return FPImm.Val; - } - unsigned getVectorIndex() const { assert(Kind == k_VectorIndex && "Invalid access!"); return VectorIndex.Val; @@ -544,202 +537,197 @@ public: bool isITMask() const { return Kind == k_ITCondMask; } bool isITCondCode() const { return Kind == k_CondCode; } bool isImm() const { return Kind == k_Immediate; } - bool isFPImm() const { return Kind == k_FPImmediate; } + bool isFPImm() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue())); + return Val != -1; + } + bool isFBits16() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value <= 16; + } + bool isFBits32() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 1 && Value <= 32; + } bool isImm8s4() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020; } bool isImm0_1020s4() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= 0 && Value <= 1020; } bool isImm0_508s4() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= 0 && Value <= 508; } bool isImm0_255() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 256; } bool isImm0_1() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 2; } bool isImm0_3() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 4; } bool isImm0_7() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 8; } bool isImm0_15() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 16; } bool isImm0_31() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 32; } bool isImm0_63() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 64; } bool isImm8() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value == 8; } bool isImm16() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value == 16; } bool isImm32() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value == 32; } bool isShrImm8() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value <= 8; } bool isShrImm16() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value <= 16; } bool isShrImm32() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value <= 32; } bool isShrImm64() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value <= 64; } bool isImm1_7() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 8; } bool isImm1_15() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 16; } bool isImm1_31() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 32; } bool isImm1_16() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 17; } bool isImm1_32() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 33; } bool isImm0_32() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 33; } bool isImm0_65535() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 65536; } bool isImm0_65535Expr() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // If it's not a constant expression, it'll generate a fixup and be // handled later. @@ -748,88 +736,77 @@ public: return Value >= 0 && Value < 65536; } bool isImm24bit() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value <= 0xffffff; } bool isImmThumbSR() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 33; } bool isPKHLSLImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 32; } bool isPKHASRImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value <= 32; } bool isARMSOImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ARM_AM::getSOImmVal(Value) != -1; } bool isARMSOImmNot() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ARM_AM::getSOImmVal(~Value) != -1; } bool isARMSOImmNeg() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ARM_AM::getSOImmVal(-Value) != -1; } bool isT2SOImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ARM_AM::getT2SOImmVal(Value) != -1; } bool isT2SOImmNot() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ARM_AM::getT2SOImmVal(~Value) != -1; } bool isT2SOImmNeg() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ARM_AM::getT2SOImmVal(-Value) != -1; } bool isSetEndImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); @@ -858,6 +835,17 @@ public: return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 && (alignOK || Memory.Alignment == 0); } + bool isMemPCRelImm12() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Base register must be PC. + if (Memory.BaseRegNum != ARM::PC) + return false; + // Immediate offset in range [-4095, 4095]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return (Val > -4096 && Val < 4096) || (Val == INT32_MIN); + } bool isAlignedMemory() const { return isMemNoOffset(true); } @@ -871,8 +859,7 @@ public: return Val > -4096 && Val < 4096; } bool isAM2OffsetImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; // Immediate offset in range [-4095, 4095]. const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; @@ -880,6 +867,11 @@ public: return Val > -4096 && Val < 4096; } bool isAddrMode3() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm() && !isa<MCConstantExpr>(getImm())) + return true; if (!isMemory() || Memory.Alignment != 0) return false; // No shifts are legal for AM3. if (Memory.ShiftType != ARM_AM::no_shift) return false; @@ -906,7 +898,7 @@ public: // If we have an immediate that's not a constant, treat it as a label // reference needing a fixup. If it is a constant, it's something else // and we reject it. - if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm())) + if (isImm() && !isa<MCConstantExpr>(getImm())) return true; if (!isMemory() || Memory.Alignment != 0) return false; // Check for register offset. @@ -992,6 +984,11 @@ public: return Val >= 0 && Val <= 1020 && (Val % 4) == 0; } bool isMemImm8s4Offset() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm() && !isa<MCConstantExpr>(getImm())) + return true; if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset a multiple of 4 in range [-1020, 1020]. @@ -1010,6 +1007,8 @@ public: bool isMemImm8Offset() const { if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; + // Base reg of PC isn't allowed for these encodings. + if (Memory.BaseRegNum == ARM::PC) return false; // Immediate offset in range [-255, 255]. if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); @@ -1026,6 +1025,8 @@ public: bool isMemNegImm8Offset() const { if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; + // Base reg of PC isn't allowed for these encodings. + if (Memory.BaseRegNum == ARM::PC) return false; // Immediate offset in range [-255, -1]. if (!Memory.OffsetImm) return false; int64_t Val = Memory.OffsetImm->getValue(); @@ -1043,7 +1044,7 @@ public: // If we have an immediate that's not a constant, treat it as a label // reference needing a fixup. If it is a constant, it's something else // and we reject it. - if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm())) + if (isImm() && !isa<MCConstantExpr>(getImm())) return true; if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) @@ -1054,16 +1055,14 @@ public: return (Val > -4096 && Val < 4096) || (Val == INT32_MIN); } bool isPostIdxImm8() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Val = CE->getValue(); return (Val > -256 && Val < 256) || (Val == INT32_MIN); } bool isPostIdxImm8s4() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Val = CE->getValue(); @@ -1086,9 +1085,10 @@ public: return VectorList.Count == 1; } - bool isVecListTwoD() const { + bool isVecListDPair() const { if (!isSingleSpacedVectorList()) return false; - return VectorList.Count == 2; + return (ARMMCRegisterClasses[ARM::DPairRegClassID] + .contains(VectorList.RegNum)); } bool isVecListThreeD() const { @@ -1106,46 +1106,159 @@ public: return VectorList.Count == 2; } + bool isVecListDPairSpaced() const { + if (!isSingleSpacedVectorList()) return false; + return (ARMMCRegisterClasses[ARM::DPairSpcRegClassID] + .contains(VectorList.RegNum)); + } + + bool isVecListThreeQ() const { + if (!isDoubleSpacedVectorList()) return false; + return VectorList.Count == 3; + } + + bool isVecListFourQ() const { + if (!isDoubleSpacedVectorList()) return false; + return VectorList.Count == 4; + } + + bool isSingleSpacedVectorAllLanes() const { + return Kind == k_VectorListAllLanes && !VectorList.isDoubleSpaced; + } + bool isDoubleSpacedVectorAllLanes() const { + return Kind == k_VectorListAllLanes && VectorList.isDoubleSpaced; + } bool isVecListOneDAllLanes() const { - if (Kind != k_VectorListAllLanes) return false; + if (!isSingleSpacedVectorAllLanes()) return false; return VectorList.Count == 1; } bool isVecListTwoDAllLanes() const { - if (Kind != k_VectorListAllLanes) return false; + if (!isSingleSpacedVectorAllLanes()) return false; + return VectorList.Count == 2; + } + + bool isVecListTwoQAllLanes() const { + if (!isDoubleSpacedVectorAllLanes()) return false; return VectorList.Count == 2; } + bool isVecListThreeDAllLanes() const { + if (!isSingleSpacedVectorAllLanes()) return false; + return VectorList.Count == 3; + } + + bool isVecListThreeQAllLanes() const { + if (!isDoubleSpacedVectorAllLanes()) return false; + return VectorList.Count == 3; + } + + bool isVecListFourDAllLanes() const { + if (!isSingleSpacedVectorAllLanes()) return false; + return VectorList.Count == 4; + } + + bool isVecListFourQAllLanes() const { + if (!isDoubleSpacedVectorAllLanes()) return false; + return VectorList.Count == 4; + } + + bool isSingleSpacedVectorIndexed() const { + return Kind == k_VectorListIndexed && !VectorList.isDoubleSpaced; + } + bool isDoubleSpacedVectorIndexed() const { + return Kind == k_VectorListIndexed && VectorList.isDoubleSpaced; + } bool isVecListOneDByteIndexed() const { - if (Kind != k_VectorListIndexed) return false; + if (!isSingleSpacedVectorIndexed()) return false; return VectorList.Count == 1 && VectorList.LaneIndex <= 7; } bool isVecListOneDHWordIndexed() const { - if (Kind != k_VectorListIndexed) return false; + if (!isSingleSpacedVectorIndexed()) return false; return VectorList.Count == 1 && VectorList.LaneIndex <= 3; } bool isVecListOneDWordIndexed() const { - if (Kind != k_VectorListIndexed) return false; + if (!isSingleSpacedVectorIndexed()) return false; return VectorList.Count == 1 && VectorList.LaneIndex <= 1; } bool isVecListTwoDByteIndexed() const { - if (Kind != k_VectorListIndexed) return false; + if (!isSingleSpacedVectorIndexed()) return false; return VectorList.Count == 2 && VectorList.LaneIndex <= 7; } bool isVecListTwoDHWordIndexed() const { - if (Kind != k_VectorListIndexed) return false; + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 3; + } + + bool isVecListTwoQWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 1; + } + + bool isVecListTwoQHWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; return VectorList.Count == 2 && VectorList.LaneIndex <= 3; } bool isVecListTwoDWordIndexed() const { - if (Kind != k_VectorListIndexed) return false; + if (!isSingleSpacedVectorIndexed()) return false; return VectorList.Count == 2 && VectorList.LaneIndex <= 1; } + bool isVecListThreeDByteIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 7; + } + + bool isVecListThreeDHWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 3; + } + + bool isVecListThreeQWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 1; + } + + bool isVecListThreeQHWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 3; + } + + bool isVecListThreeDWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 1; + } + + bool isVecListFourDByteIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 7; + } + + bool isVecListFourDHWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 3; + } + + bool isVecListFourQWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 1; + } + + bool isVecListFourQHWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 3; + } + + bool isVecListFourDWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 1; + } + bool isVectorIndex8() const { if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 8; @@ -1160,8 +1273,7 @@ public: } bool isNEONi8splat() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; @@ -1172,8 +1284,7 @@ public: } bool isNEONi16splat() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; @@ -1183,8 +1294,7 @@ public: } bool isNEONi32splat() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; @@ -1197,8 +1307,7 @@ public: } bool isNEONi32vmov() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; @@ -1212,10 +1321,24 @@ public: (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) || (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff); } + bool isNEONi32vmovNeg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // Must be a constant. + if (!CE) return false; + int64_t Value = ~CE->getValue(); + // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X, + // for VMOV/VMVN only, 00Xf or 0Xff are also accepted. + return (Value >= 0 && Value < 256) || + (Value >= 0x0100 && Value <= 0xff00) || + (Value >= 0x010000 && Value <= 0xff0000) || + (Value >= 0x01000000 && Value <= 0xff000000) || + (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) || + (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff); + } bool isNEONi64splat() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; @@ -1341,9 +1464,23 @@ public: addExpr(Inst, getImm()); } + void addFBits16Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(16 - CE->getValue())); + } + + void addFBits32Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(32 - CE->getValue())); + } + void addFPImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getFPImm())); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue())); + Inst.addOperand(MCOperand::CreateImm(Val)); } void addImm8s4Operands(MCInst &Inst, unsigned N) const { @@ -1446,6 +1583,14 @@ public: Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); } + void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + int32_t Imm = Memory.OffsetImm->getValue(); + // FIXME: Handle #-0 + if (Imm == INT32_MIN) Imm = 0; + Inst.addOperand(MCOperand::CreateImm(Imm)); + } + void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); @@ -1488,6 +1633,16 @@ public: void addAddrMode3Operands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm()) { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + Inst.addOperand(MCOperand::CreateReg(0)); + Inst.addOperand(MCOperand::CreateImm(0)); + return; + } + int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; if (!Memory.OffsetRegNum) { ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; @@ -1551,6 +1706,15 @@ public: void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm()) { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + Inst.addOperand(MCOperand::CreateImm(0)); + return; + } + int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); Inst.addOperand(MCOperand::CreateImm(Val)); @@ -1582,7 +1746,7 @@ public: void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); // If this is an immediate, it's a label reference. - if (Kind == k_Immediate) { + if (isImm()) { addExpr(Inst, getImm()); Inst.addOperand(MCOperand::CreateImm(0)); return; @@ -1597,7 +1761,7 @@ public: void addMemImm12OffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); // If this is an immediate, it's a label reference. - if (Kind == k_Immediate) { + if (isImm()) { addExpr(Inst, getImm()); Inst.addOperand(MCOperand::CreateImm(0)); return; @@ -1796,6 +1960,20 @@ public: Inst.addOperand(MCOperand::CreateImm(Value)); } + void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Value = ~CE->getValue(); + if (Value >= 256 && Value <= 0xffff) + Value = (Value >> 8) | ((Value & 0xff) ? 0xc00 : 0x200); + else if (Value > 0xffff && Value <= 0xffffff) + Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400); + else if (Value > 0xffffff) + Value = (Value >> 24) | 0x600; + Inst.addOperand(MCOperand::CreateImm(Value)); + } + void addNEONi64splatOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. @@ -1964,21 +2142,26 @@ public: } static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count, + bool isDoubleSpaced, SMLoc S, SMLoc E) { ARMOperand *Op = new ARMOperand(k_VectorListAllLanes); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; + Op->VectorList.isDoubleSpaced = isDoubleSpaced; Op->StartLoc = S; Op->EndLoc = E; return Op; } static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count, - unsigned Index, SMLoc S, SMLoc E) { + unsigned Index, + bool isDoubleSpaced, + SMLoc S, SMLoc E) { ARMOperand *Op = new ARMOperand(k_VectorListIndexed); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.LaneIndex = Index; + Op->VectorList.isDoubleSpaced = isDoubleSpaced; Op->StartLoc = S; Op->EndLoc = E; return Op; @@ -2001,14 +2184,6 @@ public: return Op; } - static ARMOperand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) { - ARMOperand *Op = new ARMOperand(k_FPImmediate); - Op->FPImm.Val = Val; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - static ARMOperand *CreateMem(unsigned BaseRegNum, const MCConstantExpr *OffsetImm, unsigned OffsetRegNum, @@ -2073,10 +2248,6 @@ public: void ARMOperand::print(raw_ostream &OS) const { switch (Kind) { - case k_FPImmediate: - OS << "<fpimm " << getFPImm() << "(" << ARM_AM::getFPImmFloat(getFPImm()) - << ") >"; - break; case k_CondCode: OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">"; break; @@ -2245,9 +2416,10 @@ int ARMAsmParser::tryParseRegister() { .Default(0); } if (!RegNum) { - // Check for aliases registered via .req. - StringMap<unsigned>::const_iterator Entry = - RegisterReqs.find(Tok.getIdentifier()); + // Check for aliases registered via .req. Canonicalize to lower case. + // That's more consistent since register names are case insensitive, and + // it's how the original entry was passed in from MC/MCParser/AsmParser. + StringMap<unsigned>::const_iterator Entry = RegisterReqs.find(lowerCase); // If no match, return failure. if (Entry == RegisterReqs.end()) return -1; @@ -2327,6 +2499,10 @@ int ARMAsmParser::tryParseShiftRegister( Error(ImmLoc, "immediate shift value out of range"); return -1; } + // shift by zero is a nop. Always send it through as lsl. + // ('as' compatibility) + if (Imm == 0) + ShiftTy = ARM_AM::lsl; } else if (Parser.getTok().is(AsmToken::Identifier)) { ShiftReg = tryParseRegister(); SMLoc L = Parser.getTok().getLoc(); @@ -2385,18 +2561,14 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { const MCExpr *ImmVal; if (getParser().ParseExpression(ImmVal)) - return MatchOperand_ParseFail; + return true; const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); - if (!MCE) { - TokError("immediate value expected for vector index"); - return MatchOperand_ParseFail; - } + if (!MCE) + return TokError("immediate value expected for vector index"); SMLoc E = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(E, "']' expected"); - return MatchOperand_ParseFail; - } + if (Parser.getTok().isNot(AsmToken::RBrac)) + return Error(E, "']' expected"); Parser.Lex(); // Eat right bracket token. @@ -2415,7 +2587,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { // Use the same layout as the tablegen'erated register name matcher. Ugly, // but efficient. switch (Name.size()) { - default: break; + default: return -1; case 2: if (Name[0] != CoprocOp) return -1; @@ -2432,7 +2604,6 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { case '8': return 8; case '9': return 9; } - break; case 3: if (Name[0] != CoprocOp || Name[1] != '1') return -1; @@ -2445,10 +2616,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { case '4': return 14; case '5': return 15; } - break; } - - return -1; } /// parseITCondCode - Try to parse a condition code for an IT instruction. @@ -2568,7 +2736,7 @@ static unsigned getNextRegister(unsigned Reg) { if (!ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) return Reg + 1; switch(Reg) { - default: assert(0 && "Invalid GPR number!"); + default: llvm_unreachable("Invalid GPR number!"); case ARM::R0: return ARM::R1; case ARM::R1: return ARM::R2; case ARM::R2: return ARM::R3; case ARM::R3: return ARM::R4; case ARM::R4: return ARM::R5; case ARM::R5: return ARM::R6; @@ -2737,21 +2905,32 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { Parser.Lex(); // Eat the ']'. return MatchOperand_Success; } - if (Parser.getTok().is(AsmToken::Integer)) { - int64_t Val = Parser.getTok().getIntVal(); - // Make this range check context sensitive for .8, .16, .32. - if (Val < 0 && Val > 7) - Error(Parser.getTok().getLoc(), "lane index out of range"); - Index = Val; - LaneKind = IndexedLane; - Parser.Lex(); // Eat the token; - if (Parser.getTok().isNot(AsmToken::RBrac)) - Error(Parser.getTok().getLoc(), "']' expected"); - Parser.Lex(); // Eat the ']'. - return MatchOperand_Success; + const MCExpr *LaneIndex; + SMLoc Loc = Parser.getTok().getLoc(); + if (getParser().ParseExpression(LaneIndex)) { + Error(Loc, "illegal expression"); + return MatchOperand_ParseFail; } - Error(Parser.getTok().getLoc(), "lane index must be empty or an integer"); - return MatchOperand_ParseFail; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LaneIndex); + if (!CE) { + Error(Loc, "lane index must be empty or an integer"); + return MatchOperand_ParseFail; + } + if (Parser.getTok().isNot(AsmToken::RBrac)) { + Error(Parser.getTok().getLoc(), "']' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat the ']'. + int64_t Val = CE->getValue(); + + // FIXME: Make this range check context sensitive for .8, .16, .32. + if (Val < 0 || Val > 7) { + Error(Parser.getTok().getLoc(), "lane index out of range"); + return MatchOperand_ParseFail; + } + Index = Val; + LaneKind = IndexedLane; + return MatchOperand_Success; } LaneKind = NoLanes; return MatchOperand_Success; @@ -2776,19 +2955,19 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (Res != MatchOperand_Success) return Res; switch (LaneKind) { - default: - assert(0 && "unexpected lane kind!"); case NoLanes: E = Parser.getTok().getLoc(); Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E)); break; case AllLanes: E = Parser.getTok().getLoc(); - Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, S, E)); + Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false, + S, E)); break; case IndexedLane: Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1, - LaneIndex, S,E)); + LaneIndex, + false, S, E)); break; } return MatchOperand_Success; @@ -2799,19 +2978,22 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (Res != MatchOperand_Success) return Res; switch (LaneKind) { - default: - assert(0 && "unexpected lane kind!"); case NoLanes: E = Parser.getTok().getLoc(); + Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, + &ARMMCRegisterClasses[ARM::DPairRegClassID]); + Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E)); break; case AllLanes: E = Parser.getTok().getLoc(); - Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, S, E)); + Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false, + S, E)); break; case IndexedLane: Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2, - LaneIndex, S,E)); + LaneIndex, + false, S, E)); break; } return MatchOperand_Success; @@ -2959,11 +3141,6 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Error(EndLoc, "mismatched lane index in register list"); return MatchOperand_ParseFail; } - if (Spacing == 2 && LaneKind != NoLanes) { - Error(EndLoc, - "lane index specfier invalid in double spaced register list"); - return MatchOperand_ParseFail; - } } SMLoc E = Parser.getTok().getLoc(); @@ -2974,19 +3151,29 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat '}' token. switch (LaneKind) { - default: - assert(0 && "unexpected lane kind in register list."); case NoLanes: + // Non-lane two-register operands have been converted to the + // composite register classes. + if (Count == 2) { + const MCRegisterClass *RC = (Spacing == 1) ? + &ARMMCRegisterClasses[ARM::DPairRegClassID] : + &ARMMCRegisterClasses[ARM::DPairSpcRegClassID]; + FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC); + } + Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, (Spacing == 2), S, E)); break; case AllLanes: Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count, + (Spacing == 2), S, E)); break; case IndexedLane: Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count, - LaneIndex, S, E)); + LaneIndex, + (Spacing == 2), + S, E)); break; } return MatchOperand_Success; @@ -3082,14 +3269,14 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { .Case("faultmask", 19) .Case("control", 20) .Default(~0U); - + if (FlagsVal == ~0U) return MatchOperand_NoMatch; if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19) // basepri, basepri_max and faultmask only valid for V7m. return MatchOperand_NoMatch; - + Parser.Lex(); // Eat identifier token. Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S)); return MatchOperand_Success; @@ -3954,7 +4141,10 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { unsigned Align = 0; switch (CE->getValue()) { default: - return Error(E, "alignment specifier must be 64, 128, or 256 bits"); + return Error(E, + "alignment specifier must be 16, 32, 64, 128, or 256 bits"); + case 16: Align = 2; break; + case 32: Align = 4; break; case 64: Align = 8; break; case 128: Align = 16; break; case 256: Align = 32; break; @@ -4135,6 +4325,15 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, /// parseFPImm - A floating point immediate expression operand. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Anything that can accept a floating point constant as an operand + // needs to go through here, as the regular ParseExpression is + // integer only. + // + // This routine still creates a generic Immediate operand, containing + // a bitcast of the 64-bit floating point value. The various operands + // that accept floats can check whether the value is valid for them + // via the standard is*() predicates. + SMLoc S = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::Hash) && @@ -4165,34 +4364,39 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); } const AsmToken &Tok = Parser.getTok(); + SMLoc Loc = Tok.getLoc(); if (Tok.is(AsmToken::Real)) { - APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); + APFloat RealVal(APFloat::IEEEsingle, Tok.getString()); uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); // If we had a '-' in front, toggle the sign bit. - IntVal ^= (uint64_t)isNegative << 63; - int Val = ARM_AM::getFP64Imm(APInt(64, IntVal)); + IntVal ^= (uint64_t)isNegative << 31; Parser.Lex(); // Eat the token. - if (Val == -1) { - TokError("floating point value out of range"); - return MatchOperand_ParseFail; - } - Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext())); + Operands.push_back(ARMOperand::CreateImm( + MCConstantExpr::Create(IntVal, getContext()), + S, Parser.getTok().getLoc())); return MatchOperand_Success; } + // Also handle plain integers. Instructions which allow floating point + // immediates also allow a raw encoded 8-bit value. if (Tok.is(AsmToken::Integer)) { int64_t Val = Tok.getIntVal(); Parser.Lex(); // Eat the token. if (Val > 255 || Val < 0) { - TokError("encoded floating point value out of range"); + Error(Loc, "encoded floating point value out of range"); return MatchOperand_ParseFail; } - Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext())); + double RealVal = ARM_AM::getFPImmFloat(Val); + Val = APFloat(APFloat::IEEEdouble, RealVal).bitcastToAPInt().getZExtValue(); + Operands.push_back(ARMOperand::CreateImm( + MCConstantExpr::Create(Val, getContext()), S, + Parser.getTok().getLoc())); return MatchOperand_Success; } - TokError("invalid floating point immediate"); + Error(Loc, "invalid floating point immediate"); return MatchOperand_ParseFail; } + /// Parse a arm instruction operand. For now this parses the operand regardless /// of the mnemonic. bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, @@ -4215,7 +4419,6 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, Error(Parser.getTok().getLoc(), "unexpected token in operand"); return true; case AsmToken::Identifier: { - // If this is VMRS, check for the apsr_nzcv operand. if (!tryParseRegisterWithWriteBack(Operands)) return false; int Res = tryParseShiftRegister(Operands); @@ -4223,6 +4426,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return false; else if (Res == -1) // irrecoverable error return true; + // If this is VMRS, check for the apsr_nzcv operand. if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") { S = Parser.getTok().getLoc(); Parser.Lex(); @@ -4349,7 +4553,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" || Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || - Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal") + Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" || + Mnemonic == "fmuls") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -4392,7 +4597,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" || Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" || Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" || - Mnemonic == "fsts" || + Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" || + Mnemonic == "fmuls" || Mnemonic == "fcmps" || (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; @@ -4521,9 +4727,11 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // // If either register is a high reg, it's either one of the SP // variants (handled above) or a 32-bit encoding, so we just - // check against T3. + // check against T3. If the second register is the PC, this is an + // alternate form of ADR, which uses encoding T4, so check for that too. if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) && + static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC && static_cast<ARMOperand*>(Operands[5])->isT2SOImm()) return false; // If both registers are low, we're in an IT block, and the immediate is @@ -4891,10 +5099,11 @@ validateInstruction(MCInst &Inst, const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); SMLoc Loc = Operands[0]->getStartLoc(); // Check the IT block state first. - // NOTE: In Thumb mode, the BKPT instruction has the interesting property of - // being allowed in IT blocks, but not being predicable. It just always + // NOTE: BKPT instruction has the interesting property of being + // allowed in IT blocks, but not being predicable. It just always // executes. - if (inITBlock() && Inst.getOpcode() != ARM::tBKPT) { + if (inITBlock() && Inst.getOpcode() != ARM::tBKPT && + Inst.getOpcode() != ARM::BKPT) { unsigned bit = 1; if (ITState.FirstCond) ITState.FirstCond = false; @@ -5034,165 +5243,259 @@ validateInstruction(MCInst &Inst, return false; } -static unsigned getRealVSTLNOpcode(unsigned Opc) { +static unsigned getRealVSTOpcode(unsigned Opc, unsigned &Spacing) { switch(Opc) { - default: assert(0 && "unexpected opcode!"); + default: llvm_unreachable("unexpected opcode!"); // VST1LN - case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8: - case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8: - case ARM::VST1LNdWB_fixed_Asm_U8: - return ARM::VST1LNd8_UPD; - case ARM::VST1LNdWB_fixed_Asm_16: case ARM::VST1LNdWB_fixed_Asm_P16: - case ARM::VST1LNdWB_fixed_Asm_I16: case ARM::VST1LNdWB_fixed_Asm_S16: - case ARM::VST1LNdWB_fixed_Asm_U16: - return ARM::VST1LNd16_UPD; - case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F: - case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32: - case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: - return ARM::VST1LNd32_UPD; - case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8: - case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8: - case ARM::VST1LNdWB_register_Asm_U8: - return ARM::VST1LNd8_UPD; - case ARM::VST1LNdWB_register_Asm_16: case ARM::VST1LNdWB_register_Asm_P16: - case ARM::VST1LNdWB_register_Asm_I16: case ARM::VST1LNdWB_register_Asm_S16: - case ARM::VST1LNdWB_register_Asm_U16: - return ARM::VST1LNd16_UPD; - case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F: - case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32: - case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: - return ARM::VST1LNd32_UPD; - case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: - case ARM::VST1LNdAsm_I8: case ARM::VST1LNdAsm_S8: - case ARM::VST1LNdAsm_U8: - return ARM::VST1LNd8; - case ARM::VST1LNdAsm_16: case ARM::VST1LNdAsm_P16: - case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16: - case ARM::VST1LNdAsm_U16: - return ARM::VST1LNd16; - case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F: - case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: - case ARM::VST1LNdAsm_S32: case ARM::VST1LNdAsm_U32: - return ARM::VST1LNd32; + case ARM::VST1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD; + case ARM::VST1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD; + case ARM::VST1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD; + case ARM::VST1LNdWB_register_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD; + case ARM::VST1LNdWB_register_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD; + case ARM::VST1LNdWB_register_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD; + case ARM::VST1LNdAsm_8: Spacing = 1; return ARM::VST1LNd8; + case ARM::VST1LNdAsm_16: Spacing = 1; return ARM::VST1LNd16; + case ARM::VST1LNdAsm_32: Spacing = 1; return ARM::VST1LNd32; // VST2LN - case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8: - case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8: - case ARM::VST2LNdWB_fixed_Asm_U8: - return ARM::VST2LNd8_UPD; - case ARM::VST2LNdWB_fixed_Asm_16: case ARM::VST2LNdWB_fixed_Asm_P16: - case ARM::VST2LNdWB_fixed_Asm_I16: case ARM::VST2LNdWB_fixed_Asm_S16: - case ARM::VST2LNdWB_fixed_Asm_U16: - return ARM::VST2LNd16_UPD; - case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F: - case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32: - case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: - return ARM::VST2LNd32_UPD; - case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8: - case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8: - case ARM::VST2LNdWB_register_Asm_U8: - return ARM::VST2LNd8_UPD; - case ARM::VST2LNdWB_register_Asm_16: case ARM::VST2LNdWB_register_Asm_P16: - case ARM::VST2LNdWB_register_Asm_I16: case ARM::VST2LNdWB_register_Asm_S16: - case ARM::VST2LNdWB_register_Asm_U16: - return ARM::VST2LNd16_UPD; - case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F: - case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32: - case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: - return ARM::VST2LNd32_UPD; - case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: - case ARM::VST2LNdAsm_I8: case ARM::VST2LNdAsm_S8: - case ARM::VST2LNdAsm_U8: - return ARM::VST2LNd8; - case ARM::VST2LNdAsm_16: case ARM::VST2LNdAsm_P16: - case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16: - case ARM::VST2LNdAsm_U16: - return ARM::VST2LNd16; - case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F: - case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: - case ARM::VST2LNdAsm_S32: case ARM::VST2LNdAsm_U32: - return ARM::VST2LNd32; + case ARM::VST2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD; + case ARM::VST2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD; + case ARM::VST2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD; + case ARM::VST2LNqWB_fixed_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD; + case ARM::VST2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD; + + case ARM::VST2LNdWB_register_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD; + case ARM::VST2LNdWB_register_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD; + case ARM::VST2LNdWB_register_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD; + case ARM::VST2LNqWB_register_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD; + case ARM::VST2LNqWB_register_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD; + + case ARM::VST2LNdAsm_8: Spacing = 1; return ARM::VST2LNd8; + case ARM::VST2LNdAsm_16: Spacing = 1; return ARM::VST2LNd16; + case ARM::VST2LNdAsm_32: Spacing = 1; return ARM::VST2LNd32; + case ARM::VST2LNqAsm_16: Spacing = 2; return ARM::VST2LNq16; + case ARM::VST2LNqAsm_32: Spacing = 2; return ARM::VST2LNq32; + + // VST3LN + case ARM::VST3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD; + case ARM::VST3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD; + case ARM::VST3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD; + case ARM::VST3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNq16_UPD; + case ARM::VST3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD; + case ARM::VST3LNdWB_register_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD; + case ARM::VST3LNdWB_register_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD; + case ARM::VST3LNdWB_register_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD; + case ARM::VST3LNqWB_register_Asm_16: Spacing = 2; return ARM::VST3LNq16_UPD; + case ARM::VST3LNqWB_register_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD; + case ARM::VST3LNdAsm_8: Spacing = 1; return ARM::VST3LNd8; + case ARM::VST3LNdAsm_16: Spacing = 1; return ARM::VST3LNd16; + case ARM::VST3LNdAsm_32: Spacing = 1; return ARM::VST3LNd32; + case ARM::VST3LNqAsm_16: Spacing = 2; return ARM::VST3LNq16; + case ARM::VST3LNqAsm_32: Spacing = 2; return ARM::VST3LNq32; + + // VST3 + case ARM::VST3dWB_fixed_Asm_8: Spacing = 1; return ARM::VST3d8_UPD; + case ARM::VST3dWB_fixed_Asm_16: Spacing = 1; return ARM::VST3d16_UPD; + case ARM::VST3dWB_fixed_Asm_32: Spacing = 1; return ARM::VST3d32_UPD; + case ARM::VST3qWB_fixed_Asm_8: Spacing = 2; return ARM::VST3q8_UPD; + case ARM::VST3qWB_fixed_Asm_16: Spacing = 2; return ARM::VST3q16_UPD; + case ARM::VST3qWB_fixed_Asm_32: Spacing = 2; return ARM::VST3q32_UPD; + case ARM::VST3dWB_register_Asm_8: Spacing = 1; return ARM::VST3d8_UPD; + case ARM::VST3dWB_register_Asm_16: Spacing = 1; return ARM::VST3d16_UPD; + case ARM::VST3dWB_register_Asm_32: Spacing = 1; return ARM::VST3d32_UPD; + case ARM::VST3qWB_register_Asm_8: Spacing = 2; return ARM::VST3q8_UPD; + case ARM::VST3qWB_register_Asm_16: Spacing = 2; return ARM::VST3q16_UPD; + case ARM::VST3qWB_register_Asm_32: Spacing = 2; return ARM::VST3q32_UPD; + case ARM::VST3dAsm_8: Spacing = 1; return ARM::VST3d8; + case ARM::VST3dAsm_16: Spacing = 1; return ARM::VST3d16; + case ARM::VST3dAsm_32: Spacing = 1; return ARM::VST3d32; + case ARM::VST3qAsm_8: Spacing = 2; return ARM::VST3q8; + case ARM::VST3qAsm_16: Spacing = 2; return ARM::VST3q16; + case ARM::VST3qAsm_32: Spacing = 2; return ARM::VST3q32; + + // VST4LN + case ARM::VST4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD; + case ARM::VST4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD; + case ARM::VST4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD; + case ARM::VST4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNq16_UPD; + case ARM::VST4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD; + case ARM::VST4LNdWB_register_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD; + case ARM::VST4LNdWB_register_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD; + case ARM::VST4LNdWB_register_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD; + case ARM::VST4LNqWB_register_Asm_16: Spacing = 2; return ARM::VST4LNq16_UPD; + case ARM::VST4LNqWB_register_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD; + case ARM::VST4LNdAsm_8: Spacing = 1; return ARM::VST4LNd8; + case ARM::VST4LNdAsm_16: Spacing = 1; return ARM::VST4LNd16; + case ARM::VST4LNdAsm_32: Spacing = 1; return ARM::VST4LNd32; + case ARM::VST4LNqAsm_16: Spacing = 2; return ARM::VST4LNq16; + case ARM::VST4LNqAsm_32: Spacing = 2; return ARM::VST4LNq32; + + // VST4 + case ARM::VST4dWB_fixed_Asm_8: Spacing = 1; return ARM::VST4d8_UPD; + case ARM::VST4dWB_fixed_Asm_16: Spacing = 1; return ARM::VST4d16_UPD; + case ARM::VST4dWB_fixed_Asm_32: Spacing = 1; return ARM::VST4d32_UPD; + case ARM::VST4qWB_fixed_Asm_8: Spacing = 2; return ARM::VST4q8_UPD; + case ARM::VST4qWB_fixed_Asm_16: Spacing = 2; return ARM::VST4q16_UPD; + case ARM::VST4qWB_fixed_Asm_32: Spacing = 2; return ARM::VST4q32_UPD; + case ARM::VST4dWB_register_Asm_8: Spacing = 1; return ARM::VST4d8_UPD; + case ARM::VST4dWB_register_Asm_16: Spacing = 1; return ARM::VST4d16_UPD; + case ARM::VST4dWB_register_Asm_32: Spacing = 1; return ARM::VST4d32_UPD; + case ARM::VST4qWB_register_Asm_8: Spacing = 2; return ARM::VST4q8_UPD; + case ARM::VST4qWB_register_Asm_16: Spacing = 2; return ARM::VST4q16_UPD; + case ARM::VST4qWB_register_Asm_32: Spacing = 2; return ARM::VST4q32_UPD; + case ARM::VST4dAsm_8: Spacing = 1; return ARM::VST4d8; + case ARM::VST4dAsm_16: Spacing = 1; return ARM::VST4d16; + case ARM::VST4dAsm_32: Spacing = 1; return ARM::VST4d32; + case ARM::VST4qAsm_8: Spacing = 2; return ARM::VST4q8; + case ARM::VST4qAsm_16: Spacing = 2; return ARM::VST4q16; + case ARM::VST4qAsm_32: Spacing = 2; return ARM::VST4q32; } } -static unsigned getRealVLDLNOpcode(unsigned Opc) { +static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { switch(Opc) { - default: assert(0 && "unexpected opcode!"); + default: llvm_unreachable("unexpected opcode!"); // VLD1LN - case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8: - case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8: - case ARM::VLD1LNdWB_fixed_Asm_U8: - return ARM::VLD1LNd8_UPD; - case ARM::VLD1LNdWB_fixed_Asm_16: case ARM::VLD1LNdWB_fixed_Asm_P16: - case ARM::VLD1LNdWB_fixed_Asm_I16: case ARM::VLD1LNdWB_fixed_Asm_S16: - case ARM::VLD1LNdWB_fixed_Asm_U16: - return ARM::VLD1LNd16_UPD; - case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F: - case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32: - case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: - return ARM::VLD1LNd32_UPD; - case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8: - case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8: - case ARM::VLD1LNdWB_register_Asm_U8: - return ARM::VLD1LNd8_UPD; - case ARM::VLD1LNdWB_register_Asm_16: case ARM::VLD1LNdWB_register_Asm_P16: - case ARM::VLD1LNdWB_register_Asm_I16: case ARM::VLD1LNdWB_register_Asm_S16: - case ARM::VLD1LNdWB_register_Asm_U16: - return ARM::VLD1LNd16_UPD; - case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F: - case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32: - case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: - return ARM::VLD1LNd32_UPD; - case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: - case ARM::VLD1LNdAsm_I8: case ARM::VLD1LNdAsm_S8: - case ARM::VLD1LNdAsm_U8: - return ARM::VLD1LNd8; - case ARM::VLD1LNdAsm_16: case ARM::VLD1LNdAsm_P16: - case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16: - case ARM::VLD1LNdAsm_U16: - return ARM::VLD1LNd16; - case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F: - case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: - case ARM::VLD1LNdAsm_S32: case ARM::VLD1LNdAsm_U32: - return ARM::VLD1LNd32; + case ARM::VLD1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD; + case ARM::VLD1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD; + case ARM::VLD1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD; + case ARM::VLD1LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD; + case ARM::VLD1LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD; + case ARM::VLD1LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD; + case ARM::VLD1LNdAsm_8: Spacing = 1; return ARM::VLD1LNd8; + case ARM::VLD1LNdAsm_16: Spacing = 1; return ARM::VLD1LNd16; + case ARM::VLD1LNdAsm_32: Spacing = 1; return ARM::VLD1LNd32; // VLD2LN - case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8: - case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8: - case ARM::VLD2LNdWB_fixed_Asm_U8: - return ARM::VLD2LNd8_UPD; - case ARM::VLD2LNdWB_fixed_Asm_16: case ARM::VLD2LNdWB_fixed_Asm_P16: - case ARM::VLD2LNdWB_fixed_Asm_I16: case ARM::VLD2LNdWB_fixed_Asm_S16: - case ARM::VLD2LNdWB_fixed_Asm_U16: - return ARM::VLD2LNd16_UPD; - case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F: - case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32: - case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: - return ARM::VLD2LNd32_UPD; - case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8: - case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8: - case ARM::VLD2LNdWB_register_Asm_U8: - return ARM::VLD2LNd8_UPD; - case ARM::VLD2LNdWB_register_Asm_16: case ARM::VLD2LNdWB_register_Asm_P16: - case ARM::VLD2LNdWB_register_Asm_I16: case ARM::VLD2LNdWB_register_Asm_S16: - case ARM::VLD2LNdWB_register_Asm_U16: - return ARM::VLD2LNd16_UPD; - case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F: - case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32: - case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: - return ARM::VLD2LNd32_UPD; - case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: - case ARM::VLD2LNdAsm_I8: case ARM::VLD2LNdAsm_S8: - case ARM::VLD2LNdAsm_U8: - return ARM::VLD2LNd8; - case ARM::VLD2LNdAsm_16: case ARM::VLD2LNdAsm_P16: - case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16: - case ARM::VLD2LNdAsm_U16: - return ARM::VLD2LNd16; - case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F: - case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: - case ARM::VLD2LNdAsm_S32: case ARM::VLD2LNdAsm_U32: - return ARM::VLD2LNd32; + case ARM::VLD2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD; + case ARM::VLD2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD; + case ARM::VLD2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD; + case ARM::VLD2LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNq16_UPD; + case ARM::VLD2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD; + case ARM::VLD2LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD; + case ARM::VLD2LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD; + case ARM::VLD2LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD; + case ARM::VLD2LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD2LNq16_UPD; + case ARM::VLD2LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD; + case ARM::VLD2LNdAsm_8: Spacing = 1; return ARM::VLD2LNd8; + case ARM::VLD2LNdAsm_16: Spacing = 1; return ARM::VLD2LNd16; + case ARM::VLD2LNdAsm_32: Spacing = 1; return ARM::VLD2LNd32; + case ARM::VLD2LNqAsm_16: Spacing = 2; return ARM::VLD2LNq16; + case ARM::VLD2LNqAsm_32: Spacing = 2; return ARM::VLD2LNq32; + + // VLD3DUP + case ARM::VLD3DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD; + case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; + case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPq16_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD; + case ARM::VLD3DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD; + case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; + case ARM::VLD3DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD; + case ARM::VLD3DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD3DUPq8_UPD; + case ARM::VLD3DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD; + case ARM::VLD3DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD; + case ARM::VLD3DUPdAsm_8: Spacing = 1; return ARM::VLD3DUPd8; + case ARM::VLD3DUPdAsm_16: Spacing = 1; return ARM::VLD3DUPd16; + case ARM::VLD3DUPdAsm_32: Spacing = 1; return ARM::VLD3DUPd32; + case ARM::VLD3DUPqAsm_8: Spacing = 2; return ARM::VLD3DUPq8; + case ARM::VLD3DUPqAsm_16: Spacing = 2; return ARM::VLD3DUPq16; + case ARM::VLD3DUPqAsm_32: Spacing = 2; return ARM::VLD3DUPq32; + + // VLD3LN + case ARM::VLD3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD; + case ARM::VLD3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD; + case ARM::VLD3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD; + case ARM::VLD3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNq16_UPD; + case ARM::VLD3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD; + case ARM::VLD3LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD; + case ARM::VLD3LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD; + case ARM::VLD3LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD; + case ARM::VLD3LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD3LNq16_UPD; + case ARM::VLD3LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD; + case ARM::VLD3LNdAsm_8: Spacing = 1; return ARM::VLD3LNd8; + case ARM::VLD3LNdAsm_16: Spacing = 1; return ARM::VLD3LNd16; + case ARM::VLD3LNdAsm_32: Spacing = 1; return ARM::VLD3LNd32; + case ARM::VLD3LNqAsm_16: Spacing = 2; return ARM::VLD3LNq16; + case ARM::VLD3LNqAsm_32: Spacing = 2; return ARM::VLD3LNq32; + + // VLD3 + case ARM::VLD3dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD; + case ARM::VLD3dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD; + case ARM::VLD3dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD; + case ARM::VLD3qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD; + case ARM::VLD3qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD; + case ARM::VLD3qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD; + case ARM::VLD3dWB_register_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD; + case ARM::VLD3dWB_register_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD; + case ARM::VLD3dWB_register_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD; + case ARM::VLD3qWB_register_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD; + case ARM::VLD3qWB_register_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD; + case ARM::VLD3qWB_register_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD; + case ARM::VLD3dAsm_8: Spacing = 1; return ARM::VLD3d8; + case ARM::VLD3dAsm_16: Spacing = 1; return ARM::VLD3d16; + case ARM::VLD3dAsm_32: Spacing = 1; return ARM::VLD3d32; + case ARM::VLD3qAsm_8: Spacing = 2; return ARM::VLD3q8; + case ARM::VLD3qAsm_16: Spacing = 2; return ARM::VLD3q16; + case ARM::VLD3qAsm_32: Spacing = 2; return ARM::VLD3q32; + + // VLD4LN + case ARM::VLD4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD; + case ARM::VLD4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD; + case ARM::VLD4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD; + case ARM::VLD4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNq16_UPD; + case ARM::VLD4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD; + case ARM::VLD4LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD; + case ARM::VLD4LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD; + case ARM::VLD4LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD; + case ARM::VLD4LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD4LNq16_UPD; + case ARM::VLD4LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD; + case ARM::VLD4LNdAsm_8: Spacing = 1; return ARM::VLD4LNd8; + case ARM::VLD4LNdAsm_16: Spacing = 1; return ARM::VLD4LNd16; + case ARM::VLD4LNdAsm_32: Spacing = 1; return ARM::VLD4LNd32; + case ARM::VLD4LNqAsm_16: Spacing = 2; return ARM::VLD4LNq16; + case ARM::VLD4LNqAsm_32: Spacing = 2; return ARM::VLD4LNq32; + + // VLD4DUP + case ARM::VLD4DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD; + case ARM::VLD4DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD; + case ARM::VLD4DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPq8_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPq16_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD; + case ARM::VLD4DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD; + case ARM::VLD4DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD; + case ARM::VLD4DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD; + case ARM::VLD4DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD4DUPq8_UPD; + case ARM::VLD4DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD4DUPq16_UPD; + case ARM::VLD4DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD; + case ARM::VLD4DUPdAsm_8: Spacing = 1; return ARM::VLD4DUPd8; + case ARM::VLD4DUPdAsm_16: Spacing = 1; return ARM::VLD4DUPd16; + case ARM::VLD4DUPdAsm_32: Spacing = 1; return ARM::VLD4DUPd32; + case ARM::VLD4DUPqAsm_8: Spacing = 2; return ARM::VLD4DUPq8; + case ARM::VLD4DUPqAsm_16: Spacing = 2; return ARM::VLD4DUPq16; + case ARM::VLD4DUPqAsm_32: Spacing = 2; return ARM::VLD4DUPq32; + + // VLD4 + case ARM::VLD4dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD; + case ARM::VLD4dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD; + case ARM::VLD4dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD; + case ARM::VLD4qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD; + case ARM::VLD4qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD; + case ARM::VLD4qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD; + case ARM::VLD4dWB_register_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD; + case ARM::VLD4dWB_register_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD; + case ARM::VLD4dWB_register_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD; + case ARM::VLD4qWB_register_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD; + case ARM::VLD4qWB_register_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD; + case ARM::VLD4qWB_register_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD; + case ARM::VLD4dAsm_8: Spacing = 1; return ARM::VLD4d8; + case ARM::VLD4dAsm_16: Spacing = 1; return ARM::VLD4d16; + case ARM::VLD4dAsm_32: Spacing = 1; return ARM::VLD4d32; + case ARM::VLD4qAsm_8: Spacing = 2; return ARM::VLD4q8; + case ARM::VLD4qAsm_16: Spacing = 2; return ARM::VLD4q16; + case ARM::VLD4qAsm_32: Spacing = 2; return ARM::VLD4q32; } } @@ -5200,24 +5503,86 @@ bool ARMAsmParser:: processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { switch (Inst.getOpcode()) { + // Aliases for alternate PC+imm syntax of LDR instructions. + case ARM::t2LDRpcrel: + Inst.setOpcode(ARM::t2LDRpci); + return true; + case ARM::t2LDRBpcrel: + Inst.setOpcode(ARM::t2LDRBpci); + return true; + case ARM::t2LDRHpcrel: + Inst.setOpcode(ARM::t2LDRHpci); + return true; + case ARM::t2LDRSBpcrel: + Inst.setOpcode(ARM::t2LDRSBpci); + return true; + case ARM::t2LDRSHpcrel: + Inst.setOpcode(ARM::t2LDRSHpci); + return true; // Handle NEON VST complex aliases. - case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8: - case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8: - case ARM::VST1LNdWB_register_Asm_U8: case ARM::VST1LNdWB_register_Asm_16: - case ARM::VST1LNdWB_register_Asm_P16: case ARM::VST1LNdWB_register_Asm_I16: - case ARM::VST1LNdWB_register_Asm_S16: case ARM::VST1LNdWB_register_Asm_U16: - case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F: - case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32: - case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: { + case ARM::VST1LNdWB_register_Asm_8: + case ARM::VST1LNdWB_register_Asm_16: + case ARM::VST1LNdWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdWB_register_Asm_8: + case ARM::VST2LNdWB_register_Asm_16: + case ARM::VST2LNdWB_register_Asm_32: + case ARM::VST2LNqWB_register_Asm_16: + case ARM::VST2LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VST3LNdWB_register_Asm_8: + case ARM::VST3LNdWB_register_Asm_16: + case ARM::VST3LNdWB_register_Asm_32: + case ARM::VST3LNqWB_register_Asm_16: + case ARM::VST3LNqWB_register_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(4)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(5)); // CondCode TmpInst.addOperand(Inst.getOperand(6)); @@ -5225,42 +5590,42 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8: - case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8: - case ARM::VST2LNdWB_register_Asm_U8: case ARM::VST2LNdWB_register_Asm_16: - case ARM::VST2LNdWB_register_Asm_P16: case ARM::VST2LNdWB_register_Asm_I16: - case ARM::VST2LNdWB_register_Asm_S16: case ARM::VST2LNdWB_register_Asm_U16: - case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F: - case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32: - case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: { + case ARM::VST4LNdWB_register_Asm_8: + case ARM::VST4LNdWB_register_Asm_16: + case ARM::VST4LNdWB_register_Asm_32: + case ARM::VST4LNqWB_register_Asm_16: + case ARM::VST4LNqWB_register_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(4)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Vd - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(5)); // CondCode TmpInst.addOperand(Inst.getOperand(6)); Inst = TmpInst; return true; } - case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8: - case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8: - case ARM::VST1LNdWB_fixed_Asm_U8: case ARM::VST1LNdWB_fixed_Asm_16: - case ARM::VST1LNdWB_fixed_Asm_P16: case ARM::VST1LNdWB_fixed_Asm_I16: - case ARM::VST1LNdWB_fixed_Asm_S16: case ARM::VST1LNdWB_fixed_Asm_U16: - case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F: - case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32: - case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: { + + case ARM::VST1LNdWB_fixed_Asm_8: + case ARM::VST1LNdWB_fixed_Asm_16: + case ARM::VST1LNdWB_fixed_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment @@ -5273,43 +5638,49 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8: - case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8: - case ARM::VST2LNdWB_fixed_Asm_U8: case ARM::VST2LNdWB_fixed_Asm_16: - case ARM::VST2LNdWB_fixed_Asm_P16: case ARM::VST2LNdWB_fixed_Asm_I16: - case ARM::VST2LNdWB_fixed_Asm_S16: case ARM::VST2LNdWB_fixed_Asm_U16: - case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F: - case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32: - case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: { + case ARM::VST2LNdWB_fixed_Asm_8: + case ARM::VST2LNdWB_fixed_Asm_16: + case ARM::VST2LNdWB_fixed_Asm_32: + case ARM::VST2LNqWB_fixed_Asm_16: + case ARM::VST2LNqWB_fixed_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Vd - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); Inst = TmpInst; return true; } - case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: case ARM::VST1LNdAsm_I8: - case ARM::VST1LNdAsm_S8: case ARM::VST1LNdAsm_U8: case ARM::VST1LNdAsm_16: - case ARM::VST1LNdAsm_P16: case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16: - case ARM::VST1LNdAsm_U16: case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F: - case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: case ARM::VST1LNdAsm_S32: - case ARM::VST1LNdAsm_U32: { + + case ARM::VST3LNdWB_fixed_Asm_8: + case ARM::VST3LNdWB_fixed_Asm_16: + case ARM::VST3LNdWB_fixed_Asm_32: + case ARM::VST3LNqWB_fixed_Asm_16: + case ARM::VST3LNqWB_fixed_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); @@ -5317,45 +5688,166 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: case ARM::VST2LNdAsm_I8: - case ARM::VST2LNdAsm_S8: case ARM::VST2LNdAsm_U8: case ARM::VST2LNdAsm_16: - case ARM::VST2LNdAsm_P16: case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16: - case ARM::VST2LNdAsm_U16: case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F: - case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: case ARM::VST2LNdAsm_S32: - case ARM::VST2LNdAsm_U32: { + case ARM::VST4LNdWB_fixed_Asm_8: + case ARM::VST4LNdWB_fixed_Asm_16: + case ARM::VST4LNdWB_fixed_Asm_32: + case ARM::VST4LNqWB_fixed_Asm_16: + case ARM::VST4LNqWB_fixed_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Vd - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); Inst = TmpInst; return true; } + + case ARM::VST1LNdAsm_8: + case ARM::VST1LNdAsm_16: + case ARM::VST1LNdAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdAsm_8: + case ARM::VST2LNdAsm_16: + case ARM::VST2LNdAsm_32: + case ARM::VST2LNqAsm_16: + case ARM::VST2LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST3LNdAsm_8: + case ARM::VST3LNdAsm_16: + case ARM::VST3LNdAsm_32: + case ARM::VST3LNqAsm_16: + case ARM::VST3LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST4LNdAsm_8: + case ARM::VST4LNdAsm_16: + case ARM::VST4LNdAsm_32: + case ARM::VST4LNqAsm_16: + case ARM::VST4LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle NEON VLD complex aliases. - case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8: - case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8: - case ARM::VLD1LNdWB_register_Asm_U8: case ARM::VLD1LNdWB_register_Asm_16: - case ARM::VLD1LNdWB_register_Asm_P16: case ARM::VLD1LNdWB_register_Asm_I16: - case ARM::VLD1LNdWB_register_Asm_S16: case ARM::VLD1LNdWB_register_Asm_U16: - case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F: - case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32: - case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: { + case ARM::VLD1LNdWB_register_Asm_8: + case ARM::VLD1LNdWB_register_Asm_16: + case ARM::VLD1LNdWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VLD2LNdWB_register_Asm_8: + case ARM::VLD2LNdWB_register_Asm_16: + case ARM::VLD2LNdWB_register_Asm_32: + case ARM::VLD2LNqWB_register_Asm_16: + case ARM::VLD2LNqWB_register_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(4)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(5)); // CondCode TmpInst.addOperand(Inst.getOperand(6)); @@ -5363,26 +5855,30 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8: - case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8: - case ARM::VLD2LNdWB_register_Asm_U8: case ARM::VLD2LNdWB_register_Asm_16: - case ARM::VLD2LNdWB_register_Asm_P16: case ARM::VLD2LNdWB_register_Asm_I16: - case ARM::VLD2LNdWB_register_Asm_S16: case ARM::VLD2LNdWB_register_Asm_U16: - case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F: - case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32: - case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: { + case ARM::VLD3LNdWB_register_Asm_8: + case ARM::VLD3LNdWB_register_Asm_16: + case ARM::VLD3LNdWB_register_Asm_32: + case ARM::VLD3LNqWB_register_Asm_16: + case ARM::VLD3LNqWB_register_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(4)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(5)); // CondCode TmpInst.addOperand(Inst.getOperand(6)); @@ -5390,18 +5886,49 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8: - case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8: - case ARM::VLD1LNdWB_fixed_Asm_U8: case ARM::VLD1LNdWB_fixed_Asm_16: - case ARM::VLD1LNdWB_fixed_Asm_P16: case ARM::VLD1LNdWB_fixed_Asm_I16: - case ARM::VLD1LNdWB_fixed_Asm_S16: case ARM::VLD1LNdWB_fixed_Asm_U16: - case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F: - case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32: - case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: { + case ARM::VLD4LNdWB_register_Asm_8: + case ARM::VLD4LNdWB_register_Asm_16: + case ARM::VLD4LNdWB_register_Asm_32: + case ARM::VLD4LNqWB_register_Asm_16: + case ARM::VLD4LNqWB_register_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VLD1LNdWB_fixed_Asm_8: + case ARM::VLD1LNdWB_fixed_Asm_16: + case ARM::VLD1LNdWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn @@ -5415,26 +5942,111 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8: - case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8: - case ARM::VLD2LNdWB_fixed_Asm_U8: case ARM::VLD2LNdWB_fixed_Asm_16: - case ARM::VLD2LNdWB_fixed_Asm_P16: case ARM::VLD2LNdWB_fixed_Asm_I16: - case ARM::VLD2LNdWB_fixed_Asm_S16: case ARM::VLD2LNdWB_fixed_Asm_U16: - case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F: - case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32: - case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: { + case ARM::VLD2LNdWB_fixed_Asm_8: + case ARM::VLD2LNdWB_fixed_Asm_16: + case ARM::VLD2LNdWB_fixed_Asm_32: + case ARM::VLD2LNqWB_fixed_Asm_16: + case ARM::VLD2LNqWB_fixed_Asm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3LNdWB_fixed_Asm_8: + case ARM::VLD3LNdWB_fixed_Asm_16: + case ARM::VLD3LNdWB_fixed_Asm_32: + case ARM::VLD3LNqWB_fixed_Asm_16: + case ARM::VLD3LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4LNdWB_fixed_Asm_8: + case ARM::VLD4LNdWB_fixed_Asm_16: + case ARM::VLD4LNdWB_fixed_Asm_32: + case ARM::VLD4LNqWB_fixed_Asm_16: + case ARM::VLD4LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD1LNdAsm_8: + case ARM::VLD1LNdAsm_16: + case ARM::VLD1LNdAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); @@ -5442,20 +6054,24 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: case ARM::VLD1LNdAsm_I8: - case ARM::VLD1LNdAsm_S8: case ARM::VLD1LNdAsm_U8: case ARM::VLD1LNdAsm_16: - case ARM::VLD1LNdAsm_P16: case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16: - case ARM::VLD1LNdAsm_U16: case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F: - case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: case ARM::VLD1LNdAsm_S32: - case ARM::VLD1LNdAsm_U32: { + case ARM::VLD2LNdAsm_8: + case ARM::VLD2LNdAsm_16: + case ARM::VLD2LNdAsm_32: + case ARM::VLD2LNqAsm_16: + case ARM::VLD2LNqAsm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); @@ -5463,29 +6079,549 @@ processInstruction(MCInst &Inst, return true; } - case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: case ARM::VLD2LNdAsm_I8: - case ARM::VLD2LNdAsm_S8: case ARM::VLD2LNdAsm_U8: case ARM::VLD2LNdAsm_16: - case ARM::VLD2LNdAsm_P16: case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16: - case ARM::VLD2LNdAsm_U16: case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F: - case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: case ARM::VLD2LNdAsm_S32: - case ARM::VLD2LNdAsm_U32: { + case ARM::VLD3LNdAsm_8: + case ARM::VLD3LNdAsm_16: + case ARM::VLD3LNdAsm_32: + case ARM::VLD3LNqAsm_16: + case ARM::VLD3LNqAsm_32: { MCInst TmpInst; // Shuffle the operands around so the lane index operand is in the // right place. - TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); TmpInst.addOperand(Inst.getOperand(0)); // Vd - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(2)); // Rn TmpInst.addOperand(Inst.getOperand(3)); // alignment TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) - TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); TmpInst.addOperand(Inst.getOperand(1)); // lane TmpInst.addOperand(Inst.getOperand(4)); // CondCode TmpInst.addOperand(Inst.getOperand(5)); Inst = TmpInst; return true; } + + case ARM::VLD4LNdAsm_8: + case ARM::VLD4LNdAsm_16: + case ARM::VLD4LNdAsm_32: + case ARM::VLD4LNqAsm_16: + case ARM::VLD4LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VLD3DUP single 3-element structure to all lanes instructions. + case ARM::VLD3DUPdAsm_8: + case ARM::VLD3DUPdAsm_16: + case ARM::VLD3DUPdAsm_32: + case ARM::VLD3DUPqAsm_8: + case ARM::VLD3DUPqAsm_16: + case ARM::VLD3DUPqAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3DUPdWB_fixed_Asm_8: + case ARM::VLD3DUPdWB_fixed_Asm_16: + case ARM::VLD3DUPdWB_fixed_Asm_32: + case ARM::VLD3DUPqWB_fixed_Asm_8: + case ARM::VLD3DUPqWB_fixed_Asm_16: + case ARM::VLD3DUPqWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3DUPdWB_register_Asm_8: + case ARM::VLD3DUPdWB_register_Asm_16: + case ARM::VLD3DUPdWB_register_Asm_32: + case ARM::VLD3DUPqWB_register_Asm_8: + case ARM::VLD3DUPqWB_register_Asm_16: + case ARM::VLD3DUPqWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VLD3 multiple 3-element structure instructions. + case ARM::VLD3dAsm_8: + case ARM::VLD3dAsm_16: + case ARM::VLD3dAsm_32: + case ARM::VLD3qAsm_8: + case ARM::VLD3qAsm_16: + case ARM::VLD3qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3dWB_fixed_Asm_8: + case ARM::VLD3dWB_fixed_Asm_16: + case ARM::VLD3dWB_fixed_Asm_32: + case ARM::VLD3qWB_fixed_Asm_8: + case ARM::VLD3qWB_fixed_Asm_16: + case ARM::VLD3qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3dWB_register_Asm_8: + case ARM::VLD3dWB_register_Asm_16: + case ARM::VLD3dWB_register_Asm_32: + case ARM::VLD3qWB_register_Asm_8: + case ARM::VLD3qWB_register_Asm_16: + case ARM::VLD3qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VLD4DUP single 3-element structure to all lanes instructions. + case ARM::VLD4DUPdAsm_8: + case ARM::VLD4DUPdAsm_16: + case ARM::VLD4DUPdAsm_32: + case ARM::VLD4DUPqAsm_8: + case ARM::VLD4DUPqAsm_16: + case ARM::VLD4DUPqAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4DUPdWB_fixed_Asm_8: + case ARM::VLD4DUPdWB_fixed_Asm_16: + case ARM::VLD4DUPdWB_fixed_Asm_32: + case ARM::VLD4DUPqWB_fixed_Asm_8: + case ARM::VLD4DUPqWB_fixed_Asm_16: + case ARM::VLD4DUPqWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4DUPdWB_register_Asm_8: + case ARM::VLD4DUPdWB_register_Asm_16: + case ARM::VLD4DUPdWB_register_Asm_32: + case ARM::VLD4DUPqWB_register_Asm_8: + case ARM::VLD4DUPqWB_register_Asm_16: + case ARM::VLD4DUPqWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VLD4 multiple 4-element structure instructions. + case ARM::VLD4dAsm_8: + case ARM::VLD4dAsm_16: + case ARM::VLD4dAsm_32: + case ARM::VLD4qAsm_8: + case ARM::VLD4qAsm_16: + case ARM::VLD4qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4dWB_fixed_Asm_8: + case ARM::VLD4dWB_fixed_Asm_16: + case ARM::VLD4dWB_fixed_Asm_32: + case ARM::VLD4qWB_fixed_Asm_8: + case ARM::VLD4qWB_fixed_Asm_16: + case ARM::VLD4qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4dWB_register_Asm_8: + case ARM::VLD4dWB_register_Asm_16: + case ARM::VLD4dWB_register_Asm_32: + case ARM::VLD4qWB_register_Asm_8: + case ARM::VLD4qWB_register_Asm_16: + case ARM::VLD4qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VST3 multiple 3-element structure instructions. + case ARM::VST3dAsm_8: + case ARM::VST3dAsm_16: + case ARM::VST3dAsm_32: + case ARM::VST3qAsm_8: + case ARM::VST3qAsm_16: + case ARM::VST3qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST3dWB_fixed_Asm_8: + case ARM::VST3dWB_fixed_Asm_16: + case ARM::VST3dWB_fixed_Asm_32: + case ARM::VST3qWB_fixed_Asm_8: + case ARM::VST3qWB_fixed_Asm_16: + case ARM::VST3qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST3dWB_register_Asm_8: + case ARM::VST3dWB_register_Asm_16: + case ARM::VST3dWB_register_Asm_32: + case ARM::VST3qWB_register_Asm_8: + case ARM::VST3qWB_register_Asm_16: + case ARM::VST3qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VST4 multiple 3-element structure instructions. + case ARM::VST4dAsm_8: + case ARM::VST4dAsm_16: + case ARM::VST4dAsm_32: + case ARM::VST4qAsm_8: + case ARM::VST4qAsm_16: + case ARM::VST4qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST4dWB_fixed_Asm_8: + case ARM::VST4dWB_fixed_Asm_16: + case ARM::VST4dWB_fixed_Asm_32: + case ARM::VST4qWB_fixed_Asm_8: + case ARM::VST4qWB_fixed_Asm_16: + case ARM::VST4qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST4dWB_register_Asm_8: + case ARM::VST4dWB_register_Asm_16: + case ARM::VST4dWB_register_Asm_32: + case ARM::VST4qWB_register_Asm_8: + case ARM::VST4qWB_register_Asm_16: + case ARM::VST4qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle the Thumb2 mode MOV complex aliases. + case ARM::t2MOVsr: + case ARM::t2MOVSsr: { + // Which instruction to expand to depends on the CCOut operand and + // whether we're in an IT block if the register operands are low + // registers. + bool isNarrow = false; + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + isARMLowRegister(Inst.getOperand(1).getReg()) && + isARMLowRegister(Inst.getOperand(2).getReg()) && + Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr)) + isNarrow = true; + MCInst TmpInst; + unsigned newOpc; + switch(ARM_AM::getSORegShOp(Inst.getOperand(3).getImm())) { + default: llvm_unreachable("unexpected opcode!"); + case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRrr : ARM::t2ASRrr; break; + case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRrr : ARM::t2LSRrr; break; + case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLrr : ARM::t2LSLrr; break; + case ARM_AM::ror: newOpc = isNarrow ? ARM::tROR : ARM::t2RORrr; break; + } + TmpInst.setOpcode(newOpc); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + if (isNarrow) + TmpInst.addOperand(MCOperand::CreateReg( + Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + if (!isNarrow) + TmpInst.addOperand(MCOperand::CreateReg( + Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0)); + Inst = TmpInst; + return true; + } case ARM::t2MOVsi: case ARM::t2MOVSsi: { // Which instruction to expand to depends on the CCOut operand and @@ -5504,6 +6640,7 @@ processInstruction(MCInst &Inst, case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break; case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break; case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break; + case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break; } unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()); if (Ammount == 32) Ammount = 0; @@ -5513,7 +6650,8 @@ processInstruction(MCInst &Inst, TmpInst.addOperand(MCOperand::CreateReg( Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0)); TmpInst.addOperand(Inst.getOperand(1)); // Rn - TmpInst.addOperand(MCOperand::CreateImm(Ammount)); + if (newOpc != ARM::t2RRX) + TmpInst.addOperand(MCOperand::CreateImm(Ammount)); TmpInst.addOperand(Inst.getOperand(3)); // CondCode TmpInst.addOperand(Inst.getOperand(4)); if (!isNarrow) @@ -5535,7 +6673,6 @@ processInstruction(MCInst &Inst, case ARM::LSLr: ShiftTy = ARM_AM::lsl; break; case ARM::RORr: ShiftTy = ARM_AM::ror; break; } - // A shift by zero is a plain MOVr, not a MOVsi. unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, 0); MCInst TmpInst; TmpInst.setOpcode(ARM::MOVsr); @@ -5881,6 +7018,57 @@ processInstruction(MCInst &Inst, } break; } + case ARM::MOVsi: { + ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm()); + if (SOpc == ARM_AM::rrx) return false; + if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) { + // Shifting by zero is accepted as a vanilla 'MOVr' + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVr); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + return false; + } + case ARM::ANDrsi: + case ARM::ORRrsi: + case ARM::EORrsi: + case ARM::BICrsi: + case ARM::SUBrsi: + case ARM::ADDrsi: { + unsigned newOpc; + ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(3).getImm()); + if (SOpc == ARM_AM::rrx) return false; + switch (Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode!"); + case ARM::ANDrsi: newOpc = ARM::ANDrr; break; + case ARM::ORRrsi: newOpc = ARM::ORRrr; break; + case ARM::EORrsi: newOpc = ARM::EORrr; break; + case ARM::BICrsi: newOpc = ARM::BICrr; break; + case ARM::SUBrsi: newOpc = ARM::SUBrr; break; + case ARM::ADDrsi: newOpc = ARM::ADDrr; break; + } + // If the shift is by zero, use the non-shifted instruction definition. + if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0) { + MCInst TmpInst; + TmpInst.setOpcode(newOpc); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(4)); + TmpInst.addOperand(Inst.getOperand(5)); + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + return false; + } + case ARM::ITasm: case ARM::t2IT: { // The mask bits for all but the first condition are represented as // the low bit of the condition code value implies 't'. We currently @@ -5987,6 +7175,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, // block. forwardITPosition(); + // ITasm is an ARM mode pseudo-instruction that just sets the ITblock and + // doesn't actually encode. + if (Inst.getOpcode() == ARM::ITasm) + return false; + + Inst.setLoc(IDLoc); Out.EmitInstruction(Inst); return false; case Match_MissingFeature: @@ -6020,7 +7214,6 @@ MatchAndEmitInstruction(SMLoc IDLoc, } llvm_unreachable("Implement any new match types added!"); - return true; } /// parseDirective parses the arm specific directives @@ -6040,6 +7233,10 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveSyntax(DirectiveID.getLoc()); else if (IDVal == ".unreq") return parseDirectiveUnreq(DirectiveID.getLoc()); + else if (IDVal == ".arch") + return parseDirectiveArch(DirectiveID.getLoc()); + else if (IDVal == ".eabi_attribute") + return parseDirectiveEabiAttr(DirectiveID.getLoc()); return true; } @@ -6100,23 +7297,32 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo(); bool isMachO = MAI.hasSubsectionsViaSymbols(); StringRef Name; + bool needFuncName = true; - // Darwin asm has function name after .thumb_func direction + // Darwin asm has (optionally) function name after .thumb_func direction // ELF doesn't if (isMachO) { const AsmToken &Tok = Parser.getTok(); - if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) - return Error(L, "unexpected token in .thumb_func directive"); - Name = Tok.getIdentifier(); - Parser.Lex(); // Consume the identifier token. + if (Tok.isNot(AsmToken::EndOfStatement)) { + if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) + return Error(L, "unexpected token in .thumb_func directive"); + Name = Tok.getIdentifier(); + Parser.Lex(); // Consume the identifier token. + needFuncName = false; + } } - if (getLexer().isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return Error(L, "unexpected token in directive"); - Parser.Lex(); + + // Eat the end of statement and any blank lines that follow. + while (getLexer().is(AsmToken::EndOfStatement)) + Parser.Lex(); // FIXME: assuming function name will be the line following .thumb_func - if (!isMachO) { + // We really should be checking the next symbol definition even if there's + // stuff in between. + if (needFuncName) { Name = Parser.getTok().getIdentifier(); } @@ -6219,6 +7425,18 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { return false; } +/// parseDirectiveArch +/// ::= .arch token +bool ARMAsmParser::parseDirectiveArch(SMLoc L) { + return true; +} + +/// parseDirectiveEabiAttr +/// ::= .eabi_attribute int, int +bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { + return true; +} + extern "C" void LLVMInitializeARMAsmLexer(); /// Force static initialization. diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 04cdf55..9a2aab5 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -33,6 +33,7 @@ add_llvm_target(ARMCodeGen ARMJITInfo.cpp ARMLoadStoreOptimizer.cpp ARMMCInstLower.cpp + ARMMachineFunctionInfo.cpp ARMRegisterInfo.cpp ARMSelectionDAGInfo.cpp ARMSubtarget.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 49c64fd..4101f59 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -1,4 +1,4 @@ -//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===// +//===-- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA --------------===// // // The LLVM Compiler Infrastructure // @@ -52,7 +52,7 @@ public: raw_ostream &cStream) const; /// getEDInfo - See MCDisassembler. - EDInstInfo *getEDInfo() const; + const EDInstInfo *getEDInfo() const; private: }; @@ -77,7 +77,7 @@ public: raw_ostream &cStream) const; /// getEDInfo - See MCDisassembler. - EDInstInfo *getEDInfo() const; + const EDInstInfo *getEDInfo() const; private: mutable std::vector<unsigned> ITBlock; DecodeStatus AddThumbPredicate(MCInst&) const; @@ -97,7 +97,7 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) { Out = In; return false; } - return false; + llvm_unreachable("Invalid DecodeStatus!"); } @@ -126,6 +126,11 @@ static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, const void *Decoder); static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPairSpacedRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); @@ -332,11 +337,11 @@ static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtarge return new ThumbDisassembler(STI); } -EDInstInfo *ARMDisassembler::getEDInfo() const { +const EDInstInfo *ARMDisassembler::getEDInfo() const { return instInfoARM; } -EDInstInfo *ThumbDisassembler::getEDInfo() const { +const EDInstInfo *ThumbDisassembler::getEDInfo() const { return instInfoARM; } @@ -440,40 +445,38 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, MCInst &MI, const void *Decoder) { const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback(); - if (!getOpInfo) - return false; - struct LLVMOpInfo1 SymbolicOp; + memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); SymbolicOp.Value = Value; void *DisInfo = Dis->getDisInfoBlock(); - if (!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { - if (isBranch) { - LLVMSymbolLookupCallback SymbolLookUp = - Dis->getLLVMSymbolLookupCallback(); - if (SymbolLookUp) { - uint64_t ReferenceType; - ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; - const char *ReferenceName; - const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, - &ReferenceName); - if (Name) { - SymbolicOp.AddSymbol.Name = Name; - SymbolicOp.AddSymbol.Present = true; - SymbolicOp.Value = 0; - } - else { - SymbolicOp.Value = Value; - } - if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) - (*Dis->CommentStream) << "symbol stub for: " << ReferenceName; - } - else { - return false; - } - } - else { + + if (!getOpInfo || + !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { + // Clear SymbolicOp.Value from above and also all other fields. + memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); + LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); + if (!SymbolLookUp) return false; + uint64_t ReferenceType; + if (isBranch) + ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; + else + ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + const char *ReferenceName; + const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, + &ReferenceName); + if (Name) { + SymbolicOp.AddSymbol.Name = Name; + SymbolicOp.AddSymbol.Present = true; } + // For branches always create an MCExpr so it gets printed as hex address. + else if (isBranch) { + SymbolicOp.Value = Value; + } + if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) + (*Dis->CommentStream) << "symbol stub for: " << ReferenceName; + if (!Name && !isBranch) + return false; } MCContext *Ctx = Dis->getMCContext(); @@ -533,7 +536,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) MI.addOperand(MCOperand::CreateExpr(Expr)); else - assert(0 && "bad SymbolicOp.VariantKind"); + llvm_unreachable("bad SymbolicOp.VariantKind"); return true; } @@ -548,7 +551,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, /// a literal 'C' string if the referenced address of the literal pool's entry /// is an address into a section with 'C' string literals. static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value, - const void *Decoder) { + const void *Decoder) { const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); if (SymbolLookUp) { @@ -989,6 +992,48 @@ static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } +static const unsigned DPairDecoderTable[] = { + ARM::Q0, ARM::D1_D2, ARM::Q1, ARM::D3_D4, ARM::Q2, ARM::D5_D6, + ARM::Q3, ARM::D7_D8, ARM::Q4, ARM::D9_D10, ARM::Q5, ARM::D11_D12, + ARM::Q6, ARM::D13_D14, ARM::Q7, ARM::D15_D16, ARM::Q8, ARM::D17_D18, + ARM::Q9, ARM::D19_D20, ARM::Q10, ARM::D21_D22, ARM::Q11, ARM::D23_D24, + ARM::Q12, ARM::D25_D26, ARM::Q13, ARM::D27_D28, ARM::Q14, ARM::D29_D30, + ARM::Q15 +}; + +static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 30) + return MCDisassembler::Fail; + + unsigned Register = DPairDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static const unsigned DPairSpacedDecoderTable[] = { + ARM::D0_D2, ARM::D1_D3, ARM::D2_D4, ARM::D3_D5, + ARM::D4_D6, ARM::D5_D7, ARM::D6_D8, ARM::D7_D9, + ARM::D8_D10, ARM::D9_D11, ARM::D10_D12, ARM::D11_D13, + ARM::D12_D14, ARM::D13_D15, ARM::D14_D16, ARM::D15_D17, + ARM::D16_D18, ARM::D17_D19, ARM::D18_D20, ARM::D19_D21, + ARM::D20_D22, ARM::D21_D23, ARM::D22_D24, ARM::D23_D25, + ARM::D24_D26, ARM::D25_D27, ARM::D26_D28, ARM::D27_D29, + ARM::D28_D30, ARM::D29_D31 +}; + +static DecodeStatus DecodeDPairSpacedRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 29) + return MCDisassembler::Fail; + + unsigned Register = DPairSpacedDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val == 0xF) return MCDisassembler::Fail; @@ -1910,12 +1955,14 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, if (pred == 0xF) { Inst.setOpcode(ARM::BLXi); imm |= fieldFromInstruction32(Insn, 24, 1) << 1; + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, + true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm))); return S; } - if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true, - 4, Inst, Decoder)) + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, + true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm))); if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) return MCDisassembler::Fail; @@ -1953,8 +2000,47 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); // First output register - if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::VLD1q16: + case ARM::VLD1q32: + case ARM::VLD1q64: + case ARM::VLD1q8: + case ARM::VLD1q16wb_fixed: + case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_fixed: + case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_fixed: + case ARM::VLD1q64wb_register: + case ARM::VLD1q8wb_fixed: + case ARM::VLD1q8wb_register: + case ARM::VLD2d16: + case ARM::VLD2d32: + case ARM::VLD2d8: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2d32wb_register: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d8wb_register: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VLD2b16: + case ARM::VLD2b32: + case ARM::VLD2b8: + case ARM::VLD2b16wb_fixed: + case ARM::VLD2b16wb_register: + case ARM::VLD2b32wb_fixed: + case ARM::VLD2b32wb_register: + case ARM::VLD2b8wb_fixed: + case ARM::VLD2b8wb_register: + if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + } // Second output register switch (Inst.getOpcode()) { @@ -2285,8 +2371,47 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // First input register - if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::VST1q16: + case ARM::VST1q32: + case ARM::VST1q64: + case ARM::VST1q8: + case ARM::VST1q16wb_fixed: + case ARM::VST1q16wb_register: + case ARM::VST1q32wb_fixed: + case ARM::VST1q32wb_register: + case ARM::VST1q64wb_fixed: + case ARM::VST1q64wb_register: + case ARM::VST1q8wb_fixed: + case ARM::VST1q8wb_register: + case ARM::VST2d16: + case ARM::VST2d32: + case ARM::VST2d8: + case ARM::VST2d16wb_fixed: + case ARM::VST2d16wb_register: + case ARM::VST2d32wb_fixed: + case ARM::VST2d32wb_register: + case ARM::VST2d8wb_fixed: + case ARM::VST2d8wb_register: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VST2b16: + case ARM::VST2b32: + case ARM::VST2b8: + case ARM::VST2b16wb_fixed: + case ARM::VST2b16wb_register: + case ARM::VST2b32wb_fixed: + case ARM::VST2b32wb_register: + case ARM::VST2b8wb_fixed: + case ARM::VST2b8wb_register: + if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + } // Second input register switch (Inst.getOpcode()) { @@ -2652,8 +2777,16 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; // Writeback } - if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::VTBL2: + case ARM::VTBX2: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + } if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; @@ -3127,7 +3260,9 @@ DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val, static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder){ - Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); return MCDisassembler::Success; } diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 662097a..bae4e78 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -35,8 +36,9 @@ static unsigned translateShiftImm(unsigned imm) { ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) : - MCInstPrinter(MAI) { + MCInstPrinter(MAI, MRI) { // Initialize the set of available features. setAvailableFeatures(STI.getFeatureBits()); } @@ -436,6 +438,12 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + if (!MO1.isReg()) { // For label symbolic references. + printOperand(MI, Op, O); + return; + } + const MCOperand &MO3 = MI->getOperand(Op+2); unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm()); @@ -639,7 +647,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, if (getAvailableFeatures() & ARM::FeatureMClass) { switch (Op.getImm()) { - default: assert(0 && "Unexpected mask value!"); + default: llvm_unreachable("Unexpected mask value!"); case 0: O << "apsr"; return; case 1: O << "iapsr"; return; case 2: O << "eapsr"; return; @@ -662,12 +670,11 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) { O << "APSR_"; switch (Mask) { - default: assert(0); + default: llvm_unreachable("Unexpected mask value!"); case 4: O << "g"; return; case 8: O << "nzcvq"; return; case 12: O << "nzcvqg"; return; } - llvm_unreachable("Unexpected mask value!"); } if (SpecRegRBit) @@ -687,7 +694,10 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); - if (CC != ARMCC::AL) + // Handle the undefined 15 CC value here for printing so we don't abort(). + if ((unsigned)CC == 15) + O << "<und>"; + else if (CC != ARMCC::AL) O << ARMCondCodeToString(CC); } @@ -885,6 +895,11 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); + if (!MO1.isReg()) { // For label symbolic references. + printOperand(MI, OpNum, O); + return; + } + O << "[" << getRegisterName(MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm() / 4; @@ -990,6 +1005,16 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, } } +void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#" << 16 - MI->getOperand(OpNum).getImm(); +} + +void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#" << 32 - MI->getOperand(OpNum).getImm(); +} + void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O) { O << "[" << MI->getOperand(OpNum).getImm() << "]"; @@ -1009,6 +1034,23 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "}"; } +void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNum).getReg(); + unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); + unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1); + O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}"; +} + +void ARMInstPrinter::printVectorListDPairSpaced(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNum).getReg(); + unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); + unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); + O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}"; +} + void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with @@ -1046,6 +1088,29 @@ void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}"; } +void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; +} + +void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}"; +} + void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O) { // Normally, it's not safe to use register enum values directly with @@ -1055,3 +1120,58 @@ void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}"; } +void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; +} + +void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[]}"; +} + +void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "[]}"; +} + +void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "}"; +} + +void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "}"; +} diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 05db2d2..1037161 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -1,4 +1,4 @@ -//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===// +//===- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -23,7 +23,8 @@ class MCOperand; class ARMInstPrinter : public MCInstPrinter { public: - ARMInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI); + ARMInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI); virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); virtual StringRef getOpcodeName(unsigned Opcode) const; @@ -128,17 +129,36 @@ public: void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListDPair(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListDPairSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); }; } // end namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index 9982fa6..62473b2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -1,4 +1,4 @@ -//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===// +//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,6 +16,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <cassert> @@ -43,7 +44,7 @@ namespace ARM_AM { static inline const char *getShiftOpcStr(ShiftOpc Op) { switch (Op) { - default: assert(0 && "Unknown shift opc!"); + default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::asr: return "asr"; case ARM_AM::lsl: return "lsl"; case ARM_AM::lsr: return "lsr"; @@ -54,7 +55,7 @@ namespace ARM_AM { static inline unsigned getShiftOpcEncoding(ShiftOpc Op) { switch (Op) { - default: assert(0 && "Unknown shift opc!"); + default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::asr: return 2; case ARM_AM::lsl: return 0; case ARM_AM::lsr: return 1; @@ -72,7 +73,7 @@ namespace ARM_AM { static inline const char *getAMSubModeStr(AMSubMode Mode) { switch (Mode) { - default: assert(0 && "Unknown addressing sub-mode!"); + default: llvm_unreachable("Unknown addressing sub-mode!"); case ARM_AM::ia: return "ia"; case ARM_AM::ib: return "ib"; case ARM_AM::da: return "da"; @@ -569,7 +570,7 @@ namespace ARM_AM { } EltBits = 64; } else { - assert(false && "Unsupported NEON immediate"); + llvm_unreachable("Unsupported NEON immediate"); } return Val; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index bf1f0e8..d3a3d3a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -31,8 +32,8 @@ using namespace llvm; namespace { class ARMELFObjectWriter : public MCELFObjectTargetWriter { public: - ARMELFObjectWriter(Triple::OSType OSType) - : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSType, ELF::EM_ARM, + ARMELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM, /*HasRelocationAddend*/ false) {} }; @@ -63,6 +64,7 @@ public: { "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, +{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, @@ -76,6 +78,8 @@ public: { "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_bl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, @@ -100,18 +104,46 @@ public: return Infos[Kind - FirstTargetFixupKind]; } - bool MayNeedRelaxation(const MCInst &Inst) const; + /// processFixupValue - Target hook to process the literal value of a fixup + /// if necessary. + void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + MCValue &Target, uint64_t &Value, + bool &IsResolved) { + const MCSymbolRefExpr *A = Target.getSymA(); + // Some fixups to thumb function symbols need the low bit (thumb bit) + // twiddled. + if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) { + if (A) { + const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); + if (Asm.isThumbFunc(&Sym)) + Value |= 1; + } + } + // We must always generate a relocation for BL/BLX instructions if we have + // a symbol to reference, as the linker relies on knowing the destination + // symbol's thumb-ness to get interworking right. + if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx || + (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl || + (unsigned)Fixup.getKind() == ARM::fixup_arm_blx || + (unsigned)Fixup.getKind() == ARM::fixup_arm_bl)) + IsResolved = false; + } + + bool mayNeedRelaxation(const MCInst &Inst) const; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCInstFragment *DF, const MCAsmLayout &Layout) const; - void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; + void relaxInstruction(const MCInst &Inst, MCInst &Res) const; - bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const; - void HandleAssemblerFlag(MCAssemblerFlag Flag) { + void handleAssemblerFlag(MCAssemblerFlag Flag) { switch (Flag) { default: break; case MCAF_Code16: @@ -132,11 +164,13 @@ public: static unsigned getRelaxedOpcode(unsigned Op) { switch (Op) { default: return Op; - case ARM::tBcc: return ARM::t2Bcc; + case ARM::tBcc: return ARM::t2Bcc; + case ARM::tLDRpciASM: return ARM::t2LDRpci; + case ARM::tADR: return ARM::t2ADR; } } -bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const { +bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode()) return true; return false; @@ -146,17 +180,29 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCInstFragment *DF, const MCAsmLayout &Layout) const { - // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the - // low bit being an implied zero. There's an implied +4 offset for the - // branch, so we adjust the other way here to determine what's - // encodable. - // - // Relax if the value is too big for a (signed) i8. - int64_t Offset = int64_t(Value) - 4; - return Offset > 254 || Offset < -256; + switch ((unsigned)Fixup.getKind()) { + case ARM::fixup_arm_thumb_bcc: { + // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the + // low bit being an implied zero. There's an implied +4 offset for the + // branch, so we adjust the other way here to determine what's + // encodable. + // + // Relax if the value is too big for a (signed) i8. + int64_t Offset = int64_t(Value) - 4; + return Offset > 254 || Offset < -256; + } + case ARM::fixup_thumb_adr_pcrel_10: + case ARM::fixup_arm_thumb_cp: { + // If the immediate is negative, greater than 1020, or not a multiple + // of four, the wide version of the instruction must be used. + int64_t Offset = int64_t(Value) - 4; + return Offset > 1020 || Offset < 0 || Offset & 3; + } + } + llvm_unreachable("Unexpected fixup kind in fixupNeedsRelaxation()!"); } -void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { +void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode()); // Sanity check w/ diagnostic if we get here w/ a bogus instruction. @@ -174,7 +220,7 @@ void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { Res.setOpcode(RelaxedOp); } -bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { +bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8 const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0 @@ -309,6 +355,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: + case ARM::fixup_arm_bl: + case ARM::fixup_arm_blx: // These values don't encode the low two bits since they're always zero. // Offset by 8 just as above. return 0xffffff & ((Value - 8) >> 2); @@ -399,6 +447,17 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case ARM::fixup_arm_thumb_bcc: // Offset by 4 and don't encode the lower bit, which is always 0. return ((Value - 4) >> 1) & 0xff; + case ARM::fixup_arm_pcrel_10_unscaled: { + Value = Value - 8; // ARM fixups offset by an additional word and don't + // need to adjust for the half-word ordering. + bool isAdd = true; + if ((int64_t)Value < 0) { + Value = -Value; + isAdd = false; + } + assert ((Value < 256) && "Out of range pc-relative fixup value!"); + return Value | (isAdd << 23); + } case ARM::fixup_arm_pcrel_10: Value = Value - 4; // ARM fixups offset by an additional word and don't // need to adjust for the half-word ordering. @@ -416,8 +475,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { assert ((Value < 256) && "Out of range pc-relative fixup value!"); Value |= isAdd << 23; - // Same addressing mode as fixup_arm_pcrel_10, - // but with 16-bit halfwords swapped. + // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords + // swapped. if (Kind == ARM::fixup_t2_pcrel_10) { uint32_t swapped = (Value & 0xFFFF0000) >> 16; swapped |= (Value & 0x0000FFFF) << 16; @@ -435,22 +494,21 @@ namespace { // ELF is an ELF of course... class ELFARMAsmBackend : public ARMAsmBackend { public: - Triple::OSType OSType; + uint8_t OSABI; ELFARMAsmBackend(const Target &T, const StringRef TT, - Triple::OSType _OSType) - : ARMAsmBackend(T, TT), OSType(_OSType) { } + uint8_t _OSABI) + : ARMAsmBackend(T, TT), OSABI(_OSABI) { } - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const; MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createELFObjectWriter(new ARMELFObjectWriter(OSType), OS, - /*IsLittleEndian*/ true); + return createARMELFObjectWriter(OS, OSABI); } }; // FIXME: Raise this to share code between Darwin and ELF. -void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, +void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { unsigned NumBytes = 4; // FIXME: 2 for Thumb Value = adjustFixupValue(Fixup.getKind(), Value); @@ -479,7 +537,7 @@ public: Subtype); } - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const; virtual bool doesSectionRequireSymbols(const MCSection &Section) const { @@ -504,9 +562,12 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_arm_thumb_cb: return 2; + case ARM::fixup_arm_pcrel_10_unscaled: case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: case ARM::fixup_arm_adr_pcrel_12: + case ARM::fixup_arm_bl: + case ARM::fixup_arm_blx: case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: return 3; @@ -531,7 +592,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { } } -void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, +void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); Value = adjustFixupValue(Fixup.getKind(), Value); @@ -567,5 +628,6 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) { if (TheTriple.isOSWindows()) assert(0 && "Windows not supported on ARM"); - return new ELFARMAsmBackend(T, TT, Triple(TT).getOS()); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); + return new ELFARMAsmBackend(T, TT, OSABI); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index ec4b6ff..06eb4e5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -67,7 +67,6 @@ namespace ARMCC { inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { switch (CC) { - default: llvm_unreachable("Unknown condition code"); case ARMCC::EQ: return "eq"; case ARMCC::NE: return "ne"; case ARMCC::HS: return "hs"; @@ -84,6 +83,7 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { case ARMCC::LE: return "le"; case ARMCC::AL: return "al"; } + llvm_unreachable("Unknown condition code"); } namespace ARM_PROC { @@ -185,6 +185,23 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) { case S29: case D29: return 29; case S30: case D30: return 30; case S31: case D31: return 31; + + // Composite registers use the regnum of the first register in the list. + case D1_D2: return 1; + case D3_D5: return 3; + case D5_D7: return 5; + case D7_D9: return 7; + case D9_D10: return 9; + case D11_D12: return 11; + case D13_D14: return 13; + case D15_D16: return 15; + case D17_D18: return 17; + case D19_D20: return 19; + case D21_D22: return 21; + case D23_D24: return 23; + case D25_D26: return 25; + case D27_D28: return 27; + case D29_D30: return 29; } } @@ -237,7 +254,6 @@ namespace ARMII { inline static const char *AddrModeToString(AddrMode addrmode) { switch (addrmode) { - default: llvm_unreachable("Unknown memory operation"); case AddrModeNone: return "AddrModeNone"; case AddrMode1: return "AddrMode1"; case AddrMode2: return "AddrMode2"; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp new file mode 100644 index 0000000..5476a46 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -0,0 +1,281 @@ +//===-- ARMELFObjectWriter.cpp - ARM ELF Writer ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCValue.h" + +using namespace llvm; + +namespace { + class ARMELFObjectWriter : public MCELFObjectTargetWriter { + enum { DefaultEABIVersion = 0x05000000U }; + unsigned GetRelocTypeInner(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const; + + + public: + ARMELFObjectWriter(uint8_t OSABI); + + virtual ~ARMELFObjectWriter(); + + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const; + virtual unsigned getEFlags() const; + virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const; + }; +} + +ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, + ELF::EM_ARM, + /*HasRelocationAddend*/ false) {} + +ARMELFObjectWriter::~ARMELFObjectWriter() {} + +// FIXME: get the real EABI Version from the Triple. +unsigned ARMELFObjectWriter::getEFlags() const { + return ELF::EF_ARM_EABIMASK & DefaultEABIVersion; +} + +// In ARM, _MergedGlobals and other most symbols get emitted directly. +// I.e. not as an offset to a section symbol. +// This code is an approximation of what ARM/gcc does. + +STATISTIC(PCRelCount, "Total number of PIC Relocations"); +STATISTIC(NonPCRelCount, "Total number of non-PIC relocations"); + +const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const { + const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol(); + bool EmitThisSym = false; + + const MCSectionELF &Section = + static_cast<const MCSectionELF&>(Symbol.getSection()); + bool InNormalSection = true; + unsigned RelocType = 0; + RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel); + + DEBUG( + const MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind(); + MCSymbolRefExpr::VariantKind Kind2; + Kind2 = Target.getSymB() ? Target.getSymB()->getKind() : + MCSymbolRefExpr::VK_None; + dbgs() << "considering symbol " + << Section.getSectionName() << "/" + << Symbol.getName() << "/" + << " Rel:" << (unsigned)RelocType + << " Kind: " << (int)Kind << "/" << (int)Kind2 + << " Tmp:" + << Symbol.isAbsolute() << "/" << Symbol.isDefined() << "/" + << Symbol.isVariable() << "/" << Symbol.isTemporary() + << " Counts:" << PCRelCount << "/" << NonPCRelCount << "\n"); + + if (IsPCRel) { ++PCRelCount; + switch (RelocType) { + default: + // Most relocation types are emitted as explicit symbols + InNormalSection = + StringSwitch<bool>(Section.getSectionName()) + .Case(".data.rel.ro.local", false) + .Case(".data.rel", false) + .Case(".bss", false) + .Default(true); + EmitThisSym = true; + break; + case ELF::R_ARM_ABS32: + // But things get strange with R_ARM_ABS32 + // In this case, most things that go in .rodata show up + // as section relative relocations + InNormalSection = + StringSwitch<bool>(Section.getSectionName()) + .Case(".data.rel.ro.local", false) + .Case(".data.rel", false) + .Case(".rodata", false) + .Case(".bss", false) + .Default(true); + EmitThisSym = false; + break; + } + } else { + NonPCRelCount++; + InNormalSection = + StringSwitch<bool>(Section.getSectionName()) + .Case(".data.rel.ro.local", false) + .Case(".rodata", false) + .Case(".data.rel", false) + .Case(".bss", false) + .Default(true); + + switch (RelocType) { + default: EmitThisSym = true; break; + case ELF::R_ARM_ABS32: EmitThisSym = false; break; + } + } + + if (EmitThisSym) + return &Symbol; + if (! Symbol.isTemporary() && InNormalSection) { + return &Symbol; + } + return NULL; +} + +// Need to examine the Fixup when determining whether to +// emit the relocation as an explicit symbol or as a section relative +// offset +unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + return GetRelocTypeInner(Target, Fixup, IsPCRel); +} + +unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? + MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + + unsigned Type = 0; + if (IsPCRel) { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("Unimplemented"); + case FK_Data_4: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_REL32; + break; + case MCSymbolRefExpr::VK_ARM_TLSGD: + llvm_unreachable("unimplemented"); + case MCSymbolRefExpr::VK_ARM_GOTTPOFF: + Type = ELF::R_ARM_TLS_IE32; + break; + } + break; + case ARM::fixup_arm_bl: + case ARM::fixup_arm_blx: + case ARM::fixup_arm_uncondbranch: + switch (Modifier) { + case MCSymbolRefExpr::VK_ARM_PLT: + Type = ELF::R_ARM_PLT32; + break; + default: + Type = ELF::R_ARM_CALL; + break; + } + break; + case ARM::fixup_arm_condbranch: + Type = ELF::R_ARM_JUMP24; + break; + case ARM::fixup_arm_movt_hi16: + case ARM::fixup_arm_movt_hi16_pcrel: + Type = ELF::R_ARM_MOVT_PREL; + break; + case ARM::fixup_arm_movw_lo16: + case ARM::fixup_arm_movw_lo16_pcrel: + Type = ELF::R_ARM_MOVW_PREL_NC; + break; + case ARM::fixup_t2_movt_hi16: + case ARM::fixup_t2_movt_hi16_pcrel: + Type = ELF::R_ARM_THM_MOVT_PREL; + break; + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movw_lo16_pcrel: + Type = ELF::R_ARM_THM_MOVW_PREL_NC; + break; + case ARM::fixup_arm_thumb_bl: + case ARM::fixup_arm_thumb_blx: + Type = ELF::R_ARM_THM_CALL; + break; + } + } else { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + case FK_Data_4: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_ARM_GOT: + Type = ELF::R_ARM_GOT_BREL; + break; + case MCSymbolRefExpr::VK_ARM_TLSGD: + Type = ELF::R_ARM_TLS_GD32; + break; + case MCSymbolRefExpr::VK_ARM_TPOFF: + Type = ELF::R_ARM_TLS_LE32; + break; + case MCSymbolRefExpr::VK_ARM_GOTTPOFF: + Type = ELF::R_ARM_TLS_IE32; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_ABS32; + break; + case MCSymbolRefExpr::VK_ARM_GOTOFF: + Type = ELF::R_ARM_GOTOFF32; + break; + case MCSymbolRefExpr::VK_ARM_TARGET1: + Type = ELF::R_ARM_TARGET1; + break; + } + break; + case ARM::fixup_arm_ldst_pcrel_12: + case ARM::fixup_arm_pcrel_10: + case ARM::fixup_arm_adr_pcrel_12: + case ARM::fixup_arm_thumb_bl: + case ARM::fixup_arm_thumb_cb: + case ARM::fixup_arm_thumb_cp: + case ARM::fixup_arm_thumb_br: + llvm_unreachable("Unimplemented"); + case ARM::fixup_arm_uncondbranch: + Type = ELF::R_ARM_CALL; + break; + case ARM::fixup_arm_condbranch: + Type = ELF::R_ARM_JUMP24; + break; + case ARM::fixup_arm_movt_hi16: + Type = ELF::R_ARM_MOVT_ABS; + break; + case ARM::fixup_arm_movw_lo16: + Type = ELF::R_ARM_MOVW_ABS_NC; + break; + case ARM::fixup_t2_movt_hi16: + Type = ELF::R_ARM_THM_MOVT_ABS; + break; + case ARM::fixup_t2_movw_lo16: + Type = ELF::R_ARM_THM_MOVW_ABS_NC; + break; + } + } + + return Type; +} + +MCObjectWriter *llvm::createARMELFObjectWriter(raw_ostream &OS, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); +} diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 350c92d..1827986 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -1,4 +1,4 @@ -//===-- ARM/ARMFixupKinds.h - ARM Specific Fixup Entries --------*- C++ -*-===// +//===-- ARMFixupKinds.h - ARM Specific Fixup Entries ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -23,6 +23,9 @@ enum Fixups { // the 16-bit halfwords reordered. fixup_t2_ldst_pcrel_12, + // fixup_arm_pcrel_10_unscaled - 10-bit PC relative relocation for symbol + // addresses used in LDRD/LDRH/LDRB/etc. instructions. All bits are encoded. + fixup_arm_pcrel_10_unscaled, // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses // used in VFP instructions where the lower 2 bits are not encoded // (so it's encoded as an 8-bit immediate). @@ -56,6 +59,12 @@ enum Fixups { // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions. fixup_arm_thumb_br, + // fixup_arm_bl - Fixup for ARM BL instructions. + fixup_arm_bl, + + // fixup_arm_blx - Fixup for ARM BLX instructions. + fixup_arm_blx, + // fixup_arm_thumb_bl - Fixup for Thumb BL instructions. fixup_arm_thumb_bl, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 1c109e0..03e8d5f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -1,4 +1,4 @@ -//===-- ARMMCAsmInfo.cpp - ARM asm properties -------------------*- C++ -*-===// +//===-- ARMMCAsmInfo.cpp - ARM asm properties -----------------------------===// // // The LLVM Compiler Infrastructure // @@ -48,6 +48,8 @@ static const char *const arm_asm_table[] = { 0,0 }; +void ARMMCAsmInfoDarwin::anchor() { } + ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() { AsmTransCBE = arm_asm_table; Data64bitsDirective = 0; @@ -61,6 +63,8 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() { ExceptionsType = ExceptionHandling::SjLj; } +void ARMELFMCAsmInfo::anchor() { } + ARMELFMCAsmInfo::ARMELFMCAsmInfo() { // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index 90f7822..f0b289c 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h @@ -1,4 +1,4 @@ -//=====-- ARMMCAsmInfo.h - ARM asm properties -------------*- C++ -*--====// +//===-- ARMMCAsmInfo.h - ARM asm properties --------------------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -18,11 +18,15 @@ namespace llvm { - struct ARMMCAsmInfoDarwin : public MCAsmInfoDarwin { + class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin { + virtual void anchor(); + public: explicit ARMMCAsmInfoDarwin(); }; - struct ARMELFMCAsmInfo : public MCAsmInfo { + class ARMELFMCAsmInfo : public MCAsmInfo { + virtual void anchor(); + public: explicit ARMELFMCAsmInfo(); }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index c38a882..4445dcd 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -64,7 +64,7 @@ public: // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. - unsigned getBinaryCodeForInstr(const MCInst &MI, + uint64_t getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups) const; /// getMachineOpValue - Return binary encoding of operand. If the machine @@ -118,8 +118,10 @@ public: /// branch target. uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const; + uint32_t getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; uint32_t getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups) const; /// getAdrLabelOpValue - Return encoding info for 12-bit immediate /// ADR label target. @@ -166,7 +168,7 @@ public: SmallVectorImpl<MCFixup> &Fixups) const { ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm(); switch (Mode) { - default: assert(0 && "Unknown addressing sub-mode!"); + default: llvm_unreachable("Unknown addressing sub-mode!"); case ARM_AM::da: return 0; case ARM_AM::ia: return 1; case ARM_AM::db: return 2; @@ -177,7 +179,6 @@ public: /// unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const { switch (ShOpc) { - default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::no_shift: case ARM_AM::lsl: return 0; case ARM_AM::lsr: return 1; @@ -185,7 +186,7 @@ public: case ARM_AM::ror: case ARM_AM::rrx: return 3; } - return 0; + llvm_unreachable("Invalid ShiftOpc!"); } /// getAddrMode2OpValue - Return encoding for addrmode2 operands. @@ -423,7 +424,6 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } llvm_unreachable("Unable to encode MCOperand!"); - return 0; } /// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand. @@ -466,7 +466,7 @@ static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, assert(MO.isExpr() && "Unexpected branch target type!"); const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind = MCFixupKind(FixupKind); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); // All of the information is in the fixup. return 0; @@ -594,16 +594,21 @@ getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, } uint32_t ARMMCCodeEmitter:: +getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_bl, Fixups); + + return MO.getImm() >> 2; +} + +uint32_t ARMMCCodeEmitter:: getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand MO = MI.getOperand(OpIdx); - if (MO.isExpr()) { - if (HasConditionalBranch(MI)) - return ::getBranchTargetOpValue(MI, OpIdx, - ARM::fixup_arm_condbranch, Fixups); - return ::getBranchTargetOpValue(MI, OpIdx, - ARM::fixup_arm_uncondbranch, Fixups); - } + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_blx, Fixups); return MO.getImm() >> 1; } @@ -718,12 +723,13 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12); else Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); ++MCNumCPRelocations; } else { Reg = ARM::PC; int32_t Offset = MO.getImm(); + // FIXME: Handle #-0. if (Offset < 0) { Offset *= -1; isAdd = false; @@ -791,8 +797,8 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, assert(MO.isExpr() && "Unexpected machine operand type!"); const MCExpr *Expr = MO.getExpr(); - MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_pcrel_10); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); ++MCNumCPRelocations; } else @@ -833,7 +839,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, // but this is good enough for now. static bool EvaluateAsPCRel(const MCExpr *Expr) { switch (Expr->getKind()) { - default: assert(0 && "Unexpected expression type"); + default: llvm_unreachable("Unexpected expression type"); case MCExpr::SymbolRef: return false; case MCExpr::Binary: return true; } @@ -857,7 +863,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, MCFixupKind Kind; switch (ARM16Expr->getKind()) { - default: assert(0 && "Unsupported ARMFixup"); + default: llvm_unreachable("Unsupported ARMFixup"); case ARMMCExpr::VK_ARM_HI16: if (!isTargetDarwin() && EvaluateAsPCRel(E)) Kind = MCFixupKind(isThumb2() @@ -879,12 +885,11 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, : ARM::fixup_arm_movw_lo16); break; } - Fixups.push_back(MCFixup::Create(0, E, Kind)); + Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc())); return 0; }; llvm_unreachable("Unsupported MCExpr type in MCOperand!"); - return 0; } uint32_t ARMMCCodeEmitter:: @@ -993,6 +998,19 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); const MCOperand &MO2 = MI.getOperand(OpIdx+2); + + // If The first operand isn't a register, we have a label reference. + if (!MO.isReg()) { + unsigned Rn = getARMRegisterNumbering(ARM::PC); // Rn is PC. + + assert(MO.isExpr() && "Unexpected machine operand type!"); + const MCExpr *Expr = MO.getExpr(); + MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10_unscaled); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); + + ++MCNumCPRelocations; + return (Rn << 9) | (1 << 13); + } unsigned Rn = getARMRegisterNumbering(MO.getReg()); unsigned Imm = MO2.getImm(); bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add; @@ -1066,7 +1084,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, Kind = MCFixupKind(ARM::fixup_t2_pcrel_10); else Kind = MCFixupKind(ARM::fixup_arm_pcrel_10); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); ++MCNumCPRelocations; } else { @@ -1372,11 +1390,11 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op, switch (Imm.getImm()) { default: break; - case 2: - case 4: case 8: - case 16: Align = 0x00; break; - case 32: Align = 0x03; break; + case 16: + case 32: // Default '0' value for invalid alignments of 8, 16, 32 bytes. + case 2: Align = 0x00; break; + case 4: Align = 0x03; break; } return RegNo | (Align << 4); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index 2727ba8..22e14a2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -21,7 +21,7 @@ ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr, void ARMMCExpr::PrintImpl(raw_ostream &OS) const { switch (Kind) { - default: assert(0 && "Invalid kind!"); + default: llvm_unreachable("Invalid kind!"); case VK_ARM_HI16: OS << ":upper16:"; break; case VK_ARM_LO16: OS << ":lower16:"; break; } @@ -45,8 +45,7 @@ ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) { switch (Value->getKind()) { case MCExpr::Target: - assert(0 && "Can't handle nested target expr!"); - break; + llvm_unreachable("Can't handle nested target expr!"); case MCExpr::Constant: break; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index 0a2e883..a727e08 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -1,4 +1,4 @@ -//===-- ARMMCExpr.h - ARM specific MC expression classes ------------------===// +//===-- ARMMCExpr.h - ARM specific MC expression classes --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index e86f48e..1606b92 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions -----------*- C++ -*-===// +//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions ---------------------===// // // The LLVM Compiler Infrastructure // @@ -155,7 +155,6 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, if (TheTriple.isOSWindows()) { llvm_unreachable("ARM does not support Windows COFF format"); - return NULL; } return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); @@ -164,9 +163,10 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createARMMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new ARMInstPrinter(MAI, STI); + return new ARMInstPrinter(MAI, MRI, STI); return 0; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 9b3d3bd..88472d7 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -46,6 +46,10 @@ MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT); +/// createARMELFObjectWriter - Construct an ELF Mach-O object writer. +MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS, + uint8_t OSABI); + /// createARMMachObjectWriter - Construct an ARM Mach-O object writer. MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, bool Is64Bit, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index f394b4f..faf73ac 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" @@ -81,6 +82,8 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, case ARM::fixup_arm_adr_pcrel_12: case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: + case ARM::fixup_arm_bl: + case ARM::fixup_arm_blx: RelocType = unsigned(macho::RIT_ARM_Branch24Bit); // Report as 'long', even though that is not quite accurate. Log2Size = llvm::Log2_32(4); @@ -136,7 +139,8 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer, MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) - report_fatal_error("symbol '" + A->getName() + + Asm.getContext().FatalError(Fixup.getLoc(), + "symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); @@ -149,7 +153,8 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer, MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + + Asm.getContext().FatalError(Fixup.getLoc(), + "symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. @@ -240,7 +245,8 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) - report_fatal_error("symbol '" + A->getName() + + Asm.getContext().FatalError(Fixup.getLoc(), + "symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); @@ -252,7 +258,8 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + + Asm.getContext().FatalError(Fixup.getLoc(), + "symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. @@ -294,10 +301,13 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Log2Size; unsigned RelocType = macho::RIT_Vanilla; - if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) { - report_fatal_error("unknown ARM fixup kind!"); - return; - } + if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) + // If we failed to get fixup kind info, it's because there's no legal + // relocation type for the fixup kind. This happens when it's a fixup that's + // expected to always be resolvable at assembly time and not have any + // relocations needed. + Asm.getContext().FatalError(Fixup.getLoc(), + "unsupported relocation on symbol"); // If this is a difference or a defined symbol plus an offset, then we need a // scattered relocation entry. Differences always require scattered diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index f2cf78a..2565994 100644 --- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -1,10 +1,12 @@ add_llvm_library(LLVMARMDesc ARMAsmBackend.cpp + ARMELFObjectWriter.cpp ARMMCAsmInfo.cpp ARMMCCodeEmitter.cpp ARMMCExpr.cpp ARMMCTargetDesc.cpp ARMMachObjectWriter.cpp + ARMELFObjectWriter.cpp ) add_dependencies(LLVMARMDesc ARMCommonTableGen) diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 000a37f..2899836 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -1,4 +1,4 @@ -//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=// +//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 2f6842e..4fcaecf 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -699,3 +699,19 @@ test is equality test so it's more a conditional move rather than a select: Currently this is a ARM specific dag combine. We probably should make it into a target-neutral one. + +//===---------------------------------------------------------------------===// + +Optimize unnecessary checks for zero with __builtin_clz/ctz. Those builtins +are specified to be undefined at zero, so portable code must check for zero +and handle it as a special case. That is unnecessary on ARM where those +operations are implemented in a way that is well-defined for zero. For +example: + +int f(int x) { return x ? __builtin_clz(x) : sizeof(int)*8; } + +should just be implemented with a CLZ instruction. Since there are other +targets, e.g., PPC, that share this behavior, it would be best to implement +this in a target-independent way: we should probably fold that (when using +"undefined at zero" semantics) to set the "defined at zero" bit and have +the code generator expand out the right code. diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index d848177..a89a663 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -1,4 +1,4 @@ -//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====// +//===-- Thumb1FrameLowering.cpp - Thumb1 Frame Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -101,7 +101,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R11: if (Reg == FramePtr) FramePtrSpillFI = FI; - if (STI.isTargetDarwin()) { + if (STI.isTargetIOS()) { AFI->addGPRCalleeSavedArea2Frame(FI); GPRCS2Size += 4; } else { @@ -175,14 +175,14 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } -static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { +static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) { for (unsigned i = 0; CSRegs[i]; ++i) if (Reg == CSRegs[i]) return true; return false; } -static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { +static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) { if (MI->getOpcode() == ARM::tLDRspi && MI->getOperand(1).isFI() && isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) @@ -214,7 +214,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { @@ -278,8 +278,11 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) - .addReg(ARM::R3, RegState::Kill)); + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) + .addReg(ARM::R3, RegState::Kill); + AddDefaultPred(MIB); + MIB->copyImplicitOps(&*MBBI); // erase the old tBX_RET instruction MBB.erase(MBBI); } @@ -350,6 +353,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + MIB->copyImplicitOps(&*MI); MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true)); diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index de33bd6..adaccdd 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===// +//===-- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information -------------===// // // The LLVM Compiler Infrastructure // @@ -19,7 +19,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/ADT/SmallVector.h" -#include "Thumb1InstrInfo.h" +#include "llvm/MC/MCInst.h" using namespace llvm; @@ -27,6 +27,15 @@ Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI(*this, STI) { } +/// getNoopForMachoTarget - Return the noop instruction to use for a noop. +void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + NopInst.setOpcode(ARM::tMOVr); + NopInst.addOperand(MCOperand::CreateReg(ARM::R8)); + NopInst.addOperand(MCOperand::CreateReg(ARM::R8)); + NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + NopInst.addOperand(MCOperand::CreateReg(0)); +} + unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 17ef2f7..4d97626 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -1,4 +1,4 @@ -//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===// +//===-- Thumb1InstrInfo.h - Thumb-1 Instruction Information -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -27,6 +27,9 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb1InstrInfo(const ARMSubtarget &STI); + /// getNoopForMachoTarget - Return the noop instruction to use for a noop. + void getNoopForMachoTarget(MCInst &NopInst) const; + // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index e61c0a7..6b8bf0e 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===// +//===-- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------------===// // // The LLVM Compiler Infrastructure // @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" @@ -570,6 +571,11 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, // If this instruction affects R12, adjust our restore point. for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { const MachineOperand &MO = II->getOperand(i); + if (MO.isRegMask() && MO.clobbersPhysReg(ARM::R12)) { + UseMI = II; + done = true; + break; + } if (!MO.isReg() || MO.isUndef() || !MO.getReg() || TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; @@ -624,6 +630,21 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, FrameReg = BasePtr; } + // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the + // call frame setup/destroy instructions have already been eliminated. That + // means the stack pointer cannot be used to access the emergency spill slot + // when !hasReservedCallFrame(). +#ifndef NDEBUG + if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){ + assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) && + "Cannot use SP to access the emergency spill slot in " + "functions without a reserved call frame"); + assert(!MF.getFrameInfo()->hasVarSizedObjects() && + "Cannot use SP to access the emergency spill slot in " + "functions with variable sized frame objects"); + } +#endif // NDEBUG + // Special handling of dbg_value instructions. if (MI.isDebugValue()) { MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); @@ -694,7 +715,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // register. The offset is already handled in the vreg value. MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); } else { - assert(false && "Unexpected opcode!"); + llvm_unreachable("Unexpected opcode!"); } // Add predicate back if it's needed. diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 55b4d30..def75dd 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -1,4 +1,4 @@ -//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===// +//===-- Thumb2ITBlockPass.cpp - Insert Thumb-2 IT blocks ------------------===// // // The LLVM Compiler Infrastructure // @@ -76,7 +76,7 @@ static void TrackDefUses(MachineInstr *MI, for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { unsigned Reg = LocalUses[i]; Uses.insert(Reg); - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) Uses.insert(*Subreg); } @@ -84,7 +84,7 @@ static void TrackDefUses(MachineInstr *MI, for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Reg = LocalDefs[i]; Defs.insert(Reg); - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) Defs.insert(*Subreg); if (Reg == ARM::CPSR) @@ -239,7 +239,8 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); // Finalize the bundle. - FinalizeBundle(MBB, InsertPos.getInstrIterator(), LastITMI); + MachineBasicBlock::instr_iterator LI = LastITMI; + finalizeBundle(MBB, InsertPos.getInstrIterator(), llvm::next(LI)); Modified = true; ++NumITs; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 7ec3c0e..6cb182a 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===// +//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===// // // The LLVM Compiler Infrastructure // @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -34,6 +35,13 @@ Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI(*this, STI) { } +/// getNoopForMachoTarget - Return the noop instruction to use for a noop. +void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + NopInst.setOpcode(ARM::tNOP); + NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + NopInst.addOperand(MCOperand::CreateReg(0)); +} + unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { // FIXME return 0; @@ -586,7 +594,7 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg); if (!(NCC == CC || NCC == OCC) || NMI->modifiesRegister(SrcReg, &TRI) || - NMI->definesRegister(ARM::CPSR)) + NMI->modifiesRegister(ARM::CPSR, &TRI)) break; if (++NumInsts == 4) // Too many in a row! diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index f2637d7..a754649 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -1,4 +1,4 @@ -//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===// +//===-- Thumb2InstrInfo.h - Thumb-2 Instruction Information -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -28,6 +28,9 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb2InstrInfo(const ARMSubtarget &STI); + /// getNoopForMachoTarget - Return the noop instruction to use for a noop. + void getNoopForMachoTarget(MCInst &NopInst) const; + // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 355c3bf..6d210fe 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information ----*- C++ -*-===// +//===-- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index e206288..5ee5f42 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -597,7 +597,24 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); - if (Reg0 != Reg1) { + // t2MUL is "special". The tied source operand is second, not first. + if (MI->getOpcode() == ARM::t2MUL) { + unsigned Reg2 = MI->getOperand(2).getReg(); + // Early exit if the regs aren't all low regs. + if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) + || !isARMLowRegister(Reg2)) + return false; + if (Reg0 != Reg2) { + // If the other operand also isn't the same as the destination, we + // can't reduce. + if (Reg1 != Reg0) + return false; + // Try to commute the operands to make it a 2-address instruction. + MachineInstr *CommutedMI = TII->commuteInstruction(MI); + if (!CommutedMI) + return false; + } + } else if (Reg0 != Reg1) { // Try to commute the operands to make it a 2-address instruction. unsigned CommOpIdx1, CommOpIdx2; if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || @@ -880,14 +897,17 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } ProcessNext: - if (LiveCPSR && - NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle() && - BundleMI->killsRegister(ARM::CPSR)) + if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) { // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill // marker is only on the BUNDLE instruction. Process the BUNDLE // instruction as we finish with the bundled instruction to work around // the inconsistency. - LiveCPSR = false; + if (BundleMI->killsRegister(ARM::CPSR)) + LiveCPSR = false; + MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); + if (MO && !MO->isDead()) + LiveCPSR = true; + } bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 8bce52c..80973b7 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -215,6 +215,8 @@ namespace { bool printConstExprCast(const ConstantExpr *CE, bool Static); void printConstantArray(ConstantArray *CPA, bool Static); void printConstantVector(ConstantVector *CV, bool Static); + void printConstantDataSequential(ConstantDataSequential *CDS, bool Static); + /// isAddressExposed - Return true if the specified value's name needs to /// have its address taken in order to get a C value of the correct type. @@ -288,9 +290,6 @@ namespace { void visitInvokeInst(InvokeInst &I) { llvm_unreachable("Lowerinvoke pass didn't work!"); } - void visitUnwindInst(UnwindInst &I) { - llvm_unreachable("Lowerinvoke pass didn't work!"); - } void visitResumeInst(ResumeInst &I) { llvm_unreachable("DwarfEHPrepare pass didn't work!"); } @@ -553,33 +552,44 @@ raw_ostream &CWriter::printType(raw_ostream &Out, Type *Ty, default: llvm_unreachable("Unhandled case in getTypeProps!"); } - - return Out; } void CWriter::printConstantArray(ConstantArray *CPA, bool Static) { + Out << "{ "; + printConstant(cast<Constant>(CPA->getOperand(0)), Static); + for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) { + Out << ", "; + printConstant(cast<Constant>(CPA->getOperand(i)), Static); + } + Out << " }"; +} + +void CWriter::printConstantVector(ConstantVector *CP, bool Static) { + Out << "{ "; + printConstant(cast<Constant>(CP->getOperand(0)), Static); + for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) { + Out << ", "; + printConstant(cast<Constant>(CP->getOperand(i)), Static); + } + Out << " }"; +} +void CWriter::printConstantDataSequential(ConstantDataSequential *CDS, + bool Static) { // As a special case, print the array as a string if it is an array of // ubytes or an array of sbytes with positive values. // - Type *ETy = CPA->getType()->getElementType(); - bool isString = (ETy == Type::getInt8Ty(CPA->getContext()) || - ETy == Type::getInt8Ty(CPA->getContext())); - - // Make sure the last character is a null char, as automatically added by C - if (isString && (CPA->getNumOperands() == 0 || - !cast<Constant>(*(CPA->op_end()-1))->isNullValue())) - isString = false; - - if (isString) { + if (CDS->isCString()) { Out << '\"'; // Keep track of whether the last number was a hexadecimal escape. bool LastWasHex = false; - + + StringRef Bytes = CDS->getAsCString(); + // Do not include the last character, which we know is null - for (unsigned i = 0, e = CPA->getNumOperands()-1; i != e; ++i) { - unsigned char C = cast<ConstantInt>(CPA->getOperand(i))->getZExtValue(); - + for (unsigned i = 0, e = Bytes.size(); i != e; ++i) { + unsigned char C = Bytes[i]; + // Print it out literally if it is a printable character. The only thing // to be careful about is when the last letter output was a hex escape // code, in which case we have to be careful not to print out hex digits @@ -595,49 +605,34 @@ void CWriter::printConstantArray(ConstantArray *CPA, bool Static) { } else { LastWasHex = false; switch (C) { - case '\n': Out << "\\n"; break; - case '\t': Out << "\\t"; break; - case '\r': Out << "\\r"; break; - case '\v': Out << "\\v"; break; - case '\a': Out << "\\a"; break; - case '\"': Out << "\\\""; break; - case '\'': Out << "\\\'"; break; - default: - Out << "\\x"; - Out << (char)(( C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')); - Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); - LastWasHex = true; - break; + case '\n': Out << "\\n"; break; + case '\t': Out << "\\t"; break; + case '\r': Out << "\\r"; break; + case '\v': Out << "\\v"; break; + case '\a': Out << "\\a"; break; + case '\"': Out << "\\\""; break; + case '\'': Out << "\\\'"; break; + default: + Out << "\\x"; + Out << (char)(( C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')); + Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); + LastWasHex = true; + break; } } } Out << '\"'; } else { - Out << '{'; - if (CPA->getNumOperands()) { - Out << ' '; - printConstant(cast<Constant>(CPA->getOperand(0)), Static); - for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) { - Out << ", "; - printConstant(cast<Constant>(CPA->getOperand(i)), Static); - } + Out << "{ "; + printConstant(CDS->getElementAsConstant(0), Static); + for (unsigned i = 1, e = CDS->getNumElements(); i != e; ++i) { + Out << ", "; + printConstant(CDS->getElementAsConstant(i), Static); } Out << " }"; } } -void CWriter::printConstantVector(ConstantVector *CP, bool Static) { - Out << '{'; - if (CP->getNumOperands()) { - Out << ' '; - printConstant(cast<Constant>(CP->getOperand(0)), Static); - for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) { - Out << ", "; - printConstant(cast<Constant>(CP->getOperand(i)), Static); - } - } - Out << " }"; -} // isFPCSafeToPrint - Returns true if we may assume that CFP may be written out // textually as a double (rather than as a reference to a stack-allocated @@ -743,7 +738,6 @@ void CWriter::printCast(unsigned opc, Type *SrcTy, Type *DstTy) { break; // These don't need a source cast. default: llvm_unreachable("Invalid cast opcode"); - break; } } @@ -1027,6 +1021,9 @@ void CWriter::printConstant(Constant *CPV, bool Static) { Out << "{ "; // Arrays are wrapped in struct types. if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) { printConstantArray(CA, Static); + } else if (ConstantDataSequential *CDS = + dyn_cast<ConstantDataSequential>(CPV)) { + printConstantDataSequential(CDS, Static); } else { assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)); ArrayType *AT = cast<ArrayType>(CPV->getType()); @@ -1054,6 +1051,9 @@ void CWriter::printConstant(Constant *CPV, bool Static) { } if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) { printConstantVector(CV, Static); + } else if (ConstantDataSequential *CDS = + dyn_cast<ConstantDataSequential>(CPV)) { + printConstantDataSequential(CDS, Static); } else { assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)); VectorType *VT = cast<VectorType>(CPV->getType()); @@ -2394,9 +2394,9 @@ void CWriter::visitSwitchInst(SwitchInst &SI) { unsigned NumCases = SI.getNumCases(); // Skip the first item since that's the default case. - for (unsigned i = 1; i < NumCases; ++i) { + for (unsigned i = 0; i < NumCases; ++i) { ConstantInt* CaseVal = SI.getCaseValue(i); - BasicBlock* Succ = SI.getSuccessor(i); + BasicBlock* Succ = SI.getCaseSuccessor(i); Out << " case "; writeOperand(CaseVal); Out << ":\n"; diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 22d8c76..d8bc743 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMTarget TargetELFWriterInfo.cpp TargetInstrInfo.cpp TargetIntrinsicInfo.cpp + TargetJITInfo.cpp TargetLibraryInfo.cpp TargetLoweringObjectFile.cpp TargetMachine.cpp diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index 6c67c2d..cf4f796 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(CellSPUCodeGen SPUISelDAGToDAG.cpp SPUISelLowering.cpp SPUFrameLowering.cpp + SPUMachineFunction.cpp SPURegisterInfo.cpp SPUSubtarget.cpp SPUTargetMachine.cpp diff --git a/lib/Target/CellSPU/CellSDKIntrinsics.td b/lib/Target/CellSPU/CellSDKIntrinsics.td index 9468aee..cdb4099 100644 --- a/lib/Target/CellSPU/CellSDKIntrinsics.td +++ b/lib/Target/CellSPU/CellSDKIntrinsics.td @@ -1,5 +1,5 @@ //===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp index 8c1176a..4bad37e 100644 --- a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp +++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp @@ -14,6 +14,8 @@ #include "SPUMCAsmInfo.h" using namespace llvm; +void SPULinuxMCAsmInfo::anchor() { } + SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) { IsLittleEndian = false; diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h index 7f850d3..f786147 100644 --- a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h +++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h @@ -20,7 +20,9 @@ namespace llvm { class Target; - struct SPULinuxMCAsmInfo : public MCAsmInfo { + class SPULinuxMCAsmInfo : public MCAsmInfo { + virtual void anchor(); + public: explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT); }; } // namespace llvm diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp index 5ce14c9..8450e2c 100644 --- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp +++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions -----*- C++ -*-===// +//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions ----------------===// // // The LLVM Compiler Infrastructure // @@ -18,6 +18,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h index b51fbc7..c660131 100644 --- a/lib/Target/CellSPU/SPU.h +++ b/lib/Target/CellSPU/SPU.h @@ -1,4 +1,4 @@ -//===-- SPU.h - Top-level interface for Cell SPU Target ----------*- C++ -*-==// +//===-- SPU.h - Top-level interface for Cell SPU Target ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/CellSPU/SPU.td b/lib/Target/CellSPU/SPU.td index 8327fe0..e835b9c 100644 --- a/lib/Target/CellSPU/SPU.td +++ b/lib/Target/CellSPU/SPU.td @@ -1,5 +1,5 @@ -//===- SPU.td - Describe the STI Cell SPU Target Machine ----*- tablegen -*-===// -// +//===-- SPU.td - Describe the STI Cell SPU Target Machine --*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source diff --git a/lib/Target/CellSPU/SPU128InstrInfo.td b/lib/Target/CellSPU/SPU128InstrInfo.td index 3031fda..e051e04 100644 --- a/lib/Target/CellSPU/SPU128InstrInfo.td +++ b/lib/Target/CellSPU/SPU128InstrInfo.td @@ -1,9 +1,9 @@ -//===--- SPU128InstrInfo.td - Cell SPU 128-bit operations -*- tablegen -*--===// +//===-- SPU128InstrInfo.td - Cell SPU 128-bit operations --*- tablegen -*--===// // // Cell SPU 128-bit operations // //===----------------------------------------------------------------------===// - + // zext 32->128: Zero extend 32-bit to 128-bit def : Pat<(i128 (zext R32C:$rSrc)), (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>; diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td index f340edf..bea33b5 100644 --- a/lib/Target/CellSPU/SPU64InstrInfo.td +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -1,4 +1,4 @@ -//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====// +//====-- SPU64InstrInfo.td - Cell SPU 64-bit operations ---*- tablegen -*--===// // // Cell SPU 64-bit operations // diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp index 90b5270..14021fe 100644 --- a/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -1,4 +1,4 @@ -//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=// +//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -----===// // // The LLVM Compiler Infrastructure // @@ -248,7 +248,6 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { switch (MO.getType()) { case MachineOperand::MO_Immediate: report_fatal_error("printOp() does not handle immediate values"); - return; case MachineOperand::MO_MachineBasicBlock: O << *MO.getMBB()->getSymbol(); diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td index 04fa2ae..9f9692b 100644 --- a/lib/Target/CellSPU/SPUCallingConv.td +++ b/lib/Target/CellSPU/SPUCallingConv.td @@ -1,10 +1,10 @@ //===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This describes the calling conventions for the STI Cell SPU architecture. diff --git a/lib/Target/CellSPU/SPUFrameLowering.h b/lib/Target/CellSPU/SPUFrameLowering.h index b837f2cf..11c5281 100644 --- a/lib/Target/CellSPU/SPUFrameLowering.h +++ b/lib/Target/CellSPU/SPUFrameLowering.h @@ -1,4 +1,4 @@ -//=====-- SPUFrameLowering.h - SPU Frame Lowering stuff -*- C++ -*----========// +//===-- SPUFrameLowering.h - SPU Frame Lowering stuff ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index a851be3..c27caea 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -90,8 +90,6 @@ namespace { short s_val = (short) i_val; return i_val == s_val; } - - return false; } //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext. @@ -286,8 +284,8 @@ namespace { llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled."); #else SelectAddrIdxOnly(Op, Op, Op0, Op1); -#endif break; +#endif } OutOps.push_back(Op0); @@ -326,7 +324,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue(); Base = CurDAG->getTargetConstant( val , MVT::i32); Index = Zero; - return true; break; + return true; case ISD::ConstantPool: case ISD::GlobalAddress: report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered."); @@ -578,22 +576,16 @@ SDValue SPUDAGToDAGISel::getRC( MVT VT ) { switch( VT.SimpleTy ) { case MVT::i8: return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32); - break; case MVT::i16: return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32); - break; case MVT::i32: return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32); - break; case MVT::f32: return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32); - break; case MVT::i64: return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32); - break; case MVT::i128: return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32); - break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: @@ -601,11 +593,10 @@ SDValue SPUDAGToDAGISel::getRC( MVT VT ) { case MVT::v2i64: case MVT::v2f64: return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32); - break; default: assert( false && "add a new case here" ); + return SDValue(); } - return SDValue(); } //! Convert the operand from a target-independent to a target-specific node diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index dc0d5a6..3d2b32d 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -27,7 +27,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/VectorExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -84,8 +83,9 @@ namespace { Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), false, - /*isReturnValueUsed=*/true, + 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Op.getDebugLoc()); return CallInfo.first; @@ -1039,7 +1039,6 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { llvm_unreachable("LowerConstantPool: Relocation model other than static" " not supported."); - return SDValue(); } //! Alternate entry point for generating the address of a constant pool entry @@ -1070,7 +1069,6 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { llvm_unreachable("LowerJumpTable: Relocation model other than static" " not supported."); - return SDValue(); } static SDValue @@ -1098,8 +1096,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { "not supported."); /*NOTREACHED*/ } - - return SDValue(); } //! Custom lower double precision floating point constants @@ -1279,7 +1275,7 @@ static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { SDValue SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -1697,7 +1693,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue T = DAG.getConstant(Value32, MVT::i32); return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); - break; } case MVT::v2f64: { uint64_t f64val = uint64_t(SplatBits); @@ -1707,7 +1702,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue T = DAG.getConstant(f64val, MVT::i64); return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); - break; } case MVT::v16i8: { // 8-bit constants have to be expanded to 16-bits @@ -1734,8 +1728,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); } } - - return SDValue(); } /*! @@ -2009,8 +2001,6 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); } } - - return SDValue(); } static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { @@ -2044,8 +2034,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { int elt_byte = EltNo * VT.getSizeInBits() / 8; switch (VT.getSimpleVT().SimpleTy) { - default: - assert(false && "Invalid value type!"); + default: llvm_unreachable("Invalid value type!"); case MVT::i8: { prefslot_begin = prefslot_end = 3; break; @@ -2223,8 +2212,6 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, switch (Opc) { default: llvm_unreachable("Unhandled i8 math operator"); - /*NOTREACHED*/ - break; case ISD::ADD: { // 8-bit addition: Promote the arguments up to 16-bits and truncate // the result: @@ -2309,11 +2296,8 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, DAG.getNode(Opc, dl, MVT::i16, N0, N1)); - break; } } - - return SDValue(); } //! Lower byte immediate operations for v16i8 vectors: @@ -2378,8 +2362,7 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); switch (VT.getSimpleVT().SimpleTy) { - default: - assert(false && "Invalid value type!"); + default: llvm_unreachable("Invalid value type!"); case MVT::i8: { SDValue N = Op.getOperand(0); SDValue Elt0 = DAG.getConstant(0, MVT::i32); diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index aa4a168..e28e2a4 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -162,7 +162,7 @@ namespace llvm { virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/CellSPU/SPUInstrBuilder.h b/lib/Target/CellSPU/SPUInstrBuilder.h index 5e268f8..b495537 100644 --- a/lib/Target/CellSPU/SPUInstrBuilder.h +++ b/lib/Target/CellSPU/SPUInstrBuilder.h @@ -1,4 +1,4 @@ -//==-- SPUInstrBuilder.h - Aides for building Cell SPU insts -----*- C++ -*-==// +//===-- SPUInstrBuilder.h - Aides for building Cell SPU insts ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td index bdbe255..cd3f422 100644 --- a/lib/Target/CellSPU/SPUInstrFormats.td +++ b/lib/Target/CellSPU/SPUInstrFormats.td @@ -1,10 +1,10 @@ -//==== SPUInstrFormats.td - Cell SPU Instruction Formats ---*- tablegen -*-===// -// +//===-- SPUInstrFormats.td - Cell SPU Instruction Formats --*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 007bc0e..759923d 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- SPUInstrInfo.cpp - Cell SPU Instruction Information ----------------===// +//===-- SPUInstrInfo.cpp - Cell SPU Instruction Information ---------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index bc1ba71..f0d21ad 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -1,4 +1,4 @@ -//===- SPUInstrInfo.h - Cell SPU Instruction Information --------*- C++ -*-===// +//===-- SPUInstrInfo.h - Cell SPU Instruction Information -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/CellSPU/SPUMachineFunction.cpp b/lib/Target/CellSPU/SPUMachineFunction.cpp new file mode 100644 index 0000000..3e948d0 --- /dev/null +++ b/lib/Target/CellSPU/SPUMachineFunction.cpp @@ -0,0 +1,14 @@ +//==-- SPUMachineFunctionInfo.cpp - Private data used for CellSPU ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SPUMachineFunction.h" + +using namespace llvm; + +void SPUFunctionInfo::anchor() { } diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h index 3ef3ccb..399684b 100644 --- a/lib/Target/CellSPU/SPUMachineFunction.h +++ b/lib/Target/CellSPU/SPUMachineFunction.h @@ -21,7 +21,8 @@ namespace llvm { /// SPUFunctionInfo - Cell SPU target-specific information for each /// MachineFunction class SPUFunctionInfo : public MachineFunctionInfo { -private: + virtual void anchor(); + /// UsesLR - Indicates whether LR is used in the current function. /// bool UsesLR; diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td index ed7129e..9a5c397 100644 --- a/lib/Target/CellSPU/SPUMathInstr.td +++ b/lib/Target/CellSPU/SPUMathInstr.td @@ -1,4 +1,4 @@ -//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======// +//===-- SPUMathInst.td - Cell SPU math operations ---------*- tablegen -*--===// // // Cell SPU math operations // diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index a6e621f..a47e9ef 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -1,4 +1,4 @@ -//===- SPUNodes.td - Specialized SelectionDAG nodes used for CellSPU ------===// +//=== SPUNodes.td - Specialized SelectionDAG nodes by CellSPU -*- tablegen -*-// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/CellSPU/SPUNopFiller.cpp b/lib/Target/CellSPU/SPUNopFiller.cpp index e2bd2d7f..7c58041 100644 --- a/lib/Target/CellSPU/SPUNopFiller.cpp +++ b/lib/Target/CellSPU/SPUNopFiller.cpp @@ -1,4 +1,4 @@ -//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines---===// +//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines ----------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td index 96cde51..6f8deef 100644 --- a/lib/Target/CellSPU/SPUOperands.td +++ b/lib/Target/CellSPU/SPUOperands.td @@ -1,10 +1,10 @@ -//===- SPUOperands.td - Cell SPU Instruction Operands ------*- tablegen -*-===// -// +//===-- SPUOperands.td - Cell SPU Instruction Operands -----*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // Cell SPU Instruction Operands: //===----------------------------------------------------------------------===// diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index bbac6fd..92983e1 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- SPURegisterInfo.cpp - Cell SPU Register Information ----------------===// +//===-- SPURegisterInfo.cpp - Cell SPU Register Information ---------------===// // // The LLVM Compiler Infrastructure // @@ -197,11 +197,11 @@ SPURegisterInfo::getPointerRegClass(unsigned Kind) const { return &SPU::R32CRegClass; } -const unsigned * +const uint16_t * SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { // Cell ABI calling convention - static const unsigned SPU_CalleeSaveRegs[] = { + static const uint16_t SPU_CalleeSaveRegs[] = { SPU::R80, SPU::R81, SPU::R82, SPU::R83, SPU::R84, SPU::R85, SPU::R86, SPU::R87, SPU::R88, SPU::R89, SPU::R90, SPU::R91, diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index b7818a4..e5ab224 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -1,4 +1,4 @@ -//===- SPURegisterInfo.h - Cell SPU Register Information Impl ----*- C++ -*-==// +//===-- SPURegisterInfo.h - Cell SPU Register Information Impl --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -57,7 +57,7 @@ namespace llvm { } //! Return the array of callee-saved registers - virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const; + virtual const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const; //! Allow for scavenging, so we can get scratch registers when needed. virtual bool requiresRegisterScavenging(const MachineFunction &MF) const diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td index e16f51f..f27b042 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.td +++ b/lib/Target/CellSPU/SPURegisterInfo.td @@ -1,10 +1,10 @@ -//===- SPURegisterInfo.td - The Cell SPU Register File -----*- tablegen -*-===// -// +//===-- SPURegisterInfo.td - The Cell SPU Register File ----*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td index 9cd3c23..9ccd084 100644 --- a/lib/Target/CellSPU/SPUSchedule.td +++ b/lib/Target/CellSPU/SPUSchedule.td @@ -1,10 +1,10 @@ -//===- SPUSchedule.td - Cell Scheduling Definitions --------*- tablegen -*-===// -// +//===-- SPUSchedule.td - Cell Scheduling Definitions -------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp index 43335ab..ebfefe2 100644 --- a/lib/Target/CellSPU/SPUSubtarget.cpp +++ b/lib/Target/CellSPU/SPUSubtarget.cpp @@ -1,4 +1,4 @@ -//===- SPUSubtarget.cpp - STI Cell SPU Subtarget Information --------------===// +//===-- SPUSubtarget.cpp - STI Cell SPU Subtarget Information -------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 1e922a4..e43f5ad 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -14,14 +14,13 @@ #include "SPU.h" #include "SPUTargetMachine.h" #include "llvm/PassManager.h" -#include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; -extern "C" void LLVMInitializeCellSPUTarget() { +extern "C" void LLVMInitializeCellSPUTarget() { // Register the target. RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget); } @@ -51,15 +50,34 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, // Pass Pipeline Configuration //===----------------------------------------------------------------------===// -bool SPUTargetMachine::addInstSelector(PassManagerBase &PM) { +namespace { +/// SPU Code Generator Pass Configuration Options. +class SPUPassConfig : public TargetPassConfig { +public: + SPUPassConfig(SPUTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + SPUTargetMachine &getSPUTargetMachine() const { + return getTM<SPUTargetMachine>(); + } + + virtual bool addInstSelector(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) { + return new SPUPassConfig(this, PM); +} + +bool SPUPassConfig::addInstSelector() { // Install an instruction selector. - PM.add(createSPUISelDag(*this)); + PM.add(createSPUISelDag(getSPUTargetMachine())); return false; } // passes to run just before printing the assembly -bool SPUTargetMachine:: -addPreEmitPass(PassManagerBase &PM) { +bool SPUPassConfig::addPreEmitPass() { // load the TCE instruction scheduler, if available via // loaded plugins typedef llvm::FunctionPass* (*BuilderFunc)(const char*); @@ -70,6 +88,6 @@ addPreEmitPass(PassManagerBase &PM) { PM.add(schedulerCreator("cellspu")); //align instructions with nops/lnops for dual issue - PM.add(createSPUNopFillerPass(*this)); + PM.add(createSPUNopFillerPass(getSPUTargetMachine())); return true; } diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 0841fee..c179292 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -1,4 +1,4 @@ -//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU ----*- C++ -*-=// +//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -61,7 +61,7 @@ public: return NULL; } - virtual const SPUTargetLowering *getTargetLowering() const { + virtual const SPUTargetLowering *getTargetLowering() const { return &TLInfo; } @@ -72,7 +72,7 @@ public: virtual const SPURegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - + virtual const TargetData *getTargetData() const { return &DataLayout; } @@ -80,10 +80,9 @@ public: virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } - + // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM); - virtual bool addPreEmitPass(PassManagerBase &); + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); }; } // end namespace llvm diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index efeb989..76b5e9c 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -189,7 +189,6 @@ static std::string getTypePrefix(Type *Ty) { case Type::VectorTyID: return "packed_"; default: return "other_"; } - return "unknown_"; } void CppWriter::error(const std::string& msg) { @@ -301,7 +300,6 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) { void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) { switch (VisType) { - default: llvm_unreachable("Unknown GVar visibility"); case GlobalValue::DefaultVisibility: Out << "GlobalValue::DefaultVisibility"; break; @@ -443,7 +441,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, for (unsigned i = 0; i < PAL.getNumSlots(); ++i) { unsigned index = PAL.getSlot(i).Index; Attributes attrs = PAL.getSlot(i).Attrs; - Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 "; + Out << "PAWI.Index = " << index << "U; PAWI.Attrs = Attribute::None "; #define HANDLE_ATTR(X) \ if (attrs & Attribute::X) \ Out << " | Attribute::" #X; \ @@ -678,11 +676,6 @@ void CppWriter::printConstant(const Constant *CV) { std::string constName(getCppName(CV)); std::string typeName(getCppName(CV->getType())); - if (isa<GlobalValue>(CV)) { - // Skip variables and functions, we emit them elsewhere - return; - } - if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { std::string constValue = CI->getValue().toString(10, true); Out << "ConstantInt* " << constName @@ -700,38 +693,17 @@ void CppWriter::printConstant(const Constant *CV) { printCFP(CFP); Out << ";"; } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) { - if (CA->isString() && - CA->getType()->getElementType() == - Type::getInt8Ty(CA->getContext())) { - Out << "Constant* " << constName << - " = ConstantArray::get(mod->getContext(), \""; - std::string tmp = CA->getAsString(); - bool nullTerminate = false; - if (tmp[tmp.length()-1] == 0) { - tmp.erase(tmp.length()-1); - nullTerminate = true; - } - printEscapedString(tmp); - // Determine if we want null termination or not. - if (nullTerminate) - Out << "\", true"; // Indicate that the null terminator should be - // added. - else - Out << "\", false";// No null terminator - Out << ");"; - } else { - Out << "std::vector<Constant*> " << constName << "_elems;"; + Out << "std::vector<Constant*> " << constName << "_elems;"; + nl(Out); + unsigned N = CA->getNumOperands(); + for (unsigned i = 0; i < N; ++i) { + printConstant(CA->getOperand(i)); // recurse to print operands + Out << constName << "_elems.push_back(" + << getCppName(CA->getOperand(i)) << ");"; nl(Out); - unsigned N = CA->getNumOperands(); - for (unsigned i = 0; i < N; ++i) { - printConstant(CA->getOperand(i)); // recurse to print operands - Out << constName << "_elems.push_back(" - << getCppName(CA->getOperand(i)) << ");"; - nl(Out); - } - Out << "Constant* " << constName << " = ConstantArray::get(" - << typeName << ", " << constName << "_elems);"; } + Out << "Constant* " << constName << " = ConstantArray::get(" + << typeName << ", " << constName << "_elems);"; } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) { Out << "std::vector<Constant*> " << constName << "_fields;"; nl(Out); @@ -744,14 +716,14 @@ void CppWriter::printConstant(const Constant *CV) { } Out << "Constant* " << constName << " = ConstantStruct::get(" << typeName << ", " << constName << "_fields);"; - } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { + } else if (const ConstantVector *CVec = dyn_cast<ConstantVector>(CV)) { Out << "std::vector<Constant*> " << constName << "_elems;"; nl(Out); - unsigned N = CP->getNumOperands(); + unsigned N = CVec->getNumOperands(); for (unsigned i = 0; i < N; ++i) { - printConstant(CP->getOperand(i)); + printConstant(CVec->getOperand(i)); Out << constName << "_elems.push_back(" - << getCppName(CP->getOperand(i)) << ");"; + << getCppName(CVec->getOperand(i)) << ");"; nl(Out); } Out << "Constant* " << constName << " = ConstantVector::get(" @@ -759,6 +731,41 @@ void CppWriter::printConstant(const Constant *CV) { } else if (isa<UndefValue>(CV)) { Out << "UndefValue* " << constName << " = UndefValue::get(" << typeName << ");"; + } else if (const ConstantDataSequential *CDS = + dyn_cast<ConstantDataSequential>(CV)) { + if (CDS->isString()) { + Out << "Constant *" << constName << + " = ConstantDataArray::getString(mod->getContext(), \""; + StringRef Str = CDS->getAsString(); + bool nullTerminate = false; + if (Str.back() == 0) { + Str = Str.drop_back(); + nullTerminate = true; + } + printEscapedString(Str); + // Determine if we want null termination or not. + if (nullTerminate) + Out << "\", true);"; + else + Out << "\", false);";// No null terminator + } else { + // TODO: Could generate more efficient code generating CDS calls instead. + Out << "std::vector<Constant*> " << constName << "_elems;"; + nl(Out); + for (unsigned i = 0; i != CDS->getNumElements(); ++i) { + Constant *Elt = CDS->getElementAsConstant(i); + printConstant(Elt); + Out << constName << "_elems.push_back(" << getCppName(Elt) << ");"; + nl(Out); + } + Out << "Constant* " << constName; + + if (isa<ArrayType>(CDS->getType())) + Out << " = ConstantArray::get("; + else + Out << " = ConstantVector::get("; + Out << typeName << ", " << constName << "_elems);"; + } } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { if (CE->getOpcode() == Instruction::GetElementPtr) { Out << "std::vector<Constant*> " << constName << "_indices;"; @@ -1084,9 +1091,9 @@ void CppWriter::printInstruction(const Instruction *I, << SI->getNumCases() << ", " << bbname << ");"; nl(Out); unsigned NumCases = SI->getNumCases(); - for (unsigned i = 1; i < NumCases; ++i) { + for (unsigned i = 0; i < NumCases; ++i) { const ConstantInt* CaseVal = SI->getCaseValue(i); - const BasicBlock* BB = SI->getSuccessor(i); + const BasicBlock *BB = SI->getCaseSuccessor(i); Out << iName << "->addCase(" << getOpName(CaseVal) << ", " << getOpName(BB) << ");"; @@ -1135,11 +1142,6 @@ void CppWriter::printInstruction(const Instruction *I, nl(Out); break; } - case Instruction::Unwind: { - Out << "new UnwindInst(" - << bbname << ");"; - break; - } case Instruction::Unreachable: { Out << "new UnreachableInst(" << "mod->getContext(), " @@ -1354,7 +1356,7 @@ void CppWriter::printInstruction(const Instruction *I, case Instruction::PtrToInt: Out << "PtrToIntInst"; break; case Instruction::IntToPtr: Out << "IntToPtrInst"; break; case Instruction::BitCast: Out << "BitCastInst"; break; - default: assert(0 && "Unreachable"); break; + default: llvm_unreachable("Unreachable"); } Out << "(" << opNames[0] << ", " << getCppName(cst->getType()) << ", \""; @@ -2049,8 +2051,6 @@ bool CppWriter::runOnModule(Module &M) { fname = "makeLLVMType"; printType(fname,tgtname); break; - default: - error("Invalid generation option"); } return false; diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index f8705ee..6c5da72 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -6,7 +6,7 @@ tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel) tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv) tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM HexagonGenIntrinsics.inc -gen-tgt-intrinsic) +tablegen(LLVM HexagonGenDFAPacketizer.inc -gen-dfa-packetizer) add_public_tablegen_target(HexagonCommonTableGen) add_llvm_target(HexagonCodeGen @@ -19,8 +19,7 @@ add_llvm_target(HexagonCodeGen HexagonInstrInfo.cpp HexagonISelDAGToDAG.cpp HexagonISelLowering.cpp - HexagonMCAsmInfo.cpp - HexagonOptimizeSZExtends.cpp + HexagonPeephole.cpp HexagonRegisterInfo.cpp HexagonRemoveSZExtArgs.cpp HexagonSelectionDAGInfo.cpp diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index a5f2279..bbefcaf 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -35,7 +35,7 @@ namespace llvm { FunctionPass* createHexagonExpandPredSpillCode(HexagonTargetMachine &TM); FunctionPass *createHexagonHardwareLoops(); - FunctionPass *createHexagonOptimizeSZExtends(); + FunctionPass *createHexagonPeephole(); FunctionPass *createHexagonFixupHwLoops(); } // end namespace llvm; diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td index 72939e6..ab5093d 100644 --- a/lib/Target/Hexagon/Hexagon.td +++ b/lib/Target/Hexagon/Hexagon.td @@ -1,4 +1,4 @@ -//===- Hexagon.td - Describe the Hexagon Target Machine ---------*- C++ -*-===// +//===-- Hexagon.td - Describe the Hexagon Target Machine --*- tablegen -*--===// // // The LLVM Compiler Infrastructure // @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// // +// This is the top level entry point for the Hexagon target. // //===----------------------------------------------------------------------===// @@ -18,8 +19,7 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// // Hexagon Subtarget features. -// - +//===----------------------------------------------------------------------===// // Hexagon Archtectures def ArchV2 : SubtargetFeature<"v2", "HexagonArchVersion", "V2", diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 8f8e804..688b8e3 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -1,4 +1,4 @@ -//===-- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly ----=// +//===-- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly --===// // // The LLVM Compiler Infrastructure // @@ -11,9 +11,6 @@ // of machine-dependent LLVM code to Hexagon assembly language. This printer is // the output mechanism used by `llc'. // -// Documentation at http://developer.apple.com/documentation/DeveloperTools/ -// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html -// //===----------------------------------------------------------------------===// @@ -125,6 +122,11 @@ namespace { O << -value; } + void printHexagonNOneImmOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) const { + O << -1; + } + void printHexagonMEMriOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { const MachineOperand &MO1 = MI->getOperand(OpNo); @@ -223,7 +225,6 @@ void HexagonAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { case MachineOperand::MO_Immediate: dbgs() << "printOp() does not handle immediate values\n"; abort(); - return; case MachineOperand::MO_MachineBasicBlock: O << *MO.getMBB()->getSymbol(); @@ -319,14 +320,14 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, if (Base.isReg()) printOperand(MI, OpNo, O); else - assert(0 && "Unimplemented"); + llvm_unreachable("Unimplemented"); if (Offset.isImm()) { if (Offset.getImm()) O << " + #" << Offset.getImm(); } else - assert(0 && "Unimplemented"); + llvm_unreachable("Unimplemented"); return false; } @@ -334,7 +335,7 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - assert(0 && "Unimplemented"); + llvm_unreachable("Unimplemented"); } @@ -370,19 +371,6 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { O << "}"; } printInstruction(MI, O); - } else if (MI->getOpcode() == Hexagon::STriwt) { - // - // Handle truncated store on Hexagon. - // - O << "\tmemw("; - printHexagonMEMriOperand(MI, 0, O); - - O << ") = "; - unsigned SubRegNum = - TM.getRegisterInfo()->getSubReg(MI->getOperand(2) - .getReg(), Hexagon::subreg_loreg); - const char *SubRegName = getRegisterName(SubRegNum); - O << SubRegName << '\n'; } else if (MI->getOpcode() == Hexagon::MPYI_rin) { // Handle multipy with -ve constant on Hexagon: // "$dst =- mpyi($src1, #$src2)" diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index 38000e7..9bca9e0 100644 --- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -1,4 +1,4 @@ -//===---- HexagonCFGOptimizer.cpp - CFG optimizations ---------------------===// +//===-- HexagonCFGOptimizer.cpp - CFG optimizations -----------------------===// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source @@ -6,27 +6,22 @@ // //===----------------------------------------------------------------------===// - #define DEBUG_TYPE "hexagon_cfg" -#include "llvm/CodeGen/Passes.h" +#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/MathExtras.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "HexagonTargetMachine.h" -#include "HexagonSubtarget.h" -#include "HexagonMachineFunctionInfo.h" -#include <iostream> - -#include "llvm/Support/CommandLine.h" using namespace llvm; @@ -56,8 +51,8 @@ private: char HexagonCFGOptimizer::ID = 0; static bool IsConditionalBranch(int Opc) { - return (Opc == Hexagon::JMP_Pred) || (Opc == Hexagon::JMP_PredNot) - || (Opc == Hexagon::JMP_PredPt) || (Opc == Hexagon::JMP_PredNotPt); + return (Opc == Hexagon::JMP_c) || (Opc == Hexagon::JMP_cNot) + || (Opc == Hexagon::JMP_cdnPt) || (Opc == Hexagon::JMP_cdnNotPt); } @@ -72,24 +67,24 @@ HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI, const HexagonInstrInfo *QII = QTM.getInstrInfo(); int NewOpcode = 0; switch(MI->getOpcode()) { - case Hexagon::JMP_Pred: - NewOpcode = Hexagon::JMP_PredNot; + case Hexagon::JMP_c: + NewOpcode = Hexagon::JMP_cNot; break; - case Hexagon::JMP_PredNot: - NewOpcode = Hexagon::JMP_Pred; + case Hexagon::JMP_cNot: + NewOpcode = Hexagon::JMP_c; break; - case Hexagon::JMP_PredPt: - NewOpcode = Hexagon::JMP_PredNotPt; + case Hexagon::JMP_cdnPt: + NewOpcode = Hexagon::JMP_cdnNotPt; break; - case Hexagon::JMP_PredNotPt: - NewOpcode = Hexagon::JMP_PredPt; + case Hexagon::JMP_cdnNotPt: + NewOpcode = Hexagon::JMP_cdnPt; break; default: - assert(0 && "Cannot handle this case"); + llvm_unreachable("Cannot handle this case"); } MI->setDesc(QII->get(NewOpcode)); @@ -160,8 +155,8 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // The target of the unconditional branch must be JumpAroundTarget. // TODO: If not, we should not invert the unconditional branch. MachineBasicBlock* CondBranchTarget = NULL; - if ((MI->getOpcode() == Hexagon::JMP_Pred) || - (MI->getOpcode() == Hexagon::JMP_PredNot)) { + if ((MI->getOpcode() == Hexagon::JMP_c) || + (MI->getOpcode() == Hexagon::JMP_cNot)) { CondBranchTarget = MI->getOperand(1).getMBB(); } diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp index 2e51dbf..71787de 100644 --- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp +++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp @@ -58,7 +58,7 @@ void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT, void Hexagon_CCState::MarkAllocated(unsigned Reg) { UsedRegs[Reg/32] |= 1 << (Reg&31); - if (const unsigned *RegAliases = TRI.getAliasSet(Reg)) + if (const uint16_t *RegAliases = TRI.getAliasSet(Reg)) for (; (Reg = *RegAliases); ++RegAliases) UsedRegs[Reg/32] |= 1 << (Reg&31); } diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp index cb73ae0..2100474 100644 --- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp +++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -1,11 +1,11 @@ -//===--- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ----===// +//===-- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -//===----------------------------------------------------------------------===//// +//===----------------------------------------------------------------------===// // The Hexagon processor has no instructions that load or store predicate // registers directly. So, when these registers must be spilled a general // purpose register must be found and the value copied to/from it from/to @@ -17,31 +17,25 @@ // //===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/Passes.h" +#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" -#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/MathExtras.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "HexagonTargetMachine.h" -#include "HexagonSubtarget.h" -#include "HexagonMachineFunctionInfo.h" -#include <map> -#include <iostream> - -#include "llvm/Support/CommandLine.h" - using namespace llvm; @@ -70,7 +64,6 @@ char HexagonExpandPredSpillCode::ID = 0; bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { const HexagonInstrInfo *TII = QTM.getInstrInfo(); - const HexagonRegisterInfo *RegInfo = QTM.getRegisterInfo(); // Loop over all of the basic blocks. for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); @@ -84,7 +77,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { if (Opc == Hexagon::STriw_pred) { // STriw_pred [R30], ofst, SrcReg; unsigned FP = MI->getOperand(0).getReg(); - assert(FP == RegInfo->getFrameRegister() && + assert(FP == QTM.getRegisterInfo()->getFrameRegister() && "Not a Frame Pointer, Nor a Spill Slot"); assert(MI->getOperand(1).isImm() && "Not an offset"); int Offset = MI->getOperand(1).getImm(); @@ -129,7 +122,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { assert(Hexagon::PredRegsRegClass.contains(DstReg) && "Not a predicate register"); unsigned FP = MI->getOperand(1).getReg(); - assert(FP == RegInfo->getFrameRegister() && + assert(FP == QTM.getRegisterInfo()->getFrameRegister() && "Not a Frame Pointer, Nor a Spill Slot"); assert(MI->getOperand(2).isImm() && "Not an offset"); int Offset = MI->getOperand(2).getImm(); diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 78e0b1c..49c6cdf 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -1,4 +1,4 @@ -//==-- HexagonFrameLowering.cpp - Define frame lowering --*- C++ -*-==// +//===-- HexagonFrameLowering.cpp - Define frame lowering ------------------===// // // The LLVM Compiler Infrastructure // @@ -7,6 +7,7 @@ // // //===----------------------------------------------------------------------===// + #include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" @@ -14,27 +15,25 @@ #include "HexagonTargetMachine.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonFrameLowering.h" - +#include "llvm/Function.h" +#include "llvm/Type.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Type.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include <iostream> +#include "llvm/Support/CommandLine.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Function.h" using namespace llvm; static cl::opt<bool> DisableDeallocRet( @@ -236,7 +235,7 @@ HexagonFrameLowering::spillCalleeSavedRegisters( // // Check if we can use a double-word store. // - const unsigned* SuperReg = TRI->getSuperRegisters(Reg); + const uint16_t* SuperReg = TRI->getSuperRegisters(Reg); // Assume that there is exactly one superreg. assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg"); @@ -244,7 +243,7 @@ HexagonFrameLowering::spillCalleeSavedRegisters( const TargetRegisterClass* SuperRegClass = 0; if (ContiguousRegs && (i < CSI.size()-1)) { - const unsigned* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg()); + const uint16_t* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg()); assert(SuperRegNext[0] && !SuperRegNext[1] && "Expected exactly one superreg"); SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]); @@ -296,14 +295,14 @@ bool HexagonFrameLowering::restoreCalleeSavedRegisters( // // Check if we can use a double-word load. // - const unsigned* SuperReg = TRI->getSuperRegisters(Reg); + const uint16_t* SuperReg = TRI->getSuperRegisters(Reg); const TargetRegisterClass* SuperRegClass = 0; // Assume that there is exactly one superreg. assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg"); bool CanUseDblLoad = false; if (ContiguousRegs && (i < CSI.size()-1)) { - const unsigned* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg()); + const uint16_t* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg()); assert(SuperRegNext[0] && !SuperRegNext[1] && "Expected exactly one superreg"); SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]); diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index c1abc4a..04ea4ed 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -517,8 +517,8 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { // The loop ends with either: // - a conditional branch followed by an unconditional branch, or // - a conditional branch to the loop start. - if (LastI->getOpcode() == Hexagon::JMP_Pred || - LastI->getOpcode() == Hexagon::JMP_PredNot) { + if (LastI->getOpcode() == Hexagon::JMP_c || + LastI->getOpcode() == Hexagon::JMP_cNot) { // delete one and change/add an uncond. branch to out of the loop MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); LastI = LastMBB->erase(LastI); diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 4deab9f..9df965e 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -1,4 +1,4 @@ -//==-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon ----==// +//===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===// // // The LLVM Compiler Infrastructure // @@ -238,7 +238,7 @@ static unsigned doesIntrinsicContainPredicate(unsigned ID) case Intrinsic::hexagon_C2_or: return Hexagon::OR_pp; case Intrinsic::hexagon_C2_not: - return Hexagon::NOT_pp; + return Hexagon::NOT_p; case Intrinsic::hexagon_C2_any8: return Hexagon::ANY_pp; case Intrinsic::hexagon_C2_all8: @@ -295,7 +295,6 @@ static bool OffsetFitsS11(EVT MemType, int64_t Offset) { // CONST32. // SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl) { - EVT LoadedVT = LD->getMemoryVT(); SDValue Chain = LD->getChain(); SDNode* Const32 = LD->getBasePtr().getNode(); unsigned Opcode = 0; @@ -572,8 +571,6 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl) { ReplaceUses(Froms, Tos, 3); return Result_1; } - - return SelectCode(LD); } @@ -767,7 +764,6 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { SelectCode(N); } - SDValue Base = LD->getBasePtr(); SDValue Chain = LD->getChain(); SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, @@ -794,7 +790,6 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { return SelectCode(N); } - SDValue Base = LD->getBasePtr(); SDValue Chain = LD->getChain(); SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, @@ -949,7 +944,6 @@ SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) { return SelectCode(N); } - SDValue Base = LD->getBasePtr(); SDValue Chain = LD->getChain(); SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, @@ -975,7 +969,6 @@ SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) { return SelectCode(N); } - SDValue Base = LD->getBasePtr(); SDValue Chain = LD->getChain(); SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, @@ -1106,7 +1099,7 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { ReplaceUses(N, RsPd); return RsPd; } - assert(0 && "Unexpected value type"); + llvm_unreachable("Unexpected value type"); } } return SelectCode(N); @@ -1152,7 +1145,7 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { SDValue SDVal = CurDAG->getTargetConstant(Val, MVT::i32); Ops.push_back(SDVal); } else { - assert(0 && "Unimplemented"); + llvm_unreachable("Unimplemented"); } } EVT ReturnValueVT = N->getValueType(0); @@ -1175,9 +1168,6 @@ SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) { SDNode* Result; int32_t Val = cast<ConstantSDNode>(N)->getSExtValue(); if (Val == -1) { - unsigned NewIntReg = TM.getInstrInfo()->createVR(MF, MVT(MVT::i32)); - SDValue Reg = CurDAG->getRegister(NewIntReg, MVT::i32); - // Create the IntReg = 1 node. SDNode* IntRegTFR = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32, @@ -1188,7 +1178,7 @@ SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) { SDValue(IntRegTFR, 0)); // not(Pd) - SDNode* NotPd = CurDAG->getMachineNode(Hexagon::NOT_pp, dl, MVT::i1, + SDNode* NotPd = CurDAG->getMachineNode(Hexagon::NOT_p, dl, MVT::i1, SDValue(Pd, 0)); // xor(not(Pd)) diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 0ac3cf0..ed4b840 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -113,8 +113,6 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, return false; } llvm_unreachable(0); - - return true; } @@ -305,9 +303,6 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, // Analyze return values of ISD::RET CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon); - SDValue StackPtr = DAG.getRegister(TM.getRegisterInfo()->getStackRegister(), - MVT::i32); - // If this is the first return lowered for this function, add the regs to the // liveout set for the function. if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { @@ -320,8 +315,6 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; - SDValue Ret = OutVals[i]; - ISD::ArgFlagsTy Flags = Outs[i].Flags; Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); @@ -378,7 +371,7 @@ HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SDValue HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -456,7 +449,7 @@ HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee, switch (VA.getLocInfo()) { default: // Loc info must be one of Full, SExt, ZExt, or AExt. - assert(0 && "Unknown loc info!"); + llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: @@ -650,7 +643,7 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, return false; } - bool isInc; + bool isInc = false; bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); // ShiftAmount = number of left-shifted bits in the Hexagon instruction. @@ -1305,6 +1298,7 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine // Needed for DYNAMIC_STACKALLOC expansion. unsigned StackRegister = TM.getRegisterInfo()->getStackRegister(); setStackPointerRegisterToSaveRestore(StackRegister); + setSchedulingPreference(Sched::VLIW); } @@ -1351,12 +1345,12 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { SDValue HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - default: assert(0 && "Should not custom lower this!"); + default: llvm_unreachable("Should not custom lower this!"); // Frame & Return address. Currently unimplemented. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GlobalTLSAddress: - assert(0 && "TLS not implemented for Hexagon."); + llvm_unreachable("TLS not implemented for Hexagon."); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); @@ -1388,10 +1382,8 @@ const { FuncInfo->addAllocaAdjustInst(MI); return BB; } - default: - assert(false && "Unexpected instr type to insert"); + default: llvm_unreachable("Unexpected instr type to insert"); } // switch - return NULL; } //===----------------------------------------------------------------------===// @@ -1407,7 +1399,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(const case 'r': // R0-R31 switch (VT.getSimpleVT().SimpleTy) { default: - assert(0 && "getRegForInlineAsmConstraint Unhandled data type"); + llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); case MVT::i32: case MVT::i16: case MVT::i8: @@ -1416,7 +1408,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(const return std::make_pair(0U, Hexagon::DoubleRegsRegisterClass); } default: - assert(0 && "Unknown asm register class"); + llvm_unreachable("Unknown asm register class"); } } diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index b327615..5396486 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -1,4 +1,4 @@ -//==-- HexagonISelLowering.h - Hexagon DAG Lowering Interface ----*- C++ -*-==// +//===-- HexagonISelLowering.h - Hexagon DAG Lowering Interface --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -96,7 +96,7 @@ namespace llvm { SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonImmediates.td index 1e3fcb8..18589a2 100644 --- a/lib/Target/Hexagon/HexagonImmediates.td +++ b/lib/Target/Hexagon/HexagonImmediates.td @@ -1,4 +1,4 @@ -//=- HexagonImmediates.td - Hexagon immediate processing --*- tablegen -*-=// +//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -52,12 +52,12 @@ def s10Imm : Operand<i32> { let PrintMethod = "printHexagonImmOperand"; } -def s8Imm : Operand<i32> { +def s9Imm : Operand<i32> { // For now, we use a generic print function for all operands. let PrintMethod = "printHexagonImmOperand"; } -def s9Imm : Operand<i32> { +def s8Imm : Operand<i32> { // For now, we use a generic print function for all operands. let PrintMethod = "printHexagonImmOperand"; } @@ -197,6 +197,11 @@ def u2Imm : Operand<i32> { let PrintMethod = "printHexagonImmOperand"; } +def u1Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + def n8Imm : Operand<i32> { // For now, we use a generic print function for all operands. let PrintMethod = "printHexagonImmOperand"; @@ -207,6 +212,11 @@ def m6Imm : Operand<i32> { let PrintMethod = "printHexagonImmOperand"; } +def nOneImm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonNOneImmOperand"; +} + // // Immediate predicates // @@ -489,3 +499,10 @@ def n8ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return (-255 <= v && v <= 0); }]>; + +def nOneImmPred : PatLeaf<(i32 imm), [{ + // nOneImmPred predicate - True if the immediate is -1. + int64_t v = (int64_t)N->getSExtValue(); + return (-1 == v); +}]>; + diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index 7e92776..c9f16fb 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -7,28 +7,42 @@ // //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Hexagon Intruction Flags + +// +// *** Must match HexagonBaseInfo.h *** +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// Intruction Class Declaration + +//===----------------------------------------------------------------------===// + class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, - string cstr, - InstrItinClass itin> : Instruction { + string cstr, InstrItinClass itin> : Instruction { field bits<32> Inst; let Namespace = "Hexagon"; -/* Commented out for Hexagon - bits<2> op; - let Inst{31-30} = op; */ // Top two bits are the 'op' field - dag OutOperandList = outs; dag InOperandList = ins; let AsmString = asmstr; let Pattern = pattern; let Constraints = cstr; let Itinerary = itin; + + // *** The code below must match HexagonBaseInfo.h *** + + // Predicated instructions. + bits<1> isPredicated = 0; + let TSFlags{1} = isPredicated; + + // *** The code above must match HexagonBaseInfo.h *** } -//----------------------------------------------------------------------------// +//===----------------------------------------------------------------------===// // Intruction Classes Definitions + -//----------------------------------------------------------------------------// +//===----------------------------------------------------------------------===// // LD Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. @@ -174,9 +188,9 @@ class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern> : InstHexagon<outs, ins, asmstr, pattern, "", PSEUDO>; -//----------------------------------------------------------------------------// +//===----------------------------------------------------------------------===// // Intruction Classes Definitions - -//----------------------------------------------------------------------------// +//===----------------------------------------------------------------------===// // diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 69a50d7..07872d4 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1,4 +1,4 @@ -//=- HexagonInstrInfo.cpp - Hexagon Instruction Information -------*- C++ -*-=// +//===-- HexagonInstrInfo.cpp - Hexagon Instruction Information ------------===// // // The LLVM Compiler Infrastructure // @@ -15,19 +15,18 @@ #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "Hexagon.h" -#include "llvm/Support/MathExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/MathExtras.h" #define GET_INSTRINFO_CTOR #include "HexagonGenInstrInfo.inc" - -#include <iostream> - +#include "HexagonGenDFAPacketizer.inc" using namespace llvm; @@ -125,16 +124,16 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, DebugLoc DL) const{ int BOpc = Hexagon::JMP; - int BccOpc = Hexagon::JMP_Pred; + int BccOpc = Hexagon::JMP_c; assert(TBB && "InsertBranch must not be told to insert a fallthrough"); int regPos = 0; // Check if ReverseBranchCondition has asked to reverse this branch // If we want to reverse the branch an odd number of times, we want - // JMP_PredNot. + // JMP_cNot. if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { - BccOpc = Hexagon::JMP_PredNot; + BccOpc = Hexagon::JMP_cNot; regPos = 1; } @@ -222,13 +221,13 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, TBB = LastInst->getOperand(0).getMBB(); return false; } - if (LastInst->getOpcode() == Hexagon::JMP_Pred) { + if (LastInst->getOpcode() == Hexagon::JMP_c) { // Block ends with fall-through true condbranch. TBB = LastInst->getOperand(1).getMBB(); Cond.push_back(LastInst->getOperand(0)); return false; } - if (LastInst->getOpcode() == Hexagon::JMP_PredNot) { + if (LastInst->getOpcode() == Hexagon::JMP_cNot) { // Block ends with fall-through false condbranch. TBB = LastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(0)); @@ -249,7 +248,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // If the block ends with Hexagon::BRCOND and Hexagon:JMP, handle it. if (((SecondLastInst->getOpcode() == Hexagon::BRCOND) || - (SecondLastInst->getOpcode() == Hexagon::JMP_Pred)) && + (SecondLastInst->getOpcode() == Hexagon::JMP_c)) && LastInst->getOpcode() == Hexagon::JMP) { TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(SecondLastInst->getOperand(0)); @@ -257,8 +256,8 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return false; } - // If the block ends with Hexagon::JMP_PredNot and Hexagon:JMP, handle it. - if ((SecondLastInst->getOpcode() == Hexagon::JMP_PredNot) && + // If the block ends with Hexagon::JMP_cNot and Hexagon:JMP, handle it. + if ((SecondLastInst->getOpcode() == Hexagon::JMP_cNot) && LastInst->getOpcode() == Hexagon::JMP) { TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(MachineOperand::CreateImm(0)); @@ -285,8 +284,8 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { int BOpc = Hexagon::JMP; - int BccOpc = Hexagon::JMP_Pred; - int BccOpcNot = Hexagon::JMP_PredNot; + int BccOpc = Hexagon::JMP_c; + int BccOpcNot = Hexagon::JMP_cNot; MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) return 0; @@ -347,9 +346,9 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Hexagon::CRRegsRegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg); return; - } - - assert (0 && "Unimplemented"); + } + + llvm_unreachable("Unimplemented"); } @@ -384,7 +383,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); } else { - assert(0 && "Unimplemented"); + llvm_unreachable("Unimplemented"); } } @@ -396,8 +395,7 @@ void HexagonInstrInfo::storeRegToAddr( const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const { - assert(0 && "Unimplemented"); - return; + llvm_unreachable("Unimplemented"); } @@ -428,7 +426,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else { - assert(0 && "Can't store this register to stack slot"); + llvm_unreachable("Can't store this register to stack slot"); } } @@ -437,7 +435,7 @@ void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl<MachineOperand> &Addr, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const { - assert(0 && "Unimplemented"); + llvm_unreachable("Unimplemented"); } @@ -461,7 +459,7 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { } else if (VT == MVT::i64) { TRC = Hexagon::DoubleRegsRegisterClass; } else { - assert(0 && "Cannot handle this register class"); + llvm_unreachable("Cannot handle this register class"); } unsigned NewReg = RegInfo.createVirtualRegister(TRC); @@ -469,6 +467,7 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { } + bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { bool isPred = MI->getDesc().isPredicable(); @@ -553,15 +552,469 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { case Hexagon::JMPR: return false; - return true; - - default: - return true; } return true; } +unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { + switch(Opc) { + case Hexagon::TFR_cPt: + return Hexagon::TFR_cNotPt; + case Hexagon::TFR_cNotPt: + return Hexagon::TFR_cPt; + + case Hexagon::TFRI_cPt: + return Hexagon::TFRI_cNotPt; + case Hexagon::TFRI_cNotPt: + return Hexagon::TFRI_cPt; + + case Hexagon::JMP_c: + return Hexagon::JMP_cNot; + case Hexagon::JMP_cNot: + return Hexagon::JMP_c; + + case Hexagon::ADD_ri_cPt: + return Hexagon::ADD_ri_cNotPt; + case Hexagon::ADD_ri_cNotPt: + return Hexagon::ADD_ri_cPt; + + case Hexagon::ADD_rr_cPt: + return Hexagon::ADD_rr_cNotPt; + case Hexagon::ADD_rr_cNotPt: + return Hexagon::ADD_rr_cPt; + + case Hexagon::XOR_rr_cPt: + return Hexagon::XOR_rr_cNotPt; + case Hexagon::XOR_rr_cNotPt: + return Hexagon::XOR_rr_cPt; + + case Hexagon::AND_rr_cPt: + return Hexagon::AND_rr_cNotPt; + case Hexagon::AND_rr_cNotPt: + return Hexagon::AND_rr_cPt; + + case Hexagon::OR_rr_cPt: + return Hexagon::OR_rr_cNotPt; + case Hexagon::OR_rr_cNotPt: + return Hexagon::OR_rr_cPt; + + case Hexagon::SUB_rr_cPt: + return Hexagon::SUB_rr_cNotPt; + case Hexagon::SUB_rr_cNotPt: + return Hexagon::SUB_rr_cPt; + + case Hexagon::COMBINE_rr_cPt: + return Hexagon::COMBINE_rr_cNotPt; + case Hexagon::COMBINE_rr_cNotPt: + return Hexagon::COMBINE_rr_cPt; + + case Hexagon::ASLH_cPt_V4: + return Hexagon::ASLH_cNotPt_V4; + case Hexagon::ASLH_cNotPt_V4: + return Hexagon::ASLH_cPt_V4; + + case Hexagon::ASRH_cPt_V4: + return Hexagon::ASRH_cNotPt_V4; + case Hexagon::ASRH_cNotPt_V4: + return Hexagon::ASRH_cPt_V4; + + case Hexagon::SXTB_cPt_V4: + return Hexagon::SXTB_cNotPt_V4; + case Hexagon::SXTB_cNotPt_V4: + return Hexagon::SXTB_cPt_V4; + + case Hexagon::SXTH_cPt_V4: + return Hexagon::SXTH_cNotPt_V4; + case Hexagon::SXTH_cNotPt_V4: + return Hexagon::SXTH_cPt_V4; + + case Hexagon::ZXTB_cPt_V4: + return Hexagon::ZXTB_cNotPt_V4; + case Hexagon::ZXTB_cNotPt_V4: + return Hexagon::ZXTB_cPt_V4; + + case Hexagon::ZXTH_cPt_V4: + return Hexagon::ZXTH_cNotPt_V4; + case Hexagon::ZXTH_cNotPt_V4: + return Hexagon::ZXTH_cPt_V4; + + + case Hexagon::JMPR_cPt: + return Hexagon::JMPR_cNotPt; + case Hexagon::JMPR_cNotPt: + return Hexagon::JMPR_cPt; + + // V4 indexed+scaled load. + case Hexagon::LDrid_indexed_cPt_V4: + return Hexagon::LDrid_indexed_cNotPt_V4; + case Hexagon::LDrid_indexed_cNotPt_V4: + return Hexagon::LDrid_indexed_cPt_V4; + + case Hexagon::LDrid_indexed_shl_cPt_V4: + return Hexagon::LDrid_indexed_shl_cNotPt_V4; + case Hexagon::LDrid_indexed_shl_cNotPt_V4: + return Hexagon::LDrid_indexed_shl_cPt_V4; + + case Hexagon::LDrib_indexed_cPt_V4: + return Hexagon::LDrib_indexed_cNotPt_V4; + case Hexagon::LDrib_indexed_cNotPt_V4: + return Hexagon::LDrib_indexed_cPt_V4; + + case Hexagon::LDriub_indexed_cPt_V4: + return Hexagon::LDriub_indexed_cNotPt_V4; + case Hexagon::LDriub_indexed_cNotPt_V4: + return Hexagon::LDriub_indexed_cPt_V4; + + case Hexagon::LDrib_indexed_shl_cPt_V4: + return Hexagon::LDrib_indexed_shl_cNotPt_V4; + case Hexagon::LDrib_indexed_shl_cNotPt_V4: + return Hexagon::LDrib_indexed_shl_cPt_V4; + + case Hexagon::LDriub_indexed_shl_cPt_V4: + return Hexagon::LDriub_indexed_shl_cNotPt_V4; + case Hexagon::LDriub_indexed_shl_cNotPt_V4: + return Hexagon::LDriub_indexed_shl_cPt_V4; + + case Hexagon::LDrih_indexed_cPt_V4: + return Hexagon::LDrih_indexed_cNotPt_V4; + case Hexagon::LDrih_indexed_cNotPt_V4: + return Hexagon::LDrih_indexed_cPt_V4; + + case Hexagon::LDriuh_indexed_cPt_V4: + return Hexagon::LDriuh_indexed_cNotPt_V4; + case Hexagon::LDriuh_indexed_cNotPt_V4: + return Hexagon::LDriuh_indexed_cPt_V4; + + case Hexagon::LDrih_indexed_shl_cPt_V4: + return Hexagon::LDrih_indexed_shl_cNotPt_V4; + case Hexagon::LDrih_indexed_shl_cNotPt_V4: + return Hexagon::LDrih_indexed_shl_cPt_V4; + + case Hexagon::LDriuh_indexed_shl_cPt_V4: + return Hexagon::LDriuh_indexed_shl_cNotPt_V4; + case Hexagon::LDriuh_indexed_shl_cNotPt_V4: + return Hexagon::LDriuh_indexed_shl_cPt_V4; + + case Hexagon::LDriw_indexed_cPt_V4: + return Hexagon::LDriw_indexed_cNotPt_V4; + case Hexagon::LDriw_indexed_cNotPt_V4: + return Hexagon::LDriw_indexed_cPt_V4; + + case Hexagon::LDriw_indexed_shl_cPt_V4: + return Hexagon::LDriw_indexed_shl_cNotPt_V4; + case Hexagon::LDriw_indexed_shl_cNotPt_V4: + return Hexagon::LDriw_indexed_shl_cPt_V4; + + // Byte. + case Hexagon::POST_STbri_cPt: + return Hexagon::POST_STbri_cNotPt; + case Hexagon::POST_STbri_cNotPt: + return Hexagon::POST_STbri_cPt; + + case Hexagon::STrib_cPt: + return Hexagon::STrib_cNotPt; + case Hexagon::STrib_cNotPt: + return Hexagon::STrib_cPt; + + case Hexagon::STrib_indexed_cPt: + return Hexagon::STrib_indexed_cNotPt; + case Hexagon::STrib_indexed_cNotPt: + return Hexagon::STrib_indexed_cPt; + + case Hexagon::STrib_imm_cPt_V4: + return Hexagon::STrib_imm_cNotPt_V4; + case Hexagon::STrib_imm_cNotPt_V4: + return Hexagon::STrib_imm_cPt_V4; + + case Hexagon::STrib_indexed_shl_cPt_V4: + return Hexagon::STrib_indexed_shl_cNotPt_V4; + case Hexagon::STrib_indexed_shl_cNotPt_V4: + return Hexagon::STrib_indexed_shl_cPt_V4; + + // Halfword. + case Hexagon::POST_SThri_cPt: + return Hexagon::POST_SThri_cNotPt; + case Hexagon::POST_SThri_cNotPt: + return Hexagon::POST_SThri_cPt; + + case Hexagon::STrih_cPt: + return Hexagon::STrih_cNotPt; + case Hexagon::STrih_cNotPt: + return Hexagon::STrih_cPt; + + case Hexagon::STrih_indexed_cPt: + return Hexagon::STrih_indexed_cNotPt; + case Hexagon::STrih_indexed_cNotPt: + return Hexagon::STrih_indexed_cPt; + + case Hexagon::STrih_imm_cPt_V4: + return Hexagon::STrih_imm_cNotPt_V4; + case Hexagon::STrih_imm_cNotPt_V4: + return Hexagon::STrih_imm_cPt_V4; + + case Hexagon::STrih_indexed_shl_cPt_V4: + return Hexagon::STrih_indexed_shl_cNotPt_V4; + case Hexagon::STrih_indexed_shl_cNotPt_V4: + return Hexagon::STrih_indexed_shl_cPt_V4; + + // Word. + case Hexagon::POST_STwri_cPt: + return Hexagon::POST_STwri_cNotPt; + case Hexagon::POST_STwri_cNotPt: + return Hexagon::POST_STwri_cPt; + + case Hexagon::STriw_cPt: + return Hexagon::STriw_cNotPt; + case Hexagon::STriw_cNotPt: + return Hexagon::STriw_cPt; + + case Hexagon::STriw_indexed_cPt: + return Hexagon::STriw_indexed_cNotPt; + case Hexagon::STriw_indexed_cNotPt: + return Hexagon::STriw_indexed_cPt; + + case Hexagon::STriw_indexed_shl_cPt_V4: + return Hexagon::STriw_indexed_shl_cNotPt_V4; + case Hexagon::STriw_indexed_shl_cNotPt_V4: + return Hexagon::STriw_indexed_shl_cPt_V4; + + case Hexagon::STriw_imm_cPt_V4: + return Hexagon::STriw_imm_cNotPt_V4; + case Hexagon::STriw_imm_cNotPt_V4: + return Hexagon::STriw_imm_cPt_V4; + + // Double word. + case Hexagon::POST_STdri_cPt: + return Hexagon::POST_STdri_cNotPt; + case Hexagon::POST_STdri_cNotPt: + return Hexagon::POST_STdri_cPt; + + case Hexagon::STrid_cPt: + return Hexagon::STrid_cNotPt; + case Hexagon::STrid_cNotPt: + return Hexagon::STrid_cPt; + + case Hexagon::STrid_indexed_cPt: + return Hexagon::STrid_indexed_cNotPt; + case Hexagon::STrid_indexed_cNotPt: + return Hexagon::STrid_indexed_cPt; + + case Hexagon::STrid_indexed_shl_cPt_V4: + return Hexagon::STrid_indexed_shl_cNotPt_V4; + case Hexagon::STrid_indexed_shl_cNotPt_V4: + return Hexagon::STrid_indexed_shl_cPt_V4; + + // Load. + case Hexagon::LDrid_cPt: + return Hexagon::LDrid_cNotPt; + case Hexagon::LDrid_cNotPt: + return Hexagon::LDrid_cPt; + + case Hexagon::LDriw_cPt: + return Hexagon::LDriw_cNotPt; + case Hexagon::LDriw_cNotPt: + return Hexagon::LDriw_cPt; + + case Hexagon::LDrih_cPt: + return Hexagon::LDrih_cNotPt; + case Hexagon::LDrih_cNotPt: + return Hexagon::LDrih_cPt; + + case Hexagon::LDriuh_cPt: + return Hexagon::LDriuh_cNotPt; + case Hexagon::LDriuh_cNotPt: + return Hexagon::LDriuh_cPt; + + case Hexagon::LDrib_cPt: + return Hexagon::LDrib_cNotPt; + case Hexagon::LDrib_cNotPt: + return Hexagon::LDrib_cPt; + + case Hexagon::LDriub_cPt: + return Hexagon::LDriub_cNotPt; + case Hexagon::LDriub_cNotPt: + return Hexagon::LDriub_cPt; + + // Load Indexed. + case Hexagon::LDrid_indexed_cPt: + return Hexagon::LDrid_indexed_cNotPt; + case Hexagon::LDrid_indexed_cNotPt: + return Hexagon::LDrid_indexed_cPt; + + case Hexagon::LDriw_indexed_cPt: + return Hexagon::LDriw_indexed_cNotPt; + case Hexagon::LDriw_indexed_cNotPt: + return Hexagon::LDriw_indexed_cPt; + + case Hexagon::LDrih_indexed_cPt: + return Hexagon::LDrih_indexed_cNotPt; + case Hexagon::LDrih_indexed_cNotPt: + return Hexagon::LDrih_indexed_cPt; + + case Hexagon::LDriuh_indexed_cPt: + return Hexagon::LDriuh_indexed_cNotPt; + case Hexagon::LDriuh_indexed_cNotPt: + return Hexagon::LDriuh_indexed_cPt; + + case Hexagon::LDrib_indexed_cPt: + return Hexagon::LDrib_indexed_cNotPt; + case Hexagon::LDrib_indexed_cNotPt: + return Hexagon::LDrib_indexed_cPt; + + case Hexagon::LDriub_indexed_cPt: + return Hexagon::LDriub_indexed_cNotPt; + case Hexagon::LDriub_indexed_cNotPt: + return Hexagon::LDriub_indexed_cPt; + + // Post Inc Load. + case Hexagon::POST_LDrid_cPt: + return Hexagon::POST_LDrid_cNotPt; + case Hexagon::POST_LDriw_cNotPt: + return Hexagon::POST_LDriw_cPt; + + case Hexagon::POST_LDrih_cPt: + return Hexagon::POST_LDrih_cNotPt; + case Hexagon::POST_LDrih_cNotPt: + return Hexagon::POST_LDrih_cPt; + + case Hexagon::POST_LDriuh_cPt: + return Hexagon::POST_LDriuh_cNotPt; + case Hexagon::POST_LDriuh_cNotPt: + return Hexagon::POST_LDriuh_cPt; + + case Hexagon::POST_LDrib_cPt: + return Hexagon::POST_LDrib_cNotPt; + case Hexagon::POST_LDrib_cNotPt: + return Hexagon::POST_LDrib_cPt; + + case Hexagon::POST_LDriub_cPt: + return Hexagon::POST_LDriub_cNotPt; + case Hexagon::POST_LDriub_cNotPt: + return Hexagon::POST_LDriub_cPt; + + // Dealloc_return. + case Hexagon::DEALLOC_RET_cPt_V4: + return Hexagon::DEALLOC_RET_cNotPt_V4; + case Hexagon::DEALLOC_RET_cNotPt_V4: + return Hexagon::DEALLOC_RET_cPt_V4; + + // New Value Jump. + // JMPEQ_ri - with -1. + case Hexagon::JMP_EQriPtneg_nv_V4: + return Hexagon::JMP_EQriNotPtneg_nv_V4; + case Hexagon::JMP_EQriNotPtneg_nv_V4: + return Hexagon::JMP_EQriPtneg_nv_V4; + + case Hexagon::JMP_EQriPntneg_nv_V4: + return Hexagon::JMP_EQriNotPntneg_nv_V4; + case Hexagon::JMP_EQriNotPntneg_nv_V4: + return Hexagon::JMP_EQriPntneg_nv_V4; + + // JMPEQ_ri. + case Hexagon::JMP_EQriPt_nv_V4: + return Hexagon::JMP_EQriNotPt_nv_V4; + case Hexagon::JMP_EQriNotPt_nv_V4: + return Hexagon::JMP_EQriPt_nv_V4; + + case Hexagon::JMP_EQriPnt_nv_V4: + return Hexagon::JMP_EQriNotPnt_nv_V4; + case Hexagon::JMP_EQriNotPnt_nv_V4: + return Hexagon::JMP_EQriPnt_nv_V4; + + // JMPEQ_rr. + case Hexagon::JMP_EQrrPt_nv_V4: + return Hexagon::JMP_EQrrNotPt_nv_V4; + case Hexagon::JMP_EQrrNotPt_nv_V4: + return Hexagon::JMP_EQrrPt_nv_V4; + + case Hexagon::JMP_EQrrPnt_nv_V4: + return Hexagon::JMP_EQrrNotPnt_nv_V4; + case Hexagon::JMP_EQrrNotPnt_nv_V4: + return Hexagon::JMP_EQrrPnt_nv_V4; + + // JMPGT_ri - with -1. + case Hexagon::JMP_GTriPtneg_nv_V4: + return Hexagon::JMP_GTriNotPtneg_nv_V4; + case Hexagon::JMP_GTriNotPtneg_nv_V4: + return Hexagon::JMP_GTriPtneg_nv_V4; + + case Hexagon::JMP_GTriPntneg_nv_V4: + return Hexagon::JMP_GTriNotPntneg_nv_V4; + case Hexagon::JMP_GTriNotPntneg_nv_V4: + return Hexagon::JMP_GTriPntneg_nv_V4; + + // JMPGT_ri. + case Hexagon::JMP_GTriPt_nv_V4: + return Hexagon::JMP_GTriNotPt_nv_V4; + case Hexagon::JMP_GTriNotPt_nv_V4: + return Hexagon::JMP_GTriPt_nv_V4; + + case Hexagon::JMP_GTriPnt_nv_V4: + return Hexagon::JMP_GTriNotPnt_nv_V4; + case Hexagon::JMP_GTriNotPnt_nv_V4: + return Hexagon::JMP_GTriPnt_nv_V4; + + // JMPGT_rr. + case Hexagon::JMP_GTrrPt_nv_V4: + return Hexagon::JMP_GTrrNotPt_nv_V4; + case Hexagon::JMP_GTrrNotPt_nv_V4: + return Hexagon::JMP_GTrrPt_nv_V4; + + case Hexagon::JMP_GTrrPnt_nv_V4: + return Hexagon::JMP_GTrrNotPnt_nv_V4; + case Hexagon::JMP_GTrrNotPnt_nv_V4: + return Hexagon::JMP_GTrrPnt_nv_V4; + + // JMPGT_rrdn. + case Hexagon::JMP_GTrrdnPt_nv_V4: + return Hexagon::JMP_GTrrdnNotPt_nv_V4; + case Hexagon::JMP_GTrrdnNotPt_nv_V4: + return Hexagon::JMP_GTrrdnPt_nv_V4; + + case Hexagon::JMP_GTrrdnPnt_nv_V4: + return Hexagon::JMP_GTrrdnNotPnt_nv_V4; + case Hexagon::JMP_GTrrdnNotPnt_nv_V4: + return Hexagon::JMP_GTrrdnPnt_nv_V4; + + // JMPGTU_ri. + case Hexagon::JMP_GTUriPt_nv_V4: + return Hexagon::JMP_GTUriNotPt_nv_V4; + case Hexagon::JMP_GTUriNotPt_nv_V4: + return Hexagon::JMP_GTUriPt_nv_V4; + + case Hexagon::JMP_GTUriPnt_nv_V4: + return Hexagon::JMP_GTUriNotPnt_nv_V4; + case Hexagon::JMP_GTUriNotPnt_nv_V4: + return Hexagon::JMP_GTUriPnt_nv_V4; + + // JMPGTU_rr. + case Hexagon::JMP_GTUrrPt_nv_V4: + return Hexagon::JMP_GTUrrNotPt_nv_V4; + case Hexagon::JMP_GTUrrNotPt_nv_V4: + return Hexagon::JMP_GTUrrPt_nv_V4; + + case Hexagon::JMP_GTUrrPnt_nv_V4: + return Hexagon::JMP_GTUrrNotPnt_nv_V4; + case Hexagon::JMP_GTUrrNotPnt_nv_V4: + return Hexagon::JMP_GTUrrPnt_nv_V4; + + // JMPGTU_rrdn. + case Hexagon::JMP_GTUrrdnPt_nv_V4: + return Hexagon::JMP_GTUrrdnNotPt_nv_V4; + case Hexagon::JMP_GTUrrdnNotPt_nv_V4: + return Hexagon::JMP_GTUrrdnPt_nv_V4; + + case Hexagon::JMP_GTUrrdnPnt_nv_V4: + return Hexagon::JMP_GTUrrdnNotPnt_nv_V4; + case Hexagon::JMP_GTUrrdnNotPnt_nv_V4: + return Hexagon::JMP_GTUrrdnPnt_nv_V4; + + default: + llvm_unreachable("Unexpected predicated instruction"); + } +} + int HexagonInstrInfo:: getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { @@ -573,8 +1026,8 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { return !invertPredicate ? Hexagon::TFRI_cPt : Hexagon::TFRI_cNotPt; case Hexagon::JMP: - return !invertPredicate ? Hexagon::JMP_Pred : - Hexagon::JMP_PredNot; + return !invertPredicate ? Hexagon::JMP_c : + Hexagon::JMP_cNot; case Hexagon::ADD_ri: return !invertPredicate ? Hexagon::ADD_ri_cPt : Hexagon::ADD_ri_cNotPt; @@ -793,9 +1246,8 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { case Hexagon::DEALLOC_RET_V4: return !invertPredicate ? Hexagon::DEALLOC_RET_cPt_V4 : Hexagon::DEALLOC_RET_cNotPt_V4; - default: - assert(false && "Unexpected predicable instruction"); } + llvm_unreachable("Unexpected predicable instruction"); } @@ -827,7 +1279,7 @@ PredicateInstruction(MachineInstr *MI, } else if (MO.isImm()) { MI->getOperand(oper+1).ChangeToImmediate(MO.getImm()); } else { - assert(false && "Unexpected operand type"); + llvm_unreachable("Unexpected operand type"); } } @@ -866,213 +1318,9 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - case Hexagon::TFR_cPt: - case Hexagon::TFR_cNotPt: - case Hexagon::TFRI_cPt: - case Hexagon::TFRI_cNotPt: - case Hexagon::TFR_cdnPt: - case Hexagon::TFR_cdnNotPt: - case Hexagon::TFRI_cdnPt: - case Hexagon::TFRI_cdnNotPt: - return true; - - case Hexagon::JMP_Pred: - case Hexagon::JMP_PredNot: - case Hexagon::BRCOND: - case Hexagon::JMP_PredPt: - case Hexagon::JMP_PredNotPt: - case Hexagon::JMP_PredPnt: - case Hexagon::JMP_PredNotPnt: - return true; + const uint64_t F = MI->getDesc().TSFlags; - case Hexagon::LDrid_indexed_cPt_V4 : - case Hexagon::LDrid_indexed_cdnPt_V4 : - case Hexagon::LDrid_indexed_cNotPt_V4 : - case Hexagon::LDrid_indexed_cdnNotPt_V4 : - case Hexagon::LDrid_indexed_shl_cPt_V4 : - case Hexagon::LDrid_indexed_shl_cdnPt_V4 : - case Hexagon::LDrid_indexed_shl_cNotPt_V4 : - case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDrib_indexed_cPt_V4 : - case Hexagon::LDrib_indexed_cdnPt_V4 : - case Hexagon::LDrib_indexed_cNotPt_V4 : - case Hexagon::LDrib_indexed_cdnNotPt_V4 : - case Hexagon::LDrib_indexed_shl_cPt_V4 : - case Hexagon::LDrib_indexed_shl_cdnPt_V4 : - case Hexagon::LDrib_indexed_shl_cNotPt_V4 : - case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriub_indexed_cPt_V4 : - case Hexagon::LDriub_indexed_cdnPt_V4 : - case Hexagon::LDriub_indexed_cNotPt_V4 : - case Hexagon::LDriub_indexed_cdnNotPt_V4 : - case Hexagon::LDriub_indexed_shl_cPt_V4 : - case Hexagon::LDriub_indexed_shl_cdnPt_V4 : - case Hexagon::LDriub_indexed_shl_cNotPt_V4 : - case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDrih_indexed_cPt_V4 : - case Hexagon::LDrih_indexed_cdnPt_V4 : - case Hexagon::LDrih_indexed_cNotPt_V4 : - case Hexagon::LDrih_indexed_cdnNotPt_V4 : - case Hexagon::LDrih_indexed_shl_cPt_V4 : - case Hexagon::LDrih_indexed_shl_cdnPt_V4 : - case Hexagon::LDrih_indexed_shl_cNotPt_V4 : - case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriuh_indexed_cPt_V4 : - case Hexagon::LDriuh_indexed_cdnPt_V4 : - case Hexagon::LDriuh_indexed_cNotPt_V4 : - case Hexagon::LDriuh_indexed_cdnNotPt_V4 : - case Hexagon::LDriuh_indexed_shl_cPt_V4 : - case Hexagon::LDriuh_indexed_shl_cdnPt_V4 : - case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : - case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriw_indexed_cPt_V4 : - case Hexagon::LDriw_indexed_cdnPt_V4 : - case Hexagon::LDriw_indexed_cNotPt_V4 : - case Hexagon::LDriw_indexed_cdnNotPt_V4 : - case Hexagon::LDriw_indexed_shl_cPt_V4 : - case Hexagon::LDriw_indexed_shl_cdnPt_V4 : - case Hexagon::LDriw_indexed_shl_cNotPt_V4 : - case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 : - return true; - - case Hexagon::LDrid_cPt : - case Hexagon::LDrid_cNotPt : - case Hexagon::LDrid_indexed_cPt : - case Hexagon::LDrid_indexed_cNotPt : - case Hexagon::POST_LDrid_cPt : - case Hexagon::POST_LDrid_cNotPt : - case Hexagon::LDriw_cPt : - case Hexagon::LDriw_cNotPt : - case Hexagon::LDriw_indexed_cPt : - case Hexagon::LDriw_indexed_cNotPt : - case Hexagon::POST_LDriw_cPt : - case Hexagon::POST_LDriw_cNotPt : - case Hexagon::LDrih_cPt : - case Hexagon::LDrih_cNotPt : - case Hexagon::LDrih_indexed_cPt : - case Hexagon::LDrih_indexed_cNotPt : - case Hexagon::POST_LDrih_cPt : - case Hexagon::POST_LDrih_cNotPt : - case Hexagon::LDrib_cPt : - case Hexagon::LDrib_cNotPt : - case Hexagon::LDrib_indexed_cPt : - case Hexagon::LDrib_indexed_cNotPt : - case Hexagon::POST_LDrib_cPt : - case Hexagon::POST_LDrib_cNotPt : - case Hexagon::LDriuh_cPt : - case Hexagon::LDriuh_cNotPt : - case Hexagon::LDriuh_indexed_cPt : - case Hexagon::LDriuh_indexed_cNotPt : - case Hexagon::POST_LDriuh_cPt : - case Hexagon::POST_LDriuh_cNotPt : - case Hexagon::LDriub_cPt : - case Hexagon::LDriub_cNotPt : - case Hexagon::LDriub_indexed_cPt : - case Hexagon::LDriub_indexed_cNotPt : - case Hexagon::POST_LDriub_cPt : - case Hexagon::POST_LDriub_cNotPt : - return true; - - case Hexagon::LDrid_cdnPt : - case Hexagon::LDrid_cdnNotPt : - case Hexagon::LDrid_indexed_cdnPt : - case Hexagon::LDrid_indexed_cdnNotPt : - case Hexagon::POST_LDrid_cdnPt_V4 : - case Hexagon::POST_LDrid_cdnNotPt_V4 : - case Hexagon::LDriw_cdnPt : - case Hexagon::LDriw_cdnNotPt : - case Hexagon::LDriw_indexed_cdnPt : - case Hexagon::LDriw_indexed_cdnNotPt : - case Hexagon::POST_LDriw_cdnPt_V4 : - case Hexagon::POST_LDriw_cdnNotPt_V4 : - case Hexagon::LDrih_cdnPt : - case Hexagon::LDrih_cdnNotPt : - case Hexagon::LDrih_indexed_cdnPt : - case Hexagon::LDrih_indexed_cdnNotPt : - case Hexagon::POST_LDrih_cdnPt_V4 : - case Hexagon::POST_LDrih_cdnNotPt_V4 : - case Hexagon::LDrib_cdnPt : - case Hexagon::LDrib_cdnNotPt : - case Hexagon::LDrib_indexed_cdnPt : - case Hexagon::LDrib_indexed_cdnNotPt : - case Hexagon::POST_LDrib_cdnPt_V4 : - case Hexagon::POST_LDrib_cdnNotPt_V4 : - case Hexagon::LDriuh_cdnPt : - case Hexagon::LDriuh_cdnNotPt : - case Hexagon::LDriuh_indexed_cdnPt : - case Hexagon::LDriuh_indexed_cdnNotPt : - case Hexagon::POST_LDriuh_cdnPt_V4 : - case Hexagon::POST_LDriuh_cdnNotPt_V4 : - case Hexagon::LDriub_cdnPt : - case Hexagon::LDriub_cdnNotPt : - case Hexagon::LDriub_indexed_cdnPt : - case Hexagon::LDriub_indexed_cdnNotPt : - case Hexagon::POST_LDriub_cdnPt_V4 : - case Hexagon::POST_LDriub_cdnNotPt_V4 : - return true; - - case Hexagon::ADD_ri_cPt: - case Hexagon::ADD_ri_cNotPt: - case Hexagon::ADD_ri_cdnPt: - case Hexagon::ADD_ri_cdnNotPt: - case Hexagon::ADD_rr_cPt: - case Hexagon::ADD_rr_cNotPt: - case Hexagon::ADD_rr_cdnPt: - case Hexagon::ADD_rr_cdnNotPt: - case Hexagon::XOR_rr_cPt: - case Hexagon::XOR_rr_cNotPt: - case Hexagon::XOR_rr_cdnPt: - case Hexagon::XOR_rr_cdnNotPt: - case Hexagon::AND_rr_cPt: - case Hexagon::AND_rr_cNotPt: - case Hexagon::AND_rr_cdnPt: - case Hexagon::AND_rr_cdnNotPt: - case Hexagon::OR_rr_cPt: - case Hexagon::OR_rr_cNotPt: - case Hexagon::OR_rr_cdnPt: - case Hexagon::OR_rr_cdnNotPt: - case Hexagon::SUB_rr_cPt: - case Hexagon::SUB_rr_cNotPt: - case Hexagon::SUB_rr_cdnPt: - case Hexagon::SUB_rr_cdnNotPt: - case Hexagon::COMBINE_rr_cPt: - case Hexagon::COMBINE_rr_cNotPt: - case Hexagon::COMBINE_rr_cdnPt: - case Hexagon::COMBINE_rr_cdnNotPt: - return true; - - case Hexagon::ASLH_cPt_V4: - case Hexagon::ASLH_cNotPt_V4: - case Hexagon::ASRH_cPt_V4: - case Hexagon::ASRH_cNotPt_V4: - case Hexagon::SXTB_cPt_V4: - case Hexagon::SXTB_cNotPt_V4: - case Hexagon::SXTH_cPt_V4: - case Hexagon::SXTH_cNotPt_V4: - case Hexagon::ZXTB_cPt_V4: - case Hexagon::ZXTB_cNotPt_V4: - case Hexagon::ZXTH_cPt_V4: - case Hexagon::ZXTH_cNotPt_V4: - return true; - - case Hexagon::ASLH_cdnPt_V4: - case Hexagon::ASLH_cdnNotPt_V4: - case Hexagon::ASRH_cdnPt_V4: - case Hexagon::ASRH_cdnNotPt_V4: - case Hexagon::SXTB_cdnPt_V4: - case Hexagon::SXTB_cdnNotPt_V4: - case Hexagon::SXTH_cdnPt_V4: - case Hexagon::SXTH_cdnNotPt_V4: - case Hexagon::ZXTB_cdnPt_V4: - case Hexagon::ZXTB_cdnNotPt_V4: - case Hexagon::ZXTH_cdnPt_V4: - case Hexagon::ZXTH_cdnNotPt_V4: - return true; - - default: - return false; - } + return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); } @@ -1149,7 +1397,6 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::LDriw: case Hexagon::STriw: - case Hexagon::STriwt: assert((Offset % 4 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMW_OFFSET_MIN) && (Offset <= Hexagon_MEMW_OFFSET_MAX); @@ -1243,8 +1490,8 @@ isValidOffset(const int Opcode, const int Offset) const { return true; } - assert(0 && "No offset range is defined for this opcode. Please define it in \ - the above switch statement!"); + llvm_unreachable("No offset range is defined for this opcode. " + "Please define it in the above switch statement!"); } @@ -1273,10 +1520,7 @@ isValidAutoIncImm(const EVT VT, const int Offset) const { return (Offset >= Hexagon_MEMB_AUTOINC_MIN && Offset <= Hexagon_MEMB_AUTOINC_MAX); } - - assert(0 && "Not an auto-inc opc!"); - - return false; + llvm_unreachable("Not an auto-inc opc!"); } @@ -1380,7 +1624,6 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const { default: return false; } - return false; } @@ -1455,5 +1698,30 @@ isConditionalLoad (const MachineInstr* MI) const { default: return false; } +} + +DFAPacketizer *HexagonInstrInfo:: +CreateTargetScheduleState(const TargetMachine *TM, + const ScheduleDAG *DAG) const { + const InstrItineraryData *II = TM->getInstrItineraryData(); + return TM->getSubtarget<HexagonGenSubtargetInfo>().createDFAPacketizer(II); +} + +bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + // Debug info is never a scheduling boundary. It's necessary to be explicit + // due to the special treatment of IT instructions below, otherwise a + // dbg_value followed by an IT will result in the IT instruction being + // considered a scheduling hazard, which is wrong. It should be the actual + // instruction preceding the dbg_value instruction(s), just like it is + // when debug info is not present. + if (MI->isDebugValue()) + return false; + + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isLabel() || MI->isInlineAsm()) + return true; + return false; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index d549c46..eb088c3 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -1,4 +1,4 @@ -//=- HexagonInstrInfo.h - Hexagon Instruction Information ---------*- C++ -*-=// +//===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,6 +14,7 @@ #ifndef HexagonINSTRUCTIONINFO_H #define HexagonINSTRUCTIONINFO_H +#include "MCTargetDesc/HexagonBaseInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetFrameLowering.h" #include "HexagonRegisterInfo.h" @@ -135,6 +136,13 @@ public: isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles, const BranchProbability &Probability) const; + virtual DFAPacketizer* + CreateTargetScheduleState(const TargetMachine *TM, + const ScheduleDAG *DAG) const; + + virtual bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const; bool isValidOffset(const int Opcode, const int Offset) const; bool isValidAutoIncImm(const EVT VT, const int Offset) const; bool isMemOp(const MachineInstr *MI) const; @@ -155,6 +163,7 @@ public: bool isConditionalALU32 (const MachineInstr* MI) const; bool isConditionalLoad (const MachineInstr* MI) const; bool isDeallocRet(const MachineInstr *MI) const; + unsigned getInvertedPredicatedOpcode(const int Opc) const; private: int getMatchingCondBranchOpcode(int Opc, bool sense) const; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index cc508b7..f3c6622 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -319,49 +319,49 @@ def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), //===----------------------------------------------------------------------===// // Conditional add. -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ADD_ri_cPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), "if ($src1) $dst = add($src2, #$src3)", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ADD_ri_cNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), "if (!$src1) $dst = add($src2, #$src3)", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ADD_ri_cdnPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), "if ($src1.new) $dst = add($src2, #$src3)", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ADD_ri_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), "if (!$src1.new) $dst = add($src2, #$src3)", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ADD_rr_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst = add($src2, $src3)", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ADD_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst = add($src2, $src3)", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ADD_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst = add($src2, $src3)", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ADD_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst = add($src2, $src3)", @@ -370,25 +370,25 @@ def ADD_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), // Conditional combine. -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst = combine($src2, $src3)", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst = combine($src2, $src3)", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst = combine($src2, $src3)", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst = combine($src2, $src3)", @@ -396,61 +396,73 @@ def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst), // Conditional logical operations. +let isPredicated = 1 in def XOR_rr_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst = xor($src2, $src3)", []>; +let isPredicated = 1 in def XOR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst = xor($src2, $src3)", []>; +let isPredicated = 1 in def XOR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst = xor($src2, $src3)", []>; +let isPredicated = 1 in def XOR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst = xor($src2, $src3)", []>; +let isPredicated = 1 in def AND_rr_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst = and($src2, $src3)", []>; +let isPredicated = 1 in def AND_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst = and($src2, $src3)", []>; +let isPredicated = 1 in def AND_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst = and($src2, $src3)", []>; +let isPredicated = 1 in def AND_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst = and($src2, $src3)", []>; +let isPredicated = 1 in def OR_rr_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst = or($src2, $src3)", []>; +let isPredicated = 1 in def OR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst = or($src2, $src3)", []>; +let isPredicated = 1 in def OR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst = or($src2, $src3)", []>; +let isPredicated = 1 in def OR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst = or($src2, $src3)", @@ -459,21 +471,25 @@ def OR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), // Conditional subtract. +let isPredicated = 1 in def SUB_rr_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst = sub($src2, $src3)", []>; +let isPredicated = 1 in def SUB_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst = sub($src2, $src3)", []>; +let isPredicated = 1 in def SUB_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst = sub($src2, $src3)", []>; +let isPredicated = 1 in def SUB_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst = sub($src2, $src3)", @@ -482,47 +498,47 @@ def SUB_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), // Conditional transfer. -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1) $dst = $src2", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def TFR_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1) $dst = $src2", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def TFRI_cPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2), "if ($src1) $dst = #$src2", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def TFRI_cNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2), "if (!$src1) $dst = #$src2", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def TFR_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) $dst = $src2", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def TFR_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) $dst = $src2", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def TFRI_cdnPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2), "if ($src1.new) $dst = #$src2", []>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def TFRI_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2), "if (!$src1.new) $dst = #$src2", @@ -679,10 +695,6 @@ def AND_pnotp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, "$dst = and($src1, !$src2)", []>; -def NOT_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), - "$dst = not($src1)", - [(set PredRegs:$dst, (not PredRegs:$src1))]>; - def ANY_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), "$dst = any8($src1)", []>; @@ -712,7 +724,7 @@ def MASK_p : SInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1), "$dst = mask($src1)", []>; -def NOT_Ps : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), +def NOT_p : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), "$dst = not($src1)", [(set PredRegs:$dst, (not PredRegs:$src1))]>; @@ -743,22 +755,25 @@ let isBranch = 1, isTerminator=1, isBarrier = 1, isPredicable = 1 in { } // if (p0) jump -let isBranch = 1, isTerminator=1, Defs = [PC] in { - def JMP_Pred : JInst< (outs), +let isBranch = 1, isTerminator=1, Defs = [PC], + isPredicated = 1 in { + def JMP_c : JInst< (outs), (ins PredRegs:$src, brtarget:$offset), "if ($src) jump $offset", [(brcond PredRegs:$src, bb:$offset)]>; } // if (!p0) jump -let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { - def JMP_PredNot : JInst< (outs), +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC], + isPredicated = 1 in { + def JMP_cNot : JInst< (outs), (ins PredRegs:$src, brtarget:$offset), "if (!$src) jump $offset", []>; } -let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC] in { +let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC], + isPredicated = 1 in { def BRCOND : JInst < (outs), (ins PredRegs:$pred, brtarget:$dst), "if ($pred) jump $dst", []>; @@ -766,32 +781,36 @@ let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC] in { // Jump to address conditioned on new predicate. // if (p0) jump:t -let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { - def JMP_PredPt : JInst< (outs), +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC], + isPredicated = 1 in { + def JMP_cdnPt : JInst< (outs), (ins PredRegs:$src, brtarget:$offset), "if ($src.new) jump:t $offset", []>; } // if (!p0) jump:t -let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { - def JMP_PredNotPt : JInst< (outs), +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC], + isPredicated = 1 in { + def JMP_cdnNotPt : JInst< (outs), (ins PredRegs:$src, brtarget:$offset), "if (!$src.new) jump:t $offset", []>; } // Not taken. -let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { - def JMP_PredPnt : JInst< (outs), +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC], + isPredicated = 1 in { + def JMP_cdnPnt : JInst< (outs), (ins PredRegs:$src, brtarget:$offset), "if ($src.new) jump:nt $offset", []>; } // Not taken. -let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { - def JMP_PredNotPnt : JInst< (outs), +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC], + isPredicated = 1 in { + def JMP_cdnNotPnt : JInst< (outs), (ins PredRegs:$src, brtarget:$offset), "if (!$src.new) jump:nt $offset", []>; @@ -1779,7 +1798,8 @@ def POST_STdri_cPt : STInstPI<(outs IntRegs:$dst), "$src3 = $dst">; // if (!Pv) memd(Rx++#s4:3)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def POST_STdri_cNotPt : STInstPI<(outs IntRegs:$dst), (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, s4_3Imm:$offset), @@ -1859,14 +1879,14 @@ def STrib_indexed_cNotPt : STInst<(outs), // if ([!]Pv) memb(Rx++#s4:0)=Rt // if (Pv) memb(Rx++#s4:0)=Rt -let mayStore = 1, hasCtrlDep = 1 in +let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in def POST_STbri_cPt : STInstPI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), "if ($src1) memb($src3++#$offset) = $src2", [],"$src3 = $dst">; // if (!Pv) memb(Rx++#s4:0)=Rt -let mayStore = 1, hasCtrlDep = 1 in +let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in def POST_STbri_cNotPt : STInstPI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), "if (!$src1) memb($src3++#$offset) = $src2", @@ -1944,14 +1964,14 @@ def STrih_indexed_cNotPt : STInst<(outs), // if ([!]Pv) memh(Rx++#s4:1)=Rt // if (Pv) memh(Rx++#s4:1)=Rt -let mayStore = 1, hasCtrlDep = 1 in +let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in def POST_SThri_cPt : STInstPI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), "if ($src1) memh($src3++#$offset) = $src2", [],"$src3 = $dst">; // if (!Pv) memh(Rx++#s4:1)=Rt -let mayStore = 1, hasCtrlDep = 1 in +let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in def POST_SThri_cNotPt : STInstPI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), "if (!$src1) memh($src3++#$offset) = $src2", @@ -1979,11 +1999,6 @@ def STriw_indexed : STInst<(outs), "memw($src1+#$src2) = $src3", [(store IntRegs:$src3, (add IntRegs:$src1, s11_2ImmPred:$src2))]>; -def STriwt : STInst<(outs), - (ins MEMri:$addr, DoubleRegs:$src1), - "memw($addr) = $src1", - [(truncstorei32 DoubleRegs:$src1, ADDRriS11_2:$addr)]>; - let mayStore = 1, neverHasSideEffects = 1 in def STriw_GP : STInst<(outs), (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), @@ -2030,14 +2045,14 @@ def STriw_indexed_cNotPt : STInst<(outs), // if ([!]Pv) memw(Rx++#s4:2)=Rt // if (Pv) memw(Rx++#s4:2)=Rt -let mayStore = 1, hasCtrlDep = 1 in +let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in def POST_STwri_cPt : STInstPI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), "if ($src1) memw($src3++#$offset) = $src2", [],"$src3 = $dst">; // if (!Pv) memw(Rx++#s4:2)=Rt -let mayStore = 1, hasCtrlDep = 1 in +let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in def POST_STwri_cNotPt : STInstPI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), "if (!$src1) memw($src3++#$offset) = $src2", @@ -2244,6 +2259,20 @@ def TFR_condset_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src3))]>; let AddedComplexity = 100 in +def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3), + "Error; should not emit", + [(set IntRegs:$dst, + (select PredRegs:$src1, IntRegs:$src2, s12ImmPred:$src3))]>; + +let AddedComplexity = 100 in +def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3), + "Error; should not emit", + [(set IntRegs:$dst, + (select PredRegs:$src1, s12ImmPred:$src2, IntRegs:$src3))]>; + +let AddedComplexity = 100 in def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3), "Error; should not emit", @@ -2436,7 +2465,7 @@ def : Pat <(and IntRegs:$src1, 255), // Add(p1, false) should never be produced, // if it does, it got to be mapped to NOOP. def : Pat <(add PredRegs:$src1, -1), - (NOT_pp PredRegs:$src1)>; + (NOT_p PredRegs:$src1)>; // Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) => // p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1). @@ -2451,7 +2480,7 @@ def : Pat <(select (not PredRegs:$src1), s8ImmPred:$src2, s8ImmPred:$src3), // Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. def : Pat <(brcond (not PredRegs:$src1), bb:$offset), - (JMP_PredNot PredRegs:$src1, bb:$offset)>; + (JMP_cNot PredRegs:$src1, bb:$offset)>; // Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2). def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)), @@ -2650,39 +2679,39 @@ def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i8)), (i64 (SXTW (SXTB (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>; // We want to prevent emiting pnot's as much as possible. -// Map brcond with an unsupported setcc to a JMP_PredNot. +// Map brcond with an unsupported setcc to a JMP_cNot. def : Pat <(brcond (i1 (setne IntRegs:$src1, IntRegs:$src2)), bb:$offset), - (JMP_PredNot (CMPEQrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; + (JMP_cNot (CMPEQrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; def : Pat <(brcond (i1 (setne IntRegs:$src1, s10ImmPred:$src2)), bb:$offset), - (JMP_PredNot (CMPEQri IntRegs:$src1, s10ImmPred:$src2), bb:$offset)>; + (JMP_cNot (CMPEQri IntRegs:$src1, s10ImmPred:$src2), bb:$offset)>; def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 -1))), bb:$offset), - (JMP_PredNot PredRegs:$src1, bb:$offset)>; + (JMP_cNot PredRegs:$src1, bb:$offset)>; def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 0))), bb:$offset), - (JMP_Pred PredRegs:$src1, bb:$offset)>; + (JMP_c PredRegs:$src1, bb:$offset)>; def : Pat <(brcond (i1 (setlt IntRegs:$src1, s8ImmPred:$src2)), bb:$offset), - (JMP_PredNot (CMPGEri IntRegs:$src1, s8ImmPred:$src2), bb:$offset)>; + (JMP_cNot (CMPGEri IntRegs:$src1, s8ImmPred:$src2), bb:$offset)>; def : Pat <(brcond (i1 (setlt IntRegs:$src1, IntRegs:$src2)), bb:$offset), - (JMP_Pred (CMPLTrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; + (JMP_c (CMPLTrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; def : Pat <(brcond (i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)), bb:$offset), - (JMP_PredNot (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1), + (JMP_cNot (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1), bb:$offset)>; def : Pat <(brcond (i1 (setule IntRegs:$src1, IntRegs:$src2)), bb:$offset), - (JMP_PredNot (CMPGTUrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; + (JMP_cNot (CMPGTUrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; def : Pat <(brcond (i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)), bb:$offset), - (JMP_PredNot (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2), + (JMP_cNot (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2), bb:$offset)>; -// Map from a 64-bit select to an emulated 64-bit mux. +// Map from a 64-bit select to an emulated 64-bit mux. // Hexagon does not support 64-bit MUXes; so emulate with combines. def : Pat <(select PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), (COMBINE_rr @@ -2697,7 +2726,7 @@ def : Pat <(select PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), // From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3). def : Pat <(select PredRegs:$src1, PredRegs:$src2, PredRegs:$src3), (OR_pp (AND_pp PredRegs:$src1, PredRegs:$src2), - (AND_pp (NOT_pp PredRegs:$src1), PredRegs:$src3))>; + (AND_pp (NOT_p PredRegs:$src1), PredRegs:$src3))>; // Map Pd = load(addr) -> Rs = load(addr); Pd = Rs. def : Pat<(i1 (load ADDRriS11_2:$addr)), @@ -2711,7 +2740,7 @@ def : Pat<(i32 (trunc DoubleRegs:$src)), def : Pat<(i1 (trunc DoubleRegs:$src)), (i1 (TFR_PdRs (i32(EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))))>; -// Map memw(Rs) = Rdd -> memw(Rs) = Rt. +// Map memb(Rs) = Rdd -> memb(Rs) = Rt. def : Pat<(truncstorei8 DoubleRegs:$src, ADDRriS11_0:$addr), (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg)))>; @@ -2721,6 +2750,11 @@ def : Pat<(truncstorei16 DoubleRegs:$src, ADDRriS11_0:$addr), (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg)))>; +// Map memw(Rs) = Rdd -> memw(Rs) = Rt. +def : Pat<(truncstorei32 DoubleRegs:$src, ADDRriS11_0:$addr), + (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src, + subreg_loreg)))>; + // Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0. def : Pat<(store (i1 -1), ADDRriS11_2:$addr), (STrib ADDRriS11_2:$addr, (TFRI 1))>; @@ -2749,26 +2783,26 @@ def : Pat<(i64 (anyext IntRegs:$src1)), // Map cmple -> cmpgt. // rs <= rt -> !(rs > rt). def : Pat<(i1 (setle IntRegs:$src1, s10ImmPred:$src2)), - (i1 (NOT_Ps (CMPGTri IntRegs:$src1, s10ImmPred:$src2)))>; + (i1 (NOT_p (CMPGTri IntRegs:$src1, s10ImmPred:$src2)))>; // rs <= rt -> !(rs > rt). def : Pat<(i1 (setle IntRegs:$src1, IntRegs:$src2)), - (i1 (NOT_Ps (CMPGTrr IntRegs:$src1, IntRegs:$src2)))>; + (i1 (NOT_p (CMPGTrr IntRegs:$src1, IntRegs:$src2)))>; // Rss <= Rtt -> !(Rss > Rtt). def : Pat<(i1 (setle DoubleRegs:$src1, DoubleRegs:$src2)), - (i1 (NOT_Ps (CMPGT64rr DoubleRegs:$src1, DoubleRegs:$src2)))>; + (i1 (NOT_p (CMPGT64rr DoubleRegs:$src1, DoubleRegs:$src2)))>; // Map cmpne -> cmpeq. // Hexagon_TODO: We should improve on this. // rs != rt -> !(rs == rt). def : Pat <(i1 (setne IntRegs:$src1, s10ImmPred:$src2)), - (i1 (NOT_Ps(i1 (CMPEQri IntRegs:$src1, s10ImmPred:$src2))))>; + (i1 (NOT_p(i1 (CMPEQri IntRegs:$src1, s10ImmPred:$src2))))>; // Map cmpne(Rs) -> !cmpeqe(Rs). // rs != rt -> !(rs == rt). def : Pat <(i1 (setne IntRegs:$src1, IntRegs:$src2)), - (i1 (NOT_Ps(i1 (CMPEQrr IntRegs:$src1, IntRegs:$src2))))>; + (i1 (NOT_p(i1 (CMPEQrr IntRegs:$src1, IntRegs:$src2))))>; // Convert setne back to xor for hexagon since we compute w/ pred registers. def : Pat <(i1 (setne PredRegs:$src1, PredRegs:$src2)), @@ -2777,12 +2811,12 @@ def : Pat <(i1 (setne PredRegs:$src1, PredRegs:$src2)), // Map cmpne(Rss) -> !cmpew(Rss). // rs != rt -> !(rs == rt). def : Pat <(i1 (setne DoubleRegs:$src1, DoubleRegs:$src2)), - (i1 (NOT_Ps(i1 (CMPEHexagon4rr DoubleRegs:$src1, DoubleRegs:$src2))))>; + (i1 (NOT_p(i1 (CMPEHexagon4rr DoubleRegs:$src1, DoubleRegs:$src2))))>; // Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt). // rs >= rt -> !(rt > rs). def : Pat <(i1 (setge IntRegs:$src1, IntRegs:$src2)), - (i1 (NOT_Ps(i1 (CMPGTrr IntRegs:$src2, IntRegs:$src1))))>; + (i1 (NOT_p(i1 (CMPGTrr IntRegs:$src2, IntRegs:$src1))))>; def : Pat <(i1 (setge IntRegs:$src1, s8ImmPred:$src2)), (i1 (CMPGEri IntRegs:$src1, s8ImmPred:$src2))>; @@ -2790,12 +2824,12 @@ def : Pat <(i1 (setge IntRegs:$src1, s8ImmPred:$src2)), // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). // rss >= rtt -> !(rtt > rss). def : Pat <(i1 (setge DoubleRegs:$src1, DoubleRegs:$src2)), - (i1 (NOT_Ps(i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))))>; + (i1 (NOT_p(i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))))>; // Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). // rs < rt -> !(rs >= rt). def : Pat <(i1 (setlt IntRegs:$src1, s8ImmPred:$src2)), - (i1 (NOT_Ps (CMPGEri IntRegs:$src1, s8ImmPred:$src2)))>; + (i1 (NOT_p (CMPGEri IntRegs:$src1, s8ImmPred:$src2)))>; // Map cmplt(Rs, Rt) -> cmplt(Rs, Rt). // rs < rt -> rs < rt. Let assembler map it. @@ -2820,22 +2854,22 @@ def : Pat <(i1 (setult DoubleRegs:$src1, DoubleRegs:$src2)), // Map from Rs >= Rt -> !(Rt > Rs). // rs >= rt -> !(rt > rs). def : Pat <(i1 (setuge IntRegs:$src1, IntRegs:$src2)), - (i1 (NOT_Ps (CMPGTUrr IntRegs:$src2, IntRegs:$src1)))>; + (i1 (NOT_p (CMPGTUrr IntRegs:$src2, IntRegs:$src1)))>; // Map from Rs >= Rt -> !(Rt > Rs). // rs >= rt -> !(rt > rs). def : Pat <(i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)), - (i1 (NOT_Ps (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1)))>; + (i1 (NOT_p (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1)))>; // Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs). // Map from (Rs <= Rt) -> !(Rs > Rt). def : Pat <(i1 (setule IntRegs:$src1, IntRegs:$src2)), - (i1 (NOT_Ps (CMPGTUrr IntRegs:$src1, IntRegs:$src2)))>; + (i1 (NOT_p (CMPGTUrr IntRegs:$src1, IntRegs:$src2)))>; // Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1). // Map from (Rs <= Rt) -> !(Rs > Rt). def : Pat <(i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)), - (i1 (NOT_Ps (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2)))>; + (i1 (NOT_p (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2)))>; // Sign extends. // i1 -> i32 diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 24218d0..9e60cf2 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -77,48 +77,56 @@ // Shift halfword. +let isPredicated = 1 in def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1) $dst = aslh($src2)", []>, Requires<[HasV4T]>; +let isPredicated = 1 in def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1) $dst = aslh($src2)", []>, Requires<[HasV4T]>; +let isPredicated = 1 in def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) $dst = aslh($src2)", []>, Requires<[HasV4T]>; +let isPredicated = 1 in def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) $dst = aslh($src2)", []>, Requires<[HasV4T]>; +let isPredicated = 1 in def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1) $dst = asrh($src2)", []>, Requires<[HasV4T]>; +let isPredicated = 1 in def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1) $dst = asrh($src2)", []>, Requires<[HasV4T]>; +let isPredicated = 1 in def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) $dst = asrh($src2)", []>, Requires<[HasV4T]>; +let isPredicated = 1 in def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) $dst = asrh($src2)", @@ -127,24 +135,28 @@ def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), // Sign extend. +let isPredicated = 1 in def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1) $dst = sxtb($src2)", []>, Requires<[HasV4T]>; +let isPredicated = 1 in def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1) $dst = sxtb($src2)", []>, Requires<[HasV4T]>; +let isPredicated = 1 in def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) $dst = sxtb($src2)", []>, Requires<[HasV4T]>; +let isPredicated = 1 in def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) $dst = sxtb($src2)", @@ -152,24 +164,28 @@ def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), Requires<[HasV4T]>; +let isPredicated = 1 in def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1) $dst = sxth($src2)", []>, Requires<[HasV4T]>; +let isPredicated = 1 in def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1) $dst = sxth($src2)", []>, Requires<[HasV4T]>; +let isPredicated = 1 in def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) $dst = sxth($src2)", []>, Requires<[HasV4T]>; +let isPredicated = 1 in def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) $dst = sxth($src2)", @@ -178,56 +194,56 @@ def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), // Zero exten. -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1) $dst = zxtb($src2)", []>, Requires<[HasV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1) $dst = zxtb($src2)", []>, Requires<[HasV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) $dst = zxtb($src2)", []>, Requires<[HasV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) $dst = zxtb($src2)", []>, Requires<[HasV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1) $dst = zxth($src2)", []>, Requires<[HasV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1) $dst = zxth($src2)", []>, Requires<[HasV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) $dst = zxth($src2)", []>, Requires<[HasV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) $dst = zxth($src2)", @@ -276,7 +292,7 @@ def LDrid_indexed_shl_V4 : LDInst<(outs DoubleRegs:$dst), //// Load doubleword conditionally. // if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2) // if (Pv) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDrid_indexed_cPt_V4 : LDInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst=memd($src2+$src3<<#0)", @@ -284,7 +300,7 @@ def LDrid_indexed_cPt_V4 : LDInst<(outs DoubleRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDrid_indexed_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst=memd($src2+$src3<<#0)", @@ -292,7 +308,7 @@ def LDrid_indexed_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDrid_indexed_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst=memd($src2+$src3<<#0)", @@ -300,7 +316,7 @@ def LDrid_indexed_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDrid_indexed_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst=memd($src2+$src3<<#0)", @@ -308,7 +324,7 @@ def LDrid_indexed_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDrid_indexed_shl_cPt_V4 : LDInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -317,7 +333,7 @@ def LDrid_indexed_shl_cPt_V4 : LDInst<(outs DoubleRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDrid_indexed_shl_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -326,7 +342,7 @@ def LDrid_indexed_shl_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDrid_indexed_shl_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -335,7 +351,7 @@ def LDrid_indexed_shl_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDrid_indexed_shl_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -405,7 +421,7 @@ def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), //// Load byte conditionally. // if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2) // if (Pv) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDrib_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst=memb($src2+$src3<<#0)", @@ -413,7 +429,7 @@ def LDrib_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDrib_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst=memb($src2+$src3<<#0)", @@ -421,7 +437,7 @@ def LDrib_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDrib_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst=memb($src2+$src3<<#0)", @@ -429,7 +445,7 @@ def LDrib_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDrib_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst=memb($src2+$src3<<#0)", @@ -437,7 +453,7 @@ def LDrib_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDrib_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -446,7 +462,7 @@ def LDrib_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDrib_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -455,7 +471,7 @@ def LDrib_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDrib_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -464,7 +480,7 @@ def LDrib_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDrib_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -475,7 +491,7 @@ def LDrib_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), //// Load unsigned byte conditionally. // if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2) // if (Pv) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDriub_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst=memub($src2+$src3<<#0)", @@ -483,7 +499,7 @@ def LDriub_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDriub_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst=memub($src2+$src3<<#0)", @@ -491,7 +507,7 @@ def LDriub_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDriub_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst=memub($src2+$src3<<#0)", @@ -499,7 +515,7 @@ def LDriub_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDriub_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst=memub($src2+$src3<<#0)", @@ -507,7 +523,7 @@ def LDriub_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDriub_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -516,7 +532,7 @@ def LDriub_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDriub_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -525,7 +541,7 @@ def LDriub_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDriub_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -534,7 +550,7 @@ def LDriub_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDriub_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -606,7 +622,7 @@ def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), //// Load halfword conditionally. // if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2) // if (Pv) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDrih_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst=memh($src2+$src3<<#0)", @@ -614,7 +630,7 @@ def LDrih_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDrih_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst=memh($src2+$src3<<#0)", @@ -622,7 +638,7 @@ def LDrih_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDrih_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst=memh($src2+$src3<<#0)", @@ -630,7 +646,7 @@ def LDrih_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDrih_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst=memh($src2+$src3<<#0)", @@ -638,7 +654,7 @@ def LDrih_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDrih_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -647,7 +663,7 @@ def LDrih_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDrih_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -656,7 +672,7 @@ def LDrih_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDrih_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -665,7 +681,7 @@ def LDrih_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDrih_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -676,7 +692,7 @@ def LDrih_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), //// Load unsigned halfword conditionally. // if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2) // if (Pv) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDriuh_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst=memuh($src2+$src3<<#0)", @@ -684,7 +700,7 @@ def LDriuh_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDriuh_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst=memuh($src2+$src3<<#0)", @@ -692,7 +708,7 @@ def LDriuh_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDriuh_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst=memuh($src2+$src3<<#0)", @@ -700,7 +716,7 @@ def LDriuh_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDriuh_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst=memuh($src2+$src3<<#0)", @@ -708,7 +724,7 @@ def LDriuh_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDriuh_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -717,7 +733,7 @@ def LDriuh_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDriuh_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -726,7 +742,7 @@ def LDriuh_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDriuh_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -735,7 +751,7 @@ def LDriuh_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -770,7 +786,7 @@ def LDriw_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), //// Load word conditionally. // if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2) // if (Pv) Rd=memw(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDriw_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst=memw($src2+$src3<<#0)", @@ -778,7 +794,7 @@ def LDriw_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDriw_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst=memw($src2+$src3<<#0)", @@ -786,7 +802,7 @@ def LDriw_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDriw_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst=memw($src2+$src3<<#0)", @@ -794,7 +810,7 @@ def LDriw_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15 in +let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in def LDriw_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst=memw($src2+$src3<<#0)", @@ -802,7 +818,7 @@ def LDriw_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDriw_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -811,7 +827,7 @@ def LDriw_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDriw_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -820,7 +836,7 @@ def LDriw_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDriw_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -829,7 +845,7 @@ def LDriw_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45 in +let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in def LDriw_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), @@ -843,7 +859,7 @@ def LDriw_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), // Post-inc Load, Predicated, Dot new -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in def POST_LDrid_cdnPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3), "if ($src1.new) $dst1 = memd($src2++#$src3)", @@ -851,7 +867,7 @@ def POST_LDrid_cdnPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in def POST_LDrid_cdnNotPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3), "if (!$src1.new) $dst1 = memd($src2++#$src3)", @@ -859,7 +875,7 @@ def POST_LDrid_cdnNotPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in def POST_LDrib_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), "if ($src1.new) $dst1 = memb($src2++#$src3)", @@ -867,7 +883,7 @@ def POST_LDrib_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in def POST_LDrib_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), "if (!$src1.new) $dst1 = memb($src2++#$src3)", @@ -875,7 +891,7 @@ def POST_LDrib_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in def POST_LDrih_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), "if ($src1.new) $dst1 = memh($src2++#$src3)", @@ -883,7 +899,7 @@ def POST_LDrih_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in def POST_LDrih_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), "if (!$src1.new) $dst1 = memh($src2++#$src3)", @@ -891,7 +907,7 @@ def POST_LDrih_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in def POST_LDriub_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), "if ($src1.new) $dst1 = memub($src2++#$src3)", @@ -899,7 +915,7 @@ def POST_LDriub_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in def POST_LDriub_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), "if (!$src1.new) $dst1 = memub($src2++#$src3)", @@ -907,7 +923,7 @@ def POST_LDriub_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in def POST_LDriuh_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), "if ($src1.new) $dst1 = memuh($src2++#$src3)", @@ -915,7 +931,7 @@ def POST_LDriuh_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in def POST_LDriuh_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), "if (!$src1.new) $dst1 = memuh($src2++#$src3)", @@ -923,7 +939,7 @@ def POST_LDriuh_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in def POST_LDriw_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3), "if ($src1.new) $dst1 = memw($src2++#$src3)", @@ -931,7 +947,7 @@ def POST_LDriw_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in def POST_LDriw_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3), "if (!$src1.new) $dst1 = memw($src2++#$src3)", @@ -2215,6 +2231,181 @@ def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), // NV/ST - //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// NV/J + +//===----------------------------------------------------------------------===// + +multiclass NVJ_type_basic_reg<string NotStr, string OpcStr, string TakenStr> { + def _ie_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset), + !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, + !strconcat("($src1.new, $src2)) jump:", + !strconcat(TakenStr, " $offset"))))), + []>, + Requires<[HasV4T]>; + + def _nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset), + !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, + !strconcat("($src1.new, $src2)) jump:", + !strconcat(TakenStr, " $offset"))))), + []>, + Requires<[HasV4T]>; +} + +multiclass NVJ_type_basic_2ndDotNew<string NotStr, string OpcStr, string TakenStr> { + def _ie_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset), + !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, + !strconcat("($src1, $src2.new)) jump:", + !strconcat(TakenStr, " $offset"))))), + []>, + Requires<[HasV4T]>; + + def _nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset), + !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, + !strconcat("($src1, $src2.new)) jump:", + !strconcat(TakenStr, " $offset"))))), + []>, + Requires<[HasV4T]>; +} + +multiclass NVJ_type_basic_imm<string NotStr, string OpcStr, string TakenStr> { + def _ie_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset), + !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, + !strconcat("($src1.new, #$src2)) jump:", + !strconcat(TakenStr, " $offset"))))), + []>, + Requires<[HasV4T]>; + + def _nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset), + !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, + !strconcat("($src1.new, #$src2)) jump:", + !strconcat(TakenStr, " $offset"))))), + []>, + Requires<[HasV4T]>; +} + +multiclass NVJ_type_basic_neg<string NotStr, string OpcStr, string TakenStr> { + def _ie_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset), + !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, + !strconcat("($src1.new, #$src2)) jump:", + !strconcat(TakenStr, " $offset"))))), + []>, + Requires<[HasV4T]>; + + def _nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset), + !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, + !strconcat("($src1.new, #$src2)) jump:", + !strconcat(TakenStr, " $offset"))))), + []>, + Requires<[HasV4T]>; +} + +multiclass NVJ_type_basic_tstbit<string NotStr, string OpcStr, string TakenStr> { + def _ie_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset), + !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, + !strconcat("($src1.new, #$src2)) jump:", + !strconcat(TakenStr, " $offset"))))), + []>, + Requires<[HasV4T]>; + + def _nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset), + !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, + !strconcat("($src1.new, #$src2)) jump:", + !strconcat(TakenStr, " $offset"))))), + []>, + Requires<[HasV4T]>; +} + +// Multiclass for regular dot new of Ist operand register. +multiclass NVJ_type_br_pred_reg<string NotStr, string OpcStr> { + defm Pt : NVJ_type_basic_reg<NotStr, OpcStr, "t">; + defm Pnt : NVJ_type_basic_reg<NotStr, OpcStr, "nt">; +} + +// Multiclass for dot new of 2nd operand register. +multiclass NVJ_type_br_pred_2ndDotNew<string NotStr, string OpcStr> { + defm Pt : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "t">; + defm Pnt : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "nt">; +} + +// Multiclass for 2nd operand immediate, including -1. +multiclass NVJ_type_br_pred_imm<string NotStr, string OpcStr> { + defm Pt : NVJ_type_basic_imm<NotStr, OpcStr, "t">; + defm Pnt : NVJ_type_basic_imm<NotStr, OpcStr, "nt">; + defm Ptneg : NVJ_type_basic_neg<NotStr, OpcStr, "t">; + defm Pntneg : NVJ_type_basic_neg<NotStr, OpcStr, "nt">; +} + +// Multiclass for 2nd operand immediate, excluding -1. +multiclass NVJ_type_br_pred_imm_only<string NotStr, string OpcStr> { + defm Pt : NVJ_type_basic_imm<NotStr, OpcStr, "t">; + defm Pnt : NVJ_type_basic_imm<NotStr, OpcStr, "nt">; +} + +// Multiclass for tstbit, where 2nd operand is always #0. +multiclass NVJ_type_br_pred_tstbit<string NotStr, string OpcStr> { + defm Pt : NVJ_type_basic_tstbit<NotStr, OpcStr, "t">; + defm Pnt : NVJ_type_basic_tstbit<NotStr, OpcStr, "nt">; +} + +// Multiclass for GT. +multiclass NVJ_type_rr_ri<string OpcStr> { + defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>; + defm rr : NVJ_type_br_pred_reg<"", OpcStr>; + defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>; + defm rrdn : NVJ_type_br_pred_2ndDotNew<"", OpcStr>; + defm riNot : NVJ_type_br_pred_imm<"!", OpcStr>; + defm ri : NVJ_type_br_pred_imm<"", OpcStr>; +} + +// Multiclass for EQ. +multiclass NVJ_type_rr_ri_no_2ndDotNew<string OpcStr> { + defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>; + defm rr : NVJ_type_br_pred_reg<"", OpcStr>; + defm riNot : NVJ_type_br_pred_imm<"!", OpcStr>; + defm ri : NVJ_type_br_pred_imm<"", OpcStr>; +} + +// Multiclass for GTU. +multiclass NVJ_type_rr_ri_no_nOne<string OpcStr> { + defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>; + defm rr : NVJ_type_br_pred_reg<"", OpcStr>; + defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>; + defm rrdn : NVJ_type_br_pred_2ndDotNew<"", OpcStr>; + defm riNot : NVJ_type_br_pred_imm_only<"!", OpcStr>; + defm ri : NVJ_type_br_pred_imm_only<"", OpcStr>; +} + +// Multiclass for tstbit. +multiclass NVJ_type_r0<string OpcStr> { + defm r0Not : NVJ_type_br_pred_tstbit<"!", OpcStr>; + defm r0 : NVJ_type_br_pred_tstbit<"", OpcStr>; + } + +// Base Multiclass for New Value Jump. +multiclass NVJ_type { + defm GT : NVJ_type_rr_ri<"cmp.gt">; + defm EQ : NVJ_type_rr_ri_no_2ndDotNew<"cmp.eq">; + defm GTU : NVJ_type_rr_ri_no_nOne<"cmp.gtu">; + defm TSTBIT : NVJ_type_r0<"tstbit">; +} + +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { + defm JMP_ : NVJ_type; +} + +//===----------------------------------------------------------------------===// +// NV/J - +//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // XTYPE/ALU + diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td index 1328eba..b15e293 100644 --- a/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/lib/Target/Hexagon/HexagonIntrinsics.td @@ -1,4 +1,4 @@ -//===- HexagonIntrinsics.td - Instruction intrinsics -------*- tablegen -*-===// +//===-- HexagonIntrinsics.td - Instruction intrinsics ------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp b/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp deleted file mode 100644 index 1229aca..0000000 --- a/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp +++ /dev/null @@ -1,129 +0,0 @@ -//===-- HexagonOptimizeSZExtends.cpp - Identify and remove sign and -------===// -//===-- zero extends. -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Constants.h" -#include "llvm/PassSupport.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Support/Debug.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include <algorithm> -#include "Hexagon.h" -#include "HexagonTargetMachine.h" - -using namespace llvm; - -namespace { - struct HexagonOptimizeSZExtends : public MachineFunctionPass { - - public: - static char ID; - HexagonOptimizeSZExtends() : MachineFunctionPass(ID) {} - - bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { - return "Hexagon remove redundant zero and size extends"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineFunctionAnalysis>(); - AU.addPreserved<MachineFunctionAnalysis>(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - private: - }; -} - -char HexagonOptimizeSZExtends::ID = 0; - -// This is a brain dead pass to get rid of redundant sign extends for the -// following case: -// -// Transform the following pattern -// %vreg170<def> = SXTW %vreg166 -// ... -// %vreg176<def> = COPY %vreg170:subreg_loreg -// -// Into -// %vreg176<def> = COPY vreg166 - -bool HexagonOptimizeSZExtends::runOnMachineFunction(MachineFunction &MF) { - DenseMap<unsigned, unsigned> SExtMap; - - // Loop over all of the basic blocks - for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); - MBBb != MBBe; ++MBBb) { - MachineBasicBlock* MBB = MBBb; - SExtMap.clear(); - - // Traverse the basic block. - for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); - ++MII) { - MachineInstr *MI = MII; - // Look for sign extends: - // %vreg170<def> = SXTW %vreg166 - if (MI->getOpcode() == Hexagon::SXTW) { - assert (MI->getNumOperands() == 2); - MachineOperand &Dst = MI->getOperand(0); - MachineOperand &Src = MI->getOperand(1); - unsigned DstReg = Dst.getReg(); - unsigned SrcReg = Src.getReg(); - // Just handle virtual registers. - if (TargetRegisterInfo::isVirtualRegister(DstReg) && - TargetRegisterInfo::isVirtualRegister(SrcReg)) { - // Map the following: - // %vreg170<def> = SXTW %vreg166 - // SExtMap[170] = vreg166 - SExtMap[DstReg] = SrcReg; - } - } - // Look for copy: - // %vreg176<def> = COPY %vreg170:subreg_loreg - if (MI->isCopy()) { - assert (MI->getNumOperands() == 2); - MachineOperand &Dst = MI->getOperand(0); - MachineOperand &Src = MI->getOperand(1); - - // Make sure we are copying the lower 32 bits. - if (Src.getSubReg() != Hexagon::subreg_loreg) - continue; - - unsigned DstReg = Dst.getReg(); - unsigned SrcReg = Src.getReg(); - if (TargetRegisterInfo::isVirtualRegister(DstReg) && - TargetRegisterInfo::isVirtualRegister(SrcReg)) { - // Try to find in the map. - if (unsigned SextSrc = SExtMap.lookup(SrcReg)) { - // Change the 1st operand. - MI->RemoveOperand(1); - MI->addOperand(MachineOperand::CreateReg(SextSrc, false)); - } - } - } - } - } - return true; -} - -FunctionPass *llvm::createHexagonOptimizeSZExtends() { - return new HexagonOptimizeSZExtends(); -} diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp new file mode 100644 index 0000000..06c732f --- /dev/null +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -0,0 +1,289 @@ +//===-- HexagonPeephole.cpp - Hexagon Peephole Optimiztions ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// This peephole pass optimizes in the following cases. +// 1. Optimizes redundant sign extends for the following case +// Transform the following pattern +// %vreg170<def> = SXTW %vreg166 +// ... +// %vreg176<def> = COPY %vreg170:subreg_loreg +// +// Into +// %vreg176<def> = COPY vreg166 +// +// 2. Optimizes redundant negation of predicates. +// %vreg15<def> = CMPGTrr %vreg6, %vreg2 +// ... +// %vreg16<def> = NOT_p %vreg15<kill> +// ... +// JMP_c %vreg16<kill>, <BB#1>, %PC<imp-def,dead> +// +// Into +// %vreg15<def> = CMPGTrr %vreg6, %vreg2; +// ... +// JMP_cNot %vreg15<kill>, <BB#1>, %PC<imp-def,dead>; +// +// Note: The peephole pass makes the instrucstions like +// %vreg170<def> = SXTW %vreg166 or %vreg16<def> = NOT_p %vreg15<kill> +// redundant and relies on some form of dead removal instrucions, like +// DCE or DIE to actually eliminate them. + + +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-peephole" +#include "llvm/Constants.h" +#include "llvm/PassSupport.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <algorithm> +#include "Hexagon.h" +#include "HexagonTargetMachine.h" + +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +static cl::opt<bool> DisableHexagonPeephole("disable-hexagon-peephole", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Peephole Optimization")); + +static cl::opt<int> +DbgPNPCount("pnp-count", cl::init(-1), cl::Hidden, + cl::desc("Maximum number of P=NOT(P) to be optimized")); + +static cl::opt<bool> DisablePNotP("disable-hexagon-pnotp", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Optimization of PNotP")); + +static cl::opt<bool> DisableOptSZExt("disable-hexagon-optszext", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Optimization of Sign/Zero Extends")); + +namespace { + struct HexagonPeephole : public MachineFunctionPass { + const HexagonInstrInfo *QII; + const HexagonRegisterInfo *QRI; + const MachineRegisterInfo *MRI; + + public: + static char ID; + HexagonPeephole() : MachineFunctionPass(ID) { } + + bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "Hexagon optimize redundant zero and size extends"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src); + }; +} + +char HexagonPeephole::ID = 0; + +bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { + + QII = static_cast<const HexagonInstrInfo *>(MF.getTarget(). + getInstrInfo()); + QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget(). + getRegisterInfo()); + MRI = &MF.getRegInfo(); + + DenseMap<unsigned, unsigned> PeepholeMap; + + if (DisableHexagonPeephole) return false; + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + PeepholeMap.clear(); + + // Traverse the basic block. + for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); + ++MII) { + MachineInstr *MI = MII; + // Look for sign extends: + // %vreg170<def> = SXTW %vreg166 + if (!DisableOptSZExt && MI->getOpcode() == Hexagon::SXTW) { + assert (MI->getNumOperands() == 2); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src.getReg(); + // Just handle virtual registers. + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Map the following: + // %vreg170<def> = SXTW %vreg166 + // PeepholeMap[170] = vreg166 + PeepholeMap[DstReg] = SrcReg; + } + } + + // Look for P=NOT(P). + if (!DisablePNotP && + (MI->getOpcode() == Hexagon::NOT_p)) { + assert (MI->getNumOperands() == 2); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src.getReg(); + // Just handle virtual registers. + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Map the following: + // %vreg170<def> = NOT_xx %vreg166 + // PeepholeMap[170] = vreg166 + PeepholeMap[DstReg] = SrcReg; + } + } + + // Look for copy: + // %vreg176<def> = COPY %vreg170:subreg_loreg + if (!DisableOptSZExt && MI->isCopy()) { + assert (MI->getNumOperands() == 2); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + + // Make sure we are copying the lower 32 bits. + if (Src.getSubReg() != Hexagon::subreg_loreg) + continue; + + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src.getReg(); + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Try to find in the map. + if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) { + // Change the 1st operand. + MI->RemoveOperand(1); + MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false)); + } + } + } + + // Look for Predicated instructions. + if (!DisablePNotP) { + bool Done = false; + if (QII->isPredicated(MI)) { + MachineOperand &Op0 = MI->getOperand(0); + unsigned Reg0 = Op0.getReg(); + const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0); + if (RC0->getID() == Hexagon::PredRegsRegClassID) { + // Handle instructions that have a prediate register in op0 + // (most cases of predicable instructions). + if (TargetRegisterInfo::isVirtualRegister(Reg0)) { + // Try to find in the map. + if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) { + // Change the 1st operand and, flip the opcode. + MI->getOperand(0).setReg(PeepholeSrc); + int NewOp = QII->getInvertedPredicatedOpcode(MI->getOpcode()); + MI->setDesc(QII->get(NewOp)); + Done = true; + } + } + } + } + + if (!Done) { + // Handle special instructions. + unsigned Op = MI->getOpcode(); + unsigned NewOp = 0; + unsigned PR = 1, S1 = 2, S2 = 3; // Operand indices. + + switch (Op) { + case Hexagon::TFR_condset_rr: + case Hexagon::TFR_condset_ii: + case Hexagon::MUX_ii: + case Hexagon::MUX_rr: + NewOp = Op; + break; + case Hexagon::TFR_condset_ri: + NewOp = Hexagon::TFR_condset_ir; + break; + case Hexagon::TFR_condset_ir: + NewOp = Hexagon::TFR_condset_ri; + break; + case Hexagon::MUX_ri: + NewOp = Hexagon::MUX_ir; + break; + case Hexagon::MUX_ir: + NewOp = Hexagon::MUX_ri; + break; + } + if (NewOp) { + unsigned PSrc = MI->getOperand(PR).getReg(); + if (unsigned POrig = PeepholeMap.lookup(PSrc)) { + MI->getOperand(PR).setReg(POrig); + MI->setDesc(QII->get(NewOp)); + // Swap operands S1 and S2. + MachineOperand Op1 = MI->getOperand(S1); + MachineOperand Op2 = MI->getOperand(S2); + ChangeOpInto(MI->getOperand(S1), Op2); + ChangeOpInto(MI->getOperand(S2), Op1); + } + } // if (NewOp) + } // if (!Done) + + } // if (!DisablePNotP) + + } // Instruction + } // Basic Block + return true; +} + +void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) { + assert (&Dst != &Src && "Cannot duplicate into itself"); + switch (Dst.getType()) { + case MachineOperand::MO_Register: + if (Src.isReg()) { + Dst.setReg(Src.getReg()); + } else if (Src.isImm()) { + Dst.ChangeToImmediate(Src.getImm()); + } else { + llvm_unreachable("Unexpected src operand type"); + } + break; + + case MachineOperand::MO_Immediate: + if (Src.isImm()) { + Dst.setImm(Src.getImm()); + } else if (Src.isReg()) { + Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(), + Src.isKill(), Src.isDead(), Src.isUndef(), + Src.isDebug()); + } else { + llvm_unreachable("Unexpected src operand type"); + } + break; + + default: + llvm_unreachable("Unexpected dst operand type"); + break; + } +} + +FunctionPass *llvm::createHexagonPeephole() { + return new HexagonPeephole(); +} diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 521e0c1..c481270 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -1,4 +1,4 @@ -//==- HexagonRegisterInfo.cpp - Hexagon Register Information -----*- C++ -*-==// +//===-- HexagonRegisterInfo.cpp - Hexagon Register Information ------------===// // // The LLVM Compiler Infrastructure // @@ -17,24 +17,23 @@ #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" #include "HexagonMachineFunctionInfo.h" +#include "llvm/Function.h" +#include "llvm/Type.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Type.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include <iostream> +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Function.h" using namespace llvm; @@ -45,31 +44,29 @@ HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st, TII(tii) { } -const unsigned* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction +const uint16_t* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - static const unsigned CalleeSavedRegsV2[] = { - Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 + static const uint16_t CalleeSavedRegsV2[] = { + Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 }; - static const unsigned CalleeSavedRegsV3[] = { + static const uint16_t CalleeSavedRegsV3[] = { Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 }; switch(Subtarget.getHexagonArchVersion()) { + case HexagonSubtarget::V1: + break; case HexagonSubtarget::V2: return CalleeSavedRegsV2; - break; case HexagonSubtarget::V3: case HexagonSubtarget::V4: return CalleeSavedRegsV3; - break; - default: - const char *ErrorString = - "Callee saved registers requested for unknown archtecture version"; - llvm_unreachable(ErrorString); } + llvm_unreachable("Callee saved registers requested for unknown architecture " + "version"); } BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) @@ -106,18 +103,16 @@ HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { }; switch(Subtarget.getHexagonArchVersion()) { + case HexagonSubtarget::V1: + break; case HexagonSubtarget::V2: return CalleeSavedRegClassesV2; - break; case HexagonSubtarget::V3: case HexagonSubtarget::V4: return CalleeSavedRegClassesV3; - break; - default: - const char *ErrorString = - "Callee saved register classes requested for unknown archtecture version"; - llvm_unreachable(ErrorString); } + llvm_unreachable("Callee saved register classes requested for unknown " + "architecture version"); } void HexagonRegisterInfo:: @@ -130,7 +125,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) { // Hexagon_TODO: add code } else { - assert(0 && "Cannot handle this call frame pseudo instruction"); + llvm_unreachable("Cannot handle this call frame pseudo instruction"); } MBB.erase(I); } @@ -211,8 +206,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else if ((MI.getOpcode() == Hexagon::STriw) || (MI.getOpcode() == Hexagon::STrid) || (MI.getOpcode() == Hexagon::STrih) || - (MI.getOpcode() == Hexagon::STrib) || - (MI.getOpcode() == Hexagon::STriwt)) { + (MI.getOpcode() == Hexagon::STrib)) { // For stores, we need a reserved register. Change // memw(r30 + #10000) = r0 to: // @@ -310,13 +304,11 @@ void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove> } unsigned HexagonRegisterInfo::getEHExceptionRegister() const { - assert(0 && "What is the exception register"); - return 0; + llvm_unreachable("What is the exception register"); } unsigned HexagonRegisterInfo::getEHHandlerRegister() const { - assert(0 && "What is the exception handler register"); - return 0; + llvm_unreachable("What is the exception handler register"); } #define GET_REGINFO_TARGET_DESC diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index 33b0c14..fc65305 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -48,7 +48,7 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo { HexagonRegisterInfo(HexagonSubtarget &st, const HexagonInstrInfo &tii); /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const TargetRegisterClass* const* getCalleeSavedRegClasses( const MachineFunction *MF = 0) const; diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td index c05f844..d44eae3 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -1,4 +1,4 @@ -//===- HexagonRegisterInfo.td - Hexagon Register defs ------*- tablegen -*-===// +//===-- HexagonRegisterInfo.td - Hexagon Register defs -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -11,46 +11,51 @@ // Declarations that describe the Hexagon register file. //===----------------------------------------------------------------------===// -class HexagonReg<string n> : Register<n> { - field bits<5> Num; - let Namespace = "Hexagon"; -} +let Namespace = "Hexagon" in { -class HexagonDoubleReg<string n, list<Register> subregs> : - RegisterWithSubRegs<n, subregs> { - field bits<5> Num; - let Namespace = "Hexagon"; -} + class HexagonReg<string n> : Register<n> { + field bits<5> Num; + } -// Registers are identified with 5-bit ID numbers. -// Ri - 32-bit integer registers. -class Ri<bits<5> num, string n> : HexagonReg<n> { - let Num = num; -} + class HexagonDoubleReg<string n, list<Register> subregs> : + RegisterWithSubRegs<n, subregs> { + field bits<5> Num; + } -// Rf - 32-bit floating-point registers. -class Rf<bits<5> num, string n> : HexagonReg<n> { - let Num = num; -} + // Registers are identified with 5-bit ID numbers. + // Ri - 32-bit integer registers. + class Ri<bits<5> num, string n> : HexagonReg<n> { + let Num = num; + } + // Rf - 32-bit floating-point registers. + class Rf<bits<5> num, string n> : HexagonReg<n> { + let Num = num; + } -// Rd - 64 bit registers. -class Rd<bits<5> num, string n, list<Register> subregs> : -HexagonDoubleReg<n, subregs> { - let Num = num; - let SubRegs = subregs; -} + // Rd - 64-bit registers. + class Rd<bits<5> num, string n, list<Register> subregs> : + HexagonDoubleReg<n, subregs> { + let Num = num; + let SubRegs = subregs; + } -class Rp<bits<5> num, string n> : HexagonReg<n> { - let Num = num; -} + // Rp - predicate registers + class Rp<bits<5> num, string n> : HexagonReg<n> { + let Num = num; + } -class Rc<bits<5> num, string n> : HexagonReg<n> { - let Num = num; -} + // Rc - control registers + class Rc<bits<5> num, string n> : HexagonReg<n> { + let Num = num; + } -let Namespace = "Hexagon" in { + // Rj - aliased integer registers + class Rj<string n, Ri R>: HexagonReg<n> { + let Num = R.Num; + let Aliases = [R]; + } def subreg_loreg : SubRegIndex; def subreg_hireg : SubRegIndex; @@ -89,17 +94,17 @@ let Namespace = "Hexagon" in { def R30 : Ri<30, "r30">, DwarfRegNum<[30]>; def R31 : Ri<31, "r31">, DwarfRegNum<[31]>; - - def PC : Ri<31, "r31">, DwarfRegNum<[32]>; - def GP : Ri<31, "r31">, DwarfRegNum<[33]>; + def SP : Rj<"sp", R29>, DwarfRegNum<[29]>; + def FP : Rj<"fp", R30>, DwarfRegNum<[30]>; + def LR : Rj<"lr", R31>, DwarfRegNum<[31]>; // Aliases of the R* registers used to hold 64-bit int values (doubles). - let SubRegIndices = [subreg_loreg, subreg_hireg] in { - def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>; - def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>; - def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>; - def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>; - def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>; + let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in { + def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>; + def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>; + def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>; + def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>; + def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>; def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>; def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>; def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>; @@ -114,45 +119,38 @@ let Namespace = "Hexagon" in { } // Predicate registers. - def P0 : Rp< 0, "p0">, DwarfRegNum<[63]>; - def P1 : Rp< 0, "p1">, DwarfRegNum<[64]>; - def P2 : Rp< 0, "p2">, DwarfRegNum<[65]>; - def P3 : Rp< 0, "p3">, DwarfRegNum<[66]>; + def P0 : Rp<0, "p0">, DwarfRegNum<[63]>; + def P1 : Rp<1, "p1">, DwarfRegNum<[64]>; + def P2 : Rp<2, "p2">, DwarfRegNum<[65]>; + def P3 : Rp<3, "p3">, DwarfRegNum<[66]>; // Control registers. def SA0 : Rc<0, "sa0">, DwarfRegNum<[67]>; - def LC0 : Rc<0, "lc0">, DwarfRegNum<[68]>; - - def SA1 : Rc<0, "sa1">, DwarfRegNum<[69]>; - def LC1 : Rc<0, "lc1">, DwarfRegNum<[70]>; -} - - - - - - - + def LC0 : Rc<1, "lc0">, DwarfRegNum<[68]>; + def SA1 : Rc<2, "sa1">, DwarfRegNum<[69]>; + def LC1 : Rc<3, "lc1">, DwarfRegNum<[70]>; + def PC : Rc<9, "pc">, DwarfRegNum<[32]>; // is the Dwarf number correct? + def GP : Rc<11, "gp">, DwarfRegNum<[33]>; // is the Dwarf number correct? +} // Register classes. // // FIXME: the register order should be defined in terms of the preferred // allocation order... // -def IntRegs : RegisterClass<"Hexagon", [i32], 32, (add (sequence "R%u", 0, 9), - (sequence "R%u", 12, 28), - R10, R11, R29, R30, - R31)> { +def IntRegs : RegisterClass<"Hexagon", [i32], 32, + (add (sequence "R%u", 0, 9), + (sequence "R%u", 12, 28), + R10, R11, R29, R30, R31)> { } -def DoubleRegs : RegisterClass<"Hexagon", [i64], 64, (add (sequence "D%u", 0, - 4), - (sequence "D%u", 6, 13), - D5, D14, D15)> { +def DoubleRegs : RegisterClass<"Hexagon", [i64], 64, + (add (sequence "D%u", 0, 4), + (sequence "D%u", 6, 13), D5, D14, D15)> { let SubRegClasses = [(IntRegs subreg_loreg, subreg_hireg)]; } @@ -162,8 +160,8 @@ def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))> let Size = 32; } -def CRRegs : RegisterClass<"Hexagon", [i32], 32, (add (sequence "LC%u", 0, 1), - (sequence "SA%u", 0, 1), - PC)> { +def CRRegs : RegisterClass<"Hexagon", [i32], 32, + (add (sequence "LC%u", 0, 1), + (sequence "SA%u", 0, 1), PC, GP)> { let Size = 32; } diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp index 3ca257f..66a00e1 100644 --- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp +++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp @@ -1,4 +1,4 @@ -//=- HexagonRemoveExtendArgs.cpp - Remove unecessary argument sign extends --=// +//===- HexagonRemoveExtendArgs.cpp - Remove unecessary argument sign extends =// // // The LLVM Compiler Infrastructure // @@ -12,15 +12,12 @@ // //===----------------------------------------------------------------------===// - - -#include "llvm/Pass.h" +#include "HexagonTargetMachine.h" #include "llvm/Function.h" #include "llvm/Instructions.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Pass.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "HexagonTargetMachine.h" -#include <iostream> +#include "llvm/Transforms/Scalar.h" using namespace llvm; namespace { diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td index 427d1cb..fbea445 100644 --- a/lib/Target/Hexagon/HexagonSchedule.td +++ b/lib/Target/Hexagon/HexagonSchedule.td @@ -1,4 +1,4 @@ -//===-HexagonSchedule.td - Hexagon Scheduling Definitions -------*- C++ -*-===// +//===- HexagonSchedule.td - Hexagon Scheduling Definitions -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Hexagon/HexagonSelectCCInfo.td b/lib/Target/Hexagon/HexagonSelectCCInfo.td index f21d928..d8feb89 100644 --- a/lib/Target/Hexagon/HexagonSelectCCInfo.td +++ b/lib/Target/Hexagon/HexagonSelectCCInfo.td @@ -1,4 +1,4 @@ -//=-HexagoSelectCCInfo.td - Selectcc mappings ----------------*- tablegen -*-=// +//===-- HexagoSelectCCInfo.td - Selectcc mappings ----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -18,7 +18,7 @@ def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, IntRegs:$fval, SETNE)), - (i32 (MUX_rr (i1 (NOT_Ps (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))), + (i32 (MUX_rr (i1 (NOT_p (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))), IntRegs:$tval, IntRegs:$fval))>; def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, @@ -35,24 +35,24 @@ def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, IntRegs:$fval, SETULT)), - (i32 (MUX_rr (i1 (NOT_Ps (CMPGTUrr IntRegs:$lhs, + (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1)))), IntRegs:$tval, IntRegs:$fval))>; def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, IntRegs:$fval, SETLT)), - (i32 (MUX_rr (i1 (NOT_Ps (CMPGTrr IntRegs:$lhs, + (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1)))), IntRegs:$tval, IntRegs:$fval))>; def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, IntRegs:$fval, SETLE)), - (i32 (MUX_rr (i1 (NOT_Ps (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))), + (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))), IntRegs:$tval, IntRegs:$fval))>; def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, IntRegs:$fval, SETULE)), - (i32 (MUX_rr (i1 (NOT_Ps (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))), + (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))), IntRegs:$tval, IntRegs:$fval))>; @@ -86,7 +86,7 @@ def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval, def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval, IntRegs:$fval, SETEQ)), - (i32 (MUX_rr (i1 (NOT_pp (XOR_pp PredRegs:$lhs, PredRegs:$rhs))), + (i32 (MUX_rr (i1 (NOT_p (XOR_pp PredRegs:$lhs, PredRegs:$rhs))), IntRegs:$tval, IntRegs:$fval))>; diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h index 86fa026..0673e4d 100644 --- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h @@ -1,4 +1,4 @@ -//=-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ------*- C++ -*-=// +//===-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp index f4d3647..d10c9f2 100644 --- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp +++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp @@ -1,4 +1,4 @@ -//===---- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -----===// +//===-- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -------===// // // The LLVM Compiler Infrastructure // @@ -6,7 +6,7 @@ // License. See LICENSE.TXT for details. // // -//===----------------------------------------------------------------------===//// +//===----------------------------------------------------------------------===// // This pass tries to provide opportunities for better optimization of muxes. // The default code generated for something like: flag = (a == b) ? 1 : 3; // would be: @@ -27,31 +27,24 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "xfer" +#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" -#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/MathExtras.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "HexagonTargetMachine.h" -#include "HexagonSubtarget.h" -#include "HexagonMachineFunctionInfo.h" -#include <map> -#include <iostream> - -#include "llvm/Support/CommandLine.h" -#define DEBUG_TYPE "xfer" - using namespace llvm; diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 83fb498..654d336 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -1,4 +1,4 @@ -//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===// +//===-- HexagonSubtarget.cpp - Hexagon Subtarget Information --------------===// // // The LLVM Compiler Infrastructure // @@ -52,6 +52,9 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS): // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); + // Max issue per cycle == bundle width. + InstrItins.IssueWidth = 4; + if (EnableMemOps) UseMemOps = true; else diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index 6de85df..3079086 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -1,4 +1,4 @@ -//==-- HexagonSubtarget.h - Define Subtarget for the Hexagon ----*- C++ -*-==// +//===-- HexagonSubtarget.h - Define Subtarget for the Hexagon ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index b29e92c..319eab2 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// // +// Implements the info about Hexagon target spec. // //===----------------------------------------------------------------------===// @@ -16,11 +17,10 @@ #include "llvm/Module.h" #include "llvm/CodeGen/Passes.h" #include "llvm/PassManager.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" -#include <iostream> using namespace llvm; @@ -56,7 +56,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), DataLayout("e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-a0:0") , - Subtarget(TT, CPU, FS), TLInfo(*this), InstrInfo(Subtarget), + Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget), InstrItins(&Subtarget.getInstrItineraryData()) { @@ -76,14 +76,38 @@ bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) { return true; } -bool HexagonTargetMachine::addInstSelector(PassManagerBase &PM) { - PM.add(createHexagonRemoveExtendOps(*this)); - PM.add(createHexagonISelDag(*this)); +namespace { +/// Hexagon Code Generator Pass Configuration Options. +class HexagonPassConfig : public TargetPassConfig { +public: + HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + HexagonTargetMachine &getHexagonTargetMachine() const { + return getTM<HexagonTargetMachine>(); + } + + virtual bool addInstSelector(); + virtual bool addPreRegAlloc(); + virtual bool addPostRegAlloc(); + virtual bool addPreSched2(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { + return new HexagonPassConfig(this, PM); +} + +bool HexagonPassConfig::addInstSelector() { + PM.add(createHexagonRemoveExtendOps(getHexagonTargetMachine())); + PM.add(createHexagonISelDag(getHexagonTargetMachine())); + PM.add(createHexagonPeephole()); return false; } -bool HexagonTargetMachine::addPreRegAlloc(PassManagerBase &PM) { +bool HexagonPassConfig::addPreRegAlloc() { if (!DisableHardwareLoops) { PM.add(createHexagonHardwareLoops()); } @@ -91,28 +115,28 @@ bool HexagonTargetMachine::addPreRegAlloc(PassManagerBase &PM) { return false; } -bool HexagonTargetMachine::addPostRegAlloc(PassManagerBase &PM) { - PM.add(createHexagonCFGOptimizer(*this)); +bool HexagonPassConfig::addPostRegAlloc() { + PM.add(createHexagonCFGOptimizer(getHexagonTargetMachine())); return true; } -bool HexagonTargetMachine::addPreSched2(PassManagerBase &PM) { - PM.add(createIfConverterPass()); +bool HexagonPassConfig::addPreSched2() { + addPass(IfConverterID); return true; } -bool HexagonTargetMachine::addPreEmitPass(PassManagerBase &PM) { +bool HexagonPassConfig::addPreEmitPass() { if (!DisableHardwareLoops) { PM.add(createHexagonFixupHwLoops()); } // Expand Spill code for predicate registers. - PM.add(createHexagonExpandPredSpillCode(*this)); + PM.add(createHexagonExpandPredSpillCode(getHexagonTargetMachine())); // Split up TFRcondsets into conditional transfers. - PM.add(createHexagonSplitTFRCondSets(*this)); + PM.add(createHexagonSplitTFRCondSets(getHexagonTargetMachine())); return false; } diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index e27d3ae..70bea56 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -29,8 +29,8 @@ class Module; class HexagonTargetMachine : public LLVMTargetMachine { const TargetData DataLayout; // Calculates type size & alignment. HexagonSubtarget Subtarget; - HexagonTargetLowering TLInfo; HexagonInstrInfo InstrInfo; + HexagonTargetLowering TLInfo; HexagonSelectionDAGInfo TSInfo; HexagonFrameLowering FrameLowering; const InstrItineraryData* InstrItins; @@ -72,11 +72,7 @@ public: // Pass Pipeline Configuration. virtual bool addPassesForOptimizations(PassManagerBase &PM); - virtual bool addInstSelector(PassManagerBase &PM); - virtual bool addPreEmitPass(PassManagerBase &PM); - virtual bool addPreRegAlloc(llvm::PassManagerBase &PM); - virtual bool addPostRegAlloc(PassManagerBase &PM); - virtual bool addPreSched2(PassManagerBase &PM); + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); }; extern bool flag_aligned_memcpy; diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index 188337d..d3ce5a6 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -1,4 +1,4 @@ -//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties ----*- C++ -*-===// +//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties --------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h index 101c1f2..6933450 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.h +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h @@ -1,4 +1,4 @@ -//===-- HexagonTargetAsmInfo.h - Hexagon asm properties ---------*- C++ -*--==// +//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h index 21b2d67..9305c27 100644 --- a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h +++ b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h @@ -1,4 +1,4 @@ -//==-- HexagonVarargsCallingConvention.h - Calling Conventions ---*- C++ -*-==// +//===-- HexagonVarargsCallingConvention.h - Calling Conventions -*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h new file mode 100644 index 0000000..ed55c3c --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -0,0 +1,43 @@ +//===-- HexagonBaseInfo.h - Top level definitions for Hexagon --*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the Hexagon target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONBASEINFO_H +#define HEXAGONBASEINFO_H + +namespace llvm { + +/// HexagonII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace HexagonII { + + // *** The code below must match HexagonInstrFormat*.td *** // + + // MCInstrDesc TSFlags + enum { + + // Predicated instructions. + PredicatedPos = 1, + PredicatedMask = 0x1 + }; + + // *** The code above must match HexagonInstrFormat*.td *** // + +} // End namespace HexagonII. + +} // End namespace llvm. + +#endif diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp index 188693c..d6e6c36 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -1,4 +1,4 @@ -//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties -----------*- C++ -*-===// +//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties ---------------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h index 8196e95..d336cd5 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h @@ -1,4 +1,4 @@ -//===-- HexagonTargetAsmInfo.h - Hexagon asm properties ---------*- C++ -*--==// +//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 625f07c..74abc56 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- HexagonMCTargetDesc.cpp - Cell Hexagon Target Descriptions -----*- C++ -*-===// +//===-- HexagonMCTargetDesc.cpp - Cell Hexagon Target Descriptions --------===// // // The LLVM Compiler Infrastructure // @@ -18,6 +18,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile index c936e92..34bc68d 100644 --- a/lib/Target/Hexagon/Makefile +++ b/lib/Target/Hexagon/Makefile @@ -16,6 +16,7 @@ BUILT_SOURCES = HexagonGenRegisterInfo.inc \ HexagonGenAsmWriter.inc \ HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \ HexagonGenCallingConv.inc \ + HexagonGenDFAPacketizer.inc \ HexagonAsmPrinter.cpp DIRS = TargetInfo MCTargetDesc diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index 97d311c..c1b003b 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -347,7 +347,6 @@ MatchAndEmitInstruction(SMLoc IDLoc, } llvm_unreachable("Implement any new match types added!"); - return true; } MBlazeOperand *MBlazeAsmParser:: diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt index 71095e5..bf1deef 100644 --- a/lib/Target/MBlaze/CMakeLists.txt +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_target(MBlazeCodeGen MBlazeISelDAGToDAG.cpp MBlazeISelLowering.cpp MBlazeFrameLowering.cpp + MBlazeMachineFunction.cpp MBlazeRegisterInfo.cpp MBlazeSubtarget.cpp MBlazeTargetMachine.cpp diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index ccc3a05..adedf93 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -1,4 +1,4 @@ -//===- MBlazeDisassembler.cpp - Disassembler for MicroBlaze ----*- C++ -*-===// +//===-- MBlazeDisassembler.cpp - Disassembler for MicroBlaze -------------===// // // The LLVM Compiler Infrastructure // @@ -492,7 +492,7 @@ static unsigned getOPCODE(uint32_t insn) { } } -EDInstInfo *MBlazeDisassembler::getEDInfo() const { +const EDInstInfo *MBlazeDisassembler::getEDInfo() const { return instInfoMBlaze; } diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h index 0ac0d89..5c4ae3b 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h @@ -1,4 +1,4 @@ -//===- MBlazeDisassembler.h - Disassembler for MicroBlaze ------*- C++ -*-===// +//===-- MBlazeDisassembler.h - Disassembler for MicroBlaze -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,8 +17,6 @@ #include "llvm/MC/MCDisassembler.h" -struct InternalInstruction; - namespace llvm { class MCInst; @@ -48,7 +46,7 @@ public: raw_ostream &cStream) const; /// getEDInfo - See MCDisassembler. - EDInstInfo *getEDInfo() const; + const EDInstInfo *getEDInfo() const; }; } // namespace llvm diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h index 5297563..236583a 100644 --- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h +++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h @@ -1,4 +1,4 @@ -//===-- MBlazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===// +//= MBlazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax -*- C++ -*-// // // The LLVM Compiler Infrastructure // @@ -21,8 +21,8 @@ namespace llvm { class MBlazeInstPrinter : public MCInstPrinter { public: - MBlazeInstPrinter(const MCAsmInfo &MAI) - : MCInstPrinter(MAI) {} + MBlazeInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MRI) {} virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td index 1245658..b4edff0 100644 --- a/lib/Target/MBlaze/MBlaze.td +++ b/lib/Target/MBlaze/MBlaze.td @@ -1,4 +1,4 @@ -//===- MBlaze.td - Describe the MBlaze Target Machine ------*- tablegen -*-===// +//===-- MBlaze.td - Describe the MBlaze Target Machine -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp index ad9247d..60a65bb 100644 --- a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp @@ -41,7 +41,6 @@ unsigned MBlazeELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { default: llvm_unreachable("unknown mblaze machine relocation type"); } - return 0; } long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, @@ -54,7 +53,6 @@ long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, default: llvm_unreachable("unknown mblaze relocation type"); } - return 0; } unsigned MBlazeELFWriterInfo::getRelocationTySize(unsigned RelTy) const { @@ -104,8 +102,6 @@ long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset, unsigned RelTy) const { if (RelTy == ELF::R_MICROBLAZE_32_PCREL || ELF::R_MICROBLAZE_64_PCREL) return SymOffset - (RelOffset + 4); - else - assert(0 && "computeRelocation unknown for this relocation type"); - return 0; + llvm_unreachable("computeRelocation unknown for this relocation type"); } diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp index 37919bc..6531064 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp +++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -1,4 +1,4 @@ -//===- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====// +//===-- MBlazeFrameLowering.cpp - MBlaze Frame Information ---------------====// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 0002174..23c8e13 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -216,7 +216,7 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { switch (MI->getOpcode()) { - default: assert(false && "Unexpected instr type to insert"); + default: llvm_unreachable("Unexpected instr type to insert"); case MBlaze::ShiftRL: case MBlaze::ShiftRA: @@ -602,7 +602,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { SDValue MBlazeTargetLowering:: LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("TLS not implemented for MicroBlaze."); - return SDValue(); // Not reached } SDValue MBlazeTargetLowering:: @@ -683,7 +682,7 @@ static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, /// TODO: isVarArg, isTailCall. SDValue MBlazeTargetLowering:: LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool &isTailCall, + bool isVarArg, bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -897,7 +896,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, if (VA.isRegLoc()) { MVT RegVT = VA.getLocVT(); ArgRegEnd = VA.getLocReg(); - TargetRegisterClass *RC = 0; + const TargetRegisterClass *RC; if (RegVT == MVT::i32) RC = MBlaze::GPRRegisterClass; @@ -965,7 +964,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, StackPtr = DAG.getRegister(StackReg, getPointerTy()); // The last register argument that must be saved is MBlaze::R10 - TargetRegisterClass *RC = MBlaze::GPRRegisterClass; + const TargetRegisterClass *RC = MBlaze::GPRRegisterClass; unsigned Begin = getMBlazeRegisterNumbering(MBlaze::R5); unsigned Start = getMBlazeRegisterNumbering(ArgRegEnd+1); @@ -1081,7 +1080,6 @@ getConstraintType(const std::string &Constraint) const case 'y': case 'f': return C_RegisterClass; - break; } } return TargetLowering::getConstraintType(Constraint); diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h index 8b49bc3..168694b 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -134,7 +134,7 @@ namespace llvm { virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td index 4acdcfd..3f14593 100644 --- a/lib/Target/MBlaze/MBlazeInstrFPU.td +++ b/lib/Target/MBlaze/MBlazeInstrFPU.td @@ -1,4 +1,4 @@ -//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs -----*- tablegen -*-===// +//===-- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td index 3082a7e..91b69de 100644 --- a/lib/Target/MBlaze/MBlazeInstrFSL.td +++ b/lib/Target/MBlaze/MBlazeInstrFSL.td @@ -1,4 +1,4 @@ -//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs -----*- tablegen -*-===// +//===-- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td index 4c6034d..e40432a 100644 --- a/lib/Target/MBlaze/MBlazeInstrFormats.td +++ b/lib/Target/MBlaze/MBlazeInstrFormats.td @@ -1,4 +1,4 @@ -//===- MBlazeInstrFormats.td - MB Instruction defs ---------*- tablegen -*-===// +//===-- MBlazeInstrFormats.td - MB Instruction defs --------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp index 7ae05b3..db71434 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- MBlazeInstrInfo.cpp - MBlaze Instruction Information -----*- C++ -*-===// +//===-- MBlazeInstrInfo.cpp - MBlaze Instruction Information --------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h index 7174405..a309d2b 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -1,4 +1,4 @@ -//===- MBlazeInstrInfo.h - MBlaze Instruction Information -------*- C++ -*-===// +//===-- MBlazeInstrInfo.h - MBlaze Instruction Information ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td index 9fe2a49..02a2157 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.td +++ b/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -1,4 +1,4 @@ -//===- MBlazeInstrInfo.td - MBlaze Instruction defs --------*- tablegen -*-===// +//===-- MBlazeInstrInfo.td - MBlaze Instruction defs -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp index 4c78deb..91aaf94 100644 --- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp +++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp @@ -1,4 +1,4 @@ -//===- MBlazeIntrinsicInfo.cpp - Intrinsic Information -00-------*- C++ -*-===// +//===-- MBlazeIntrinsicInfo.cpp - Intrinsic Information -------------------===// // // The LLVM Compiler Infrastructure // @@ -18,6 +18,7 @@ #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ErrorHandling.h" #include <cstring> using namespace llvm; @@ -73,16 +74,13 @@ lookupGCCName(const char *Name) const { } bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const { - // Overload Table - const bool OTable[] = { + if (IntrID == 0) + return false; + + unsigned id = IntrID - Intrinsic::num_intrinsics + 1; #define GET_INTRINSIC_OVERLOAD_TABLE #include "MBlazeGenIntrinsics.inc" #undef GET_INTRINSIC_OVERLOAD_TABLE - }; - if (IntrID == 0) - return false; - else - return OTable[IntrID - Intrinsic::num_intrinsics]; } /// This defines the "getAttributes(ID id)" method. diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h index 80760d8..34f3792 100644 --- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.h +++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h @@ -1,4 +1,4 @@ -//===- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information -----*- C++ -*-===// +//===-- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td index 278afbe..b5dc595 100644 --- a/lib/Target/MBlaze/MBlazeIntrinsics.td +++ b/lib/Target/MBlaze/MBlazeIntrinsics.td @@ -1,4 +1,4 @@ -//===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===// +//===-- IntrinsicsMBlaze.td - Defines MBlaze intrinsics ----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp index 7e5598f..6b9f42e 100644 --- a/lib/Target/MBlaze/MBlazeMCInstLower.cpp +++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp @@ -85,9 +85,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const { MCSymbol *MBlazeMCInstLower:: GetBlockAddressSymbol(const MachineOperand &MO) const { switch (MO.getTargetFlags()) { - default: - assert(0 && "Unknown target flag on GV operand"); - + default: llvm_unreachable("Unknown target flag on GV operand"); case 0: break; } @@ -150,7 +148,7 @@ void MBlazeMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case MachineOperand::MO_BlockAddress: MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO)); break; - case MachineOperand::MO_FPImmediate: + case MachineOperand::MO_FPImmediate: { bool ignored; APFloat FVal = MO.getFPImm()->getValueAPF(); FVal.convert(APFloat::IEEEsingle, APFloat::rmTowardZero, &ignored); @@ -160,6 +158,9 @@ void MBlazeMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = MCOperand::CreateImm(Val); break; } + case MachineOperand::MO_RegisterMask: + continue; + } OutMI.addOperand(MCOp); } diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.h b/lib/Target/MBlaze/MBlazeMCInstLower.h index 92196f2..bb77ed4 100644 --- a/lib/Target/MBlaze/MBlazeMCInstLower.h +++ b/lib/Target/MBlaze/MBlazeMCInstLower.h @@ -1,4 +1,4 @@ -//===-- MBlazeMCInstLower.h - Lower MachineInstr to MCInst ----------------===// +//===-- MBlazeMCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.cpp b/lib/Target/MBlaze/MBlazeMachineFunction.cpp new file mode 100644 index 0000000..2217b54 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeMachineFunction.cpp @@ -0,0 +1,14 @@ +//===-- MBlazeMachineFunctionInfo.cpp - Private data ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MBlazeMachineFunction.h" + +using namespace llvm; + +void MBlazeFunctionInfo::anchor() { } diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h index df39509..95cc507 100644 --- a/lib/Target/MBlaze/MBlazeMachineFunction.h +++ b/lib/Target/MBlaze/MBlazeMachineFunction.h @@ -1,4 +1,4 @@ -//===-- MBlazeMachineFunctionInfo.h - Private data ----------------*- C++ -*-=// +//===-- MBlazeMachineFunctionInfo.h - Private data --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,7 +16,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/VectorExtras.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -25,8 +24,8 @@ namespace llvm { /// MBlazeFunctionInfo - This class is derived from MachineFunction private /// MBlaze target-specific information for each MachineFunction. class MBlazeFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); -private: /// Holds for each function where on the stack the Frame Pointer must be /// saved. This is used on Prologue and Epilogue to emit FP save/restore int FPStackOffset; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index 9788ba9..6801a1a 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- MBlazeRegisterInfo.cpp - MBlaze Register Information -== -*- C++ -*-===// +//===-- MBlazeRegisterInfo.cpp - MBlaze Register Information --------------===// // // The LLVM Compiler Infrastructure // @@ -54,10 +54,10 @@ unsigned MBlazeRegisterInfo::getPICCallReg() { //===----------------------------------------------------------------------===// /// MBlaze Callee Saved Registers -const unsigned* MBlazeRegisterInfo:: +const uint16_t* MBlazeRegisterInfo:: getCalleeSavedRegs(const MachineFunction *MF) const { // MBlaze callee-save register range is R20 - R31 - static const unsigned CalleeSavedRegs[] = { + static const uint16_t CalleeSavedRegs[] = { MBlaze::R20, MBlaze::R21, MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25, MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29, MBlaze::R30, MBlaze::R31, @@ -184,10 +184,8 @@ unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const { unsigned MBlazeRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); - return 0; } unsigned MBlazeRegisterInfo::getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); - return 0; } diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index 7e4b269..1d51162 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -1,4 +1,4 @@ -//===- MBlazeRegisterInfo.h - MBlaze Register Information Impl --*- C++ -*-===// +//===-- MBlazeRegisterInfo.h - MBlaze Register Information Impl -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -46,7 +46,7 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { static unsigned getPICCallReg(); /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; BitVector getReservedRegs(const MachineFunction &MF) const; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td index 13c46ba..64cae5c 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.td +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td @@ -1,4 +1,4 @@ -//===- MBlazeRegisterInfo.td - MBlaze Register defs --------*- tablegen -*-===// +//===-- MBlazeRegisterInfo.td - MBlaze Register defs -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeRelocations.h b/lib/Target/MBlaze/MBlazeRelocations.h index c298eda..6387ee2 100644 --- a/lib/Target/MBlaze/MBlazeRelocations.h +++ b/lib/Target/MBlaze/MBlazeRelocations.h @@ -1,4 +1,4 @@ -//===- MBlazeRelocations.h - MBlaze Code Relocations ------------*- C++ -*-===// +//===-- MBlazeRelocations.h - MBlaze Code Relocations -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td index 4662f25..4a3ae5f 100644 --- a/lib/Target/MBlaze/MBlazeSchedule.td +++ b/lib/Target/MBlaze/MBlazeSchedule.td @@ -1,4 +1,4 @@ -//===- MBlazeSchedule.td - MBlaze Scheduling Definitions ---*- tablegen -*-===// +//===-- MBlazeSchedule.td - MBlaze Scheduling Definitions --*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeSchedule3.td b/lib/Target/MBlaze/MBlazeSchedule3.td index ccbf99d..20257a6 100644 --- a/lib/Target/MBlaze/MBlazeSchedule3.td +++ b/lib/Target/MBlaze/MBlazeSchedule3.td @@ -1,4 +1,4 @@ -//===- MBlazeSchedule3.td - MBlaze Scheduling Definitions --*- tablegen -*-===// +//===-- MBlazeSchedule3.td - MBlaze Scheduling Definitions -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeSchedule5.td b/lib/Target/MBlaze/MBlazeSchedule5.td index fa88766..ab53b42 100644 --- a/lib/Target/MBlaze/MBlazeSchedule5.td +++ b/lib/Target/MBlaze/MBlazeSchedule5.td @@ -1,4 +1,4 @@ -//===- MBlazeSchedule5.td - MBlaze Scheduling Definitions --*- tablegen -*-===// +//===-- MBlazeSchedule5.td - MBlaze Scheduling Definitions -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp index 7e5667f..d12d142 100644 --- a/lib/Target/MBlaze/MBlazeSubtarget.cpp +++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp @@ -1,4 +1,4 @@ -//===- MBlazeSubtarget.cpp - MBlaze Subtarget Information -------*- C++ -*-===// +//===-- MBlazeSubtarget.cpp - MBlaze Subtarget Information ----------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h index 43b0197..eb37504 100644 --- a/lib/Target/MBlaze/MBlazeSubtarget.h +++ b/lib/Target/MBlaze/MBlazeSubtarget.h @@ -1,4 +1,4 @@ -//=====-- MBlazeSubtarget.h - Define Subtarget for the MBlaze -*- C++ -*--====// +//===-- MBlazeSubtarget.h - Define Subtarget for the MBlaze ----*- C++ -*--===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 5ed81dd..5c07424 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -45,17 +45,37 @@ MBlazeTargetMachine(const Target &T, StringRef TT, InstrItins(Subtarget.getInstrItineraryData()) { } +namespace { +/// MBlaze Code Generator Pass Configuration Options. +class MBlazePassConfig : public TargetPassConfig { +public: + MBlazePassConfig(MBlazeTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + MBlazeTargetMachine &getMBlazeTargetMachine() const { + return getTM<MBlazeTargetMachine>(); + } + + virtual bool addInstSelector(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *MBlazeTargetMachine::createPassConfig(PassManagerBase &PM) { + return new MBlazePassConfig(this, PM); +} + // Install an instruction selector pass using // the ISelDag to gen MBlaze code. -bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM) { - PM.add(createMBlazeISelDag(*this)); +bool MBlazePassConfig::addInstSelector() { + PM.add(createMBlazeISelDag(getMBlazeTargetMachine())); return false; } // Implemented by targets that want to run passes immediately before // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. -bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM) { - PM.add(createMBlazeDelaySlotFillerPass(*this)); +bool MBlazePassConfig::addPreEmitPass() { + PM.add(createMBlazeDelaySlotFillerPass(getMBlazeTargetMachine())); return true; } diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 036f1b6..1647a21 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -1,4 +1,4 @@ -//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze --- C++ ---===// +//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -79,8 +79,7 @@ namespace llvm { } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM); - virtual bool addPreEmitPass(PassManagerBase &PM); + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); }; } // End llvm namespace diff --git a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt index 6fa7f43..36134a6 100644 --- a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMMBlazeDesc MBlazeMCAsmInfo.cpp MBlazeMCCodeEmitter.cpp MBlazeMCTargetDesc.cpp + MBlazeELFObjectWriter.cpp ) add_dependencies(LLVMMBlazeDesc MBlazeCommonTableGen) diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp index d5acbe9..f383fec 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp @@ -27,7 +27,7 @@ using namespace llvm; static unsigned getFixupKindSize(unsigned Kind) { switch (Kind) { - default: assert(0 && "invalid fixup kind!"); + default: llvm_unreachable("invalid fixup kind!"); case FK_Data_1: return 1; case FK_PCRel_2: case FK_Data_2: return 2; @@ -39,12 +39,6 @@ static unsigned getFixupKindSize(unsigned Kind) { namespace { -class MBlazeELFObjectWriter : public MCELFObjectTargetWriter { -public: - MBlazeELFObjectWriter(Triple::OSType OSType) - : MCELFObjectTargetWriter(/*is64Bit*/ false, OSType, ELF::EM_MBLAZE, - /*HasRelocationAddend*/ true) {} -}; class MBlazeAsmBackend : public MCAsmBackend { public: @@ -56,16 +50,16 @@ public: return 2; } - bool MayNeedRelaxation(const MCInst &Inst) const; + bool mayNeedRelaxation(const MCInst &Inst) const; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCInstFragment *DF, const MCAsmLayout &Layout) const; - void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; + void relaxInstruction(const MCInst &Inst, MCInst &Res) const; - bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const; unsigned getPointerSize() const { return 4; @@ -81,7 +75,7 @@ static unsigned getRelaxedOpcode(unsigned Op) { } } -bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const { +bool MBlazeAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode()) return false; @@ -104,12 +98,12 @@ bool MBlazeAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, return int64_t(Value) != int64_t(int8_t(Value)); } -void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { +void MBlazeAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { Res = Inst; Res.setOpcode(getRelaxedOpcode(Inst.getOpcode())); } -bool MBlazeAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { +bool MBlazeAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { if ((Count % 4) != 0) return false; @@ -123,20 +117,19 @@ bool MBlazeAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { namespace { class ELFMBlazeAsmBackend : public MBlazeAsmBackend { public: - Triple::OSType OSType; - ELFMBlazeAsmBackend(const Target &T, Triple::OSType _OSType) - : MBlazeAsmBackend(T), OSType(_OSType) { } + uint8_t OSABI; + ELFMBlazeAsmBackend(const Target &T, uint8_t _OSABI) + : MBlazeAsmBackend(T), OSABI(_OSABI) { } - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const; MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createELFObjectWriter(new MBlazeELFObjectWriter(OSType), OS, - /*IsLittleEndian*/ false); + return createMBlazeELFObjectWriter(OS, OSABI); } }; -void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, +void ELFMBlazeAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { unsigned Size = getFixupKindSize(Fixup.getKind()); @@ -172,5 +165,6 @@ MCAsmBackend *llvm::createMBlazeAsmBackend(const Target &T, StringRef TT) { if (TheTriple.isOSWindows()) assert(0 && "Windows not supported on MBlaze"); - return new ELFMBlazeAsmBackend(T, TheTriple.getOS()); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); + return new ELFMBlazeAsmBackend(T, OSABI); } diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h index c8bdd6f..437026e 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h @@ -96,7 +96,6 @@ static inline bool isSpecialMBlazeRegister(unsigned Reg) { default: return false; } - return false; // Not reached } /// getMBlazeRegisterNumbering - Given the enum value for some register, e.g. @@ -161,7 +160,6 @@ static inline unsigned getMBlazeRegisterNumbering(unsigned RegEnum) { case MBlaze::RPVR11 : return 0x200B; default: llvm_unreachable("Unknown register number!"); } - return 0; // Not reached } /// getRegisterFromNumbering - Given the enum value for some register, e.g. @@ -202,7 +200,6 @@ static inline unsigned getMBlazeRegisterFromNumbering(unsigned Reg) { case 31 : return MBlaze::R31; default: llvm_unreachable("Unknown register number!"); } - return 0; // Not reached } static inline unsigned getSpecialMBlazeRegisterFromNumbering(unsigned Reg) { @@ -233,7 +230,6 @@ static inline unsigned getSpecialMBlazeRegisterFromNumbering(unsigned Reg) { case 0x200B : return MBlaze::RPVR11; default: llvm_unreachable("Unknown register number!"); } - return 0; // Not reached } } // end namespace llvm; diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp new file mode 100644 index 0000000..2824b3c --- /dev/null +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp @@ -0,0 +1,77 @@ +//===-- MBlazeELFObjectWriter.cpp - MBlaze ELF Writer ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/MBlazeMCTargetDesc.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +namespace { + class MBlazeELFObjectWriter : public MCELFObjectTargetWriter { + public: + MBlazeELFObjectWriter(uint8_t OSABI); + + virtual ~MBlazeELFObjectWriter(); + protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const; + }; +} + +MBlazeELFObjectWriter::MBlazeELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_MBLAZE, + /*HasRelocationAddend*/ false) {} + +MBlazeELFObjectWriter::~MBlazeELFObjectWriter() { +} + +unsigned MBlazeELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + // determine the type of the relocation + unsigned Type; + if (IsPCRel) { + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("Unimplemented"); + case FK_PCRel_4: + Type = ELF::R_MICROBLAZE_64_PCREL; + break; + case FK_PCRel_2: + Type = ELF::R_MICROBLAZE_32_PCREL; + break; + } + } else { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + case FK_Data_4: + Type = ((IsRelocWithSymbol || Addend !=0) + ? ELF::R_MICROBLAZE_32 + : ELF::R_MICROBLAZE_64); + break; + case FK_Data_2: + Type = ELF::R_MICROBLAZE_32; + break; + } + } + return Type; +} + + + +MCObjectWriter *llvm::createMBlazeELFObjectWriter(raw_ostream &OS, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = new MBlazeELFObjectWriter(OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/ false); +} diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp index 0d88466..8231f07 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp @@ -14,6 +14,8 @@ #include "MBlazeMCAsmInfo.h" using namespace llvm; +void MBlazeMCAsmInfo::anchor() { } + MBlazeMCAsmInfo::MBlazeMCAsmInfo() { IsLittleEndian = false; StackGrowsUp = false; diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h index e68dd58..36bf655 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h @@ -1,4 +1,4 @@ -//=====-- MBlazeMCAsmInfo.h - MBlaze asm properties -----------*- C++ -*--====// +//===-- MBlazeMCAsmInfo.h - MBlaze asm properties --------------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -21,6 +21,7 @@ namespace llvm { class Target; class MBlazeMCAsmInfo : public MCAsmInfo { + virtual void anchor(); public: explicit MBlazeMCAsmInfo(); }; diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp index 1514557..c9b1636 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp @@ -43,7 +43,7 @@ public: // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. - unsigned getBinaryCodeForInstr(const MCInst &MI) const; + uint64_t getBinaryCodeForInstr(const MCInst &MI) const; /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. @@ -54,8 +54,8 @@ public: static unsigned GetMBlazeRegNum(const MCOperand &MO) { // FIXME: getMBlazeRegisterNumbering() is sufficient? - assert(0 && "MBlazeMCCodeEmitter::GetMBlazeRegNum() not yet implemented."); - return 0; + llvm_unreachable("MBlazeMCCodeEmitter::GetMBlazeRegNum() not yet " + "implemented."); } void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const { @@ -109,17 +109,14 @@ unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO) const { if (MO.isReg()) return getMBlazeRegisterNumbering(MO.getReg()); - else if (MO.isImm()) + if (MO.isImm()) return static_cast<unsigned>(MO.getImm()); - else if (MO.isExpr()) - return 0; // The relocation has already been recorded at this point. - else { + if (MO.isExpr()) + return 0; // The relocation has already been recorded at this point. #ifndef NDEBUG - errs() << MO; + errs() << MO; #endif - llvm_unreachable(0); - } - return 0; + llvm_unreachable(0); } void MBlazeMCCodeEmitter:: diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp index a3a5cf4..5da0aa7 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- MBlazeMCTargetDesc.cpp - MBlaze Target Descriptions -----*- C++ -*-===// +//===-- MBlazeMCTargetDesc.cpp - MBlaze Target Descriptions ---------------===// // // The LLVM Compiler Infrastructure // @@ -83,12 +83,10 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, if (TheTriple.isOSDarwin()) { llvm_unreachable("MBlaze does not support Darwin MACH-O format"); - return NULL; } if (TheTriple.isOSWindows()) { llvm_unreachable("MBlaze does not support Windows COFF format"); - return NULL; } return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); @@ -97,9 +95,10 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new MBlazeInstPrinter(MAI); + return new MBlazeInstPrinter(MAI, MRI); return 0; } diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h index deff5cb..088d163 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h @@ -14,24 +14,29 @@ #ifndef MBLAZEMCTARGETDESC_H #define MBLAZEMCTARGETDESC_H +#include "llvm/Support/DataTypes.h" + namespace llvm { class MCAsmBackend; class MCContext; class MCCodeEmitter; class MCInstrInfo; +class MCObjectWriter; class MCSubtargetInfo; class Target; class StringRef; class formatted_raw_ostream; +class raw_ostream; extern Target TheMBlazeTarget; MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); - + MCAsmBackend *createMBlazeAsmBackend(const Target &T, StringRef TT); +MCObjectWriter *createMBlazeELFObjectWriter(raw_ostream &OS, uint8_t OSABI); } // End llvm namespace // Defines symbolic names for MBlaze registers. This defines a mapping from diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt index 7daa7a2..a8f9b52 100644 --- a/lib/Target/MSP430/CMakeLists.txt +++ b/lib/Target/MSP430/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_target(MSP430CodeGen MSP430ISelLowering.cpp MSP430InstrInfo.cpp MSP430FrameLowering.cpp + MSP430MachineFunctionInfo.cpp MSP430RegisterInfo.cpp MSP430Subtarget.cpp MSP430TargetMachine.cpp diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp index 5d6c6ad..0930c45 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp @@ -92,7 +92,6 @@ void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo, switch (CC) { default: llvm_unreachable("Unsupported CC code"); - break; case MSP430CC::COND_E: O << "eq"; break; diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h index a1984a8..3fd7ce0 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h @@ -1,4 +1,4 @@ -//===-- MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax ----===// +//= MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax -*- C++ -*-// // // The LLVM Compiler Infrastructure // @@ -21,8 +21,8 @@ namespace llvm { class MSP430InstPrinter : public MCInstPrinter { public: - MSP430InstPrinter(const MCAsmInfo &MAI) - : MCInstPrinter(MAI) {} + MSP430InstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MRI) {} virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp index ad7d380..5e5f3d8 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp @@ -14,6 +14,8 @@ #include "MSP430MCAsmInfo.h" using namespace llvm; +void MSP430MCAsmInfo::anchor() { } + MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) { PointerSize = 2; diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h index f3138a2..690fc19 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h @@ -1,4 +1,4 @@ -//=====-- MSP430MCAsmInfo.h - MSP430 asm properties -----------*- C++ -*--====// +//===-- MSP430MCAsmInfo.h - MSP430 asm properties --------------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -20,7 +20,9 @@ namespace llvm { class Target; - struct MSP430MCAsmInfo : public MCAsmInfo { + class MSP430MCAsmInfo : public MCAsmInfo { + virtual void anchor(); + public: explicit MSP430MCAsmInfo(const Target &T, StringRef TT); }; diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp index 0d532e3..8545055 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions -----*- C++ -*-===// +//===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions ---------------===// // // The LLVM Compiler Infrastructure // @@ -61,9 +61,10 @@ static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM, static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new MSP430InstPrinter(MAI); + return new MSP430InstPrinter(MAI, MRI); return 0; } diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td index 5cc5e6e..c6796b3 100644 --- a/lib/Target/MSP430/MSP430.td +++ b/lib/Target/MSP430/MSP430.td @@ -1,4 +1,4 @@ -//===- MSP430.td - Describe the MSP430 Target Machine ---------*- tblgen -*-==// +//===-- MSP430.td - Describe the MSP430 Target Machine -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index 8836549..1d1094b 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -65,7 +65,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, const char *Modifier) { const MachineOperand &MO = MI->getOperand(OpNum); switch (MO.getType()) { - default: assert(0 && "Not implemented yet!"); + default: llvm_unreachable("Not implemented yet!"); case MachineOperand::MO_Register: O << MSP430InstPrinter::getRegisterName(MO.getReg()); return; diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp index bd64443..bdeb0c5 100644 --- a/lib/Target/MSP430/MSP430BranchSelector.cpp +++ b/lib/Target/MSP430/MSP430BranchSelector.cpp @@ -1,4 +1,4 @@ -//===-- MSP430BranchSelector.cpp - Emit long conditional branches--*- C++ -*-=// +//===-- MSP430BranchSelector.cpp - Emit long conditional branches ---------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp index e406ff2..61d7f2b 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -1,4 +1,4 @@ -//======-- MSP430FrameLowering.cpp - MSP430 Frame Information -------=========// +//===-- MSP430FrameLowering.cpp - MSP430 Frame Information ----------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 884d69b..071a2f7 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -36,7 +36,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/VectorExtras.h" using namespace llvm; typedef enum { @@ -195,7 +194,6 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); default: llvm_unreachable("unimplemented operand"); - return SDValue(); } } @@ -261,19 +259,16 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain, case CallingConv::Fast: return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); case CallingConv::MSP430_INTR: - if (Ins.empty()) - return Chain; - else { + if (Ins.empty()) + return Chain; report_fatal_error("ISRs cannot have arguments"); - return SDValue(); - } } } SDValue MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -291,7 +286,6 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Outs, OutVals, Ins, dl, DAG, InVals); case CallingConv::MSP430_INTR: report_fatal_error("ISRs cannot be called directly"); - return SDValue(); } } @@ -392,10 +386,8 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, SmallVector<CCValAssign, 16> RVLocs; // ISRs cannot return any value. - if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) { + if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) report_fatal_error("ISRs cannot return any value"); - return SDValue(); - } // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), @@ -601,8 +593,7 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op, // Expand non-constant shifts to loops: if (!isa<ConstantSDNode>(N->getOperand(1))) switch (Opc) { - default: - assert(0 && "Invalid shift opcode!"); + default: llvm_unreachable("Invalid shift opcode!"); case ISD::SHL: return DAG.getNode(MSP430ISD::SHL, dl, VT, N->getOperand(0), N->getOperand(1)); @@ -653,7 +644,7 @@ SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op, const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); - return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);; + return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result); } SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op, @@ -662,7 +653,7 @@ SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op, const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true); - return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);; + return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result); } static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC, @@ -1030,8 +1021,7 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, unsigned Opc; const TargetRegisterClass * RC; switch (MI->getOpcode()) { - default: - assert(0 && "Invalid shift opcode!"); + default: llvm_unreachable("Invalid shift opcode!"); case MSP430::Shl8: Opc = MSP430::SHL8r1; RC = MSP430::GR8RegisterClass; diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 237f604..e372f00 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -1,4 +1,4 @@ -//==-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ------*- C++ -*-==// +//===-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -152,8 +152,8 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, + LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/MSP430/MSP430InstrFormats.td b/lib/Target/MSP430/MSP430InstrFormats.td index 73aef1f..a9e87da 100644 --- a/lib/Target/MSP430/MSP430InstrFormats.td +++ b/lib/Target/MSP430/MSP430InstrFormats.td @@ -1,4 +1,4 @@ -//===- MSP430InstrFormats.td - MSP430 Instruction Formats-----*- tblgen -*-===// +//===-- MSP430InstrFormats.td - MSP430 Instruction Formats -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 9d3c7e9..fd5de34 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -1,4 +1,4 @@ -//===- MSP430InstrInfo.cpp - MSP430 Instruction Information ---------------===// +//===-- MSP430InstrInfo.cpp - MSP430 Instruction Information --------------===// // // The LLVM Compiler Infrastructure // @@ -130,9 +130,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { MSP430CC::CondCodes CC = static_cast<MSP430CC::CondCodes>(Cond[0].getImm()); switch (CC) { - default: - assert(0 && "Invalid branch condition!"); - break; + default: llvm_unreachable("Invalid branch condition!"); case MSP430CC::COND_E: CC = MSP430CC::COND_NE; break; @@ -297,8 +295,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { switch (Desc.TSFlags & MSP430II::SizeMask) { default: switch (Desc.getOpcode()) { - default: - assert(0 && "Unknown instruction size!"); + default: llvm_unreachable("Unknown instruction size!"); case TargetOpcode::PROLOG_LABEL: case TargetOpcode::EH_LABEL: case TargetOpcode::IMPLICIT_DEF: @@ -314,8 +311,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { } case MSP430II::SizeSpecial: switch (MI->getOpcode()) { - default: - assert(0 && "Unknown instruction size!"); + default: llvm_unreachable("Unknown instruction size!"); case MSP430::SAR8r1c: case MSP430::SAR16r1c: return 4; @@ -327,6 +323,4 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case MSP430II::Size6Bytes: return 6; } - - return 6; } diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index 90013f5..fe2a75c 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -1,4 +1,4 @@ -//===- MSP430InstrInfo.h - MSP430 Instruction Information -------*- C++ -*-===// +//===-- MSP430InstrInfo.h - MSP430 Instruction Information ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index 59cb598..4348dd5 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -1,4 +1,4 @@ -//===- MSP430InstrInfo.td - MSP430 Instruction defs -----------*- tblgen-*-===// +//===-- MSP430InstrInfo.td - MSP430 Instruction defs -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp index d1d9a11..b1773fb 100644 --- a/lib/Target/MSP430/MSP430MCInstLower.cpp +++ b/lib/Target/MSP430/MSP430MCInstLower.cpp @@ -1,4 +1,4 @@ -//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst---===// +//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst --===// // // The LLVM Compiler Infrastructure // @@ -39,7 +39,7 @@ GetGlobalAddressSymbol(const MachineOperand &MO) const { MCSymbol *MSP430MCInstLower:: GetExternalSymbolSymbol(const MachineOperand &MO) const { switch (MO.getTargetFlags()) { - default: assert(0 && "Unknown target flag on GV operand"); + default: llvm_unreachable("Unknown target flag on GV operand"); case 0: break; } @@ -81,7 +81,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const { MCSymbol *MSP430MCInstLower:: GetBlockAddressSymbol(const MachineOperand &MO) const { switch (MO.getTargetFlags()) { - default: assert(0 && "Unknown target flag on GV operand"); + default: llvm_unreachable("Unknown target flag on GV operand"); case 0: break; } @@ -116,7 +116,7 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { switch (MO.getType()) { default: MI->dump(); - assert(0 && "unknown operand type"); + llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: // Ignore all implicit register operands. if (MO.isImplicit()) continue; @@ -143,6 +143,9 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; case MachineOperand::MO_BlockAddress: MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO)); + break; + case MachineOperand::MO_RegisterMask: + continue; } OutMI.addOperand(MCOp); diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h index e937696..297efd2 100644 --- a/lib/Target/MSP430/MSP430MCInstLower.h +++ b/lib/Target/MSP430/MSP430MCInstLower.h @@ -1,4 +1,4 @@ -//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ----------------===// +//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp b/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp new file mode 100644 index 0000000..0f75399 --- /dev/null +++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp @@ -0,0 +1,14 @@ +//===-- MSP430MachineFuctionInfo.cpp - MSP430 machine function info -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MSP430MachineFunctionInfo.h" + +using namespace llvm; + +void MSP430MachineFunctionInfo::anchor() { } diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h index 383fd2e..632d6de 100644 --- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h +++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h @@ -21,6 +21,8 @@ namespace llvm { /// MSP430MachineFunctionInfo - This class is derived from MachineFunction and /// contains private MSP430 target-specific information for each MachineFunction. class MSP430MachineFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); + /// CalleeSavedFrameSize - Size of the callee-saved register portion of the /// stack frame in bytes. unsigned CalleeSavedFrameSize; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 9049c4b..f9ddfb3 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- MSP430RegisterInfo.cpp - MSP430 Register Information ---------------===// +//===-- MSP430RegisterInfo.cpp - MSP430 Register Information --------------===// // // The LLVM Compiler Infrastructure // @@ -38,27 +38,27 @@ MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm, StackAlign = TM.getFrameLowering()->getStackAlignment(); } -const unsigned* +const uint16_t* MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering(); const Function* F = MF->getFunction(); - static const unsigned CalleeSavedRegs[] = { + static const uint16_t CalleeSavedRegs[] = { MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W, MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W, 0 }; - static const unsigned CalleeSavedRegsFP[] = { + static const uint16_t CalleeSavedRegsFP[] = { MSP430::R5W, MSP430::R6W, MSP430::R7W, MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W, 0 }; - static const unsigned CalleeSavedRegsIntr[] = { + static const uint16_t CalleeSavedRegsIntr[] = { MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W, MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W, MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W, 0 }; - static const unsigned CalleeSavedRegsIntrFP[] = { + static const uint16_t CalleeSavedRegsIntrFP[] = { MSP430::R5W, MSP430::R6W, MSP430::R7W, MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W, MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W, diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index 10a3d53..82ee499 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -1,4 +1,4 @@ -//===- MSP430RegisterInfo.h - MSP430 Register Information Impl --*- C++ -*-===// +//===-- MSP430RegisterInfo.h - MSP430 Register Information Impl -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -36,18 +36,11 @@ public: MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii); /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; BitVector getReservedRegs(const MachineFunction &MF) const; const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const; - const TargetRegisterClass * - getMatchingSuperRegClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, unsigned Idx) const { - // No sub-classes makes this really easy. - return A; - } - void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td index d1c2e3f..3f2eb8c 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.td +++ b/lib/Target/MSP430/MSP430RegisterInfo.td @@ -1,4 +1,4 @@ -//===- MSP430RegisterInfo.td - MSP430 Register defs ----------*- tblgen -*-===// +//===-- MSP430RegisterInfo.td - MSP430 Register defs -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp index 3ee14d9..edeaf34 100644 --- a/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/lib/Target/MSP430/MSP430Subtarget.cpp @@ -1,4 +1,4 @@ -//===- MSP430Subtarget.cpp - MSP430 Subtarget Information ---------*- C++ -*-=// +//===-- MSP430Subtarget.cpp - MSP430 Subtarget Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -21,6 +21,8 @@ using namespace llvm; +void MSP430Subtarget::anchor() { } + MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &CPU, const std::string &FS) : diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h index 1ce5f11..e7bebbd 100644 --- a/lib/Target/MSP430/MSP430Subtarget.h +++ b/lib/Target/MSP430/MSP430Subtarget.h @@ -1,4 +1,4 @@ -//====-- MSP430Subtarget.h - Define Subtarget for the MSP430 ---*- C++ -*--===// +//===-- MSP430Subtarget.h - Define Subtarget for the MSP430 ----*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -25,6 +25,7 @@ namespace llvm { class StringRef; class MSP430Subtarget : public MSP430GenSubtargetInfo { + virtual void anchor(); bool ExtendedInsts; public: /// This constructor initializes the data members to match that diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index a0fc3da..af62e48 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -38,14 +38,33 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { } +namespace { +/// MSP430 Code Generator Pass Configuration Options. +class MSP430PassConfig : public TargetPassConfig { +public: + MSP430PassConfig(MSP430TargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} -bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM) { + MSP430TargetMachine &getMSP430TargetMachine() const { + return getTM<MSP430TargetMachine>(); + } + + virtual bool addInstSelector(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *MSP430TargetMachine::createPassConfig(PassManagerBase &PM) { + return new MSP430PassConfig(this, PM); +} + +bool MSP430PassConfig::addInstSelector() { // Install an instruction selector. - PM.add(createMSP430ISelDag(*this, getOptLevel())); + PM.add(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel())); return false; } -bool MSP430TargetMachine::addPreEmitPass(PassManagerBase &PM) { +bool MSP430PassConfig::addPreEmitPass() { // Must run branch selection immediately preceding the asm printer. PM.add(createMSP430BranchSelectionPass()); return false; diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index 28d482a..f54146b 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -1,4 +1,4 @@ -//==-- MSP430TargetMachine.h - Define TargetMachine for MSP430 ---*- C++ -*-==// +//===-- MSP430TargetMachine.h - Define TargetMachine for MSP430 -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -62,8 +62,7 @@ public: return &TSInfo; } - virtual bool addInstSelector(PassManagerBase &PM); - virtual bool addPreEmitPass(PassManagerBase &PM); + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); }; // MSP430TargetMachine. } // end namespace llvm diff --git a/lib/Target/Mips/AsmParser/CMakeLists.txt b/lib/Target/Mips/AsmParser/CMakeLists.txt new file mode 100644 index 0000000..ac21c25 --- /dev/null +++ b/lib/Target/Mips/AsmParser/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMMipsAsmParser + MipsAsmParser.cpp + ) + diff --git a/lib/Target/Mips/AsmParser/LLVMBuild.txt b/lib/Target/Mips/AsmParser/LLVMBuild.txt new file mode 100644 index 0000000..e7ca243 --- /dev/null +++ b/lib/Target/Mips/AsmParser/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/Mips/AsmParser/LLVMBuild.txt ----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MipsAsmParser +parent = Mips +required_libraries = MC MCParser Support MipsDesc MipsInfo +add_to_library_groups = Mips diff --git a/lib/Target/Mips/AsmParser/Makefile b/lib/Target/Mips/AsmParser/Makefile new file mode 100644 index 0000000..679acee --- /dev/null +++ b/lib/Target/Mips/AsmParser/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/Mips/AsmParser/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMMipsAsmParser + +# Hack: we need to include 'main' mips target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp new file mode 100644 index 0000000..58b5590 --- /dev/null +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -0,0 +1,66 @@ +//===-- MipsAsmParser.cpp - Parse Mips assembly to MCInst instructions ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/MipsMCTargetDesc.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +namespace { +class MipsAsmParser : public MCTargetAsmParser { + bool MatchAndEmitInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out); + + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + + bool ParseInstruction(StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + bool ParseDirective(AsmToken DirectiveID); + +public: + MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) + : MCTargetAsmParser() { + } + +}; +} + +bool MipsAsmParser:: +MatchAndEmitInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out) { + return true; +} + +bool MipsAsmParser:: +ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + return true; +} + +bool MipsAsmParser:: +ParseInstruction(StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + return true; +} + +bool MipsAsmParser:: +ParseDirective(AsmToken DirectiveID) { + return true; +} + +extern "C" void LLVMInitializeMipsAsmParser() { + RegisterMCAsmParser<MipsAsmParser> X(TheMipsTarget); + RegisterMCAsmParser<MipsAsmParser> Y(TheMipselTarget); + RegisterMCAsmParser<MipsAsmParser> A(TheMips64Target); + RegisterMCAsmParser<MipsAsmParser> B(TheMips64elTarget); +} diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index a13c0e8..13d17e4 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -11,6 +11,7 @@ tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen + MipsAnalyzeImmediate.cpp MipsAsmPrinter.cpp MipsCodeEmitter.cpp MipsDelaySlotFiller.cpp @@ -22,6 +23,7 @@ add_llvm_target(MipsCodeGen MipsISelLowering.cpp MipsFrameLowering.cpp MipsMCInstLower.cpp + MipsMachineFunction.cpp MipsRegisterInfo.cpp MipsSubtarget.cpp MipsTargetMachine.cpp @@ -32,3 +34,4 @@ add_llvm_target(MipsCodeGen add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) +add_subdirectory(AsmParser) diff --git a/lib/Target/Mips/InstPrinter/Makefile b/lib/Target/Mips/InstPrinter/Makefile index 74872a4..f07f3ed 100644 --- a/lib/Target/Mips/InstPrinter/Makefile +++ b/lib/Target/Mips/InstPrinter/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/Mips/AsmPrinter/Makefile --------------*- Makefile -*-===## +##===- lib/Target/Mips/AsmPrinter/Makefile -----------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 3e9c46a..2917a89 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -92,26 +92,26 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { MCSymbolRefExpr::VariantKind Kind = SRE->getKind(); switch (Kind) { - default: assert(0 && "Invalid kind!"); - case MCSymbolRefExpr::VK_None: break; - case MCSymbolRefExpr::VK_Mips_GPREL: OS << "%gp_rel("; break; - case MCSymbolRefExpr::VK_Mips_GOT_CALL: OS << "%call16("; break; - case MCSymbolRefExpr::VK_Mips_GOT16: OS << "%got("; break; - case MCSymbolRefExpr::VK_Mips_GOT: OS << "%got("; break; - case MCSymbolRefExpr::VK_Mips_ABS_HI: OS << "%hi("; break; - case MCSymbolRefExpr::VK_Mips_ABS_LO: OS << "%lo("; break; - case MCSymbolRefExpr::VK_Mips_TLSGD: OS << "%tlsgd("; break; - case MCSymbolRefExpr::VK_Mips_TLSLDM: OS << "%tlsldm("; break; - case MCSymbolRefExpr::VK_Mips_DTPREL_HI:OS << "%dtprel_hi("; break; - case MCSymbolRefExpr::VK_Mips_DTPREL_LO:OS << "%dtprel_lo("; break; - case MCSymbolRefExpr::VK_Mips_GOTTPREL: OS << "%gottprel("; break; - case MCSymbolRefExpr::VK_Mips_TPREL_HI: OS << "%tprel_hi("; break; - case MCSymbolRefExpr::VK_Mips_TPREL_LO: OS << "%tprel_lo("; break; - case MCSymbolRefExpr::VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break; - case MCSymbolRefExpr::VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break; - case MCSymbolRefExpr::VK_Mips_GOT_DISP: OS << "%got_disp("; break; - case MCSymbolRefExpr::VK_Mips_GOT_PAGE: OS << "%got_page("; break; - case MCSymbolRefExpr::VK_Mips_GOT_OFST: OS << "%got_ofst("; break; + default: llvm_unreachable("Invalid kind!"); + case MCSymbolRefExpr::VK_None: break; + case MCSymbolRefExpr::VK_Mips_GPREL: OS << "%gp_rel("; break; + case MCSymbolRefExpr::VK_Mips_GOT_CALL: OS << "%call16("; break; + case MCSymbolRefExpr::VK_Mips_GOT16: OS << "%got("; break; + case MCSymbolRefExpr::VK_Mips_GOT: OS << "%got("; break; + case MCSymbolRefExpr::VK_Mips_ABS_HI: OS << "%hi("; break; + case MCSymbolRefExpr::VK_Mips_ABS_LO: OS << "%lo("; break; + case MCSymbolRefExpr::VK_Mips_TLSGD: OS << "%tlsgd("; break; + case MCSymbolRefExpr::VK_Mips_TLSLDM: OS << "%tlsldm("; break; + case MCSymbolRefExpr::VK_Mips_DTPREL_HI: OS << "%dtprel_hi("; break; + case MCSymbolRefExpr::VK_Mips_DTPREL_LO: OS << "%dtprel_lo("; break; + case MCSymbolRefExpr::VK_Mips_GOTTPREL: OS << "%gottprel("; break; + case MCSymbolRefExpr::VK_Mips_TPREL_HI: OS << "%tprel_hi("; break; + case MCSymbolRefExpr::VK_Mips_TPREL_LO: OS << "%tprel_lo("; break; + case MCSymbolRefExpr::VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break; + case MCSymbolRefExpr::VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break; + case MCSymbolRefExpr::VK_Mips_GOT_DISP: OS << "%got_disp("; break; + case MCSymbolRefExpr::VK_Mips_GOT_PAGE: OS << "%got_page("; break; + case MCSymbolRefExpr::VK_Mips_GOT_OFST: OS << "%got_ofst("; break; } OS << SRE->getSymbol(); @@ -136,12 +136,12 @@ void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, printRegName(O, Op.getReg()); return; } - + if (Op.isImm()) { O << Op.getImm(); return; } - + assert(Op.isExpr() && "unknown operand kind in printOperand"); printExpr(Op.getExpr(), O); } diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index 5c11165..acd761d 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -1,4 +1,4 @@ -//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax --------===// +//=== MipsInstPrinter.h - Convert Mips MCInst to assembly syntax -*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -18,7 +18,7 @@ namespace llvm { // These enumeration declarations were orignally in MipsInstrInfo.h but // had to be moved here to avoid circular dependencies between -// LLVMMipsCodeGen and LLVMMipsAsmPrinter. +// LLVMMipsCodeGen and LLVMMipsAsmPrinter. namespace Mips { // Mips Branch Codes enum FPBranchCode { @@ -77,17 +77,18 @@ class TargetMachine; class MipsInstPrinter : public MCInstPrinter { public: - MipsInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} - + MipsInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) : + MCInstPrinter(MAI, MRI) {} + // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getInstructionName(unsigned Opcode); static const char *getRegisterName(unsigned RegNo); - + virtual StringRef getOpcodeName(unsigned Opcode) const; virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - + private: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O); diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt index bcd32bc..abbed8c 100644 --- a/lib/Target/Mips/LLVMBuild.txt +++ b/lib/Target/Mips/LLVMBuild.txt @@ -16,12 +16,13 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = InstPrinter MCTargetDesc TargetInfo +subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup name = Mips parent = Target +has_asmparser = 1 has_asmprinter = 1 has_jit = 1 diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt index 0eb0a55..fa23150 100644 --- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMMipsDesc MipsMCAsmInfo.cpp MipsMCCodeEmitter.cpp MipsMCTargetDesc.cpp + MipsELFObjectWriter.cpp ) add_dependencies(LLVMMipsDesc MipsCommonTableGen) diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 60ff4fe..d69570b 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -1,4 +1,4 @@ -//===-- MipsASMBackend.cpp - ---------===// +//===-- MipsASMBackend.cpp - Mips Asm Backend ----------------------------===// // // The LLVM Compiler Infrastructure // @@ -69,23 +69,22 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { } namespace { +class MipsAsmBackend : public MCAsmBackend { + Triple::OSType OSType; + bool IsLittle; // Big or little endian -class MipsELFObjectWriter : public MCELFObjectTargetWriter { public: - MipsELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine, - bool HasRelocationAddend) - : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, - HasRelocationAddend) {} -}; + MipsAsmBackend(const Target &T, Triple::OSType _OSType, bool _isLittle) : + MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle) {} -class MipsAsmBackend : public MCAsmBackend { -public: - MipsAsmBackend(const Target &T) : MCAsmBackend() {} + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createMipsELFObjectWriter(OS, OSType, IsLittle); + } /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided /// data fragment, at the offset specified by the fixup and following the /// fixup kind as appropriate. - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { MCFixupKind Kind = Fixup.getKind(); Value = adjustFixupValue((unsigned)Kind, Value); @@ -141,6 +140,9 @@ public: { "fixup_Mips_GOTTPREL", 0, 16, 0 }, { "fixup_Mips_TPREL_HI", 0, 16, 0 }, { "fixup_Mips_TPREL_LO", 0, 16, 0 }, + { "fixup_Mips_TLSLDM", 0, 16, 0 }, + { "fixup_Mips_DTPREL_HI", 0, 16, 0 }, + { "fixup_Mips_DTPREL_LO", 0, 16, 0 }, { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel } }; @@ -159,7 +161,7 @@ public: /// relaxation. /// /// \param Inst - The instruction to test. - bool MayNeedRelaxation(const MCInst &Inst) const { + bool mayNeedRelaxation(const MCInst &Inst) const { return false; } @@ -180,9 +182,9 @@ public: /// \param Inst - The instruction to relax, which may be the same /// as the output. /// \parm Res [output] - On return, the relaxed instruction. - void RelaxInstruction(const MCInst &Inst, MCInst &Res) const { + void relaxInstruction(const MCInst &Inst, MCInst &Res) const { } - + /// @} /// WriteNopData - Write an (optimal) nop sequence of Count bytes @@ -190,50 +192,20 @@ public: /// it should return an error. /// /// \return - True on success. - bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const { + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } }; -class MipsEB_AsmBackend : public MipsAsmBackend { -public: - Triple::OSType OSType; - - MipsEB_AsmBackend(const Target &T, Triple::OSType _OSType) - : MipsAsmBackend(T), OSType(_OSType) {} - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createELFObjectWriter(createELFObjectTargetWriter(), - OS, /*IsLittleEndian*/ false); - } - - MCELFObjectTargetWriter *createELFObjectTargetWriter() const { - return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false); - } -}; - -class MipsEL_AsmBackend : public MipsAsmBackend { -public: - Triple::OSType OSType; - - MipsEL_AsmBackend(const Target &T, Triple::OSType _OSType) - : MipsAsmBackend(T), OSType(_OSType) {} - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createELFObjectWriter(createELFObjectTargetWriter(), - OS, /*IsLittleEndian*/ true); - } - - MCELFObjectTargetWriter *createELFObjectTargetWriter() const { - return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false); - } -}; } // namespace -MCAsmBackend *llvm::createMipsAsmBackend(const Target &T, StringRef TT) { - Triple TheTriple(TT); +// MCAsmBackend +MCAsmBackend *llvm::createMipsAsmBackendEL(const Target &T, StringRef TT) { + return new MipsAsmBackend(T, Triple(TT).getOS(), + /*IsLittle*/true); +} - // just return little endian for now - // - return new MipsEL_AsmBackend(T, Triple(TT).getOS()); +MCAsmBackend *llvm::createMipsAsmBackendEB(const Target &T, StringRef TT) { + return new MipsAsmBackend(T, Triple(TT).getOS(), + /*IsLittle*/false); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index 00fc5df..34e3a6e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -1,4 +1,4 @@ -//===-- MipsBaseInfo.h - Top level definitions for ARM ------- --*- C++ -*-===// +//===-- MipsBaseInfo.h - Top level definitions for MIPS MC ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -191,13 +191,12 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum) case Mips::HWR29: return 29; case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64: - case Mips::D15: + case Mips::D15: return 30; case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64: return 31; default: llvm_unreachable("Unknown register number!"); } - return 0; // Not reached } } diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp new file mode 100644 index 0000000..076a6a8 --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -0,0 +1,136 @@ +//===-- MipsELFObjectWriter.cpp - Mips ELF Writer -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/MipsFixupKinds.h" +#include "MCTargetDesc/MipsMCTargetDesc.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +namespace { + class MipsELFObjectWriter : public MCELFObjectTargetWriter { + public: + MipsELFObjectWriter(uint8_t OSABI); + + virtual ~MipsELFObjectWriter(); + + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const; + virtual unsigned getEFlags() const; + virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const; + }; +} + +MipsELFObjectWriter::MipsELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_MIPS, + /*HasRelocationAddend*/ false) {} + +MipsELFObjectWriter::~MipsELFObjectWriter() {} + +// FIXME: get the real EABI Version from the Triple. +unsigned MipsELFObjectWriter::getEFlags() const { + return ELF::EF_MIPS_NOREORDER | ELF::EF_MIPS_ARCH_32R2; +} + +const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const { + assert(Target.getSymA() && "SymA cannot be 0."); + const MCSymbol &Sym = Target.getSymA()->getSymbol().AliasedSymbol(); + + if (Sym.getSection().getKind().isMergeableCString() || + Sym.getSection().getKind().isMergeableConst()) + return &Sym; + + return NULL; +} + +unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + // determine the type of the relocation + unsigned Type = (unsigned)ELF::R_MIPS_NONE; + unsigned Kind = (unsigned)Fixup.getKind(); + + switch (Kind) { + default: + llvm_unreachable("invalid fixup kind!"); + case FK_Data_4: + Type = ELF::R_MIPS_32; + break; + case FK_GPRel_4: + Type = ELF::R_MIPS_GPREL32; + break; + case Mips::fixup_Mips_GPREL16: + Type = ELF::R_MIPS_GPREL16; + break; + case Mips::fixup_Mips_26: + Type = ELF::R_MIPS_26; + break; + case Mips::fixup_Mips_CALL16: + Type = ELF::R_MIPS_CALL16; + break; + case Mips::fixup_Mips_GOT_Global: + case Mips::fixup_Mips_GOT_Local: + Type = ELF::R_MIPS_GOT16; + break; + case Mips::fixup_Mips_HI16: + Type = ELF::R_MIPS_HI16; + break; + case Mips::fixup_Mips_LO16: + Type = ELF::R_MIPS_LO16; + break; + case Mips::fixup_Mips_TLSGD: + Type = ELF::R_MIPS_TLS_GD; + break; + case Mips::fixup_Mips_GOTTPREL: + Type = ELF::R_MIPS_TLS_GOTTPREL; + break; + case Mips::fixup_Mips_TPREL_HI: + Type = ELF::R_MIPS_TLS_TPREL_HI16; + break; + case Mips::fixup_Mips_TPREL_LO: + Type = ELF::R_MIPS_TLS_TPREL_LO16; + break; + case Mips::fixup_Mips_TLSLDM: + Type = ELF::R_MIPS_TLS_LDM; + break; + case Mips::fixup_Mips_DTPREL_HI: + Type = ELF::R_MIPS_TLS_DTPREL_HI16; + break; + case Mips::fixup_Mips_DTPREL_LO: + Type = ELF::R_MIPS_TLS_DTPREL_LO16; + break; + case Mips::fixup_Mips_Branch_PCRel: + case Mips::fixup_Mips_PC16: + Type = ELF::R_MIPS_PC16; + break; + } + + return Type; +} + +MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI, + bool IsLittleEndian) { + MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(OSABI); + return createELFObjectWriter(MOTW, OS, IsLittleEndian); +} diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index a56c002..9b76eda 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -1,4 +1,4 @@ -//===-- Mips/MipsFixupKinds.h - Mips Specific Fixup Entries -----*- C++ -*-===// +//===-- MipsFixupKinds.h - Mips Specific Fixup Entries ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_Mips_MipsFIXUPKINDS_H -#define LLVM_Mips_MipsFIXUPKINDS_H +#ifndef LLVM_MIPS_MIPSFIXUPKINDS_H +#define LLVM_MIPS_MIPSFIXUPKINDS_H #include "llvm/MC/MCFixup.h" @@ -83,6 +83,15 @@ namespace Mips { // resulting in - R_MIPS_TLS_TPREL_LO16. fixup_Mips_TPREL_LO, + // resulting in - R_MIPS_TLS_LDM. + fixup_Mips_TLSLDM, + + // resulting in - R_MIPS_TLS_DTPREL_HI16. + fixup_Mips_DTPREL_HI, + + // resulting in - R_MIPS_TLS_DTPREL_LO16. + fixup_Mips_DTPREL_LO, + // PC relative branch fixup resulting in - R_MIPS_PC16 fixup_Mips_Branch_PCRel, @@ -94,4 +103,4 @@ namespace Mips { } // namespace llvm -#endif // LLVM_Mips_MipsFIXUPKINDS_H +#endif // LLVM_MIPS_MIPSFIXUPKINDS_H diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp index 71ae804..9d67aa1 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp @@ -1,4 +1,4 @@ -//===-- MipsMCAsmInfo.cpp - Mips asm properties ---------------------------===// +//===-- MipsMCAsmInfo.cpp - Mips Asm Properties ---------------------------===// // // The LLVM Compiler Infrastructure // @@ -16,6 +16,8 @@ using namespace llvm; +void MipsMCAsmInfo::anchor() { } + MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); if ((TheTriple.getArch() == Triple::mips) || @@ -25,11 +27,12 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) { AlignmentIsInBytes = false; Data16bitsDirective = "\t.2byte\t"; Data32bitsDirective = "\t.4byte\t"; - Data64bitsDirective = 0; + Data64bitsDirective = "\t.8byte\t"; PrivateGlobalPrefix = "$"; CommentString = "#"; ZeroDirective = "\t.space\t"; GPRel32Directive = "\t.gpword\t"; + GPRel64Directive = "\t.gpdword\t"; WeakRefDirective = "\t.weak\t"; SupportsDebugInformation = true; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h index 41b7192..ef4c6e2 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h @@ -1,4 +1,4 @@ -//=====-- MipsMCAsmInfo.h - Mips asm properties ---------------*- C++ -*--====// +//===-- MipsMCAsmInfo.h - Mips Asm Info ------------------------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -21,6 +21,7 @@ namespace llvm { class Target; class MipsMCAsmInfo : public MCAsmInfo { + virtual void anchor(); public: explicit MipsMCAsmInfo(const Target &T, StringRef TT); }; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 463dcfe..b039678 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -1,4 +1,4 @@ -//===-- MipsMCCodeEmitter.cpp - Convert Mips code to machine code ---------===// +//===-- MipsMCCodeEmitter.cpp - Convert Mips Code to Machine Code ---------===// // // The LLVM Compiler Infrastructure // @@ -34,10 +34,12 @@ class MipsMCCodeEmitter : public MCCodeEmitter { const MCInstrInfo &MCII; const MCSubtargetInfo &STI; MCContext &Ctx; + bool IsLittleEndian; public: MipsMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) : MCII(mcii), STI(sti) , Ctx(ctx) {} + MCContext &ctx, bool IsLittle) : + MCII(mcii), STI(sti) , Ctx(ctx), IsLittleEndian(IsLittle) {} ~MipsMCCodeEmitter() {} @@ -58,7 +60,7 @@ public: // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. - unsigned getBinaryCodeForInstr(const MCInst &MI, + uint64_t getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups) const; // getBranchJumpOpValue - Return binary encoding of the jump @@ -88,11 +90,18 @@ public: }; // class MipsMCCodeEmitter } // namespace -MCCodeEmitter *llvm::createMipsMCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx) +MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx) { - return new MipsMCCodeEmitter(MCII, STI, Ctx); + return new MipsMCCodeEmitter(MCII, STI, Ctx, false); +} + +MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx) +{ + return new MipsMCCodeEmitter(MCII, STI, Ctx, true); } /// EncodeInstruction - Emit the instruction. @@ -187,6 +196,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, if (Kind == MCExpr::SymbolRef) { Mips::Fixups FixupKind; + switch(cast<MCSymbolRefExpr>(Expr)->getKind()) { case MCSymbolRefExpr::VK_Mips_GPREL: FixupKind = Mips::fixup_Mips_GPREL16; @@ -209,6 +219,15 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, case MCSymbolRefExpr::VK_Mips_TLSGD: FixupKind = Mips::fixup_Mips_TLSGD; break; + case MCSymbolRefExpr::VK_Mips_TLSLDM: + FixupKind = Mips::fixup_Mips_TLSLDM; + break; + case MCSymbolRefExpr::VK_Mips_DTPREL_HI: + FixupKind = Mips::fixup_Mips_DTPREL_HI; + break; + case MCSymbolRefExpr::VK_Mips_DTPREL_LO: + FixupKind = Mips::fixup_Mips_DTPREL_LO; + break; case MCSymbolRefExpr::VK_Mips_GOTTPREL: FixupKind = Mips::fixup_Mips_GOTTPREL; break; @@ -227,8 +246,6 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, return Ret; } llvm_unreachable("Unable to encode MCOperand!"); - // Not reached - return 0; } /// getMemEncoding - Return binary encoding of memory related operand. diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 1fec88a..7652675 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- MipsMCTargetDesc.cpp - Mips Target Descriptions ---------*- C++ -*-===// +//===-- MipsMCTargetDesc.cpp - Mips Target Descriptions -------------------===// // // The LLVM Compiler Infrastructure // @@ -20,6 +20,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC @@ -75,8 +76,9 @@ static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM, static MCInstPrinter *createMipsMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { - return new MipsInstPrinter(MAI); + return new MipsInstPrinter(MAI, MRI); } static MCStreamer *createMCStreamer(const Target &T, StringRef TT, @@ -111,7 +113,8 @@ extern "C" void LLVMInitializeMipsTargetMC() { TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo); TargetRegistry::RegisterMCInstrInfo(TheMipselTarget, createMipsMCInstrInfo); TargetRegistry::RegisterMCInstrInfo(TheMips64Target, createMipsMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget, createMipsMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget, + createMipsMCInstrInfo); // Register the MC register info. TargetRegistry::RegisterMCRegInfo(TheMipsTarget, createMipsMCRegisterInfo); @@ -121,28 +124,31 @@ extern "C" void LLVMInitializeMipsTargetMC() { createMipsMCRegisterInfo); // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget, createMipsMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget, + createMipsMCCodeEmitterEB); TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget, - createMipsMCCodeEmitter); + createMipsMCCodeEmitterEL); TargetRegistry::RegisterMCCodeEmitter(TheMips64Target, - createMipsMCCodeEmitter); + createMipsMCCodeEmitterEB); TargetRegistry::RegisterMCCodeEmitter(TheMips64elTarget, - createMipsMCCodeEmitter); + createMipsMCCodeEmitterEL); // Register the object streamer. TargetRegistry::RegisterMCObjectStreamer(TheMipsTarget, createMCStreamer); TargetRegistry::RegisterMCObjectStreamer(TheMipselTarget, createMCStreamer); TargetRegistry::RegisterMCObjectStreamer(TheMips64Target, createMCStreamer); - TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget, + createMCStreamer); // Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(TheMipsTarget, createMipsAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheMipselTarget, createMipsAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheMips64Target, createMipsAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget, createMipsAsmBackend); - - TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget, createMipsMCCodeEmitter); - TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget, createMipsMCCodeEmitter); + TargetRegistry::RegisterMCAsmBackend(TheMipsTarget, + createMipsAsmBackendEB); + TargetRegistry::RegisterMCAsmBackend(TheMipselTarget, + createMipsAsmBackendEL); + TargetRegistry::RegisterMCAsmBackend(TheMips64Target, + createMipsAsmBackendEB); + TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget, + createMipsAsmBackendEL); // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget, diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index fc43d2d..2e58f9d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -14,6 +14,8 @@ #ifndef MIPSMCTARGETDESC_H #define MIPSMCTARGETDESC_H +#include "llvm/Support/DataTypes.h" + namespace llvm { class MCAsmBackend; class MCCodeEmitter; @@ -30,12 +32,19 @@ extern Target TheMipselTarget; extern Target TheMips64Target; extern Target TheMips64elTarget; -MCCodeEmitter *createMipsMCCodeEmitter(const MCInstrInfo &MCII, - const MCSubtargetInfo &STI, - MCContext &Ctx); +MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); +MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, + const MCSubtargetInfo &STI, + MCContext &Ctx); -MCAsmBackend *createMipsAsmBackend(const Target &T, StringRef TT); +MCAsmBackend *createMipsAsmBackendEB(const Target &T, StringRef TT); +MCAsmBackend *createMipsAsmBackendEL(const Target &T, StringRef TT); +MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS, + uint8_t OSABI, + bool IsLittleEndian); } // End llvm namespace // Defines symbolic names for Mips registers. This defines a mapping from diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index 94f7c18..168635c 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -17,7 +17,7 @@ BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc -DIRS = InstPrinter TargetInfo MCTargetDesc +DIRS = InstPrinter AsmParser TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index e9e0f60..cbebe84 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -1,4 +1,4 @@ -//===- Mips.td - Describe the Mips Target Machine ----------*- tablegen -*-===// +//===-- Mips.td - Describe the Mips Target Machine ---------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -63,7 +63,7 @@ def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32", [FeatureCondMov, FeatureBitCount]>; def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion", "Mips32r2", "Mips32r2 ISA Support", - [FeatureMips32, FeatureSEInReg]>; + [FeatureMips32, FeatureSEInReg, FeatureSwap]>; def FeatureMips64 : SubtargetFeature<"mips64", "MipsArchVersion", "Mips64", "Mips64 ISA Support", [FeatureGP64Bit, FeatureFP64Bit, diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 2996986..427e8d9 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -28,26 +28,8 @@ def Subtract32 : SDNodeXForm<imm, [{ return getImm(N, (unsigned)N->getZExtValue() - 32); }]>; -// shamt field must fit in 5 bits. -def immZExt5_64 : ImmLeaf<i64, [{return Imm == (Imm & 0x1f);}]>; - -// imm32_63 predicate - True if imm is in range [32, 63]. -def imm32_63 : ImmLeaf<i32, - [{return (int32_t)Imm >= 32 && (int32_t)Imm < 64;}], - Subtract32>; - -// Is a 32-bit int. -def immSExt32 : ImmLeaf<i64, [{return isInt<32>(Imm);}]>; - -// Transformation Function - get the higher 16 bits. -def HIGHER : SDNodeXForm<imm, [{ - return getImm(N, (N->getZExtValue() >> 32) & 0xFFFF); -}]>; - -// Transformation Function - get the highest 16 bits. -def HIGHEST : SDNodeXForm<imm, [{ - return getImm(N, (N->getZExtValue() >> 48) & 0xFFFF); -}]>; +// shamt must fit in 6 bits. +def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>; //===----------------------------------------------------------------------===// // Instructions specific format @@ -56,38 +38,9 @@ def HIGHEST : SDNodeXForm<imm, [{ // 64-bit shift instructions. class shift_rotate_imm64<bits<6> func, bits<5> isRotate, string instr_asm, SDNode OpNode>: - shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt5, shamt, - CPU64Regs>; - -class shift_rotate_imm64_32<bits<6> func, bits<5> isRotate, string instr_asm, - SDNode OpNode>: - shift_rotate_imm<func, isRotate, instr_asm, OpNode, imm32_63, shamt, + shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt6, shamt, CPU64Regs>; -// Jump and Link (Call) -let isCall=1, hasDelaySlot=1, - // All calls clobber the non-callee saved registers... - Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, - K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in { - class JumpLink64<bits<6> op, string instr_asm>: - FJ<op, (outs), (ins calltarget64:$target, variable_ops), - !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)], - IIBranch>; - - class JumpLinkReg64<bits<6> op, bits<6> func, string instr_asm>: - FR<op, func, (outs), (ins CPU64Regs:$rs, variable_ops), - !strconcat(instr_asm, "\t$rs"), - [(MipsJmpLink CPU64Regs:$rs)], IIBranch> { - let rt = 0; - let rd = 31; - let shamt = 0; - } - - class BranchLink64<string instr_asm>: - FI<0x1, (outs), (ins CPU64Regs:$rs, brtarget:$imm16, variable_ops), - !strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch>; -} - // Mul, Div class Mult64<bits<6> func, string instr_asm, InstrItinClass itin>: Mult<func, instr_asm, itin, CPU64Regs, [HI64, LO64]>; @@ -144,9 +97,6 @@ def NOR64 : LogicNOR<0x00, 0x27, "nor", CPU64Regs>; def DSLL : shift_rotate_imm64<0x38, 0x00, "dsll", shl>; def DSRL : shift_rotate_imm64<0x3a, 0x00, "dsrl", srl>; def DSRA : shift_rotate_imm64<0x3b, 0x00, "dsra", sra>; -def DSLL32 : shift_rotate_imm64_32<0x3c, 0x00, "dsll32", shl>; -def DSRL32 : shift_rotate_imm64_32<0x3e, 0x00, "dsrl32", srl>; -def DSRA32 : shift_rotate_imm64_32<0x3f, 0x00, "dsra32", sra>; def DSLLV : shift_rotate_reg<0x24, 0x00, "dsllv", shl, CPU64Regs>; def DSRLV : shift_rotate_reg<0x26, 0x00, "dsrlv", srl, CPU64Regs>; def DSRAV : shift_rotate_reg<0x27, 0x00, "dsrav", sra, CPU64Regs>; @@ -154,12 +104,11 @@ def DSRAV : shift_rotate_reg<0x27, 0x00, "dsrav", sra, CPU64Regs>; // Rotate Instructions let Predicates = [HasMips64r2] in { def DROTR : shift_rotate_imm64<0x3a, 0x01, "drotr", rotr>; - def DROTR32 : shift_rotate_imm64_32<0x3e, 0x01, "drotr32", rotr>; def DROTRV : shift_rotate_reg<0x16, 0x01, "drotrv", rotr, CPU64Regs>; } /// Load and Store Instructions -/// aligned +/// aligned defm LB64 : LoadM64<0x20, "lb", sextloadi8>; defm LBu64 : LoadM64<0x24, "lbu", zextloadi8>; defm LH64 : LoadM64<0x21, "lh", sextloadi16_a>; @@ -189,8 +138,6 @@ def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, Requires<[IsN64]>; /// Jump and Branch Instructions def JR64 : JumpFR<0x00, 0x08, "jr", CPU64Regs>; -def JAL64 : JumpLink64<0x03, "jal">; -def JALR64 : JumpLinkReg64<0x00, 0x09, "jalr">; def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>; def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>; def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>; @@ -198,6 +145,8 @@ def BGTZ64 : CBranchZero<0x07, 0, "bgtz", setgt, CPU64Regs>; def BLEZ64 : CBranchZero<0x07, 0, "blez", setle, CPU64Regs>; def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>; +def JALR64 : JumpLinkReg<0x00, 0x09, "jalr", CPU64Regs>; + /// Multiply and Divide Instructions. def DMULT : Mult64<0x1c, "dmult", IIImul>; def DMULTu : Mult64<0x1d, "dmultu", IIImul>; @@ -209,11 +158,19 @@ def MTLO64 : MoveToLOHI<0x13, "mtlo", CPU64Regs, [LO64]>; def MFHI64 : MoveFromLOHI<0x10, "mfhi", CPU64Regs, [HI64]>; def MFLO64 : MoveFromLOHI<0x12, "mflo", CPU64Regs, [LO64]>; +/// Sign Ext In Register Instructions. +def SEB64 : SignExtInReg<0x10, "seb", i8, CPU64Regs>; +def SEH64 : SignExtInReg<0x18, "seh", i16, CPU64Regs>; + /// Count Leading def DCLZ : CountLeading0<0x24, "dclz", CPU64Regs>; def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>; -def LEA_ADDiu64 : EffectiveAddress<"addiu\t$rt, $addr", CPU64Regs, mem_ea_64>; +/// Double Word Swap Bytes/HalfWords +def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>; +def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>; + +def LEA_ADDiu64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>; let Uses = [SP_64] in def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>, @@ -225,40 +182,35 @@ def DEXT : ExtBase<3, "dext", CPU64Regs>; def DINS : InsBase<7, "dins", CPU64Regs>; def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), - "dsll32\t$rd, $rt, 0", [], IIAlu>; + "dsll\t$rd, $rt, 32", [], IIAlu>; def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), "sll\t$rd, $rt, 0", [], IIAlu>; +def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt), + "sll\t$rd, $rt, 0", [], IIAlu>; //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// -// Small immediates -def : Pat<(i64 immSExt16:$in), - (DADDiu ZERO_64, imm:$in)>; -def : Pat<(i64 immZExt16:$in), - (ORi64 ZERO_64, imm:$in)>; - -// 32-bit immediates -def : Pat<(i64 immSExt32:$imm), - (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>; - -// Arbitrary immediates -def : Pat<(i64 imm:$imm), - (ORi64 (DSLL (ORi64 (DSLL (ORi64 (LUi64 (HIGHEST imm:$imm)), - (HIGHER imm:$imm)), 16), (HI16 imm:$imm)), 16), - (LO16 imm:$imm))>; - // extended loads let Predicates = [NotN64] in { - def : Pat<(extloadi32_a addr:$a), (DSRL32 (DSLL32 (LW64 addr:$a), 0), 0)>; - def : Pat<(zextloadi32_u addr:$a), (DSRL32 (DSLL32 (ULW64 addr:$a), 0), 0)>; + def : Pat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>; + def : Pat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>; + def : Pat<(i64 (extloadi16_a addr:$src)), (LH64 addr:$src)>; + def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64 addr:$src)>; + def : Pat<(i64 (extloadi32_a addr:$src)), (LW64 addr:$src)>; + def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64 addr:$src)>; + def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>; } let Predicates = [IsN64] in { - def : Pat<(extloadi32_a addr:$a), (DSRL32 (DSLL32 (LW64_P8 addr:$a), 0), 0)>; - def : Pat<(zextloadi32_u addr:$a), - (DSRL32 (DSLL32 (ULW64_P8 addr:$a), 0), 0)>; + def : Pat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>; + def : Pat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>; + def : Pat<(i64 (extloadi16_a addr:$src)), (LH64_P8 addr:$src)>; + def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64_P8 addr:$src)>; + def : Pat<(i64 (extloadi32_a addr:$src)), (LW64_P8 addr:$src)>; + def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64_P8 addr:$src)>; + def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>; } // hi/lo relocs @@ -285,12 +237,12 @@ def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)), def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)), (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>; -def : WrapperPat<tglobaladdr, DADDiu, GP_64>; -def : WrapperPat<tconstpool, DADDiu, GP_64>; -def : WrapperPat<texternalsym, DADDiu, GP_64>; -def : WrapperPat<tblockaddress, DADDiu, GP_64>; -def : WrapperPat<tjumptable, DADDiu, GP_64>; -def : WrapperPat<tglobaltlsaddr, DADDiu, GP_64>; +def : WrapperPat<tglobaladdr, DADDiu, CPU64Regs>; +def : WrapperPat<tconstpool, DADDiu, CPU64Regs>; +def : WrapperPat<texternalsym, DADDiu, CPU64Regs>; +def : WrapperPat<tblockaddress, DADDiu, CPU64Regs>; +def : WrapperPat<tjumptable, DADDiu, CPU64Regs>; +def : WrapperPat<tglobaltlsaddr, DADDiu, CPU64Regs>; defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64, ZERO_64>; @@ -308,7 +260,14 @@ def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, Requires<[IsN64]>; // truncate def : Pat<(i32 (trunc CPU64Regs:$src)), (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, Requires<[IsN64]>; - + // 32-to-64-bit extension def : Pat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; -def : Pat<(i64 (zext CPURegs:$src)), (DSRL32 (DSLL64_32 CPURegs:$src), 0)>; +def : Pat<(i64 (zext CPURegs:$src)), (DSRL (DSLL64_32 CPURegs:$src), 32)>; +def : Pat<(i64 (sext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; + +// Sign extend in register +def : Pat<(i64 (sext_inreg CPU64Regs:$src, i32)), (SLL64_64 CPU64Regs:$src)>; + +// bswap pattern +def : Pat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp new file mode 100644 index 0000000..31b669a --- /dev/null +++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp @@ -0,0 +1,153 @@ +//===-- MipsAnalyzeImmediate.cpp - Analyze Immediates ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "MipsAnalyzeImmediate.h" +#include "Mips.h" +#include "llvm/Support/MathExtras.h" + +using namespace llvm; + +MipsAnalyzeImmediate::Inst::Inst(unsigned O, unsigned I) : Opc(O), ImmOpnd(I) {} + +// Add I to the instruction sequences. +void MipsAnalyzeImmediate::AddInstr(InstSeqLs &SeqLs, const Inst &I) { + // Add an instruction seqeunce consisting of just I. + if (SeqLs.empty()) { + SeqLs.push_back(InstSeq(1, I)); + return; + } + + for (InstSeqLs::iterator Iter = SeqLs.begin(); Iter != SeqLs.end(); ++Iter) + Iter->push_back(I); +} + +void MipsAnalyzeImmediate::GetInstSeqLsADDiu(int64_t Imm, unsigned RemSize, + InstSeqLs &SeqLs) { + GetInstSeqLs((Imm + 0x8000) & ~0xffff, RemSize, SeqLs); + AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffff)); +} + +void MipsAnalyzeImmediate::GetInstSeqLsORi(int64_t Imm, unsigned RemSize, + InstSeqLs &SeqLs) { + GetInstSeqLs(Imm & ~0xffff, RemSize, SeqLs); + AddInstr(SeqLs, Inst(ORi, Imm & 0xffff)); +} + +void MipsAnalyzeImmediate::GetInstSeqLsSLL(int64_t Imm, unsigned RemSize, + InstSeqLs &SeqLs) { + unsigned Shamt = CountTrailingZeros_64(Imm); + GetInstSeqLs(Imm >> Shamt, RemSize - Shamt, SeqLs); + AddInstr(SeqLs, Inst(SLL, Shamt)); +} + +void MipsAnalyzeImmediate::GetInstSeqLs(int64_t Imm, unsigned RemSize, + InstSeqLs &SeqLs) { + int64_t MaskedImm = Imm & (((uint64_t)-1) >> (64 - Size)); + + // Do nothing if Imm is 0. + if (!MaskedImm) + return; + + // A single ADDiu will do if RemSize <= 16. + if (RemSize <= 16) { + AddInstr(SeqLs, Inst(ADDiu, MaskedImm)); + return; + } + + // Shift if the lower 16-bit is cleared. + if (!(Imm & 0xffff)) { + GetInstSeqLsSLL(Imm, RemSize, SeqLs); + return; + } + + GetInstSeqLsADDiu(Imm, RemSize, SeqLs); + + // If bit 15 is cleared, it doesn't make a difference whether the last + // instruction is an ADDiu or ORi. In that case, do not call GetInstSeqLsORi. + if (Imm & 0x8000) { + InstSeqLs SeqLsORi; + GetInstSeqLsORi(Imm, RemSize, SeqLsORi); + SeqLs.insert(SeqLs.end(), SeqLsORi.begin(), SeqLsORi.end()); + } +} + +// Replace a ADDiu & SLL pair with a LUi. +// e.g. the following two instructions +// ADDiu 0x0111 +// SLL 18 +// are replaced with +// LUi 0x444 +void MipsAnalyzeImmediate::ReplaceADDiuSLLWithLUi(InstSeq &Seq) { + // Check if the first two instructions are ADDiu and SLL and the shift amount + // is at least 16. + if ((Seq.size() < 2) || (Seq[0].Opc != ADDiu) || + (Seq[1].Opc != SLL) || (Seq[1].ImmOpnd < 16)) + return; + + // Sign-extend and shift operand of ADDiu and see if it still fits in 16-bit. + int64_t Imm = SignExtend64<16>(Seq[0].ImmOpnd); + int64_t ShiftedImm = Imm << (Seq[1].ImmOpnd - 16); + + if (!isInt<16>(ShiftedImm)) + return; + + // Replace the first instruction and erase the second. + Seq[0].Opc = LUi; + Seq[0].ImmOpnd = (unsigned)(ShiftedImm & 0xffff); + Seq.erase(Seq.begin() + 1); +} + +void MipsAnalyzeImmediate::GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts) { + InstSeqLs::iterator ShortestSeq = SeqLs.end(); + // The length of an instruction sequence is at most 7. + unsigned ShortestLength = 8; + + for (InstSeqLs::iterator S = SeqLs.begin(); S != SeqLs.end(); ++S) { + ReplaceADDiuSLLWithLUi(*S); + assert(S->size() <= 7); + + if (S->size() < ShortestLength) { + ShortestSeq = S; + ShortestLength = S->size(); + } + } + + Insts.clear(); + Insts.append(ShortestSeq->begin(), ShortestSeq->end()); +} + +const MipsAnalyzeImmediate::InstSeq +&MipsAnalyzeImmediate::Analyze(int64_t Imm, unsigned Size, + bool LastInstrIsADDiu) { + this->Size = Size; + + if (Size == 32) { + ADDiu = Mips::ADDiu; + ORi = Mips::ORi; + SLL = Mips::SLL; + LUi = Mips::LUi; + } else { + ADDiu = Mips::DADDiu; + ORi = Mips::ORi64; + SLL = Mips::DSLL; + LUi = Mips::LUi64; + } + + InstSeqLs SeqLs; + + // Get the list of instruction sequences. + if (LastInstrIsADDiu | !Imm) + GetInstSeqLsADDiu(Imm, Size, SeqLs); + else + GetInstSeqLs(Imm, Size, SeqLs); + + // Set Insts to the shortest instruction sequence. + GetShortestSeq(SeqLs, Insts); + + return Insts; +} diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h new file mode 100644 index 0000000..24e6e5f --- /dev/null +++ b/lib/Target/Mips/MipsAnalyzeImmediate.h @@ -0,0 +1,63 @@ +//===-- MipsAnalyzeImmediate.h - Analyze Immediates ------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef MIPS_ANALYZE_IMMEDIATE_H +#define MIPS_ANALYZE_IMMEDIATE_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { + + class MipsAnalyzeImmediate { + public: + struct Inst { + unsigned Opc, ImmOpnd; + Inst(unsigned Opc, unsigned ImmOpnd); + }; + typedef SmallVector<Inst, 7 > InstSeq; + + /// Analyze - Get an instrucion sequence to load immediate Imm. The last + /// instruction in the sequence must be an ADDiu if LastInstrIsADDiu is + /// true; + const InstSeq &Analyze(int64_t Imm, unsigned Size, bool LastInstrIsADDiu); + private: + typedef SmallVector<InstSeq, 5> InstSeqLs; + + /// AddInstr - Add I to all instruction sequences in SeqLs. + void AddInstr(InstSeqLs &SeqLs, const Inst &I); + + /// GetInstSeqLsADDiu - Get instrucion sequences which end with an ADDiu to + /// load immediate Imm + void GetInstSeqLsADDiu(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + + /// GetInstSeqLsORi - Get instrucion sequences which end with an ORi to + /// load immediate Imm + void GetInstSeqLsORi(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + + /// GetInstSeqLsSLL - Get instrucion sequences which end with a SLL to + /// load immediate Imm + void GetInstSeqLsSLL(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + + /// GetInstSeqLs - Get instrucion sequences to load immediate Imm. + void GetInstSeqLs(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + + /// ReplaceADDiuSLLWithLUi - Replace an ADDiu & SLL pair with a LUi. + void ReplaceADDiuSLLWithLUi(InstSeq &Seq); + + /// GetShortestSeq - Find the shortest instruction sequence in SeqLs and + /// return it in Insts. + void GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts); + + unsigned Size; + unsigned ADDiu, ORi, SLL, LUi; + InstSeq Insts; + }; +} + +#endif diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index a5505d3..aeabc0f 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -1,4 +1,4 @@ -//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===// +//===-- MipsAsmPrinter.cpp - Mips LLVM Assembly Printer -------------------===// // // The LLVM Compiler Infrastructure // @@ -34,6 +34,8 @@ #include "llvm/Instructions.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/TargetRegistry.h" @@ -49,7 +51,14 @@ static bool isUnalignedLoadStore(unsigned Opc) { return Opc == Mips::ULW || Opc == Mips::ULH || Opc == Mips::ULHu || Opc == Mips::USW || Opc == Mips::USH || Opc == Mips::ULW_P8 || Opc == Mips::ULH_P8 || Opc == Mips::ULHu_P8 || - Opc == Mips::USW_P8 || Opc == Mips::USH_P8; + Opc == Mips::USW_P8 || Opc == Mips::USH_P8 || + Opc == Mips::ULD || Opc == Mips::ULW64 || Opc == Mips::ULH64 || + Opc == Mips::ULHu64 || Opc == Mips::USD || Opc == Mips::USW64 || + Opc == Mips::USH64 || + Opc == Mips::ULD_P8 || Opc == Mips::ULW64_P8 || + Opc == Mips::ULH64_P8 || Opc == Mips::ULHu64_P8 || + Opc == Mips::USD_P8 || Opc == Mips::USW64_P8 || + Opc == Mips::USH64_P8; } static bool isDirective(unsigned Opc) { @@ -59,10 +68,10 @@ static bool isDirective(unsigned Opc) { } void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - if (MI->isDebugValue()) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + PrintDebugValueComment(MI, OS); return; } @@ -100,7 +109,7 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInstLowering.LowerCPLOAD(MI, MCInsts); else if (Opc == Mips::CPRESTORE) MCInstLowering.LowerCPRESTORE(MI, MCInsts); - + if (!MCInsts.empty()) { for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I != MCInsts.end(); ++I) @@ -109,6 +118,16 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { } } + if (Opc == Mips::SETGP01) { + MCInstLowering.LowerSETGP01(MI, MCInsts); + + for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); + I != MCInsts.end(); ++I) + OutStreamer.EmitInstruction(*I); + + return; + } + OutStreamer.EmitInstruction(TmpInst0); } @@ -171,7 +190,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { if (Mips::CPURegsRegisterClass->contains(Reg)) break; - unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg); + unsigned RegNum = getMipsRegisterNumbering(Reg); if (Mips::AFGR64RegisterClass->contains(Reg)) { FPUBitmask |= (3 << RegNum); CSFPRegsSize += AFGR64RegSize; @@ -186,7 +205,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { // Set CPU Bitmask. for (; i != e; ++i) { unsigned Reg = CSI[i].getReg(); - unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg); + unsigned RegNum = getMipsRegisterNumbering(Reg); CPUBitmask |= (1 << RegNum); } @@ -225,7 +244,7 @@ void MipsAsmPrinter::emitFrameDirective() { unsigned returnReg = RI.getRARegister(); unsigned stackSize = MF->getFrameInfo()->getStackSize(); - if (OutStreamer.hasRawTextSupport()) + if (OutStreamer.hasRawTextSupport()) OutStreamer.EmitRawText("\t.frame\t$" + StringRef(MipsInstPrinter::getRegisterName(stackReg)).lower() + "," + Twine(stackSize) + ",$" + @@ -239,15 +258,12 @@ const char *MipsAsmPrinter::getCurrentABIString() const { case MipsSubtarget::N32: return "abiN32"; case MipsSubtarget::N64: return "abi64"; case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64 - default: break; + default: llvm_unreachable("Unknown Mips ABI");; } - - llvm_unreachable("Unknown Mips ABI"); - return NULL; } void MipsAsmPrinter::EmitFunctionEntryLabel() { - if (OutStreamer.hasRawTextSupport()) + if (OutStreamer.hasRawTextSupport()) OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName())); OutStreamer.EmitLabel(CurrentFnSym); } @@ -300,18 +316,18 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock* // If there isn't exactly one predecessor, it can't be a fall through. MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; ++PI2; - + if (PI2 != MBB->pred_end()) - return false; + return false; // The predecessor has to be immediately before this block. if (!Pred->isLayoutSuccessor(MBB)) return false; - + // If the block is completely empty, then it definitely does fall through. if (Pred->empty()) return true; - + // Otherwise, check the last instruction. // Check if the last terminator is an unconditional branch. MachineBasicBlock::const_iterator I = Pred->end(); @@ -338,7 +354,7 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, raw_ostream &O) { if (ExtraCode && ExtraCode[0]) return true; // Unknown modifier. - + const MachineOperand &MO = MI->getOperand(OpNum); assert(MO.isReg() && "unexpected inline asm memory operand"); O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")"; @@ -459,7 +475,8 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // Tell the assembler which ABI we are using if (OutStreamer.hasRawTextSupport()) - OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString())); + OutStreamer.EmitRawText("\t.section .mdebug." + + Twine(getCurrentABIString())); // TODO: handle O64 ABI if (OutStreamer.hasRawTextSupport()) { diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index f0c6626..8502db2 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -1,4 +1,4 @@ -//===-- MipsAsmPrinter.h - Mips LLVM assembly writer ----------------------===// +//===-- MipsAsmPrinter.h - Mips LLVM Assembly Printer ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 3d973ce..4b7e1d3 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -1,4 +1,4 @@ -//===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===// +//===-- MipsCallingConv.td - Calling Conventions for Mips --*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -37,11 +37,16 @@ def RetCC_MipsO32 : CallingConv<[ def CC_MipsN : CallingConv<[ // Handles byval parameters. CCIfByVal<CCCustom<"CC_Mips64Byval">>, - - // Promote i8/i16/i32 arguments to i64. - CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType<i32>>, // Integer arguments are passed in integer registers. + CCIfType<[i32], CCAssignToRegWithShadow<[A0, A1, A2, A3, + T0, T1, T2, T3], + [F12, F13, F14, F15, + F16, F17, F18, F19]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64, T0_64, T1_64, T2_64, T3_64], [D12_64, D13_64, D14_64, D15_64, @@ -60,8 +65,8 @@ def CC_MipsN : CallingConv<[ T0_64, T1_64, T2_64, T3_64]>>, // All stack parameter slots become 64-bit doublewords and are 8-byte aligned. - CCIfType<[i64, f64], CCAssignToStack<8, 8>>, - CCIfType<[f32], CCAssignToStack<4, 8>> + CCIfType<[i32, f32], CCAssignToStack<4, 8>>, + CCIfType<[i64, f64], CCAssignToStack<8, 8>> ]>; // N32/64 variable arguments. @@ -69,23 +74,21 @@ def CC_MipsN : CallingConv<[ def CC_MipsN_VarArg : CallingConv<[ // Handles byval parameters. CCIfByVal<CCCustom<"CC_Mips64Byval">>, - - // Promote i8/i16/i32 arguments to i64. - CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType<i32>>, + + CCIfType<[i32, f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>, CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, T0_64, T1_64, T2_64, T3_64]>>, - CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>, - // All stack parameter slots become 64-bit doublewords and are 8-byte aligned. - CCIfType<[i64, f64], CCAssignToStack<8, 8>>, - CCIfType<[f32], CCAssignToStack<4, 8>> + CCIfType<[i32, f32], CCAssignToStack<4, 8>>, + CCIfType<[i64, f64], CCAssignToStack<8, 8>> ]>; def RetCC_MipsN : CallingConv<[ - // FIXME: Handle complex and float double return values. - // i32 are returned in registers V0, V1 CCIfType<[i32], CCAssignToReg<[V0, V1]>>, @@ -157,3 +160,20 @@ def RetCC_Mips : CallingConv<[ CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>, CCDelegateTo<RetCC_MipsO32> ]>; + +//===----------------------------------------------------------------------===// +// Callee-saved register lists. +//===----------------------------------------------------------------------===// + +def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 20), RA, FP, + (sequence "S%u", 7, 0))>; + +def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP, + (sequence "S%u", 7, 0))>; + +def CSR_N32 : CalleeSavedRegs<(add D31_64, D29_64, D27_64, D25_64, D24_64, + D23_64, D22_64, D21_64, RA_64, FP_64, GP_64, + (sequence "S%u_64", 7, 0))>; + +def CSR_N64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 24), RA_64, FP_64, + GP_64, (sequence "S%u_64", 7, 0))>; diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index 6b26e24..7d81902 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -1,4 +1,4 @@ -//===-- Mips/MipsCodeEmitter.cpp - Convert Mips code to machine code -----===// +//===-- Mips/MipsCodeEmitter.cpp - Convert Mips Code to Machine Code ------===// // // The LLVM Compiler Infrastructure // @@ -80,7 +80,7 @@ class MipsCodeEmitter : public MachineFunctionPass { /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for /// machine instructions. - unsigned getBinaryCodeForInstr(const MachineInstr &MI) const; + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; void emitInstruction(const MachineInstr &MI); @@ -107,7 +107,8 @@ class MipsCodeEmitter : public MachineFunctionPass { unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; - unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; + unsigned getBranchTargetOpValue(const MachineInstr &MI, + unsigned OpNo) const; unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const; @@ -119,7 +120,7 @@ class MipsCodeEmitter : public MachineFunctionPass { int emitUSH(const MachineInstr &MI); void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc, - int Offset) const; + int Offset) const; }; } @@ -162,7 +163,7 @@ unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI, return Mips::reloc_mips_26; if ((Form == MipsII::FrmI || Form == MipsII::FrmFI) && MI.isBranch()) - return Mips::reloc_mips_branch; + return Mips::reloc_mips_pc16; if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi) return Mips::reloc_mips_hi; return Mips::reloc_mips_lo; @@ -170,13 +171,22 @@ unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI, unsigned MipsCodeEmitter::getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const { - // FIXME: implement + MachineOperand MO = MI.getOperand(OpNo); + if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO)); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO)); + else + llvm_unreachable("Unexpected jump target operand kind."); return 0; } unsigned MipsCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const { - // FIXME: implement + MachineOperand MO = MI.getOperand(OpNo); + emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO)); return 0; } @@ -206,7 +216,7 @@ unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI, unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI, const MachineOperand &MO) const { if (MO.isReg()) - return MipsRegisterInfo::getRegisterNumbering(MO.getReg()); + return getMipsRegisterNumbering(MO.getReg()); else if (MO.isImm()) return static_cast<unsigned>(MO.getImm()); else if (MO.isGlobal()) { diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index 1a3f194..075a3e8 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -1,3 +1,16 @@ +//===-- MipsCondMov.td - Describe Mips Conditional Moves --*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the Conditional Moves implementation. +// +//===----------------------------------------------------------------------===// + // Conditional moves: // These instructions are expanded in // MipsISelLowering::EmitInstrWithCustomInserter if target does not have diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index 1d9e9b0..debf2f1 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -1,4 +1,4 @@ -//===-- DelaySlotFiller.cpp - Mips delay slot filler ---------------------===// +//===-- DelaySlotFiller.cpp - Mips Delay Slot Filler ----------------------===// // // The LLVM Compiler Infrastructure // @@ -105,8 +105,7 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) { if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) { MBB.splice(llvm::next(I), &MBB, D); ++UsefulSlots; - } - else + } else BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); // Record the filler instruction that filled the delay slot. @@ -167,15 +166,14 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, } bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, - bool &sawLoad, - bool &sawStore, + bool &sawLoad, bool &sawStore, SmallSet<unsigned, 32> &RegDefs, SmallSet<unsigned, 32> &RegUses) { if (candidate->isImplicitDef() || candidate->isKill()) return true; // Loads or stores cannot be moved past a store to the delay slot - // and stores cannot be moved past a load. + // and stores cannot be moved past a load. if (candidate->mayLoad()) { if (sawStore) return true; @@ -222,8 +220,8 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI, MCInstrDesc MCID = MI->getDesc(); unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() : MI->getNumOperands(); - - // Add RA to RegDefs to prevent users of RA from going into delay slot. + + // Add RA to RegDefs to prevent users of RA from going into delay slot. if (MI->isCall()) RegDefs.insert(Mips::RA); @@ -246,7 +244,7 @@ bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) { if (RegSet.count(Reg)) return true; // check Aliased Registers - for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg); + for (const uint16_t *Alias = TM.getRegisterInfo()->getAliasSet(Reg); *Alias; ++Alias) if (RegSet.count(*Alias)) return true; diff --git a/lib/Target/Mips/MipsEmitGPRestore.cpp b/lib/Target/Mips/MipsEmitGPRestore.cpp index 03d922f..119d1a8 100644 --- a/lib/Target/Mips/MipsEmitGPRestore.cpp +++ b/lib/Target/Mips/MipsEmitGPRestore.cpp @@ -1,4 +1,4 @@ -//===-- MipsEmitGPRestore.cpp - Emit GP restore instruction----------------===// +//===-- MipsEmitGPRestore.cpp - Emit GP Restore Instruction ---------------===// // // The LLVM Compiler Infrastructure // @@ -44,11 +44,14 @@ namespace { } // end of anonymous namespace bool Inserter::runOnMachineFunction(MachineFunction &F) { - if (TM.getRelocationModel() != Reloc::PIC_) + MipsFunctionInfo *MipsFI = F.getInfo<MipsFunctionInfo>(); + + if ((TM.getRelocationModel() != Reloc::PIC_) || + (!MipsFI->globalBaseRegFixed())) return false; bool Changed = false; - int FI = F.getInfo<MipsFunctionInfo>()->getGPFI(); + int FI = MipsFI->getGPFI(); for (MachineFunction::iterator MFI = F.begin(), MFE = F.end(); MFI != MFE; ++MFI) { @@ -60,7 +63,7 @@ bool Inserter::runOnMachineFunction(MachineFunction &F) { if (MBB.isLandingPad()) { // Find EH_LABEL first. for (; I->getOpcode() != TargetOpcode::EH_LABEL; ++I) ; - + // Insert lw. ++I; DebugLoc dl = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); @@ -81,7 +84,7 @@ bool Inserter::runOnMachineFunction(MachineFunction &F) { .addImm(0); Changed = true; } - } + } return Changed; } diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp index a622258..baeae97 100644 --- a/lib/Target/Mips/MipsExpandPseudo.cpp +++ b/lib/Target/Mips/MipsExpandPseudo.cpp @@ -1,4 +1,4 @@ -//===-- MipsExpandPseudo.cpp - Expand pseudo instructions ----------------===// +//===-- MipsExpandPseudo.cpp - Expand Pseudo Instructions ----------------===// // // The LLVM Compiler Infrastructure // @@ -64,16 +64,22 @@ bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) { const MCInstrDesc& MCid = I->getDesc(); switch(MCid.getOpcode()) { - default: + default: ++I; continue; + case Mips::SETGP2: + // Convert "setgp2 $globalreg, $t9" to "addu $globalreg, $v0, $t9" + BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::ADDu), + I->getOperand(0).getReg()) + .addReg(Mips::V0).addReg(I->getOperand(1).getReg()); + break; case Mips::BuildPairF64: ExpandBuildPairF64(MBB, I); break; case Mips::ExtractElementF64: ExpandExtractElementF64(MBB, I); break; - } + } // delete original instr MBB.erase(I++); @@ -84,12 +90,12 @@ bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) { } void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB, - MachineBasicBlock::iterator I) { + MachineBasicBlock::iterator I) { unsigned DstReg = I->getOperand(0).getReg(); unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1); DebugLoc dl = I->getDebugLoc(); - const unsigned* SubReg = + const uint16_t* SubReg = TM.getRegisterInfo()->getSubRegisters(DstReg); // mtc1 Lo, $fp @@ -105,12 +111,12 @@ void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB, unsigned N = I->getOperand(2).getImm(); const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1); DebugLoc dl = I->getDebugLoc(); - const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg); + const uint16_t* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg); BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(*(SubReg + N)); } -/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo +/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo /// instrs into real instrs FunctionPass *llvm::createMipsExpandPseudoPass(MipsTargetMachine &tm) { return new MipsExpandPseudo(tm); diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 2466545..e83c64e 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -1,4 +1,4 @@ -//=======- MipsFrameLowering.cpp - Mips Frame Information ------*- C++ -*-====// +//===-- MipsFrameLowering.cpp - Mips Frame Information --------------------===// // // The LLVM Compiler Infrastructure // @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "MipsAnalyzeImmediate.h" #include "MipsFrameLowering.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" @@ -93,47 +94,40 @@ bool MipsFrameLowering::targetHandlesStackFrameRounding() const { return true; } -static unsigned AlignOffset(unsigned Offset, unsigned Align) { - return (Offset + Align - 1) / Align * Align; -} - -// expand pair of register and immediate if the immediate doesn't fit in the -// 16-bit offset field. -// e.g. -// if OrigImm = 0x10000, OrigReg = $sp: -// generate the following sequence of instrs: -// lui $at, hi(0x10000) -// addu $at, $sp, $at -// -// (NewReg, NewImm) = ($at, lo(Ox10000)) -// return true -static bool expandRegLargeImmPair(unsigned OrigReg, int OrigImm, - unsigned& NewReg, int& NewImm, - MachineBasicBlock& MBB, - MachineBasicBlock::iterator I) { - // OrigImm fits in the 16-bit field - if (OrigImm < 0x8000 && OrigImm >= -0x8000) { - NewReg = OrigReg; - NewImm = OrigImm; - return false; - } - - MachineFunction* MF = MBB.getParent(); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - DebugLoc DL = I->getDebugLoc(); - int ImmLo = (short)(OrigImm & 0xffff); - int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) + - ((OrigImm & 0x8000) != 0); +// Build an instruction sequence to load an immediate that is too large to fit +// in 16-bit and add the result to Reg. +static void expandLargeImm(unsigned Reg, int64_t Imm, bool IsN64, + const MipsInstrInfo &TII, MachineBasicBlock& MBB, + MachineBasicBlock::iterator II, DebugLoc DL) { + unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi; + unsigned ADDu = IsN64 ? Mips::DADDu : Mips::ADDu; + unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO; + unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT; + MipsAnalyzeImmediate AnalyzeImm; + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, IsN64 ? 64 : 32, false /* LastInstrIsADDiu */); + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); // FIXME: change this when mips goes MC". - BuildMI(MBB, I, DL, TII->get(Mips::NOAT)); - BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi); - BuildMI(MBB, I, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg) - .addReg(Mips::AT); - NewReg = Mips::AT; - NewImm = ImmLo; + BuildMI(MBB, II, DL, TII.get(Mips::NOAT)); + + // The first instruction can be a LUi, which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == LUi) + BuildMI(MBB, II, DL, TII.get(LUi), ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + else + BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); - return true; + // Build the remaining instructions in Seq. + for (++Inst; Inst != Seq.end(); ++Inst) + BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(Reg).addReg(ATReg); + BuildMI(MBB, II, DL, TII.get(Mips::ATMACRO)); } void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { @@ -147,50 +141,36 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_); - unsigned NewReg = 0; - int NewImm = 0; - bool ATUsed; - unsigned GP = STI.isABI_N64() ? Mips::GP_64 : Mips::GP; - unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9; unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; - unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi; // First, compute final stack size. unsigned RegSize = STI.isGP32bit() ? 4 : 8; unsigned StackAlign = getStackAlignment(); - unsigned LocalVarAreaOffset = MipsFI->needGPSaveRestore() ? + unsigned LocalVarAreaOffset = MipsFI->needGPSaveRestore() ? (MFI->getObjectOffset(MipsFI->getGPFI()) + RegSize) : MipsFI->getMaxCallFrameSize(); - unsigned StackSize = AlignOffset(LocalVarAreaOffset, StackAlign) + - AlignOffset(MFI->getStackSize(), StackAlign); + uint64_t StackSize = RoundUpToAlignment(LocalVarAreaOffset, StackAlign) + + RoundUpToAlignment(MFI->getStackSize(), StackAlign); // Update stack size - MFI->setStackSize(StackSize); - + MFI->setStackSize(StackSize); + BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER)); BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); - // Emit instructions that set $gp using the the value of $t9. - // O32 uses the directive .cpload while N32/64 requires three instructions to - // do this. - // TODO: Do not emit these instructions if no instructions use $gp. - if (isPIC && STI.isABI_O32()) - BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::CPLOAD)) - .addReg(RegInfo->getPICCallReg()); - else if (STI.isABI_N64() || (isPIC && STI.isABI_N32())) { - // lui $28,%hi(%neg(%gp_rel(fname))) - // addu $28,$28,$25 - // addiu $28,$28,%lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); - BuildMI(MBB, MBBI, dl, TII.get(LUi), GP) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, MBBI, dl, TII.get(ADDu), GP).addReg(GP).addReg(T9); - BuildMI(MBB, MBBI, dl, TII.get(ADDiu), GP).addReg(GP) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + // Emit instructions that set the global base register if the target ABI is + // O32. + if (isPIC && MipsFI->globalBaseRegSet() && STI.isABI_O32()) { + if (MipsFI->globalBaseRegFixed()) + BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::CPLOAD)) + .addReg(RegInfo->getPICCallReg()); + else + // See MipsInstrInfo.td for explanation. + BuildMI(MBB, MBBI, dl, TII.get(Mips:: SETGP01), Mips::V0); } // No need to allocate space on the stack. @@ -200,13 +180,11 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { std::vector<MachineMove> &Moves = MMI.getFrameMoves(); MachineLocation DstML, SrcML; - // Adjust stack : addi sp, sp, (-imm) - ATUsed = expandRegLargeImmPair(SP, -StackSize, NewReg, NewImm, MBB, MBBI); - BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(NewReg).addImm(NewImm); - - // FIXME: change this when mips goes MC". - if (ATUsed) - BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); + // Adjust stack. + if (isInt<16>(-StackSize)) // addi sp, sp, (-stacksize) + BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize); + else // Expand immediate that doesn't fit in 16-bit. + expandLargeImm(SP, -StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl); // emit ".cfi_def_cfa_offset StackSize" MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); @@ -223,13 +201,13 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { // register to the stack. for (unsigned i = 0; i < CSI.size(); ++i) ++MBBI; - + // Iterate over list of callee-saved registers and emit .cfi_offset // directives. MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel); - + for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); @@ -238,7 +216,7 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { // If Reg is a double precision register, emit two cfa_offsets, // one for each of the paired single precision registers. if (Mips::AFGR64RegisterClass->contains(Reg)) { - const unsigned *SubRegs = RegInfo->getSubRegisters(Reg); + const uint16_t *SubRegs = RegInfo->getSubRegisters(Reg); MachineLocation DstML0(MachineLocation::VirtualFP, Offset); MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4); MachineLocation SrcML0(*SubRegs); @@ -257,14 +235,14 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { Moves.push_back(MachineMove(CSLabel, DstML, SrcML)); } } - } + } // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { - // Insert instruction "move $fp, $sp" at this location. + // Insert instruction "move $fp, $sp" at this location. BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO); - // emit ".cfi_def_cfa_register $fp" + // emit ".cfi_def_cfa_register $fp" MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel); @@ -298,18 +276,11 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF, unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; - // Get the number of bytes from FrameInfo - unsigned StackSize = MFI->getStackSize(); - - unsigned NewReg = 0; - int NewImm = 0; - bool ATUsed = false; - // if framepointer enabled, restore the stack pointer. if (hasFP(MF)) { // Find the first instruction that restores a callee-saved register. MachineBasicBlock::iterator I = MBBI; - + for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i) --I; @@ -317,22 +288,23 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO); } - // adjust stack : insert addi sp, sp, (imm) - if (StackSize) { - ATUsed = expandRegLargeImmPair(SP, StackSize, NewReg, NewImm, MBB, MBBI); - BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(NewReg).addImm(NewImm); + // Get the number of bytes from FrameInfo + uint64_t StackSize = MFI->getStackSize(); - // FIXME: change this when mips goes MC". - if (ATUsed) - BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); - } + if (!StackSize) + return; + + // Adjust stack. + if (isInt<16>(StackSize)) // addi sp, sp, (-stacksize) + BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(StackSize); + else // Expand immediate that doesn't fit in 16-bit. + expandLargeImm(SP, StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl); } void MipsFrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { MachineRegisterInfo& MRI = MF.getRegInfo(); - unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA; unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; // FIXME: remove this code if register allocator can correctly mark @@ -342,13 +314,15 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (hasFP(MF)) MRI.setPhysRegUsed(FP); - // The register allocator might determine $ra is used after seeing + // The register allocator might determine $ra is used after seeing // instruction "jr $ra", but we do not want PrologEpilogInserter to insert // instructions to save/restore $ra unless there is a function call. // To correct this, $ra is explicitly marked unused if there is no // function call. if (MF.getFrameInfo()->hasCalls()) - MRI.setPhysRegUsed(RA); - else - MRI.setPhysRegUnused(RA); + MRI.setPhysRegUsed(Mips::RA); + else { + MRI.setPhysRegUnused(Mips::RA); + MRI.setPhysRegUnused(Mips::RA_64); + } } diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index c249756..bd1d89f 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -1,4 +1,4 @@ -//==--- MipsFrameLowering.h - Define frame lowering for Mips --*- C++ -*---===// +//===-- MipsFrameLowering.h - Define frame lowering for Mips ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index b17239d..782d203 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -1,4 +1,4 @@ -//===-- MipsISelDAGToDAG.cpp - A dag to dag inst selector for Mips --------===// +//===-- MipsISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips --------===// // // The LLVM Compiler Infrastructure // @@ -13,10 +13,12 @@ #define DEBUG_TYPE "mips-isel" #include "Mips.h" +#include "MipsAnalyzeImmediate.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/GlobalValue.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" @@ -28,6 +30,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -63,6 +66,7 @@ public: return "MIPS DAG->DAG Pattern Instruction Selection"; } + virtual bool runOnMachineFunction(MachineFunction &MF); private: // Include the pieces autogenerated from the target description. @@ -81,16 +85,22 @@ private: } SDNode *getGlobalBaseReg(); + + std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, + EVT Ty, bool HasLo, bool HasHi); + SDNode *Select(SDNode *N); // Complex Pattern. - bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset); + bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset); // getImm - Return a target constant with the specified value. inline SDValue getImm(const SDNode *Node, unsigned Imm) { return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); } + void InitGlobalBaseReg(MachineFunction &MF); + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps); @@ -98,20 +108,114 @@ private: } +// Insert instructions to initialize the global base register in the +// first MBB of the function. When the ABI is O32 and the relocation model is +// PIC, the necessary instructions are emitted later to prevent optimization +// passes from moving them. +void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->globalBaseRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + bool FixGlobalBaseReg = MipsFI->globalBaseRegFixed(); + + if (Subtarget.isABI_O32() && FixGlobalBaseReg) + // $gp is the global base register. + V0 = V1 = GlobalBaseReg; + else { + const TargetRegisterClass *RC; + RC = Subtarget.isABI_N64() ? + Mips::CPU64RegsRegisterClass : Mips::CPURegsRegisterClass; + + V0 = RegInfo.createVirtualRegister(RC); + V1 = RegInfo.createVirtualRegister(RC); + } + + if (Subtarget.isABI_N64()) { + MF.getRegInfo().addLiveIn(Mips::T9_64); + + // lui $v0, %hi(%neg(%gp_rel(fname))) + // daddu $v1, $v0, $t9 + // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0).addReg(Mips::T9_64); + BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + } else if (MF.getTarget().getRelocationModel() == Reloc::Static) { + // Set global register to __gnu_local_gp. + // + // lui $v0, %hi(__gnu_local_gp) + // addiu $globalbasereg, $v0, %lo(__gnu_local_gp) + BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) + .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) + .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); + } else { + MF.getRegInfo().addLiveIn(Mips::T9); + + if (Subtarget.isABI_N32()) { + // lui $v0, %hi(%neg(%gp_rel(fname))) + // addu $v1, $v0, $t9 + // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); + BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + } else if (!MipsFI->globalBaseRegFixed()) { + assert(Subtarget.isABI_O32()); + + BuildMI(MBB, I, DL, TII.get(Mips::SETGP2), GlobalBaseReg) + .addReg(Mips::T9); + } + } +} + +bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + bool Ret = SelectionDAGISel::runOnMachineFunction(MF); + + InitGlobalBaseReg(MF); + + return Ret; +} /// getGlobalBaseReg - Output the instructions required to put the /// GOT address into a register. SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { - unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); + unsigned GlobalBaseReg = MF->getInfo<MipsFunctionInfo>()->getGlobalBaseReg(); return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel:: -SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { +SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { EVT ValTy = Addr.getValueType(); - unsigned GPReg = ValTy == MVT::i32 ? Mips::GP : Mips::GP_64; + + // If Parent is an unaligned f32 load or store, select a (base + index) + // floating point load/store instruction (luxc1 or suxc1). + const LSBaseSDNode* LS = 0; + + if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) { + EVT VT = LS->getMemoryVT(); + + if (VT.getSizeInBits() / 8 > LS->getAlignment()) { + assert(TLI.allowsUnalignedMemoryAccesses(VT) && + "Unaligned loads/stores not supported for this type."); + if (VT == MVT::f32) + return false; + } + } // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { @@ -122,8 +226,8 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { // on PIC code Load GA if (Addr.getOpcode() == MipsISD::Wrapper) { - Base = CurDAG->getRegister(GPReg, ValTy); - Offset = Addr.getOperand(0); + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); return true; } @@ -160,17 +264,20 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) - if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi || - Addr.getOperand(0).getOpcode() == ISD::LOAD) && - Addr.getOperand(1).getOpcode() == MipsISD::Lo) { + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo) { SDValue LoVal = Addr.getOperand(1); - if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) || + if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) || isa<GlobalAddressSDNode>(LoVal.getOperand(0))) { Base = Addr.getOperand(0); Offset = LoVal.getOperand(0); return true; } } + + // If an indexed floating point load/store can be emitted, return false. + if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && + Subtarget.hasMips32r2Or64()) + return false; } Base = Addr; @@ -178,6 +285,28 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { return true; } +/// Select multiply instructions. +std::pair<SDNode*, SDNode*> +MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty, + bool HasLo, bool HasHi) { + SDNode *Lo = 0, *Hi = 0; + SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N->getOperand(0), + N->getOperand(1)); + SDValue InFlag = SDValue(Mul, 0); + + if (HasLo) { + Lo = CurDAG->getMachineNode(Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64, dl, + Ty, MVT::Glue, InFlag); + InFlag = SDValue(Lo, 1); + } + if (HasHi) + Hi = CurDAG->getMachineNode(Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64, dl, + Ty, InFlag); + + return std::make_pair(Lo, Hi); +} + + /// Select instructions not customized! Used for /// expanded, promoted and normal instructions SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { @@ -197,134 +326,167 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { // Instruction Selection not handled by the auto-generated // tablegen selection should be handled here. /// + EVT NodeTy = Node->getValueType(0); + unsigned MultOpc; + switch(Opcode) { - default: break; - - case ISD::SUBE: - case ISD::ADDE: { - SDValue InFlag = Node->getOperand(2), CmpLHS; - unsigned Opc = InFlag.getOpcode(); (void)Opc; - assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || - (Opc == ISD::SUBC || Opc == ISD::SUBE)) && - "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); - - unsigned MOp; - if (Opcode == ISD::ADDE) { - CmpLHS = InFlag.getValue(0); - MOp = Mips::ADDu; - } else { - CmpLHS = InFlag.getOperand(0); - MOp = Mips::SUBu; - } + default: break; + + case ISD::SUBE: + case ISD::ADDE: { + SDValue InFlag = Node->getOperand(2), CmpLHS; + unsigned Opc = InFlag.getOpcode(); (void)Opc; + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + + unsigned MOp; + if (Opcode == ISD::ADDE) { + CmpLHS = InFlag.getValue(0); + MOp = Mips::ADDu; + } else { + CmpLHS = InFlag.getOperand(0); + MOp = Mips::SUBu; + } - SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; - SDValue LHS = Node->getOperand(0); - SDValue RHS = Node->getOperand(1); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); - EVT VT = LHS.getValueType(); - SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2); - SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, - SDValue(Carry,0), RHS); + EVT VT = LHS.getValueType(); + SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2); + SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, + SDValue(Carry,0), RHS); - return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, - LHS, SDValue(AddCarry,0)); - } + return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, + LHS, SDValue(AddCarry,0)); + } - /// Mul with two results - case ISD::SMUL_LOHI: - case ISD::UMUL_LOHI: { - assert(Node->getValueType(0) != MVT::i64 && - "64-bit multiplication with two results not handled."); - SDValue Op1 = Node->getOperand(0); - SDValue Op2 = Node->getOperand(1); + /// Mul with two results + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + if (NodeTy == MVT::i32) + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); + else + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT); - unsigned Op; - Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); + std::pair<SDNode*, SDNode*> LoHi = SelectMULT(Node, MultOpc, dl, NodeTy, + true, true); - SDNode *Mul = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2); + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); - SDValue InFlag = SDValue(Mul, 0); - SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, - MVT::Glue, InFlag); - InFlag = SDValue(Lo,1); - SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag); + if (!SDValue(Node, 1).use_empty()) + ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); - if (!SDValue(Node, 0).use_empty()) - ReplaceUses(SDValue(Node, 0), SDValue(Lo,0)); + return NULL; + } - if (!SDValue(Node, 1).use_empty()) - ReplaceUses(SDValue(Node, 1), SDValue(Hi,0)); + /// Special Muls + case ISD::MUL: { + // Mips32 has a 32-bit three operand mul instruction. + if (Subtarget.hasMips32() && NodeTy == MVT::i32) + break; + return SelectMULT(Node, NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT, + dl, NodeTy, true, false).first; + } + case ISD::MULHS: + case ISD::MULHU: { + if (NodeTy == MVT::i32) + MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); + else + MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT); + + return SelectMULT(Node, MultOpc, dl, NodeTy, false, true).second; + } - return NULL; - } + // Get target GOT address. + case ISD::GLOBAL_OFFSET_TABLE: + return getGlobalBaseReg(); - /// Special Muls - case ISD::MUL: - // Mips32 has a 32-bit three operand mul instruction. - if (Subtarget.hasMips32() && Node->getValueType(0) == MVT::i32) - break; - case ISD::MULHS: - case ISD::MULHU: { - assert((Opcode == ISD::MUL || Node->getValueType(0) != MVT::i64) && - "64-bit MULH* not handled."); - EVT Ty = Node->getValueType(0); - SDValue MulOp1 = Node->getOperand(0); - SDValue MulOp2 = Node->getOperand(1); - - unsigned MulOp = (Opcode == ISD::MULHU ? - Mips::MULTu : - (Ty == MVT::i32 ? Mips::MULT : Mips::DMULT)); - SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl, - MVT::Glue, MulOp1, MulOp2); - - SDValue InFlag = SDValue(MulNode, 0); - - if (Opcode == ISD::MUL) { - unsigned Opc = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); - return CurDAG->getMachineNode(Opc, dl, Ty, InFlag); + case ISD::ConstantFP: { + ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); + if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { + if (Subtarget.hasMips64()) { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + Mips::ZERO_64, MVT::i64); + return CurDAG->getMachineNode(Mips::DMTC1, dl, MVT::f64, Zero); } - else - return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag); + + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + Mips::ZERO, MVT::i32); + return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero, + Zero); } + break; + } - // Get target GOT address. - case ISD::GLOBAL_OFFSET_TABLE: - return getGlobalBaseReg(); + case ISD::Constant: { + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); + unsigned Size = CN->getValueSizeInBits(0); - case ISD::ConstantFP: { - ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); - if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { - SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - Mips::ZERO, MVT::i32); - return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero, - Zero); - } + if (Size == 32) break; + + MipsAnalyzeImmediate AnalyzeImm; + int64_t Imm = CN->getSExtValue(); + + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, Size, false); + + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + DebugLoc DL = CN->getDebugLoc(); + SDNode *RegOpnd; + SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), + MVT::i64); + + // The first instruction can be a LUi which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == Mips::LUi64) + RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); + else + RegOpnd = + CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, + CurDAG->getRegister(Mips::ZERO_64, MVT::i64), + ImmOpnd); + + // The remaining instructions in the sequence are handled here. + for (++Inst; Inst != Seq.end(); ++Inst) { + ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), + MVT::i64); + RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, + SDValue(RegOpnd, 0), ImmOpnd); } - case MipsISD::ThreadPointer: { - EVT PtrVT = TLI.getPointerTy(); - unsigned RdhwrOpc, SrcReg, DestReg; - - if (PtrVT == MVT::i32) { - RdhwrOpc = Mips::RDHWR; - SrcReg = Mips::HWR29; - DestReg = Mips::V1; - } else { - RdhwrOpc = Mips::RDHWR64; - SrcReg = Mips::HWR29_64; - DestReg = Mips::V1_64; - } - - SDNode *Rdhwr = CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), - Node->getValueType(0), CurDAG->getRegister(SrcReg, PtrVT)); - SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg, - SDValue(Rdhwr, 0)); - SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT); - ReplaceUses(SDValue(Node, 0), ResNode); - return ResNode.getNode(); + return RegOpnd; + } + + case MipsISD::ThreadPointer: { + EVT PtrVT = TLI.getPointerTy(); + unsigned RdhwrOpc, SrcReg, DestReg; + + if (PtrVT == MVT::i32) { + RdhwrOpc = Mips::RDHWR; + SrcReg = Mips::HWR29; + DestReg = Mips::V1; + } else { + RdhwrOpc = Mips::RDHWR64; + SrcReg = Mips::HWR29_64; + DestReg = Mips::V1_64; } + + SDNode *Rdhwr = + CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), + Node->getValueType(0), + CurDAG->getRegister(SrcReg, PtrVT)); + SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg, + SDValue(Rdhwr, 0)); + SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT); + ReplaceUses(SDValue(Node, 0), ResNode); + return ResNode.getNode(); + } } // Select the default instruction diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index c9b657c..dc894d9 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -36,9 +36,9 @@ #include "llvm/Support/ErrorHandling.h" using namespace llvm; -// If I is a shifted mask, set the size (Size) and the first bit of the +// If I is a shifted mask, set the size (Size) and the first bit of the // mask (Pos), and return true. -// For example, if I is 0x003ff800, (Pos, Size) = (11, 11). +// For example, if I is 0x003ff800, (Pos, Size) = (11, 11). static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { if (!isShiftedMask_64(I)) return false; @@ -48,6 +48,11 @@ static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { return true; } +static SDValue GetGlobalReg(SelectionDAG &DAG, EVT Ty) { + MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>(); + return DAG.getRegister(FI->getGlobalBaseReg(), Ty); +} + const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { case MipsISD::JmpLink: return "MipsISD::JmpLink"; @@ -92,17 +97,20 @@ MipsTargetLowering(MipsTargetMachine &TM) // Set up the register classes addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass); - addRegisterClass(MVT::f32, Mips::FGR32RegisterClass); if (HasMips64) addRegisterClass(MVT::i64, Mips::CPU64RegsRegisterClass); - // When dealing with single precision only, use libcalls - if (!Subtarget->isSingleFloat()) { - if (HasMips64) - addRegisterClass(MVT::f64, Mips::FGR64RegisterClass); - else - addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass); + if (!TM.Options.UseSoftFloat) { + addRegisterClass(MVT::f32, Mips::FGR32RegisterClass); + + // When dealing with single precision only, use libcalls + if (!Subtarget->isSingleFloat()) { + if (HasMips64) + addRegisterClass(MVT::f64, Mips::FGR64RegisterClass); + else + addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass); + } } // Load extented operations for i1 types must be promoted @@ -136,6 +144,7 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::SDIV, MVT::i32, Expand); @@ -152,10 +161,14 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::BR_CC, MVT::Other, Expand); setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i64, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); @@ -189,7 +202,9 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); setOperationAction(ISD::VAARG, MVT::Other, Expand); setOperationAction(ISD::VACOPY, MVT::Other, Expand); @@ -200,10 +215,12 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); setInsertFencesForAtomic(true); @@ -215,11 +232,15 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); } - if (!Subtarget->hasBitCount()) + if (!Subtarget->hasBitCount()) { setOperationAction(ISD::CTLZ, MVT::i32, Expand); + setOperationAction(ISD::CTLZ, MVT::i64, Expand); + } - if (!Subtarget->hasSwap()) + if (!Subtarget->hasSwap()) { setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i64, Expand); + } setTargetDAGCombine(ISD::ADDE); setTargetDAGCombine(ISD::SUBE); @@ -231,16 +252,26 @@ MipsTargetLowering(MipsTargetMachine &TM) setMinFunctionAlignment(2); - setStackPointerRegisterToSaveRestore(Mips::SP); + setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP); computeRegisterProperties(); - setExceptionPointerRegister(Mips::A0); - setExceptionSelectorRegister(Mips::A1); + setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0); + setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1); } bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; - return SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16; + + switch (SVT) { + case MVT::i64: + case MVT::i32: + case MVT::i16: + return true; + case MVT::f32: + return Subtarget->hasMips32r2Or64(); + default: + return false; + } } EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { @@ -297,8 +328,7 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { // create MipsMAdd(u) node MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; - SDValue MAdd = CurDAG->getNode(MultOpc, dl, - MVT::Glue, + SDValue MAdd = CurDAG->getNode(MultOpc, dl, MVT::Glue, MultNode->getOperand(0),// Factor 0 MultNode->getOperand(1),// Factor 1 ADDCNode->getOperand(1),// Lo0 @@ -371,8 +401,7 @@ static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) { // create MipsSub(u) node MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; - SDValue MSub = CurDAG->getNode(MultOpc, dl, - MVT::Glue, + SDValue MSub = CurDAG->getNode(MultOpc, dl, MVT::Glue, MultNode->getOperand(0),// Factor 0 MultNode->getOperand(1),// Factor 1 SUBCNode->getOperand(0),// Lo0 @@ -428,8 +457,8 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, return SDValue(); EVT Ty = N->getValueType(0); - unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64; - unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64; + unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64; + unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64; unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem : MipsISD::DivRemU; DebugLoc dl = N->getDebugLoc(); @@ -490,11 +519,10 @@ static bool InvertFPCondCode(Mips::CondCode CC) { if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) return false; - if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) - return true; + assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) && + "Illegal Condition Code"); - assert(false && "Illegal Condition Code"); - return false; + return true; } // Creates and returns an FPCmp node from a setcc node. @@ -568,7 +596,7 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, ConstantSDNode *CN; if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1)))) return SDValue(); - + uint64_t Pos = CN->getZExtValue(); uint64_t SMPos, SMSize; @@ -584,17 +612,16 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, return SDValue(); return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), ValTy, - ShiftRight.getOperand(0), - DAG.getConstant(Pos, MVT::i32), + ShiftRight.getOperand(0), DAG.getConstant(Pos, MVT::i32), DAG.getConstant(SMSize, MVT::i32)); } - + static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget* Subtarget) { // Pattern match INS. // $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1), - // where mask1 = (2**size - 1) << pos, mask0 = ~mask1 + // where mask1 = (2**size - 1) << pos, mask0 = ~mask1 // => ins $dst, $src, size, pos, $src1 if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2()) return SDValue(); @@ -614,7 +641,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG, // See if Op's second operand matches (and (shl $src, pos), mask1). if (And1.getOpcode() != ISD::AND) return SDValue(); - + if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) || !IsShiftedMask(CN->getZExtValue(), SMPos1, SMSize1)) return SDValue(); @@ -633,18 +660,16 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG, unsigned Shamt = CN->getZExtValue(); // Return if the shift amount and the first bit position of mask are not the - // same. + // same. EVT ValTy = N->getValueType(0); if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits())) return SDValue(); - - return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), ValTy, - Shl.getOperand(0), + + return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), ValTy, Shl.getOperand(0), DAG.getConstant(SMPos0, MVT::i32), - DAG.getConstant(SMSize0, MVT::i32), - And0.getOperand(0)); + DAG.getConstant(SMSize0, MVT::i32), And0.getOperand(0)); } - + SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -700,7 +725,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const // MachineFunction as a live in value. It also creates a corresponding // virtual register for it. static unsigned -AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC) +AddLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC) { assert(RC->contains(PReg) && "Not the correct regclass!"); unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); @@ -713,10 +738,10 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) { if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) return Mips::BRANCH_T; - if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) - return Mips::BRANCH_F; + assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) && + "Invalid CondCode."); - return Mips::BRANCH_INVALID; + return Mips::BRANCH_F; } /* @@ -800,9 +825,7 @@ MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { - default: - assert(false && "Unexpected instr type to insert"); - return NULL; + default: llvm_unreachable("Unexpected instr type to insert"); case Mips::ATOMIC_LOAD_ADD_I8: case Mips::ATOMIC_LOAD_ADD_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu); @@ -1049,8 +1072,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(loopMBB); @@ -1082,7 +1104,6 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); BuildMI(BB, dl, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr); - // atomic.load.binop // loopMBB: // ll oldval,0(alignedaddr) @@ -1121,7 +1142,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, // and newval, incr2, mask BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask); } - + BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0) .addReg(OldVal).addReg(Mask2); BuildMI(BB, dl, TII->get(Mips::OR), StoreVal) @@ -1201,8 +1222,7 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: @@ -1290,8 +1310,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(loop1MBB); @@ -1460,7 +1479,7 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); - const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { SDVTList VTs = DAG.getVTList(MVT::i32); @@ -1492,10 +1511,9 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, (HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) : (HasGotOfst ? MipsII::MO_GOT : MipsII::MO_GOT16); SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag); - GA = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GA); - SDValue ResNode = DAG.getLoad(ValTy, dl, - DAG.getEntryNode(), GA, MachinePointerInfo(), - false, false, false, 0); + GA = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GetGlobalReg(DAG, ValTy), GA); + SDValue ResNode = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), GA, + MachinePointerInfo(), false, false, false, 0); // On functions and global targets not internal linked only // a load from got/GP is necessary for PIC to work. if (!HasGotOfst) @@ -1515,10 +1533,8 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { // %hi/%lo relocation - SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true, - MipsII::MO_ABS_HI); - SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true, - MipsII::MO_ABS_LO); + SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true, MipsII::MO_ABS_HI); + SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true, MipsII::MO_ABS_LO); SDValue Hi = DAG.getNode(MipsISD::Hi, dl, MVT::i32, BAHi); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALo); return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo); @@ -1528,10 +1544,10 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; SDValue BAGOTOffset = DAG.getBlockAddress(BA, ValTy, true, GOTFlag); - BAGOTOffset = DAG.getNode(MipsISD::Wrapper, dl, ValTy, BAGOTOffset); + BAGOTOffset = DAG.getNode(MipsISD::Wrapper, dl, ValTy, + GetGlobalReg(DAG, ValTy), BAGOTOffset); SDValue BALOOffset = DAG.getBlockAddress(BA, ValTy, true, OFSTFlag); - SDValue Load = DAG.getLoad(ValTy, dl, - DAG.getEntryNode(), BAGOTOffset, + SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), BAGOTOffset, MachinePointerInfo(), false, false, false, 0); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, BALOOffset); return DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo); @@ -1554,7 +1570,8 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const bool LocalDynamic = GV->hasInternalLinkage(); unsigned Flag = LocalDynamic ? MipsII::MO_TLSLDM :MipsII::MO_TLSGD; SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Flag); - SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, TGA); + SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, + GetGlobalReg(DAG, PtrVT), TGA); unsigned PtrSize = PtrVT.getSizeInBits(); IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); @@ -1565,10 +1582,12 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const Entry.Node = Argument; Entry.Ty = PtrTy; Args.push_back(Entry); - + std::pair<SDValue, SDValue> CallResult = LowerCallTo(DAG.getEntryNode(), PtrTy, - false, false, false, false, 0, CallingConv::C, false, true, + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, /*doesNotRet=*/false, + /*isReturnValueUsed=*/true, TlsGetAddr, Args, DAG, dl); SDValue Ret = CallResult.first; @@ -1591,7 +1610,8 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const // Initial Exec TLS Model SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, MipsII::MO_GOTTPREL); - TGA = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, TGA); + TGA = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, GetGlobalReg(DAG, PtrVT), + TGA); Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), TGA, MachinePointerInfo(), false, false, false, 0); @@ -1628,7 +1648,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; unsigned OfstFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, GOTFlag); - JTI = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, JTI); + JTI = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, GetGlobalReg(DAG, PtrVT), + JTI); HiPart = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), JTI, MachinePointerInfo(), false, false, false, 0); JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OfstFlag); @@ -1671,10 +1692,10 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; SDValue CP = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(), N->getOffset(), GOTFlag); - CP = DAG.getNode(MipsISD::Wrapper, dl, ValTy, CP); - SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), - CP, MachinePointerInfo::getConstantPool(), - false, false, false, 0); + CP = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GetGlobalReg(DAG, ValTy), CP); + SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(), false, + false, false, 0); SDValue CPLo = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(), N->getOffset(), OFSTFlag); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, CPLo); @@ -1696,10 +1717,9 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { // memory location argument. const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), - MachinePointerInfo(SV), - false, false, 0); + MachinePointerInfo(SV), false, false, 0); } - + // Called if the size of integer registers is large enough to hold the whole // floating point number. static SDValue LowerFCOPYSIGNLargeIntReg(SDValue Op, SelectionDAG &DAG) { @@ -1750,16 +1770,16 @@ LowerFCOPYSIGNSmallIntReg(SDValue Op, SelectionDAG &DAG, bool isLittle) { return DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, Word0, Word1); } -SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) - const { +SDValue +MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { EVT Ty = Op.getValueType(); assert(Ty == MVT::f32 || Ty == MVT::f64); if (Ty == MVT::f32 || HasMips64) return LowerFCOPYSIGNLargeIntReg(Op, DAG); - else - return LowerFCOPYSIGNSmallIntReg(Op, DAG, Subtarget->isLittle()); + + return LowerFCOPYSIGNSmallIntReg(Op, DAG, Subtarget->isLittle()); } SDValue MipsTargetLowering:: @@ -1778,8 +1798,8 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { } // TODO: set SType according to the desired memory barrier behavior. -SDValue MipsTargetLowering::LowerMEMBARRIER(SDValue Op, - SelectionDAG& DAG) const { +SDValue +MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const { unsigned SType = 0; DebugLoc dl = Op.getDebugLoc(); return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0), @@ -1922,7 +1942,7 @@ static bool CC_Mips64Byval(unsigned ValNo, MVT ValVT, MVT LocVT, assert(Align <= 16 && "Cannot handle alignments larger than 16."); - // If byval is 16-byte aligned, the first arg register must be even. + // If byval is 16-byte aligned, the first arg register must be even. if ((Align == 16) && (FirstIdx % 2)) { State.AllocateReg(Mips64IntRegs[FirstIdx], Mips64DPRegs[FirstIdx]); ++FirstIdx; @@ -1934,10 +1954,10 @@ static bool CC_Mips64Byval(unsigned ValNo, MVT ValVT, MVT LocVT, // Allocate space on caller's stack. unsigned Offset = State.AllocateStack(Size, Align); - + if (FirstIdx < 8) State.addLoc(CCValAssign::getReg(ValNo, ValVT, Mips64IntRegs[FirstIdx], - LocVT, LocInfo)); + LocVT, LocInfo)); else State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); @@ -1947,7 +1967,7 @@ static bool CC_Mips64Byval(unsigned ValNo, MVT ValVT, MVT LocVT, #include "MipsGenCallingConv.inc" static void -AnalyzeMips64CallOperands(CCState CCInfo, +AnalyzeMips64CallOperands(CCState &CCInfo, const SmallVectorImpl<ISD::OutputArg> &Outs) { unsigned NumOps = Outs.size(); for (unsigned i = 0; i != NumOps; ++i) { @@ -1959,7 +1979,7 @@ AnalyzeMips64CallOperands(CCState CCInfo, R = CC_MipsN(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); else R = CC_MipsN_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); - + if (R) { #ifndef NDEBUG dbgs() << "Call operand #" << i << " has unhandled type " @@ -2007,9 +2027,8 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, DAG.getConstant(Offset, MVT::i32)); SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr, - MachinePointerInfo(), - false, false, false, std::min(ByValAlign, - (unsigned )4)); + MachinePointerInfo(), false, false, false, + std::min(ByValAlign, (unsigned )4)); MemOpChains.push_back(LoadVal.getValue(1)); unsigned DstReg = O32IntRegs[LocMemOffset / 4]; RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); @@ -2045,7 +2064,7 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, // Read second subword if necessary. if (RemainingSize != 0) { assert(RemainingSize == 1 && "There must be one byte remaining."); - LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, DAG.getConstant(Offset, MVT::i32)); unsigned Alignment = std::min(ByValAlign, (unsigned )2); SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, @@ -2111,7 +2130,7 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, RegsToPass.push_back(std::make_pair(*Reg, LoadVal)); } - // Return if the struct has been fully copied. + // Return if the struct has been fully copied. if (!(MemCpySize = ByValSize - Offset)) return; @@ -2126,10 +2145,10 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, if (RemSize < LoadSize) continue; - + SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, DAG.getConstant(Offset, PtrTy)); - SDValue LoadVal = + SDValue LoadVal = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i64, Chain, LoadPtr, MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8), false, false, Alignment); @@ -2140,13 +2159,13 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, unsigned Shamt = isLittle ? OffsetDW : 64 - (OffsetDW + LoadSize * 8); SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i64, LoadVal, DAG.getConstant(Shamt, MVT::i32)); - + Val = Val.getNode() ? DAG.getNode(ISD::OR, dl, MVT::i64, Val, Shift) : Shift; Offset += LoadSize; Alignment = std::min(Alignment, LoadSize); } - + RegsToPass.push_back(std::make_pair(*Reg, Val)); return; } @@ -2172,7 +2191,7 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, SDValue MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -2190,7 +2209,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); if (IsO32) CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32); @@ -2212,7 +2231,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, // If this is the first call, create a stack frame object that points to // a location to which .cprestore saves $gp. - if (IsO32 && IsPIC && !MipsFI->getGPFI()) + if (IsO32 && IsPIC && MipsFI->globalBaseRegFixed() && !MipsFI->getGPFI()) MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true)); // Get the frame index of the stack frame object that points to the location @@ -2266,11 +2285,11 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, Subtarget->isLittle()); else PassByValArg64(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, - MFI, DAG, Arg, VA, Flags, getPointerTy(), + MFI, DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); continue; } - + // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); @@ -2286,7 +2305,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, Arg, DAG.getConstant(1, MVT::i32)); if (!Subtarget->isLittle()) std::swap(Lo, Hi); - unsigned LocRegLo = VA.getLocReg(); + unsigned LocRegLo = VA.getLocReg(); unsigned LocRegHigh = getNextIntArgReg(LocRegLo); RegsToPass.push_back(std::make_pair(LocRegLo, Lo)); RegsToPass.push_back(std::make_pair(LocRegHigh, Hi)); @@ -2323,8 +2342,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, // emit ISD::STORE whichs stores the // parameter value to a stack Location MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo(), - false, false, 0)); + MachinePointerInfo(), false, false, 0)); } // Extend range of indices of frame objects for outgoing arguments that were @@ -2376,8 +2394,8 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, OpFlag = MipsII::MO_NO_FLAG; else // O32 & PIC OpFlag = MipsII::MO_GOT_CALL; - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), - getPointerTy(), OpFlag); + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), + OpFlag); GlobalOrExternal = true; } @@ -2387,7 +2405,8 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, if (IsPICCall) { if (GlobalOrExternal) { // Load callee address - Callee = DAG.getNode(MipsISD::Wrapper, dl, getPointerTy(), Callee); + Callee = DAG.getNode(MipsISD::Wrapper, dl, getPointerTy(), + GetGlobalReg(DAG, getPointerTy()), Callee); SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(), false, false, false, 0); @@ -2401,7 +2420,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, } } - // T9 should contain the address of the callee function if + // T9 should contain the address of the callee function if // -reloction-model=pic or it is an indirect call. if (IsPICCall || !GlobalOrExternal) { // copy to T9 @@ -2436,6 +2455,12 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + if (InFlag.getNode()) Ops.push_back(InFlag); @@ -2542,7 +2567,7 @@ CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, false, 0); OutChains.push_back(Store); } - + return LastFI; } @@ -2552,8 +2577,7 @@ SDValue MipsTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> - &Ins, + const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -2569,7 +2593,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); if (IsO32) CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32); @@ -2605,7 +2629,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, if (IsRegLoc) { EVT RegVT = VA.getLocVT(); unsigned ArgReg = VA.getLocReg(); - TargetRegisterClass *RC = 0; + const TargetRegisterClass *RC; if (RegVT == MVT::i32) RC = Mips::CPURegsRegisterClass; @@ -2688,7 +2712,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, const unsigned *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs; unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumOfRegs); int FirstRegSlotOffset = IsO32 ? 0 : -64 ; // offset of $a0's slot. - TargetRegisterClass *RC + const TargetRegisterClass *RC = IsO32 ? Mips::CPURegsRegisterClass : Mips::CPU64RegsRegisterClass; unsigned RegSize = RC->getSize(); int RegSlotOffset = FirstRegSlotOffset + Idx * RegSize; @@ -2719,8 +2743,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, LastFI = MFI->CreateFixedObject(RegSize, StackOffset, true); SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy()); OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, - MachinePointerInfo(), - false, false, 0)); + MachinePointerInfo(), false, false, 0)); } } @@ -2774,8 +2797,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - OutVals[i], Flag); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad @@ -2832,7 +2854,6 @@ getConstraintType(const std::string &Constraint) const case 'y': case 'f': return C_RegisterClass; - break; } } return TargetLowering::getConstraintType(Constraint); @@ -2880,14 +2901,19 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const case 'd': // Address register. Same as 'r' unless generating MIPS16 code. case 'y': // Same as 'r'. Exists for compatibility. case 'r': - return std::make_pair(0U, Mips::CPURegsRegisterClass); + if (VT == MVT::i32) + return std::make_pair(0U, Mips::CPURegsRegisterClass); + assert(VT == MVT::i64 && "Unexpected type."); + return std::make_pair(0U, Mips::CPU64RegsRegisterClass); case 'f': if (VT == MVT::f32) return std::make_pair(0U, Mips::FGR32RegisterClass); - if (VT == MVT::f64) - if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit())) + if ((VT == MVT::f64) && (!Subtarget->isSingleFloat())) { + if (Subtarget->isFP64bit()) + return std::make_pair(0U, Mips::FGR64RegisterClass); + else return std::make_pair(0U, Mips::AFGR64RegisterClass); - break; + } } } return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); @@ -2906,3 +2932,10 @@ bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return false; return Imm.isZero(); } + +unsigned MipsTargetLowering::getJumpTableEncoding() const { + if (IsN64) + return MachineJumpTableInfo::EK_GPRel64BlockAddress; + + return TargetLowering::getJumpTableEncoding(); +} diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 81d093f..621bbec 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -109,7 +109,7 @@ namespace llvm { private: // Subtarget Info const MipsSubtarget *Subtarget; - + bool HasMips64, IsN64, IsO32; // Lower Operand helpers @@ -144,7 +144,7 @@ namespace llvm { virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -181,6 +181,8 @@ namespace llvm { /// materialize the FP immediate as a load from a constant pool. virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + virtual unsigned getJumpTableEncoding() const; + MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand = false) const; MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI, diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 7638f54..fe5eaec 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -1,4 +1,4 @@ -//===- MipsInstrFPU.td - Mips FPU Instruction Information --*- tablegen -*-===// +//===-- MipsInstrFPU.td - Mips FPU Instruction Information -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -59,6 +59,15 @@ def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">; def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">; def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">; +// FP immediate patterns. +def fpimm0 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; + +def fpimm0neg : PatLeaf<(fpimm), [{ + return N->isExactlyValue(-0.0); +}]>; + //===----------------------------------------------------------------------===// // Instruction Class Templates // @@ -74,19 +83,35 @@ def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">; //===----------------------------------------------------------------------===// // FP load. -class FPLoad<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC, - Operand MemOpnd>: +class FPLoad<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>: FMem<op, (outs RC:$ft), (ins MemOpnd:$addr), - !strconcat(opstr, "\t$ft, $addr"), [(set RC:$ft, (FOp addr:$addr))], + !strconcat(opstr, "\t$ft, $addr"), [(set RC:$ft, (load_a addr:$addr))], IILoad>; // FP store. -class FPStore<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC, - Operand MemOpnd>: +class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>: FMem<op, (outs), (ins RC:$ft, MemOpnd:$addr), - !strconcat(opstr, "\t$ft, $addr"), [(store RC:$ft, addr:$addr)], + !strconcat(opstr, "\t$ft, $addr"), [(store_a RC:$ft, addr:$addr)], IIStore>; +// FP indexed load. +class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC, + RegisterClass PRC, PatFrag FOp>: + FFMemIdx<funct, (outs DRC:$fd), (ins PRC:$base, PRC:$index), + !strconcat(opstr, "\t$fd, $index($base)"), + [(set DRC:$fd, (FOp (add PRC:$base, PRC:$index)))]> { + let fs = 0; +} + +// FP indexed store. +class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC, + RegisterClass PRC, PatFrag FOp>: + FFMemIdx<funct, (outs), (ins DRC:$fs, PRC:$base, PRC:$index), + !strconcat(opstr, "\t$fs, $index($base)"), + [(FOp DRC:$fs, (add PRC:$base, PRC:$index))]> { + let fd = 0; +} + // Instructions that convert an FP value to 32-bit fixed point. multiclass FFR1_W_M<bits<6> funct, string opstr> { def _S : FFR1<funct, 16, opstr, "w.s", FGR32, FGR32>; @@ -122,6 +147,19 @@ multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> { } } +// FP madd/msub/nmadd/nmsub instruction classes. +class FMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr, + SDNode OpNode, RegisterClass RC> : + FFMADDSUB<funct, fmt, (outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft), + !strconcat(opstr, ".", fmtstr, "\t$fd, $fr, $fs, $ft"), + [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))]>; + +class FNMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr, + SDNode OpNode, RegisterClass RC> : + FFMADDSUB<funct, fmt, (outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft), + !strconcat(opstr, ".", fmtstr, "\t$fd, $fr, $fs, $ft"), + [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))]>; + //===----------------------------------------------------------------------===// // Floating Point Instructions //===----------------------------------------------------------------------===// @@ -199,23 +237,53 @@ def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>, /// Floating Point Memory Instructions let Predicates = [IsN64] in { - def LWC1_P8 : FPLoad<0x31, "lwc1", load, FGR32, mem64>; - def SWC1_P8 : FPStore<0x39, "swc1", store, FGR32, mem64>; - def LDC164_P8 : FPLoad<0x35, "ldc1", load, FGR64, mem64>; - def SDC164_P8 : FPStore<0x3d, "sdc1", store, FGR64, mem64>; + def LWC1_P8 : FPLoad<0x31, "lwc1", FGR32, mem64>; + def SWC1_P8 : FPStore<0x39, "swc1", FGR32, mem64>; + def LDC164_P8 : FPLoad<0x35, "ldc1", FGR64, mem64>; + def SDC164_P8 : FPStore<0x3d, "sdc1", FGR64, mem64>; } let Predicates = [NotN64] in { - def LWC1 : FPLoad<0x31, "lwc1", load, FGR32, mem>; - def SWC1 : FPStore<0x39, "swc1", store, FGR32, mem>; - let Predicates = [HasMips64] in { - def LDC164 : FPLoad<0x35, "ldc1", load, FGR64, mem>; - def SDC164 : FPStore<0x3d, "sdc1", store, FGR64, mem>; - } - let Predicates = [NotMips64] in { - def LDC1 : FPLoad<0x35, "ldc1", load, AFGR64, mem>; - def SDC1 : FPStore<0x3d, "sdc1", store, AFGR64, mem>; - } + def LWC1 : FPLoad<0x31, "lwc1", FGR32, mem>; + def SWC1 : FPStore<0x39, "swc1", FGR32, mem>; +} + +let Predicates = [NotN64, HasMips64] in { + def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>; + def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>; +} + +let Predicates = [NotN64, NotMips64] in { + def LDC1 : FPLoad<0x35, "ldc1", AFGR64, mem>; + def SDC1 : FPStore<0x3d, "sdc1", AFGR64, mem>; +} + +// Indexed loads and stores. +let Predicates = [HasMips32r2Or64] in { + def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load_a>; + def LUXC1 : FPIdxLoad<0x5, "luxc1", FGR32, CPURegs, load_u>; + def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store_a>; + def SUXC1 : FPIdxStore<0xd, "suxc1", FGR32, CPURegs, store_u>; +} + +let Predicates = [HasMips32r2, NotMips64] in { + def LDXC1 : FPIdxLoad<0x1, "ldxc1", AFGR64, CPURegs, load_a>; + def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store_a>; +} + +let Predicates = [HasMips64, NotN64] in { + def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load_a>; + def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store_a>; +} + +// n64 +let Predicates = [IsN64] in { + def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>; + def LUXC1_P8 : FPIdxLoad<0x5, "luxc1", FGR32, CPU64Regs, load_u>; + def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>; + def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store_a>; + def SUXC1_P8 : FPIdxStore<0xd, "suxc1", FGR32, CPU64Regs, store_u>; + def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store_a>; } /// Floating-point Aritmetic @@ -224,6 +292,36 @@ defm FDIV : FFR2P_M<0x03, "div", fdiv>; defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>; defm FSUB : FFR2P_M<0x01, "sub", fsub>; +let Predicates = [HasMips32r2] in { + def MADD_S : FMADDSUB<0x4, 0, "madd", "s", fadd, FGR32>; + def MSUB_S : FMADDSUB<0x5, 0, "msub", "s", fsub, FGR32>; +} + +let Predicates = [HasMips32r2, NoNaNsFPMath] in { + def NMADD_S : FNMADDSUB<0x6, 0, "nmadd", "s", fadd, FGR32>; + def NMSUB_S : FNMADDSUB<0x7, 0, "nmsub", "s", fsub, FGR32>; +} + +let Predicates = [HasMips32r2, NotFP64bit] in { + def MADD_D32 : FMADDSUB<0x4, 1, "madd", "d", fadd, AFGR64>; + def MSUB_D32 : FMADDSUB<0x5, 1, "msub", "d", fsub, AFGR64>; +} + +let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath] in { + def NMADD_D32 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, AFGR64>; + def NMSUB_D32 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, AFGR64>; +} + +let Predicates = [HasMips32r2, IsFP64bit] in { + def MADD_D64 : FMADDSUB<0x4, 1, "madd", "d", fadd, FGR64>; + def MSUB_D64 : FMADDSUB<0x5, 1, "msub", "d", fsub, FGR64>; +} + +let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath] in { + def NMADD_D64 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, FGR64>; + def NMSUB_D64 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, FGR64>; +} + //===----------------------------------------------------------------------===// // Floating Point Branch Codes //===----------------------------------------------------------------------===// @@ -305,14 +403,6 @@ def ExtractElementF64 : //===----------------------------------------------------------------------===// // Floating Point Patterns //===----------------------------------------------------------------------===// -def fpimm0 : PatLeaf<(fpimm), [{ - return N->isExactlyValue(+0.0); -}]>; - -def fpimm0neg : PatLeaf<(fpimm), [{ - return N->isExactlyValue(-0.0); -}]>; - def : Pat<(f32 fpimm0), (MTC1 ZERO)>; def : Pat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>; @@ -337,8 +427,22 @@ let Predicates = [IsFP64bit] in { (CVT_D64_L (DMTC1 CPU64Regs:$src))>; def : Pat<(i32 (fp_to_sint FGR64:$src)), (MFC1 (TRUNC_W_D64 FGR64:$src))>; + def : Pat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>; def : Pat<(i64 (fp_to_sint FGR64:$src)), (DMFC1 (TRUNC_L_D64 FGR64:$src))>; def : Pat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; def : Pat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; -}
\ No newline at end of file +} + +// Patterns for unaligned floating point loads and stores. +let Predicates = [HasMips32r2Or64, NotN64] in { + def : Pat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>; + def : Pat<(store_u FGR32:$src, CPURegs:$addr), + (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>; +} + +let Predicates = [IsN64] in { + def : Pat<(f32 (load_u CPU64Regs:$addr)), (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>; + def : Pat<(store_u FGR32:$src, CPU64Regs:$addr), + (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>; +} diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 21a1862..4555303 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -1,4 +1,4 @@ -//===- MipsInstrFormats.td - Mips Instruction Formats ------*- tablegen -*-===// +//===-- MipsInstrFormats.td - Mips Instruction Formats -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -290,3 +290,40 @@ class FFR2P<bits<6> funct, bits<5> fmt, string opstr, FFR<0x11, funct, fmt, (outs RC:$fd), (ins RC:$fs, RC:$ft), !strconcat(opstr, ".", fmtstr, "\t$fd, $fs, $ft"), [(set RC:$fd, (OpNode RC:$fs, RC:$ft))]>; + +// Floating point madd/msub/nmadd/nmsub. +class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr, + list<dag> pattern> + : MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> { + bits<5> fd; + bits<5> fr; + bits<5> fs; + bits<5> ft; + + let Opcode = 0x13; + let Inst{25-21} = fr; + let Inst{20-16} = ft; + let Inst{15-11} = fs; + let Inst{10-6} = fd; + let Inst{5-3} = funct; + let Inst{2-0} = fmt; +} + +// FP indexed load/store instructions. +class FFMemIdx<bits<6> funct, dag outs, dag ins, string asmstr, + list<dag> pattern> : + MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> +{ + bits<5> base; + bits<5> index; + bits<5> fs; + bits<5> fd; + + let Opcode = 0x13; + + let Inst{25-21} = base; + let Inst{20-16} = index; + let Inst{15-11} = fs; + let Inst{10-6} = fd; + let Inst{5-0} = funct; +} diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index ea101f7..a3a18bf 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- MipsInstrInfo.cpp - Mips Instruction Information ---------*- C++ -*-===// +//===-- MipsInstrInfo.cpp - Mips Instruction Information ------------------===// // // The LLVM Compiler Infrastructure // @@ -32,7 +32,7 @@ MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm) RI(*TM.getSubtargetImpl(), *this), UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {} -const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { +const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { return RI; } @@ -157,7 +157,7 @@ copyPhysReg(MachineBasicBlock &MBB, assert(Opc && "Cannot copy registers"); MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); - + if (DestReg) MIB.addReg(DestReg, RegState::Define); @@ -168,6 +168,16 @@ copyPhysReg(MachineBasicBlock &MBB, MIB.addReg(SrcReg, getKillRegState(KillSrc)); } +static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI, + unsigned Flag) { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + + return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), Flag, + MFI.getObjectSize(FI), Align); +} + void MipsInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, @@ -175,6 +185,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); + unsigned Opc = 0; if (RC == Mips::CPURegsRegisterClass) @@ -190,7 +202,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, assert(Opc && "Register class not handled!"); BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0); + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } void MipsInstrInfo:: @@ -201,6 +213,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); unsigned Opc = 0; if (RC == Mips::CPURegsRegisterClass) @@ -215,7 +228,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164; assert(Opc && "Register class not handled!"); - BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0); + BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); } MachineInstr* @@ -246,21 +260,21 @@ static unsigned GetAnalyzableBrOpc(unsigned Opc) { unsigned Mips::GetOppositeBranchOpc(unsigned Opc) { switch (Opc) { - default: llvm_unreachable("Illegal opcode!"); - case Mips::BEQ : return Mips::BNE; - case Mips::BNE : return Mips::BEQ; - case Mips::BGTZ : return Mips::BLEZ; - case Mips::BGEZ : return Mips::BLTZ; - case Mips::BLTZ : return Mips::BGEZ; - case Mips::BLEZ : return Mips::BGTZ; - case Mips::BEQ64 : return Mips::BNE64; - case Mips::BNE64 : return Mips::BEQ64; - case Mips::BGTZ64 : return Mips::BLEZ64; - case Mips::BGEZ64 : return Mips::BLTZ64; - case Mips::BLTZ64 : return Mips::BGEZ64; - case Mips::BLEZ64 : return Mips::BGTZ64; - case Mips::BC1T : return Mips::BC1F; - case Mips::BC1F : return Mips::BC1T; + default: llvm_unreachable("Illegal opcode!"); + case Mips::BEQ: return Mips::BNE; + case Mips::BNE: return Mips::BEQ; + case Mips::BGTZ: return Mips::BLEZ; + case Mips::BGEZ: return Mips::BLTZ; + case Mips::BLTZ: return Mips::BGEZ; + case Mips::BLEZ: return Mips::BGTZ; + case Mips::BEQ64: return Mips::BNE64; + case Mips::BNE64: return Mips::BEQ64; + case Mips::BGTZ64: return Mips::BLEZ64; + case Mips::BGEZ64: return Mips::BLTZ64; + case Mips::BLTZ64: return Mips::BGEZ64; + case Mips::BLEZ64: return Mips::BGTZ64; + case Mips::BC1T: return Mips::BC1F; + case Mips::BC1F: return Mips::BC1T; } } @@ -269,7 +283,7 @@ static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc, SmallVectorImpl<MachineOperand>& Cond) { assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch"); int NumOp = Inst->getNumExplicitOperands(); - + // for both int and fp branches, the last explicit operand is the // MBB. BB = Inst->getOperand(NumOp-1).getMBB(); @@ -357,8 +371,8 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, FBB = LastInst->getOperand(0).getMBB(); return false; -} - +} + void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL, const SmallVectorImpl<MachineOperand>& Cond) @@ -440,27 +454,3 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const return false; } -/// getGlobalBaseReg - Return a virtual register initialized with the -/// the global base register value. Output instructions required to -/// initialize the register in the function entry block, if necessary. -/// -unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { - MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>(); - unsigned GlobalBaseReg = MipsFI->getGlobalBaseReg(); - if (GlobalBaseReg != 0) - return GlobalBaseReg; - - // Insert the set of GlobalBaseReg into the first MBB of the function - MachineBasicBlock &FirstMBB = MF->front(); - MachineBasicBlock::iterator MBBI = FirstMBB.begin(); - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - - GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass); - BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), - GlobalBaseReg).addReg(Mips::GP); - RegInfo.addLiveIn(Mips::GP); - - MipsFI->setGlobalBaseReg(GlobalBaseReg); - return GlobalBaseReg; -} diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 70cc2cf..10caf30 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -1,4 +1,4 @@ -//===- MipsInstrInfo.h - Mips Instruction Information -----------*- C++ -*-===// +//===-- MipsInstrInfo.h - Mips Instruction Information ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -103,12 +103,6 @@ public: /// Insert nop instruction when hazard condition is found virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; - - /// getGlobalBaseReg - Return a virtual register initialized with the - /// the global base register value. Output instructions required to - /// initialize the register in the function entry block, if necessary. - /// - unsigned getGlobalBaseReg(MachineFunction *MF) const; }; } diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 9fcc5fd..bc85fa6 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -103,11 +103,11 @@ def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem, // target constant nodes that would otherwise remain unchanged with ADDiu // nodes. Without these wrapper node patterns, the following conditional move // instrucion is emitted when function cmov2 in test/CodeGen/Mips/cmov.ll is -// compiled: +// compiled: // movn %got(d)($gp), %got(c)($gp), $4 // This instruction is illegal since movn can take only register operands. -def MipsWrapper : SDNode<"MipsISD::Wrapper", SDTIntUnaryOp>; +def MipsWrapper : SDNode<"MipsISD::Wrapper", SDTIntBinOp>; // Pointer to dynamically allocated stack area. def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc, @@ -128,12 +128,14 @@ def HasCondMov : Predicate<"Subtarget.hasCondMov()">; def HasMips32 : Predicate<"Subtarget.hasMips32()">; def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">; def HasMips64 : Predicate<"Subtarget.hasMips64()">; +def HasMips32r2Or64 : Predicate<"Subtarget.hasMips32r2Or64()">; def NotMips64 : Predicate<"!Subtarget.hasMips64()">; def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">; def IsN64 : Predicate<"Subtarget.isABI_N64()">; def NotN64 : Predicate<"!Subtarget.isABI_N64()">; def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">; +def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; //===----------------------------------------------------------------------===// // Mips Operand, Complex Patterns and Transformations Definitions. @@ -219,34 +221,42 @@ def immZExt16 : PatLeaf<(imm), [{ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); }], LO16>; +// Immediate can be loaded with LUi (32-bit int with lower 16-bit cleared). +def immLow16Zero : PatLeaf<(imm), [{ + int64_t Val = N->getSExtValue(); + return isInt<32>(Val) && !(Val & 0xffff); +}]>; + // shamt field must fit in 5 bits. def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>; // Mips Address Mode! SDNode frameindex could possibily be a match // since load and store instructions from stack used it. -def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>; +def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], [SDNPWantParent]>; //===----------------------------------------------------------------------===// // Pattern fragment for load/store //===----------------------------------------------------------------------===// -class UnalignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{ +class UnalignedLoad<PatFrag Node> : + PatFrag<(ops node:$ptr), (Node node:$ptr), [{ LoadSDNode *LD = cast<LoadSDNode>(N); return LD->getMemoryVT().getSizeInBits()/8 > LD->getAlignment(); }]>; -class AlignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{ +class AlignedLoad<PatFrag Node> : + PatFrag<(ops node:$ptr), (Node node:$ptr), [{ LoadSDNode *LD = cast<LoadSDNode>(N); return LD->getMemoryVT().getSizeInBits()/8 <= LD->getAlignment(); }]>; -class UnalignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr), - (Node node:$val, node:$ptr), [{ +class UnalignedStore<PatFrag Node> : + PatFrag<(ops node:$val, node:$ptr), (Node node:$val, node:$ptr), [{ StoreSDNode *SD = cast<StoreSDNode>(N); return SD->getMemoryVT().getSizeInBits()/8 > SD->getAlignment(); }]>; -class AlignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr), - (Node node:$val, node:$ptr), [{ +class AlignedStore<PatFrag Node> : + PatFrag<(ops node:$val, node:$ptr), (Node node:$val, node:$ptr), [{ StoreSDNode *SD = cast<StoreSDNode>(N); return SD->getMemoryVT().getSizeInBits()/8 <= SD->getAlignment(); }]>; @@ -397,7 +407,7 @@ multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode, Requires<[NotN64]>; def _P8 : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>, Requires<[IsN64]>; -} +} // 64-bit load. multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode, @@ -406,7 +416,7 @@ multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode, Requires<[NotN64]>; def _P8 : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>, Requires<[IsN64]>; -} +} // 32-bit load. multiclass LoadUnAlign32<bits<6> op> { @@ -487,7 +497,7 @@ class JumpFJ<bits<6> op, string instr_asm>: let isTerminator=1; let isBarrier=1; let hasDelaySlot = 1; - let Predicates = [RelocStatic]; + let Predicates = [RelocStatic]; } // Unconditional branch @@ -500,7 +510,7 @@ class UncondBranch<bits<6> op, string instr_asm>: let isTerminator = 1; let isBarrier = 1; let hasDelaySlot = 1; - let Predicates = [RelocPIC]; + let Predicates = [RelocPIC]; } let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1, @@ -514,26 +524,26 @@ class JumpFR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>: } // Jump and Link (Call) -let isCall=1, hasDelaySlot=1, - // All calls clobber the non-callee saved registers... - Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, - K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in { +let isCall=1, hasDelaySlot=1 in { class JumpLink<bits<6> op, string instr_asm>: FJ<op, (outs), (ins calltarget:$target, variable_ops), !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)], IIBranch>; - class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>: - FR<op, func, (outs), (ins CPURegs:$rs, variable_ops), - !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch> { + class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm, + RegisterClass RC>: + FR<op, func, (outs), (ins RC:$rs, variable_ops), + !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink RC:$rs)], IIBranch> { let rt = 0; let rd = 31; let shamt = 0; } - class BranchLink<string instr_asm>: - FI<0x1, (outs), (ins CPURegs:$rs, brtarget:$imm16, variable_ops), - !strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch>; + class BranchLink<string instr_asm, bits<5> _rt, RegisterClass RC>: + FI<0x1, (outs), (ins RC:$rs, brtarget:$imm16, variable_ops), + !strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch> { + let rt = _rt; + } } // Mul, Div @@ -608,20 +618,20 @@ class CountLeading1<bits<6> func, string instr_asm, RegisterClass RC>: } // Sign Extend in Register. -class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt>: - FR<0x1f, 0x20, (outs CPURegs:$rd), (ins CPURegs:$rt), +class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt, + RegisterClass RC>: + FR<0x1f, 0x20, (outs RC:$rd), (ins RC:$rt), !strconcat(instr_asm, "\t$rd, $rt"), - [(set CPURegs:$rd, (sext_inreg CPURegs:$rt, vt))], NoItinerary> { + [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary> { let rs = 0; let shamt = sa; let Predicates = [HasSEInReg]; } -// Byte Swap -class ByteSwap<bits<6> func, bits<5> sa, string instr_asm>: - FR<0x1f, func, (outs CPURegs:$rd), (ins CPURegs:$rt), - !strconcat(instr_asm, "\t$rd, $rt"), - [(set CPURegs:$rd, (bswap CPURegs:$rt))], NoItinerary> { +// Subword Swap +class SubwordSwap<bits<6> func, bits<5> sa, string instr_asm, RegisterClass RC>: + FR<0x1f, func, (outs RC:$rd), (ins RC:$rt), + !strconcat(instr_asm, "\t$rd, $rt"), [], NoItinerary> { let rs = 0; let shamt = sa; let Predicates = [HasSwap]; @@ -637,7 +647,7 @@ class ReadHardware<RegisterClass CPURegClass, RegisterClass HWRegClass> // Ext and Ins class ExtBase<bits<6> _funct, string instr_asm, RegisterClass RC>: - FR<0x1f, _funct, (outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$sz), + FR<0x1f, _funct, (outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$sz), !strconcat(instr_asm, " $rt, $rs, $pos, $sz"), [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$sz))], NoItinerary> { bits<5> pos; @@ -731,6 +741,26 @@ def ATMACRO : MipsPseudo<(outs), (ins), ".set\tat", []>; def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>; def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>; +// For O32 ABI & PIC & non-fixed global base register, the following instruction +// seqeunce is emitted to set the global base register: +// +// 0. lui $2, %hi(_gp_disp) +// 1. addiu $2, $2, %lo(_gp_disp) +// 2. addu $globalbasereg, $2, $t9 +// +// SETGP01 is emitted during Prologue/Epilogue insertion and then converted to +// instructions 0 and 1 in the sequence above during MC lowering. +// SETGP2 is emitted just before register allocation and converted to +// instruction 2 just prior to post-RA scheduling. +// +// These pseudo instructions are needed to ensure no instructions are inserted +// before or between instructions 0 and 1, which is a limitation imposed by +// GNU linker. + +def SETGP01 : MipsPseudo<(outs CPURegs:$dst), (ins), "", []>; +def SETGP2 : MipsPseudo<(outs CPURegs:$globalreg), (ins CPURegs:$picreg), "", + []>; + let usesCustomInserter = 1 in { defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8, "load_add_8">; defm ATOMIC_LOAD_ADD_I16 : Atomic2Ops32<atomic_load_add_16, "load_add_16">; @@ -848,8 +878,6 @@ def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, Requires<[IsN64]>; /// Jump and Branch Instructions def J : JumpFJ<0x02, "j">; def JR : JumpFR<0x00, 0x08, "jr", CPURegs>; -def JAL : JumpLink<0x03, "jal">; -def JALR : JumpLinkReg<0x00, 0x09, "jalr">; def B : UncondBranch<0x04, "b">; def BEQ : CBranch<0x04, "beq", seteq, CPURegs>; def BNE : CBranch<0x05, "bne", setne, CPURegs>; @@ -858,10 +886,10 @@ def BGTZ : CBranchZero<0x07, 0, "bgtz", setgt, CPURegs>; def BLEZ : CBranchZero<0x06, 0, "blez", setle, CPURegs>; def BLTZ : CBranchZero<0x01, 0, "bltz", setlt, CPURegs>; -let rt=0x11 in - def BGEZAL : BranchLink<"bgezal">; -let rt=0x10 in - def BLTZAL : BranchLink<"bltzal">; +def JAL : JumpLink<0x03, "jal">; +def JALR : JumpLinkReg<0x00, 0x09, "jalr", CPURegs>; +def BGEZAL : BranchLink<"bgezal", 0x11, CPURegs>; +def BLTZAL : BranchLink<"bltzal", 0x10, CPURegs>; let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1, rd=0, rt=0, shamt=0 in @@ -880,15 +908,15 @@ def MFHI : MoveFromLOHI<0x10, "mfhi", CPURegs, [HI]>; def MFLO : MoveFromLOHI<0x12, "mflo", CPURegs, [LO]>; /// Sign Ext In Register Instructions. -def SEB : SignExtInReg<0x10, "seb", i8>; -def SEH : SignExtInReg<0x18, "seh", i16>; +def SEB : SignExtInReg<0x10, "seb", i8, CPURegs>; +def SEH : SignExtInReg<0x18, "seh", i16, CPURegs>; /// Count Leading def CLZ : CountLeading0<0x20, "clz", CPURegs>; def CLO : CountLeading1<0x21, "clo", CPURegs>; -/// Byte Swap -def WSBW : ByteSwap<0x20, 0x2, "wsbw">; +/// Word Swap Bytes Within Halfwords +def WSBH : SubwordSwap<0x20, 0x2, "wsbh", CPURegs>; /// No operation let addr=0 in @@ -931,6 +959,8 @@ def : Pat<(i32 immSExt16:$in), (ADDiu ZERO, imm:$in)>; def : Pat<(i32 immZExt16:$in), (ORi ZERO, imm:$in)>; +def : Pat<(i32 immLow16Zero:$in), + (LUi (HI16 imm:$in))>; // Arbitrary immediates def : Pat<(i32 imm:$imm), @@ -983,29 +1013,44 @@ def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)), (ADDiu CPURegs:$gp, tconstpool:$in)>; // wrapper_pic -class WrapperPat<SDNode node, Instruction ADDiuOp, Register GPReg>: - Pat<(MipsWrapper node:$in), - (ADDiuOp GPReg, node:$in)>; +class WrapperPat<SDNode node, Instruction ADDiuOp, RegisterClass RC>: + Pat<(MipsWrapper RC:$gp, node:$in), + (ADDiuOp RC:$gp, node:$in)>; -def : WrapperPat<tglobaladdr, ADDiu, GP>; -def : WrapperPat<tconstpool, ADDiu, GP>; -def : WrapperPat<texternalsym, ADDiu, GP>; -def : WrapperPat<tblockaddress, ADDiu, GP>; -def : WrapperPat<tjumptable, ADDiu, GP>; -def : WrapperPat<tglobaltlsaddr, ADDiu, GP>; +def : WrapperPat<tglobaladdr, ADDiu, CPURegs>; +def : WrapperPat<tconstpool, ADDiu, CPURegs>; +def : WrapperPat<texternalsym, ADDiu, CPURegs>; +def : WrapperPat<tblockaddress, ADDiu, CPURegs>; +def : WrapperPat<tjumptable, ADDiu, CPURegs>; +def : WrapperPat<tglobaltlsaddr, ADDiu, CPURegs>; // Mips does not have "not", so we expand our way def : Pat<(not CPURegs:$in), (NOR CPURegs:$in, ZERO)>; -// extended load and stores -def : Pat<(extloadi1 addr:$src), (LBu addr:$src)>; -def : Pat<(extloadi8 addr:$src), (LBu addr:$src)>; -def : Pat<(extloadi16_a addr:$src), (LHu addr:$src)>; -def : Pat<(extloadi16_u addr:$src), (ULHu addr:$src)>; +// extended loads +let Predicates = [NotN64] in { + def : Pat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>; + def : Pat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>; + def : Pat<(i32 (extloadi16_a addr:$src)), (LHu addr:$src)>; + def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu addr:$src)>; +} +let Predicates = [IsN64] in { + def : Pat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>; + def : Pat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>; + def : Pat<(i32 (extloadi16_a addr:$src)), (LHu_P8 addr:$src)>; + def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu_P8 addr:$src)>; +} // peepholes -def : Pat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; +let Predicates = [NotN64] in { + def : Pat<(store_a (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; + def : Pat<(store_u (i32 0), addr:$dst), (USW ZERO, addr:$dst)>; +} +let Predicates = [IsN64] in { + def : Pat<(store_a (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>; + def : Pat<(store_u (i32 0), addr:$dst), (USW_P8 ZERO, addr:$dst)>; +} // brcond patterns multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp, @@ -1083,6 +1128,9 @@ defm : SetgeImmPats<CPURegs, SLTi, SLTiu>; // select MipsDynAlloc def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>; +// bswap pattern +def : Pat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>; + //===----------------------------------------------------------------------===// // Floating Point Support //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp index a0ee722..76ca3e1 100644 --- a/lib/Target/Mips/MipsJITInfo.cpp +++ b/lib/Target/Mips/MipsJITInfo.cpp @@ -1,4 +1,4 @@ -//===- MipsJITInfo.cpp - Implement the JIT interfaces for the Mips target -===// +//===-- MipsJITInfo.cpp - Implement the Mips JIT Interface ----------------===// // // The LLVM Compiler Infrastructure // @@ -200,7 +200,7 @@ void MipsJITInfo::relocate(void *Function, MachineRelocation *MR, intptr_t ResultPtr = (intptr_t) MR->getResultPointer(); switch ((Mips::RelocationType) MR->getRelocationType()) { - case Mips::reloc_mips_branch: + case Mips::reloc_mips_pc16: ResultPtr = (((ResultPtr - (intptr_t) RelocPos) - 4) >> 2) & 0xffff; *((unsigned*) RelocPos) |= (unsigned) ResultPtr; break; @@ -228,9 +228,6 @@ void MipsJITInfo::relocate(void *Function, MachineRelocation *MR, *((unsigned*) RelocPos) |= (unsigned) ResultPtr; break; } - - default: - llvm_unreachable("ERROR: Unknown Mips relocation."); } } } diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h index 41f32a3..ad3c930 100644 --- a/lib/Target/Mips/MipsJITInfo.h +++ b/lib/Target/Mips/MipsJITInfo.h @@ -1,4 +1,4 @@ -//===- MipsJITInfo.h - Mips implementation of the JIT interface -*- C++ -*-===// +//===- MipsJITInfo.h - Mips Implementation of the JIT Interface -*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 23486d3..be65298 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -37,26 +37,26 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, const MCSymbol *Symbol; switch(MO.getTargetFlags()) { - default: assert(0 && "Invalid target flag!"); - case MipsII::MO_NO_FLAG: Kind = MCSymbolRefExpr::VK_None; break; - case MipsII::MO_GPREL: Kind = MCSymbolRefExpr::VK_Mips_GPREL; break; - case MipsII::MO_GOT_CALL: Kind = MCSymbolRefExpr::VK_Mips_GOT_CALL; break; - case MipsII::MO_GOT16: Kind = MCSymbolRefExpr::VK_Mips_GOT16; break; - case MipsII::MO_GOT: Kind = MCSymbolRefExpr::VK_Mips_GOT; break; - case MipsII::MO_ABS_HI: Kind = MCSymbolRefExpr::VK_Mips_ABS_HI; break; - case MipsII::MO_ABS_LO: Kind = MCSymbolRefExpr::VK_Mips_ABS_LO; break; - case MipsII::MO_TLSGD: Kind = MCSymbolRefExpr::VK_Mips_TLSGD; break; - case MipsII::MO_TLSLDM: Kind = MCSymbolRefExpr::VK_Mips_TLSLDM; break; - case MipsII::MO_DTPREL_HI:Kind = MCSymbolRefExpr::VK_Mips_DTPREL_HI; break; - case MipsII::MO_DTPREL_LO:Kind = MCSymbolRefExpr::VK_Mips_DTPREL_LO; break; - case MipsII::MO_GOTTPREL: Kind = MCSymbolRefExpr::VK_Mips_GOTTPREL; break; - case MipsII::MO_TPREL_HI: Kind = MCSymbolRefExpr::VK_Mips_TPREL_HI; break; - case MipsII::MO_TPREL_LO: Kind = MCSymbolRefExpr::VK_Mips_TPREL_LO; break; - case MipsII::MO_GPOFF_HI: Kind = MCSymbolRefExpr::VK_Mips_GPOFF_HI; break; - case MipsII::MO_GPOFF_LO: Kind = MCSymbolRefExpr::VK_Mips_GPOFF_LO; break; - case MipsII::MO_GOT_DISP: Kind = MCSymbolRefExpr::VK_Mips_GOT_DISP; break; - case MipsII::MO_GOT_PAGE: Kind = MCSymbolRefExpr::VK_Mips_GOT_PAGE; break; - case MipsII::MO_GOT_OFST: Kind = MCSymbolRefExpr::VK_Mips_GOT_OFST; break; + default: llvm_unreachable("Invalid target flag!"); + case MipsII::MO_NO_FLAG: Kind = MCSymbolRefExpr::VK_None; break; + case MipsII::MO_GPREL: Kind = MCSymbolRefExpr::VK_Mips_GPREL; break; + case MipsII::MO_GOT_CALL: Kind = MCSymbolRefExpr::VK_Mips_GOT_CALL; break; + case MipsII::MO_GOT16: Kind = MCSymbolRefExpr::VK_Mips_GOT16; break; + case MipsII::MO_GOT: Kind = MCSymbolRefExpr::VK_Mips_GOT; break; + case MipsII::MO_ABS_HI: Kind = MCSymbolRefExpr::VK_Mips_ABS_HI; break; + case MipsII::MO_ABS_LO: Kind = MCSymbolRefExpr::VK_Mips_ABS_LO; break; + case MipsII::MO_TLSGD: Kind = MCSymbolRefExpr::VK_Mips_TLSGD; break; + case MipsII::MO_TLSLDM: Kind = MCSymbolRefExpr::VK_Mips_TLSLDM; break; + case MipsII::MO_DTPREL_HI: Kind = MCSymbolRefExpr::VK_Mips_DTPREL_HI; break; + case MipsII::MO_DTPREL_LO: Kind = MCSymbolRefExpr::VK_Mips_DTPREL_LO; break; + case MipsII::MO_GOTTPREL: Kind = MCSymbolRefExpr::VK_Mips_GOTTPREL; break; + case MipsII::MO_TPREL_HI: Kind = MCSymbolRefExpr::VK_Mips_TPREL_HI; break; + case MipsII::MO_TPREL_LO: Kind = MCSymbolRefExpr::VK_Mips_TPREL_LO; break; + case MipsII::MO_GPOFF_HI: Kind = MCSymbolRefExpr::VK_Mips_GPOFF_HI; break; + case MipsII::MO_GPOFF_LO: Kind = MCSymbolRefExpr::VK_Mips_GPOFF_LO; break; + case MipsII::MO_GOT_DISP: Kind = MCSymbolRefExpr::VK_Mips_GOT_DISP; break; + case MipsII::MO_GOT_PAGE: Kind = MCSymbolRefExpr::VK_Mips_GOT_PAGE; break; + case MipsII::MO_GOT_OFST: Kind = MCSymbolRefExpr::VK_Mips_GOT_OFST; break; } switch (MOTy) { @@ -89,7 +89,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, default: llvm_unreachable("<unknown operand type>"); } - + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, Ctx); if (!Offset) @@ -97,7 +97,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, // Assume offset is never negative. assert(Offset > 0); - + const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); const MCBinaryExpr *AddExpr = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, Ctx); return MCOperand::CreateExpr(AddExpr); @@ -148,7 +148,7 @@ void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, MCInst Sw; if (Offset >= 0x8000) { - unsigned Hi = (Offset >> 16) + ((Offset & 0x8000) != 0); + unsigned Hi = (Offset >> 16) + ((Offset & 0x8000) != 0); Offset &= 0xffff; Reg = Mips::AT; @@ -163,7 +163,7 @@ void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT)); MCInsts[1].addOperand(MCOperand::CreateReg(Mips::SP)); } - + Sw.setOpcode(Mips::SW); Sw.addOperand(MCOperand::CreateReg(Mips::GP)); Sw.addOperand(MCOperand::CreateReg(Reg)); @@ -172,13 +172,11 @@ void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, } MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO, - unsigned offset) const { + unsigned offset) const { MachineOperandType MOTy = MO.getType(); - + switch (MOTy) { - default: - assert(0 && "unknown operand type"); - break; + default: llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: // Ignore all implicit register operands. if (MO.isImplicit()) break; @@ -192,6 +190,8 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO, case MachineOperand::MO_ConstantPoolIndex: case MachineOperand::MO_BlockAddress: return LowerSymbolOperand(MO, MOTy, offset); + case MachineOperand::MO_RegisterMask: + break; } return MCOperand(); @@ -199,7 +199,7 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO, void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); - + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); MCOperand MCOp = LowerOperand(MO); @@ -210,114 +210,140 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } void MipsMCInstLower::LowerUnalignedLoadStore(const MachineInstr *MI, - SmallVector<MCInst, - 4>& MCInsts) { + SmallVector<MCInst, + 4>& MCInsts) { unsigned Opc = MI->getOpcode(); - MCInst instr1, instr2, instr3, move; + MCInst Instr1, Instr2, Instr3, Move; - bool two_instructions = false; + bool TwoInstructions = false; assert(MI->getNumOperands() == 3); assert(MI->getOperand(0).isReg()); assert(MI->getOperand(1).isReg()); - MCOperand target = LowerOperand(MI->getOperand(0)); - MCOperand base = LowerOperand(MI->getOperand(1)); - MCOperand atReg = MCOperand::CreateReg(Mips::AT); - MCOperand zeroReg = MCOperand::CreateReg(Mips::ZERO); + MCOperand Target = LowerOperand(MI->getOperand(0)); + MCOperand Base = LowerOperand(MI->getOperand(1)); + MCOperand ATReg = MCOperand::CreateReg(Mips::AT); + MCOperand ZeroReg = MCOperand::CreateReg(Mips::ZERO); - MachineOperand unloweredName = MI->getOperand(2); - MCOperand name = LowerOperand(unloweredName); + MachineOperand UnLoweredName = MI->getOperand(2); + MCOperand Name = LowerOperand(UnLoweredName); - move.setOpcode(Mips::ADDu); - move.addOperand(target); - move.addOperand(atReg); - move.addOperand(zeroReg); + Move.setOpcode(Mips::ADDu); + Move.addOperand(Target); + Move.addOperand(ATReg); + Move.addOperand(ZeroReg); switch (Opc) { case Mips::ULW: { // FIXME: only works for little endian right now - MCOperand adj_name = LowerOperand(unloweredName, 3); - if (base.getReg() == (target.getReg())) { - instr1.setOpcode(Mips::LWL); - instr1.addOperand(atReg); - instr1.addOperand(base); - instr1.addOperand(adj_name); - instr2.setOpcode(Mips::LWR); - instr2.addOperand(atReg); - instr2.addOperand(base); - instr2.addOperand(name); - instr3 = move; + MCOperand AdjName = LowerOperand(UnLoweredName, 3); + if (Base.getReg() == (Target.getReg())) { + Instr1.setOpcode(Mips::LWL); + Instr1.addOperand(ATReg); + Instr1.addOperand(Base); + Instr1.addOperand(AdjName); + Instr2.setOpcode(Mips::LWR); + Instr2.addOperand(ATReg); + Instr2.addOperand(Base); + Instr2.addOperand(Name); + Instr3 = Move; } else { - two_instructions = true; - instr1.setOpcode(Mips::LWL); - instr1.addOperand(target); - instr1.addOperand(base); - instr1.addOperand(adj_name); - instr2.setOpcode(Mips::LWR); - instr2.addOperand(target); - instr2.addOperand(base); - instr2.addOperand(name); + TwoInstructions = true; + Instr1.setOpcode(Mips::LWL); + Instr1.addOperand(Target); + Instr1.addOperand(Base); + Instr1.addOperand(AdjName); + Instr2.setOpcode(Mips::LWR); + Instr2.addOperand(Target); + Instr2.addOperand(Base); + Instr2.addOperand(Name); } break; } case Mips::ULHu: { // FIXME: only works for little endian right now - MCOperand adj_name = LowerOperand(unloweredName, 1); - instr1.setOpcode(Mips::LBu); - instr1.addOperand(atReg); - instr1.addOperand(base); - instr1.addOperand(adj_name); - instr2.setOpcode(Mips::LBu); - instr2.addOperand(target); - instr2.addOperand(base); - instr2.addOperand(name); - instr3.setOpcode(Mips::INS); - instr3.addOperand(target); - instr3.addOperand(atReg); - instr3.addOperand(MCOperand::CreateImm(0x8)); - instr3.addOperand(MCOperand::CreateImm(0x18)); + MCOperand AdjName = LowerOperand(UnLoweredName, 1); + Instr1.setOpcode(Mips::LBu); + Instr1.addOperand(ATReg); + Instr1.addOperand(Base); + Instr1.addOperand(AdjName); + Instr2.setOpcode(Mips::LBu); + Instr2.addOperand(Target); + Instr2.addOperand(Base); + Instr2.addOperand(Name); + Instr3.setOpcode(Mips::INS); + Instr3.addOperand(Target); + Instr3.addOperand(ATReg); + Instr3.addOperand(MCOperand::CreateImm(0x8)); + Instr3.addOperand(MCOperand::CreateImm(0x18)); break; } case Mips::USW: { // FIXME: only works for little endian right now - assert (base.getReg() != target.getReg()); - two_instructions = true; - MCOperand adj_name = LowerOperand(unloweredName, 3); - instr1.setOpcode(Mips::SWL); - instr1.addOperand(target); - instr1.addOperand(base); - instr1.addOperand(adj_name); - instr2.setOpcode(Mips::SWR); - instr2.addOperand(target); - instr2.addOperand(base); - instr2.addOperand(name); + assert (Base.getReg() != Target.getReg()); + TwoInstructions = true; + MCOperand AdjName = LowerOperand(UnLoweredName, 3); + Instr1.setOpcode(Mips::SWL); + Instr1.addOperand(Target); + Instr1.addOperand(Base); + Instr1.addOperand(AdjName); + Instr2.setOpcode(Mips::SWR); + Instr2.addOperand(Target); + Instr2.addOperand(Base); + Instr2.addOperand(Name); break; } case Mips::USH: { - MCOperand adj_name = LowerOperand(unloweredName, 1); - instr1.setOpcode(Mips::SB); - instr1.addOperand(target); - instr1.addOperand(base); - instr1.addOperand(name); - instr2.setOpcode(Mips::SRL); - instr2.addOperand(atReg); - instr2.addOperand(target); - instr2.addOperand(MCOperand::CreateImm(8)); - instr3.setOpcode(Mips::SB); - instr3.addOperand(atReg); - instr3.addOperand(base); - instr3.addOperand(adj_name); + MCOperand AdjName = LowerOperand(UnLoweredName, 1); + Instr1.setOpcode(Mips::SB); + Instr1.addOperand(Target); + Instr1.addOperand(Base); + Instr1.addOperand(Name); + Instr2.setOpcode(Mips::SRL); + Instr2.addOperand(ATReg); + Instr2.addOperand(Target); + Instr2.addOperand(MCOperand::CreateImm(8)); + Instr3.setOpcode(Mips::SB); + Instr3.addOperand(ATReg); + Instr3.addOperand(Base); + Instr3.addOperand(AdjName); break; } default: // FIXME: need to add others - assert(0 && "unaligned instruction not processed"); + llvm_unreachable("unaligned instruction not processed"); } - MCInsts.push_back(instr1); - MCInsts.push_back(instr2); - if (!two_instructions) MCInsts.push_back(instr3); + MCInsts.push_back(Instr1); + MCInsts.push_back(Instr2); + if (!TwoInstructions) MCInsts.push_back(Instr3); } +// Convert +// "setgp01 $reg" +// to +// "lui $reg, %hi(_gp_disp)" +// "addiu $reg, $reg, %lo(_gp_disp)" +void MipsMCInstLower::LowerSETGP01(const MachineInstr *MI, + SmallVector<MCInst, 4>& MCInsts) { + const MachineOperand &MO = MI->getOperand(0); + assert(MO.isReg()); + MCOperand RegOpnd = MCOperand::CreateReg(MO.getReg()); + StringRef SymName("_gp_disp"); + const MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName); + const MCSymbolRefExpr *MCSym; + + MCInsts.resize(2); + + MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, Ctx); + MCInsts[0].setOpcode(Mips::LUi); + MCInsts[0].addOperand(RegOpnd); + MCInsts[0].addOperand(MCOperand::CreateExpr(MCSym)); + MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, Ctx); + MCInsts[1].setOpcode(Mips::ADDiu); + MCInsts[1].addOperand(RegOpnd); + MCInsts[1].addOperand(RegOpnd); + MCInsts[1].addOperand(MCOperand::CreateExpr(MCSym)); +} diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 1490c14..cbd5264 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -1,4 +1,4 @@ -//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------==// +//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -23,7 +23,7 @@ namespace llvm { class MachineFunction; class Mangler; class MipsAsmPrinter; - + /// MipsMCInstLower - This class is used to lower an MachineInstr into an // MCInst. class LLVM_LIBRARY_VISIBILITY MipsMCInstLower { @@ -33,12 +33,13 @@ class LLVM_LIBRARY_VISIBILITY MipsMCInstLower { MipsAsmPrinter &AsmPrinter; public: MipsMCInstLower(Mangler *mang, const MachineFunction &MF, - MipsAsmPrinter &asmprinter); + MipsAsmPrinter &asmprinter); void Lower(const MachineInstr *MI, MCInst &OutMI) const; void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); - void LowerCPRESTORE(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); + void LowerCPRESTORE(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); void LowerUnalignedLoadStore(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); + void LowerSETGP01(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); private: MCOperand LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, unsigned Offset) const; diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp new file mode 100644 index 0000000..b00c62b --- /dev/null +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -0,0 +1,50 @@ +//===-- MipsMachineFunctionInfo.cpp - Private data used for Mips ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MipsMachineFunction.h" +#include "MipsInstrInfo.h" +#include "MipsSubtarget.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +static cl::opt<bool> +FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true), + cl::desc("Always use $gp as the global base register.")); + +bool MipsFunctionInfo::globalBaseRegFixed() const { + return FixGlobalBaseReg; +} + +bool MipsFunctionInfo::globalBaseRegSet() const { + return GlobalBaseReg; +} + +unsigned MipsFunctionInfo::getGlobalBaseReg() { + // Return if it has already been initialized. + if (GlobalBaseReg) + return GlobalBaseReg; + + const MipsSubtarget &ST = MF.getTarget().getSubtarget<MipsSubtarget>(); + + if (FixGlobalBaseReg) // $gp is the global base register. + return GlobalBaseReg = ST.isABI_N64() ? Mips::GP_64 : Mips::GP; + + const TargetRegisterClass *RC; + RC = ST.isABI_N64() ? + Mips::CPU64RegsRegisterClass : Mips::CPURegsRegisterClass; + + return GlobalBaseReg = MF.getRegInfo().createVirtualRegister(RC); +} + +void MipsFunctionInfo::anchor() { } diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index bc30b6b..57ff069 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -16,7 +16,6 @@ #include <utility> #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/VectorExtras.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -25,8 +24,8 @@ namespace llvm { /// MipsFunctionInfo - This class is derived from MachineFunction private /// Mips target-specific information for each MachineFunction. class MipsFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); -private: MachineFunction& MF; /// SRetReturnReg - Some subtargets require that sret lowering includes /// returning the value of the returned struct in a register. This field @@ -45,10 +44,10 @@ private: // InArgFIRange: Range of indices of all frame objects created during call to // LowerFormalArguments. // OutArgFIRange: Range of indices of all frame objects created during call to - // LowerCall except for the frame object for restoring $gp. + // LowerCall except for the frame object for restoring $gp. std::pair<int, int> InArgFIRange, OutArgFIRange; - int GPFI; // Index of the frame object for restoring $gp - mutable int DynAllocFI; // Frame index of dynamically allocated stack area. + int GPFI; // Index of the frame object for restoring $gp + mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; public: @@ -64,7 +63,7 @@ public: } void setLastInArgFI(int FI) { InArgFIRange.second = FI; } - bool isOutArgFI(int FI) const { + bool isOutArgFI(int FI) const { return FI <= OutArgFIRange.first && FI >= OutArgFIRange.second; } void extendOutArgFIRange(int FirstFI, int LastFI) { @@ -92,8 +91,9 @@ public: unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } - unsigned getGlobalBaseReg() const { return GlobalBaseReg; } - void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + bool globalBaseRegFixed() const; + bool globalBaseRegSet() const; + unsigned getGlobalBaseReg(); int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index e5a0f08..e0ecba2 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- MipsRegisterInfo.cpp - MIPS Register Information -== -----*- C++ -*-===// +//===-- MipsRegisterInfo.cpp - MIPS Register Information -== --------------===// // // The LLVM Compiler Infrastructure // @@ -14,6 +14,7 @@ #define DEBUG_TYPE "mips-reg-info" #include "Mips.h" +#include "MipsAnalyzeImmediate.h" #include "MipsSubtarget.h" #include "MipsRegisterInfo.h" #include "MipsMachineFunction.h" @@ -45,98 +46,6 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, const TargetInstrInfo &tii) : MipsGenRegisterInfo(Mips::RA), Subtarget(ST), TII(tii) {} -/// getRegisterNumbering - Given the enum value for some register, e.g. -/// Mips::RA, return the number that it corresponds to (e.g. 31). -unsigned MipsRegisterInfo:: -getRegisterNumbering(unsigned RegEnum) -{ - switch (RegEnum) { - case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64: - case Mips::D0: - return 0; - case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64: - return 1; - case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64: - case Mips::D1: - return 2; - case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64: - return 3; - case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64: - case Mips::D2: - return 4; - case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64: - return 5; - case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64: - case Mips::D3: - return 6; - case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64: - return 7; - case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64: - case Mips::D4: - return 8; - case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64: - return 9; - case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64: - case Mips::D5: - return 10; - case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64: - return 11; - case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64: - case Mips::D6: - return 12; - case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64: - return 13; - case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64: - case Mips::D7: - return 14; - case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64: - return 15; - case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64: - case Mips::D8: - return 16; - case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64: - return 17; - case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64: - case Mips::D9: - return 18; - case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64: - return 19; - case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64: - case Mips::D10: - return 20; - case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64: - return 21; - case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64: - case Mips::D11: - return 22; - case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64: - return 23; - case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64: - case Mips::D12: - return 24; - case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64: - return 25; - case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64: - case Mips::D13: - return 26; - case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64: - return 27; - case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64: - case Mips::D14: - return 28; - case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64: - case Mips::HWR29: - return 29; - case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64: - case Mips::D15: - return 30; - case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64: - return 31; - default: llvm_unreachable("Unknown register number!"); - } - return 0; // Not reached -} - unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } //===----------------------------------------------------------------------===// @@ -144,60 +53,44 @@ unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } //===----------------------------------------------------------------------===// /// Mips Callee Saved Registers -const unsigned* MipsRegisterInfo:: +const uint16_t* MipsRegisterInfo:: getCalleeSavedRegs(const MachineFunction *MF) const { - // Mips callee-save register range is $16-$23, $f20-$f30 - static const unsigned SingleFloatOnlyCalleeSavedRegs[] = { - Mips::F31, Mips::F30, Mips::F29, Mips::F28, Mips::F27, Mips::F26, - Mips::F25, Mips::F24, Mips::F23, Mips::F22, Mips::F21, Mips::F20, - Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4, - Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0 - }; - - static const unsigned Mips32CalleeSavedRegs[] = { - Mips::D15, Mips::D14, Mips::D13, Mips::D12, Mips::D11, Mips::D10, - Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4, - Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0 - }; - - static const unsigned N32CalleeSavedRegs[] = { - Mips::D31_64, Mips::D29_64, Mips::D27_64, Mips::D25_64, Mips::D23_64, - Mips::D21_64, - Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64, - Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64, - Mips::S0_64, 0 - }; - - static const unsigned N64CalleeSavedRegs[] = { - Mips::D31_64, Mips::D30_64, Mips::D29_64, Mips::D28_64, Mips::D27_64, - Mips::D26_64, Mips::D25_64, Mips::D24_64, - Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64, - Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64, - Mips::S0_64, 0 - }; - if (Subtarget.isSingleFloat()) - return SingleFloatOnlyCalleeSavedRegs; + return CSR_SingleFloatOnly_SaveList; else if (!Subtarget.hasMips64()) - return Mips32CalleeSavedRegs; + return CSR_O32_SaveList; else if (Subtarget.isABI_N32()) - return N32CalleeSavedRegs; + return CSR_N32_SaveList; assert(Subtarget.isABI_N64()); - return N64CalleeSavedRegs; + return CSR_N64_SaveList; +} + +const uint32_t* +MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const +{ + if (Subtarget.isSingleFloat()) + return CSR_SingleFloatOnly_RegMask; + else if (!Subtarget.hasMips64()) + return CSR_O32_RegMask; + else if (Subtarget.isABI_N32()) + return CSR_N32_RegMask; + + assert(Subtarget.isABI_N64()); + return CSR_N64_RegMask; } BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { static const unsigned ReservedCPURegs[] = { - Mips::ZERO, Mips::AT, Mips::K0, Mips::K1, - Mips::GP, Mips::SP, Mips::FP, Mips::RA + Mips::ZERO, Mips::AT, Mips::K0, Mips::K1, + Mips::SP, Mips::FP, Mips::RA }; static const unsigned ReservedCPU64Regs[] = { - Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64, - Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64 + Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64, + Mips::SP_64, Mips::FP_64, Mips::RA_64 }; BitVector Reserved(getNumRegs()); @@ -225,7 +118,13 @@ getReservedRegs(const MachineFunction &MF) const { Reg != Mips::FGR64RegisterClass->end(); ++Reg) Reserved.set(*Reg); } - + + // If GP is dedicated as a global base register, reserve it. + if (MF.getInfo<MipsFunctionInfo>()->globalBaseRegFixed()) { + Reserved.set(Mips::GP); + Reserved.set(Mips::GP_64); + } + return Reserved; } @@ -260,8 +159,8 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, errs() << "<--------->\n" << MI); int FrameIndex = MI.getOperand(i).getIndex(); - int stackSize = MF.getFrameInfo()->getStackSize(); - int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + uint64_t stackSize = MF.getFrameInfo()->getStackSize(); + int64_t spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex); DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" << "spOffset : " << spOffset << "\n" @@ -280,7 +179,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, // 1. Outgoing arguments. // 2. Pointer to dynamically allocated stack space. // 3. Locations for callee-saved registers. - // Everything else is referenced relative to whatever register + // Everything else is referenced relative to whatever register // getFrameRegister() returns. unsigned FrameReg; @@ -288,43 +187,64 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; else - FrameReg = getFrameRegister(MF); - + FrameReg = getFrameRegister(MF); + // Calculate final offset. // - There is no need to change the offset if the frame object is one of the // following: an outgoing argument, pointer to a dynamically allocated // stack space or a $gp restore location, // - If the frame object is any of the following, its offset must be adjusted // by adding the size of the stack: - // incoming argument, callee-saved register location or local variable. - int Offset; + // incoming argument, callee-saved register location or local variable. + int64_t Offset; if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex)) Offset = spOffset; else - Offset = spOffset + stackSize; + Offset = spOffset + (int64_t)stackSize; Offset += MI.getOperand(i+1).getImm(); DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); // If MI is not a debug value, make sure Offset fits in the 16-bit immediate - // field. - if (!MI.isDebugValue() && (Offset >= 0x8000 || Offset < -0x8000)) { + // field. + if (!MI.isDebugValue() && !isInt<16>(Offset)) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = II->getDebugLoc(); - int ImmHi = (((unsigned)Offset & 0xffff0000) >> 16) + - ((Offset & 0x8000) != 0); + MipsAnalyzeImmediate AnalyzeImm; + unsigned Size = Subtarget.isABI_N64() ? 64 : 32; + unsigned LUi = Subtarget.isABI_N64() ? Mips::LUi64 : Mips::LUi; + unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned ZEROReg = Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Offset, Size, true /* LastInstrIsADDiu */); + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); // FIXME: change this when mips goes MC". BuildMI(MBB, II, DL, TII.get(Mips::NOAT)); - BuildMI(MBB, II, DL, TII.get(Mips::LUi), Mips::AT).addImm(ImmHi); - BuildMI(MBB, II, DL, TII.get(Mips::ADDu), Mips::AT).addReg(FrameReg) - .addReg(Mips::AT); - FrameReg = Mips::AT; - Offset = (short)(Offset & 0xffff); + // The first instruction can be a LUi, which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == LUi) + BuildMI(MBB, II, DL, TII.get(LUi), ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + else + BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + // Build the remaining instructions in Seq except for the last one. + for (++Inst; Inst != Seq.end() - 1; ++Inst) + BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg); + + FrameReg = ATReg; + Offset = SignExtend64<16>(Inst->ImmOpnd); BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO)); } @@ -344,11 +264,9 @@ getFrameRegister(const MachineFunction &MF) const { unsigned MipsRegisterInfo:: getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); - return 0; } unsigned MipsRegisterInfo:: getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); - return 0; } diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 67e57dd..7037ca6 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -1,4 +1,4 @@ -//===- MipsRegisterInfo.h - Mips Register Information Impl ------*- C++ -*-===// +//===-- MipsRegisterInfo.h - Mips Register Information Impl -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -42,7 +42,8 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { void adjustMipsStackFrame(MachineFunction &MF) const; /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + const uint32_t *getCallPreservedMask(CallingConv::ID) const; BitVector getReservedRegs(const MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 76ee2e6..ce399a0 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -1,4 +1,4 @@ -//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===// +//===-- MipsRegisterInfo.td - Mips Register defs -----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -50,6 +50,7 @@ class AFPR<bits<5> num, string n, list<Register> subregs> : MipsRegWithSubRegs<n, subregs> { let Num = num; let SubRegIndices = [sub_fpeven, sub_fpodd]; + let CoveredBySubRegs = 1; } class AFPR64<bits<5> num, string n, list<Register> subregs> @@ -68,8 +69,6 @@ class HWR<bits<5> num, string n> : MipsReg<n> { //===----------------------------------------------------------------------===// let Namespace = "Mips" in { - // FIXME: Fix DwarfRegNum. - // General Purpose Registers def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>; def AT : MipsGPRReg< 1, "AT">, DwarfRegNum<[1]>; @@ -105,38 +104,38 @@ let Namespace = "Mips" in { def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>; // General Purpose 64-bit Registers - def ZERO_64 : Mips64GPRReg< 0, "ZERO", [ZERO]>; - def AT_64 : Mips64GPRReg< 1, "AT", [AT]>; - def V0_64 : Mips64GPRReg< 2, "2", [V0]>; - def V1_64 : Mips64GPRReg< 3, "3", [V1]>; - def A0_64 : Mips64GPRReg< 4, "4", [A0]>; - def A1_64 : Mips64GPRReg< 5, "5", [A1]>; - def A2_64 : Mips64GPRReg< 6, "6", [A2]>; - def A3_64 : Mips64GPRReg< 7, "7", [A3]>; - def T0_64 : Mips64GPRReg< 8, "8", [T0]>; - def T1_64 : Mips64GPRReg< 9, "9", [T1]>; - def T2_64 : Mips64GPRReg< 10, "10", [T2]>; - def T3_64 : Mips64GPRReg< 11, "11", [T3]>; - def T4_64 : Mips64GPRReg< 12, "12", [T4]>; - def T5_64 : Mips64GPRReg< 13, "13", [T5]>; - def T6_64 : Mips64GPRReg< 14, "14", [T6]>; - def T7_64 : Mips64GPRReg< 15, "15", [T7]>; - def S0_64 : Mips64GPRReg< 16, "16", [S0]>; - def S1_64 : Mips64GPRReg< 17, "17", [S1]>; - def S2_64 : Mips64GPRReg< 18, "18", [S2]>; - def S3_64 : Mips64GPRReg< 19, "19", [S3]>; - def S4_64 : Mips64GPRReg< 20, "20", [S4]>; - def S5_64 : Mips64GPRReg< 21, "21", [S5]>; - def S6_64 : Mips64GPRReg< 22, "22", [S6]>; - def S7_64 : Mips64GPRReg< 23, "23", [S7]>; - def T8_64 : Mips64GPRReg< 24, "24", [T8]>; - def T9_64 : Mips64GPRReg< 25, "25", [T9]>; - def K0_64 : Mips64GPRReg< 26, "26", [K0]>; - def K1_64 : Mips64GPRReg< 27, "27", [K1]>; - def GP_64 : Mips64GPRReg< 28, "GP", [GP]>; - def SP_64 : Mips64GPRReg< 29, "SP", [SP]>; - def FP_64 : Mips64GPRReg< 30, "FP", [FP]>; - def RA_64 : Mips64GPRReg< 31, "RA", [RA]>; + def ZERO_64 : Mips64GPRReg< 0, "ZERO", [ZERO]>, DwarfRegNum<[0]>; + def AT_64 : Mips64GPRReg< 1, "AT", [AT]>, DwarfRegNum<[1]>; + def V0_64 : Mips64GPRReg< 2, "2", [V0]>, DwarfRegNum<[2]>; + def V1_64 : Mips64GPRReg< 3, "3", [V1]>, DwarfRegNum<[3]>; + def A0_64 : Mips64GPRReg< 4, "4", [A0]>, DwarfRegNum<[4]>; + def A1_64 : Mips64GPRReg< 5, "5", [A1]>, DwarfRegNum<[5]>; + def A2_64 : Mips64GPRReg< 6, "6", [A2]>, DwarfRegNum<[6]>; + def A3_64 : Mips64GPRReg< 7, "7", [A3]>, DwarfRegNum<[7]>; + def T0_64 : Mips64GPRReg< 8, "8", [T0]>, DwarfRegNum<[8]>; + def T1_64 : Mips64GPRReg< 9, "9", [T1]>, DwarfRegNum<[9]>; + def T2_64 : Mips64GPRReg< 10, "10", [T2]>, DwarfRegNum<[10]>; + def T3_64 : Mips64GPRReg< 11, "11", [T3]>, DwarfRegNum<[11]>; + def T4_64 : Mips64GPRReg< 12, "12", [T4]>, DwarfRegNum<[12]>; + def T5_64 : Mips64GPRReg< 13, "13", [T5]>, DwarfRegNum<[13]>; + def T6_64 : Mips64GPRReg< 14, "14", [T6]>, DwarfRegNum<[14]>; + def T7_64 : Mips64GPRReg< 15, "15", [T7]>, DwarfRegNum<[15]>; + def S0_64 : Mips64GPRReg< 16, "16", [S0]>, DwarfRegNum<[16]>; + def S1_64 : Mips64GPRReg< 17, "17", [S1]>, DwarfRegNum<[17]>; + def S2_64 : Mips64GPRReg< 18, "18", [S2]>, DwarfRegNum<[18]>; + def S3_64 : Mips64GPRReg< 19, "19", [S3]>, DwarfRegNum<[19]>; + def S4_64 : Mips64GPRReg< 20, "20", [S4]>, DwarfRegNum<[20]>; + def S5_64 : Mips64GPRReg< 21, "21", [S5]>, DwarfRegNum<[21]>; + def S6_64 : Mips64GPRReg< 22, "22", [S6]>, DwarfRegNum<[22]>; + def S7_64 : Mips64GPRReg< 23, "23", [S7]>, DwarfRegNum<[23]>; + def T8_64 : Mips64GPRReg< 24, "24", [T8]>, DwarfRegNum<[24]>; + def T9_64 : Mips64GPRReg< 25, "25", [T9]>, DwarfRegNum<[25]>; + def K0_64 : Mips64GPRReg< 26, "26", [K0]>, DwarfRegNum<[26]>; + def K1_64 : Mips64GPRReg< 27, "27", [K1]>, DwarfRegNum<[27]>; + def GP_64 : Mips64GPRReg< 28, "GP", [GP]>, DwarfRegNum<[28]>; + def SP_64 : Mips64GPRReg< 29, "SP", [SP]>, DwarfRegNum<[29]>; + def FP_64 : Mips64GPRReg< 30, "FP", [FP]>, DwarfRegNum<[30]>; + def RA_64 : Mips64GPRReg< 31, "RA", [RA]>, DwarfRegNum<[31]>; /// Mips Single point precision FPU Registers def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>; @@ -192,38 +191,38 @@ let Namespace = "Mips" in { def D15 : AFPR<30, "F30", [F30, F31]>; /// Mips Double point precision FPU Registers in MFP64 mode. - def D0_64 : AFPR64<0, "F0", [F0]>; - def D1_64 : AFPR64<1, "F1", [F1]>; - def D2_64 : AFPR64<2, "F2", [F2]>; - def D3_64 : AFPR64<3, "F3", [F3]>; - def D4_64 : AFPR64<4, "F4", [F4]>; - def D5_64 : AFPR64<5, "F5", [F5]>; - def D6_64 : AFPR64<6, "F6", [F6]>; - def D7_64 : AFPR64<7, "F7", [F7]>; - def D8_64 : AFPR64<8, "F8", [F8]>; - def D9_64 : AFPR64<9, "F9", [F9]>; - def D10_64 : AFPR64<10, "F10", [F10]>; - def D11_64 : AFPR64<11, "F11", [F11]>; - def D12_64 : AFPR64<12, "F12", [F12]>; - def D13_64 : AFPR64<13, "F13", [F13]>; - def D14_64 : AFPR64<14, "F14", [F14]>; - def D15_64 : AFPR64<15, "F15", [F15]>; - def D16_64 : AFPR64<16, "F16", [F16]>; - def D17_64 : AFPR64<17, "F17", [F17]>; - def D18_64 : AFPR64<18, "F18", [F18]>; - def D19_64 : AFPR64<19, "F19", [F19]>; - def D20_64 : AFPR64<20, "F20", [F20]>; - def D21_64 : AFPR64<21, "F21", [F21]>; - def D22_64 : AFPR64<22, "F22", [F22]>; - def D23_64 : AFPR64<23, "F23", [F23]>; - def D24_64 : AFPR64<24, "F24", [F24]>; - def D25_64 : AFPR64<25, "F25", [F25]>; - def D26_64 : AFPR64<26, "F26", [F26]>; - def D27_64 : AFPR64<27, "F27", [F27]>; - def D28_64 : AFPR64<28, "F28", [F28]>; - def D29_64 : AFPR64<29, "F29", [F29]>; - def D30_64 : AFPR64<30, "F30", [F30]>; - def D31_64 : AFPR64<31, "F31", [F31]>; + def D0_64 : AFPR64<0, "F0", [F0]>, DwarfRegNum<[32]>; + def D1_64 : AFPR64<1, "F1", [F1]>, DwarfRegNum<[33]>; + def D2_64 : AFPR64<2, "F2", [F2]>, DwarfRegNum<[34]>; + def D3_64 : AFPR64<3, "F3", [F3]>, DwarfRegNum<[35]>; + def D4_64 : AFPR64<4, "F4", [F4]>, DwarfRegNum<[36]>; + def D5_64 : AFPR64<5, "F5", [F5]>, DwarfRegNum<[37]>; + def D6_64 : AFPR64<6, "F6", [F6]>, DwarfRegNum<[38]>; + def D7_64 : AFPR64<7, "F7", [F7]>, DwarfRegNum<[39]>; + def D8_64 : AFPR64<8, "F8", [F8]>, DwarfRegNum<[40]>; + def D9_64 : AFPR64<9, "F9", [F9]>, DwarfRegNum<[41]>; + def D10_64 : AFPR64<10, "F10", [F10]>, DwarfRegNum<[42]>; + def D11_64 : AFPR64<11, "F11", [F11]>, DwarfRegNum<[43]>; + def D12_64 : AFPR64<12, "F12", [F12]>, DwarfRegNum<[44]>; + def D13_64 : AFPR64<13, "F13", [F13]>, DwarfRegNum<[45]>; + def D14_64 : AFPR64<14, "F14", [F14]>, DwarfRegNum<[46]>; + def D15_64 : AFPR64<15, "F15", [F15]>, DwarfRegNum<[47]>; + def D16_64 : AFPR64<16, "F16", [F16]>, DwarfRegNum<[48]>; + def D17_64 : AFPR64<17, "F17", [F17]>, DwarfRegNum<[49]>; + def D18_64 : AFPR64<18, "F18", [F18]>, DwarfRegNum<[50]>; + def D19_64 : AFPR64<19, "F19", [F19]>, DwarfRegNum<[51]>; + def D20_64 : AFPR64<20, "F20", [F20]>, DwarfRegNum<[52]>; + def D21_64 : AFPR64<21, "F21", [F21]>, DwarfRegNum<[53]>; + def D22_64 : AFPR64<22, "F22", [F22]>, DwarfRegNum<[54]>; + def D23_64 : AFPR64<23, "F23", [F23]>, DwarfRegNum<[55]>; + def D24_64 : AFPR64<24, "F24", [F24]>, DwarfRegNum<[56]>; + def D25_64 : AFPR64<25, "F25", [F25]>, DwarfRegNum<[57]>; + def D26_64 : AFPR64<26, "F26", [F26]>, DwarfRegNum<[58]>; + def D27_64 : AFPR64<27, "F27", [F27]>, DwarfRegNum<[59]>; + def D28_64 : AFPR64<28, "F28", [F28]>, DwarfRegNum<[60]>; + def D29_64 : AFPR64<29, "F29", [F29]>, DwarfRegNum<[61]>; + def D30_64 : AFPR64<30, "F30", [F30]>, DwarfRegNum<[62]>; + def D31_64 : AFPR64<31, "F31", [F31]>, DwarfRegNum<[63]>; // Hi/Lo registers def HI : Register<"hi">, DwarfRegNum<[64]>; diff --git a/lib/Target/Mips/MipsRelocations.h b/lib/Target/Mips/MipsRelocations.h index 66d1bfd..0787ed3 100644 --- a/lib/Target/Mips/MipsRelocations.h +++ b/lib/Target/Mips/MipsRelocations.h @@ -1,16 +1,16 @@ -//===- MipsRelocations.h - Mips Code Relocations ---------------*- C++ -*-===// +//===-- MipsRelocations.h - Mips Code Relocations ---------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -//===---------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// // // This file defines the Mips target-specific relocation types // (for relocation-model=static). // -//===---------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// #ifndef MIPSRELOCATIONS_H_ #define MIPSRELOCATIONS_H_ @@ -20,10 +20,10 @@ namespace llvm { namespace Mips{ enum RelocationType { - // reloc_mips_branch - pc relative relocation for branches. The lower 18 + // reloc_mips_pc16 - pc relative relocation for branches. The lower 18 // bits of the difference between the branch target and the branch // instruction, shifted right by 2. - reloc_mips_branch = 1, + reloc_mips_pc16 = 1, // reloc_mips_hi - upper 16 bits of the address (modified by +1 if the // lower 16 bits of the address is negative). diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index 00be8ee..1add02f 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -1,4 +1,4 @@ -//===- MipsSchedule.td - Mips Scheduling Definitions -------*- tablegen -*-===// +//===-- MipsSchedule.td - Mips Scheduling Definitions ------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index dc299f2..d4a50ee 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -1,4 +1,4 @@ -//===- MipsSubtarget.cpp - Mips Subtarget Information -----------*- C++ -*-===// +//===-- MipsSubtarget.cpp - Mips Subtarget Information --------------------===// // // The LLVM Compiler Infrastructure // @@ -21,10 +21,12 @@ using namespace llvm; +void MipsSubtarget::anchor() { } + MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool little) : MipsGenSubtargetInfo(TT, CPU, FS), - MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), + MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false), HasSwap(false), HasBitCount(false) @@ -41,7 +43,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, // Set MipsABI if it hasn't been set yet. if (MipsABI == UnknownABI) - MipsABI = hasMips64() ? N64 : O32; + MipsABI = hasMips64() ? N64 : O32; // Check if Architecture and ABI are compatible. assert(((!hasMips64() && (isABI_O32() || isABI_EABI())) || diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index d9dddad..ba0bbac 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -1,4 +1,4 @@ -//=====-- MipsSubtarget.h - Define Subtarget for the Mips -----*- C++ -*--====// +//===-- MipsSubtarget.h - Define Subtarget for the Mips ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -25,6 +25,7 @@ namespace llvm { class StringRef; class MipsSubtarget : public MipsGenSubtargetInfo { + virtual void anchor(); public: // NOTE: O64 will not be supported. @@ -111,6 +112,8 @@ public: bool hasMips64() const { return MipsArchVersion >= Mips64; } bool hasMips64r2() const { return MipsArchVersion == Mips64r2; } + bool hasMips32r2Or64() const { return hasMips32r2() || hasMips64(); } + bool isLittle() const { return IsLittle; } bool isFP64bit() const { return IsFP64bit; } bool isGP64bit() const { return IsGP64bit; } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 02887fa..8806aaf 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -14,6 +14,7 @@ #include "Mips.h" #include "MipsTargetMachine.h" #include "llvm/PassManager.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -52,6 +53,8 @@ MipsTargetMachine(const Target &T, StringRef TT, TLInfo(*this), TSInfo(*this), JITInfo() { } +void MipsebTargetMachine::anchor() { } + MipsebTargetMachine:: MipsebTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -59,6 +62,8 @@ MipsebTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} +void MipselTargetMachine::anchor() { } + MipselTargetMachine:: MipselTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -66,6 +71,8 @@ MipselTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} +void Mips64ebTargetMachine::anchor() { } + Mips64ebTargetMachine:: Mips64ebTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -73,6 +80,8 @@ Mips64ebTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} +void Mips64elTargetMachine::anchor() { } + Mips64elTargetMachine:: Mips64elTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -80,37 +89,59 @@ Mips64elTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} +namespace { +/// Mips Code Generator Pass Configuration Options. +class MipsPassConfig : public TargetPassConfig { +public: + MipsPassConfig(MipsTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + MipsTargetMachine &getMipsTargetMachine() const { + return getTM<MipsTargetMachine>(); + } + + const MipsSubtarget &getMipsSubtarget() const { + return *getMipsTargetMachine().getSubtargetImpl(); + } + + virtual bool addInstSelector(); + virtual bool addPreRegAlloc(); + virtual bool addPreSched2(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) { + return new MipsPassConfig(this, PM); +} + // Install an instruction selector pass using // the ISelDag to gen Mips code. -bool MipsTargetMachine:: -addInstSelector(PassManagerBase &PM) +bool MipsPassConfig::addInstSelector() { - PM.add(createMipsISelDag(*this)); + PM.add(createMipsISelDag(getMipsTargetMachine())); return false; } // Implemented by targets that want to run passes immediately before // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. -bool MipsTargetMachine:: -addPreEmitPass(PassManagerBase &PM) +bool MipsPassConfig::addPreEmitPass() { - PM.add(createMipsDelaySlotFillerPass(*this)); + PM.add(createMipsDelaySlotFillerPass(getMipsTargetMachine())); return true; } -bool MipsTargetMachine:: -addPreRegAlloc(PassManagerBase &PM) { +bool MipsPassConfig::addPreRegAlloc() { // Do not restore $gp if target is Mips64. // In N32/64, $gp is a callee-saved register. - if (!Subtarget.hasMips64()) - PM.add(createMipsEmitGPRestorePass(*this)); + if (!getMipsSubtarget().hasMips64()) + PM.add(createMipsEmitGPRestorePass(getMipsTargetMachine())); return true; } -bool MipsTargetMachine:: -addPostRegAlloc(PassManagerBase &PM) { - PM.add(createMipsExpandPseudoPass(*this)); +bool MipsPassConfig::addPreSched2() { + PM.add(createMipsExpandPseudoPass(getMipsTargetMachine())); return true; } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 6842373..19ae142 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -1,4 +1,4 @@ -//===-- MipsTargetMachine.h - Define TargetMachine for Mips -00--*- C++ -*-===// +//===-- MipsTargetMachine.h - Define TargetMachine for Mips -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -68,10 +68,7 @@ namespace llvm { } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM); - virtual bool addPreEmitPass(PassManagerBase &PM); - virtual bool addPreRegAlloc(PassManagerBase &PM); - virtual bool addPostRegAlloc(PassManagerBase &); + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); @@ -80,6 +77,7 @@ namespace llvm { /// MipsebTargetMachine - Mips32 big endian target machine. /// class MipsebTargetMachine : public MipsTargetMachine { + virtual void anchor(); public: MipsebTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -90,6 +88,7 @@ public: /// MipselTargetMachine - Mips32 little endian target machine. /// class MipselTargetMachine : public MipsTargetMachine { + virtual void anchor(); public: MipselTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -100,6 +99,7 @@ public: /// Mips64ebTargetMachine - Mips64 big endian target machine. /// class Mips64ebTargetMachine : public MipsTargetMachine { + virtual void anchor(); public: Mips64ebTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -111,6 +111,7 @@ public: /// Mips64elTargetMachine - Mips64 little endian target machine. /// class Mips64elTargetMachine : public MipsTargetMachine { + virtual void anchor(); public: Mips64elTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index 05c46f5..04dc60a 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -1,4 +1,4 @@ -//===-- MipsTargetObjectFile.cpp - Mips object files ----------------------===// +//===-- MipsTargetObjectFile.cpp - Mips Object Files ----------------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt index a9f4330..a3be342 100644 --- a/lib/Target/PTX/CMakeLists.txt +++ b/lib/Target/PTX/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_target(PTXCodeGen PTXMCAsmStreamer.cpp PTXMCInstLower.cpp PTXMFInfoExtract.cpp + PTXMachineFunctionInfo.cpp PTXParamManager.cpp PTXRegAlloc.cpp PTXRegisterInfo.cpp diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp index 5fecb85..ec7e2a7 100644 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp +++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp @@ -27,8 +27,9 @@ using namespace llvm; #include "PTXGenAsmWriter.inc" PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) : - MCInstPrinter(MAI) { + MCInstPrinter(MAI, MRI) { // Initialize the set of available features. setAvailableFeatures(STI.getFeatureBits()); } @@ -215,7 +216,6 @@ void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo, llvm_unreachable("Unknown rounding mode!"); case PTXRoundingMode::RndDefault: llvm_unreachable("FP rounding-mode pass did not handle instruction!"); - break; case PTXRoundingMode::RndNone: // Do not print anything. break; diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h index 86dfd48..eef6101 100644 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h +++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h @@ -1,4 +1,4 @@ -//===-- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax ----------===// +//===- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -23,7 +23,8 @@ class MCOperand; class PTXInstPrinter : public MCInstPrinter { public: - PTXInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI); + PTXInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI); virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); virtual StringRef getOpcodeName(unsigned Opcode) const; diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp index efefead..cdfbc80 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp +++ b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp @@ -16,6 +16,8 @@ using namespace llvm; +void PTXMCAsmInfo::anchor() { } + PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) { Triple TheTriple(TT); if (TheTriple.getArch() == Triple::ptx64) diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h index 03f5d66..32ca069 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h +++ b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h @@ -1,4 +1,4 @@ -//=====-- PTXMCAsmInfo.h - PTX asm properties -----------------*- C++ -*--====// +//===-- PTXMCAsmInfo.h - PTX asm properties --------------------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -20,7 +20,9 @@ namespace llvm { class Target; class StringRef; - struct PTXMCAsmInfo : public MCAsmInfo { + class PTXMCAsmInfo : public MCAsmInfo { + virtual void anchor(); + public: explicit PTXMCAsmInfo(const Target &T, const StringRef &TT); }; } // namespace llvm diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp index 09f86b5..7671b11 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp +++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions -----------*- C++ -*-===// +//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions ---------------------===// // // The LLVM Compiler Infrastructure // @@ -62,9 +62,10 @@ static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, static MCInstPrinter *createPTXMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { assert(SyntaxVariant == 0 && "We only have one syntax variant"); - return new PTXInstPrinter(MAI, STI); + return new PTXInstPrinter(MAI, MRI, STI); } extern "C" void LLVMInitializePTXTargetMC() { diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td index 693bb9c..994a68e 100644 --- a/lib/Target/PTX/PTX.td +++ b/lib/Target/PTX/PTX.td @@ -1,4 +1,4 @@ -//===- PTX.td - Describe the PTX Target Machine ---------------*- tblgen -*-==// +//===-- PTX.td - Describe the PTX Target Machine -----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index 77ed71d..58ac5f2 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -79,7 +79,6 @@ static const char *getStateSpaceName(unsigned addressSpace) { case PTXStateSpace::Parameter: return "param"; case PTXStateSpace::Shared: return "shared"; } - return NULL; } static const char *getTypeName(Type* type) { @@ -358,11 +357,9 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { void PTXAsmPrinter::EmitFunctionEntryLabel() { // The function label could have already been emitted if two symbols end up // conflicting due to asm renaming. Detect this and emit an error. - if (!CurrentFnSym->isUndefined()) { + if (!CurrentFnSym->isUndefined()) report_fatal_error("'" + Twine(CurrentFnSym->getName()) + "' label emitted multiple times to assembly file"); - return; - } const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); const PTXParamManager &PM = MFI->getParamManager(); diff --git a/lib/Target/PTX/PTXAsmPrinter.h b/lib/Target/PTX/PTXAsmPrinter.h index d5ea4db..74c8d58 100644 --- a/lib/Target/PTX/PTXAsmPrinter.h +++ b/lib/Target/PTX/PTXAsmPrinter.h @@ -1,4 +1,4 @@ -//===-- PTXAsmPrinter.h - Print machine code to a PTX file ----------------===// +//===-- PTXAsmPrinter.h - Print machine code to a PTX file ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/PTXFrameLowering.cpp b/lib/Target/PTX/PTXFrameLowering.cpp index b621b9d..e6e268e 100644 --- a/lib/Target/PTX/PTXFrameLowering.cpp +++ b/lib/Target/PTX/PTXFrameLowering.cpp @@ -1,4 +1,4 @@ -//=======- PTXFrameLowering.cpp - PTX Frame Information -------*- C++ -*-=====// +//===-- PTXFrameLowering.cpp - PTX Frame Information ----------------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/PTXFrameLowering.h b/lib/Target/PTX/PTXFrameLowering.h index 9320676..831e818 100644 --- a/lib/Target/PTX/PTXFrameLowering.h +++ b/lib/Target/PTX/PTXFrameLowering.h @@ -1,4 +1,4 @@ -//===--- PTXFrameLowering.h - Define frame lowering for PTX --*- C++ -*----===// +//===-- PTXFrameLowering.h - Define frame lowering for PTX -----*- C++ -*--===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index a012297..e5d4edc 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -211,7 +211,6 @@ SDValue PTXTargetLowering:: switch (CallConv) { default: llvm_unreachable("Unsupported calling convention"); - break; case CallingConv::PTX_Kernel: MFI->setKernel(true); break; @@ -241,32 +240,25 @@ SDValue PTXTargetLowering:: } else { for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - EVT RegVT = Ins[i].VT; - TargetRegisterClass* TRC = getRegClassFor(RegVT); - unsigned RegType; + EVT RegVT = Ins[i].VT; + const TargetRegisterClass* TRC = getRegClassFor(RegVT); + unsigned RegType; // Determine which register class we need - if (RegVT == MVT::i1) { + if (RegVT == MVT::i1) RegType = PTXRegisterType::Pred; - } - else if (RegVT == MVT::i16) { + else if (RegVT == MVT::i16) RegType = PTXRegisterType::B16; - } - else if (RegVT == MVT::i32) { + else if (RegVT == MVT::i32) RegType = PTXRegisterType::B32; - } - else if (RegVT == MVT::i64) { + else if (RegVT == MVT::i64) RegType = PTXRegisterType::B64; - } - else if (RegVT == MVT::f32) { + else if (RegVT == MVT::f32) RegType = PTXRegisterType::F32; - } - else if (RegVT == MVT::f64) { + else if (RegVT == MVT::f64) RegType = PTXRegisterType::F64; - } - else { + else llvm_unreachable("Unknown parameter type"); - } // Use a unique index in the instruction to prevent instruction folding. // Yes, this is a hack. @@ -327,7 +319,7 @@ SDValue PTXTargetLowering:: } else { for (unsigned i = 0, e = Outs.size(); i != e; ++i) { EVT RegVT = Outs[i].VT; - TargetRegisterClass* TRC = 0; + const TargetRegisterClass* TRC; unsigned RegType; // Determine which register class we need @@ -381,7 +373,7 @@ SDValue PTXTargetLowering:: SDValue PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h index 4d25665..fd20982 100644 --- a/lib/Target/PTX/PTXISelLowering.h +++ b/lib/Target/PTX/PTXISelLowering.h @@ -1,4 +1,4 @@ -//==-- PTXISelLowering.h - PTX DAG Lowering Interface ------------*- C++ -*-==// +//===-- PTXISelLowering.h - PTX DAG Lowering Interface ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -64,9 +64,8 @@ class PTXTargetLowering : public TargetLowering { SelectionDAG &DAG) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/PTX/PTXInstrFormats.td b/lib/Target/PTX/PTXInstrFormats.td index 397fdc3..267e834 100644 --- a/lib/Target/PTX/PTXInstrFormats.td +++ b/lib/Target/PTX/PTXInstrFormats.td @@ -1,4 +1,4 @@ -//===- PTXInstrFormats.td - PTX Instruction Formats ----------*- tblgen -*-===// +//===-- PTXInstrFormats.td - PTX Instruction Formats -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp index 871b3a7..9d6cbf1 100644 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- PTXInstrInfo.cpp - PTX Instruction Information ---------------------===// +//===-- PTXInstrInfo.cpp - PTX Instruction Information --------------------===// // // The LLVM Compiler Infrastructure // @@ -300,7 +300,7 @@ void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(false && "storeRegToStackSlot should not be called for PTX"); + llvm_unreachable("storeRegToStackSlot should not be called for PTX"); } void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, @@ -308,7 +308,7 @@ void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert(false && "loadRegFromStackSlot should not be called for PTX"); + llvm_unreachable("loadRegFromStackSlot should not be called for PTX"); } // static helper routines diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h index 871f1ac..fba89c0 100644 --- a/lib/Target/PTX/PTXInstrInfo.h +++ b/lib/Target/PTX/PTXInstrInfo.h @@ -1,4 +1,4 @@ -//===- PTXInstrInfo.h - PTX Instruction Information -------------*- C++ -*-===// +//===-- PTXInstrInfo.h - PTX Instruction Information ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 19a862f..818d444 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -1,4 +1,4 @@ -//===- PTXInstrInfo.td - PTX Instruction defs -----------------*- tblgen-*-===// +//===-- PTXInstrInfo.td - PTX Instruction defs --------------*- tablegen-*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/PTXInstrLoadStore.td b/lib/Target/PTX/PTXInstrLoadStore.td index 9b4f56c..7a62684 100644 --- a/lib/Target/PTX/PTXInstrLoadStore.td +++ b/lib/Target/PTX/PTXInstrLoadStore.td @@ -1,4 +1,4 @@ -//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tblgen-*-===// +//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tablegen-*-=// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td index 9de1cb6..3416f1c 100644 --- a/lib/Target/PTX/PTXIntrinsicInstrInfo.td +++ b/lib/Target/PTX/PTXIntrinsicInstrInfo.td @@ -1,4 +1,4 @@ -//===- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics ---*- tablegen -*-===// +//===-- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics --*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp index bc7aaa3..3ed67a6 100644 --- a/lib/Target/PTX/PTXMCAsmStreamer.cpp +++ b/lib/Target/PTX/PTXMCAsmStreamer.cpp @@ -1,4 +1,4 @@ -//===- lib/Target/PTX/PTXMCAsmStreamer.cpp - PTX Text Assembly Output -----===// +//===-- PTXMCAsmStreamer.cpp - PTX Text Assembly Output -------------------===// // // The LLVM Compiler Infrastructure // @@ -162,7 +162,7 @@ public: virtual void EmitCodeAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit = 0); - virtual void EmitValueToOffset(const MCExpr *Offset, + virtual bool EmitValueToOffset(const MCExpr *Offset, unsigned char Value = 0); virtual void EmitFileDirective(StringRef Filename); @@ -176,7 +176,7 @@ public: /// indicated by the hasRawTextSupport() predicate. virtual void EmitRawText(StringRef String); - virtual void Finish(); + virtual void FinishImpl(); /// @} @@ -478,8 +478,8 @@ void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit) {} -void PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, - unsigned char Value) {} +bool PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value) {return false;} void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) { @@ -540,7 +540,7 @@ void PTXMCAsmStreamer::EmitRawText(StringRef String) { EmitEOL(); } -void PTXMCAsmStreamer::Finish() {} +void PTXMCAsmStreamer::FinishImpl() {} namespace llvm { MCStreamer *createPTXAsmStreamer(MCContext &Context, diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp index 26ec623..172a0e0 100644 --- a/lib/Target/PTX/PTXMFInfoExtract.cpp +++ b/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -71,6 +71,8 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { RegType = PTXRegisterType::F32; else if (TRC == PTX::RegF64RegisterClass) RegType = PTXRegisterType::F64; + else + llvm_unreachable("Unkown register class."); MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg); } diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.cpp b/lib/Target/PTX/PTXMachineFunctionInfo.cpp new file mode 100644 index 0000000..60acfc7 --- /dev/null +++ b/lib/Target/PTX/PTXMachineFunctionInfo.cpp @@ -0,0 +1,14 @@ +//===-- PTXMachineFuctionInfo.cpp - PTX machine function info -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PTXMachineFunctionInfo.h" + +using namespace llvm; + +void PTXMachineFunctionInfo::anchor() { } diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h index 1a2878c..bb7574c 100644 --- a/lib/Target/PTX/PTXMachineFunctionInfo.h +++ b/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -1,4 +1,4 @@ -//===- PTXMachineFuctionInfo.h - PTX machine function info -------*- C++ -*-==// +//===-- PTXMachineFuctionInfo.h - PTX machine function info ------*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -30,7 +30,7 @@ namespace llvm { /// contains private PTX target-specific information for each MachineFunction. /// class PTXMachineFunctionInfo : public MachineFunctionInfo { -private: + virtual void anchor(); bool IsKernel; DenseSet<unsigned> RegArgs; DenseSet<unsigned> RegRets; diff --git a/lib/Target/PTX/PTXParamManager.cpp b/lib/Target/PTX/PTXParamManager.cpp index 7753787..74538e6 100644 --- a/lib/Target/PTX/PTXParamManager.cpp +++ b/lib/Target/PTX/PTXParamManager.cpp @@ -1,4 +1,4 @@ -//===- PTXParamManager.cpp - Manager for .param variables -------*- C++ -*-===// +//===-- PTXParamManager.cpp - Manager for .param variables ----------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/PTXParamManager.h b/lib/Target/PTX/PTXParamManager.h index 9fd2de5..32342f7 100644 --- a/lib/Target/PTX/PTXParamManager.h +++ b/lib/Target/PTX/PTXParamManager.h @@ -1,4 +1,4 @@ -//===- PTXParamManager.h - Manager for .param variables ----------*- C++ -*-==// +//===-- PTXParamManager.h - Manager for .param variables --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/PTXRegAlloc.cpp b/lib/Target/PTX/PTXRegAlloc.cpp index 2d2d5c3..7fd5375 100644 --- a/lib/Target/PTX/PTXRegAlloc.cpp +++ b/lib/Target/PTX/PTXRegAlloc.cpp @@ -24,10 +24,7 @@ namespace { class PTXRegAlloc : public MachineFunctionPass { public: static char ID; - PTXRegAlloc() : MachineFunctionPass(ID) { - initializePHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); - } + PTXRegAlloc() : MachineFunctionPass(ID) {} virtual const char* getPassName() const { return "PTX Register Allocator"; @@ -35,8 +32,6 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequiredID(PHIEliminationID); - AU.addRequiredID(TwoAddressInstructionPassID); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp index c806266..3f087cd 100644 --- a/lib/Target/PTX/PTXRegisterInfo.cpp +++ b/lib/Target/PTX/PTXRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- PTXRegisterInfo.cpp - PTX Register Information ---------------------===// +//===-- PTXRegisterInfo.cpp - PTX Register Information --------------------===// // // The LLVM Compiler Infrastructure // @@ -31,44 +31,8 @@ PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM, : PTXGenRegisterInfo(0), TII(tii) { } -void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, - RegScavenger *RS) const { - unsigned Index; - MachineInstr &MI = *II; - //MachineBasicBlock &MBB = *MI.getParent(); - //DebugLoc dl = MI.getDebugLoc(); - //MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); - - //unsigned Reg = MRI.createVirtualRegister(PTX::RegF32RegisterClass); - +void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator /*II*/, + int /*SPAdj*/, + RegScavenger * /*RS*/) const { llvm_unreachable("FrameIndex should have been previously eliminated!"); - - Index = 0; - while (!MI.getOperand(Index).isFI()) { - ++Index; - assert(Index < MI.getNumOperands() && - "Instr does not have a FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(Index).getIndex(); - - DEBUG(dbgs() << "eliminateFrameIndex: " << MI); - DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n"); - DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n"); - - //MachineInstr* MI2 = BuildMI(MBB, II, dl, TII.get(PTX::LOAD_LOCAL_F32)) - //.addReg(Reg, RegState::Define).addImm(FrameIndex); - //if (MI2->findFirstPredOperandIdx() == -1) { - // MI2->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false)); - // MI2->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL)); - //} - //MI2->dump(); - - //MachineOperand ESOp = MachineOperand::CreateES("__local__"); - - // This frame index is post stack slot re-use assignments - //MI.getOperand(Index).ChangeToRegister(Reg, false); - MI.getOperand(Index).ChangeToImmediate(FrameIndex); - //MI.getOperand(Index) = ESOp; } diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h index 55fafe4..5614ce7 100644 --- a/lib/Target/PTX/PTXRegisterInfo.h +++ b/lib/Target/PTX/PTXRegisterInfo.h @@ -1,4 +1,4 @@ -//===- PTXRegisterInfo.h - PTX Register Information Impl --------*- C++ -*-===// +//===-- PTXRegisterInfo.h - PTX Register Information Impl -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -32,9 +32,9 @@ public: PTXRegisterInfo(PTXTargetMachine &TM, const TargetInstrInfo &tii); - virtual const unsigned + virtual const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const { - static const unsigned CalleeSavedRegs[] = { 0 }; + static const uint16_t CalleeSavedRegs[] = { 0 }; return CalleeSavedRegs; // save nothing } @@ -49,7 +49,6 @@ public: virtual unsigned getFrameRegister(const MachineFunction &MF) const { llvm_unreachable("PTX does not have a frame register"); - return 0; } }; // struct PTXRegisterInfo } // namespace llvm diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td index 6ed6d3f..e8b262e 100644 --- a/lib/Target/PTX/PTXRegisterInfo.td +++ b/lib/Target/PTX/PTXRegisterInfo.td @@ -1,5 +1,4 @@ - -//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// +//===-- PTXRegisterInfo.td - PTX Register defs -------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp index 1eb57d2..454f64e 100644 --- a/lib/Target/PTX/PTXSubtarget.cpp +++ b/lib/Target/PTX/PTXSubtarget.cpp @@ -1,4 +1,4 @@ -//===- PTXSubtarget.cpp - PTX Subtarget Information ---------------*- C++ -*-=// +//===-- PTXSubtarget.cpp - PTX Subtarget Information ----------------------===// // // The LLVM Compiler Infrastructure // @@ -22,6 +22,8 @@ using namespace llvm; +void PTXSubtarget::anchor() { } + PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) : PTXGenSubtargetInfo(TT, CPU, FS), @@ -57,10 +59,10 @@ std::string PTXSubtarget::getTargetString() const { std::string PTXSubtarget::getPTXVersionString() const { switch(PTXVersion) { - default: llvm_unreachable("Unknown PTX version"); case PTX_VERSION_2_0: return "2.0"; case PTX_VERSION_2_1: return "2.1"; case PTX_VERSION_2_2: return "2.2"; case PTX_VERSION_2_3: return "2.3"; } + llvm_unreachable("Invalid PTX version"); } diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h index b946d7c..ce93fef 100644 --- a/lib/Target/PTX/PTXSubtarget.h +++ b/lib/Target/PTX/PTXSubtarget.h @@ -1,4 +1,4 @@ -//====-- PTXSubtarget.h - Define Subtarget for the PTX ---------*- C++ -*--===// +//===-- PTXSubtarget.h - Define Subtarget for the PTX -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -23,6 +23,7 @@ namespace llvm { class StringRef; class PTXSubtarget : public PTXGenSubtargetInfo { + virtual void anchor(); public: /** diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 4efdc27..9305377 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -85,6 +85,8 @@ PTXTargetMachine::PTXTargetMachine(const Target &T, TLInfo(*this) { } +void PTX32TargetMachine::anchor() { } + PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -93,6 +95,8 @@ PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT, : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { } +void PTX64TargetMachine::anchor() { } + PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -101,261 +105,63 @@ PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT, : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { } -bool PTXTargetMachine::addInstSelector(PassManagerBase &PM) { - PM.add(createPTXISelDag(*this, getOptLevel())); - return false; +namespace llvm { +/// PTX Code Generator Pass Configuration Options. +class PTXPassConfig : public TargetPassConfig { +public: + PTXPassConfig(PTXTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + PTXTargetMachine &getPTXTargetMachine() const { + return getTM<PTXTargetMachine>(); + } + + bool addInstSelector(); + FunctionPass *createTargetRegisterAllocator(bool); + void addOptimizedRegAlloc(FunctionPass *RegAllocPass); + bool addPostRegAlloc(); + void addMachineLateOptimization(); + bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *PTXTargetMachine::createPassConfig(PassManagerBase &PM) { + PTXPassConfig *PassConfig = new PTXPassConfig(this, PM); + PassConfig->disablePass(PrologEpilogCodeInserterID); + return PassConfig; } -bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM) { - // PTXMFInfoExtract must after register allocation! - //PM.add(createPTXMFInfoExtract(*this)); +bool PTXPassConfig::addInstSelector() { + PM.add(createPTXISelDag(getPTXTargetMachine(), getOptLevel())); return false; } -bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - bool DisableVerify) { - // This is mostly based on LLVMTargetMachine::addPassesToEmitFile - - // Add common CodeGen passes. - MCContext *Context = 0; - if (addCommonCodeGenPasses(PM, DisableVerify, Context)) - return true; - assert(Context != 0 && "Failed to get MCContext"); - - if (hasMCSaveTempLabels()) - Context->setAllowTemporaryLabels(false); - - const MCAsmInfo &MAI = *getMCAsmInfo(); - const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - OwningPtr<MCStreamer> AsmStreamer; - - switch (FileType) { - default: return true; - case CGFT_AssemblyFile: { - MCInstPrinter *InstPrinter = - getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI); - - // Create a code emitter if asked to show the encoding. - MCCodeEmitter *MCE = 0; - MCAsmBackend *MAB = 0; - - MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, - true, /* verbose asm */ - hasMCUseLoc(), - hasMCUseCFI(), - hasMCUseDwarfDirectory(), - InstPrinter, - MCE, MAB, - false /* show MC encoding */); - AsmStreamer.reset(S); - break; - } - case CGFT_ObjectFile: { - llvm_unreachable("Object file emission is not supported with PTX"); - } - case CGFT_Null: - // The Null output is intended for use for performance analysis and testing, - // not real users. - AsmStreamer.reset(createNullStreamer(*Context)); - break; - } - - // MC Logging - //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs())); - - // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. - FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); - if (Printer == 0) - return true; - - // If successful, createAsmPrinter took ownership of AsmStreamer. - AsmStreamer.take(); +FunctionPass *PTXPassConfig::createTargetRegisterAllocator(bool /*Optimized*/) { + return createPTXRegisterAllocator(); +} - PM.add(Printer); +// Modify the optimized compilation path to bypass optimized register alloction. +void PTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + addFastRegAlloc(RegAllocPass); +} - PM.add(createGCInfoDeleter()); +bool PTXPassConfig::addPostRegAlloc() { + // PTXMFInfoExtract must after register allocation! + //PM.add(createPTXMFInfoExtract(getPTXTargetMachine())); return false; } -bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, - bool DisableVerify, - MCContext *&OutContext) { - // Add standard LLVM codegen passes. - // This is derived from LLVMTargetMachine::addCommonCodeGenPasses, with some - // modifications for the PTX target. - - // Standard LLVM-Level Passes. - - // Basic AliasAnalysis support. - // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that - // BasicAliasAnalysis wins if they disagree. This is intended to help - // support "obvious" type-punning idioms. - PM.add(createTypeBasedAliasAnalysisPass()); - PM.add(createBasicAliasAnalysisPass()); - - // Before running any passes, run the verifier to determine if the input - // coming from the front-end and/or optimizer is valid. - if (!DisableVerify) - PM.add(createVerifierPass()); - - // Run loop strength reduction before anything else. - if (getOptLevel() != CodeGenOpt::None) { - PM.add(createLoopStrengthReducePass(getTargetLowering())); - //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); - } - - PM.add(createGCLoweringPass()); - - // Make sure that no unreachable blocks are instruction selected. - PM.add(createUnreachableBlockEliminationPass()); - - PM.add(createLowerInvokePass(getTargetLowering())); - // The lower invoke pass may create unreachable code. Remove it. - PM.add(createUnreachableBlockEliminationPass()); - - if (getOptLevel() != CodeGenOpt::None) - PM.add(createCodeGenPreparePass(getTargetLowering())); - - PM.add(createStackProtectorPass(getTargetLowering())); - - addPreISel(PM); - - //PM.add(createPrintFunctionPass("\n\n" - // "*** Final LLVM Code input to ISel ***\n", - // &dbgs())); - - // All passes which modify the LLVM IR are now complete; run the verifier - // to ensure that the IR is valid. - if (!DisableVerify) - PM.add(createVerifierPass()); - - // Standard Lower-Level Passes. +/// Add passes that optimize machine instructions after register allocation. +void PTXPassConfig::addMachineLateOptimization() { + if (addPass(BranchFolderPassID) != &NoPassID) + printNoVerify("After BranchFolding"); - // Install a MachineModuleInfo class, which is an immutable pass that holds - // all the per-module stuff we're generating, including MCContext. - MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), - *getRegisterInfo(), - &getTargetLowering()->getObjFileLowering()); - PM.add(MMI); - OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. - - // Set up a MachineFunction for the rest of CodeGen to work on. - PM.add(new MachineFunctionAnalysis(*this)); - - // Ask the target for an isel. - if (addInstSelector(PM)) - return true; - - // Print the instruction selected machine code... - printAndVerify(PM, "After Instruction Selection"); - - // Expand pseudo-instructions emitted by ISel. - PM.add(createExpandISelPseudosPass()); - - // Pre-ra tail duplication. - if (getOptLevel() != CodeGenOpt::None) { - PM.add(createTailDuplicatePass(true)); - printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); - } - - // Optimize PHIs before DCE: removing dead PHI cycles may make more - // instructions dead. - if (getOptLevel() != CodeGenOpt::None) - PM.add(createOptimizePHIsPass()); - - // If the target requests it, assign local variables to stack slots relative - // to one another and simplify frame index references where possible. - PM.add(createLocalStackSlotAllocationPass()); - - if (getOptLevel() != CodeGenOpt::None) { - // With optimization, dead code should already be eliminated. However - // there is one known exception: lowered code for arguments that are only - // used by tail calls, where the tail calls reuse the incoming stack - // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). - PM.add(createDeadMachineInstructionElimPass()); - printAndVerify(PM, "After codegen DCE pass"); - - PM.add(createMachineLICMPass()); - PM.add(createMachineCSEPass()); - PM.add(createMachineSinkingPass()); - printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); - - PM.add(createPeepholeOptimizerPass()); - printAndVerify(PM, "After codegen peephole optimization pass"); - } - - // Run pre-ra passes. - if (addPreRegAlloc(PM)) - printAndVerify(PM, "After PreRegAlloc passes"); - - // Perform register allocation. - PM.add(createPTXRegisterAllocator()); - printAndVerify(PM, "After Register Allocation"); - - // Perform stack slot coloring and post-ra machine LICM. - if (getOptLevel() != CodeGenOpt::None) { - // FIXME: Re-enable coloring with register when it's capable of adding - // kill markers. - PM.add(createStackSlotColoringPass(false)); - - // FIXME: Post-RA LICM has asserts that fire on virtual registers. - // Run post-ra machine LICM to hoist reloads / remats. - //if (!DisablePostRAMachineLICM) - // PM.add(createMachineLICMPass(false)); - - printAndVerify(PM, "After StackSlotColoring and postra Machine LICM"); - } - - // Run post-ra passes. - if (addPostRegAlloc(PM)) - printAndVerify(PM, "After PostRegAlloc passes"); - - PM.add(createExpandPostRAPseudosPass()); - printAndVerify(PM, "After ExpandPostRAPseudos"); - - // Insert prolog/epilog code. Eliminate abstract frame index references... - PM.add(createPrologEpilogCodeInserter()); - printAndVerify(PM, "After PrologEpilogCodeInserter"); - - // Run pre-sched2 passes. - if (addPreSched2(PM)) - printAndVerify(PM, "After PreSched2 passes"); - - // Second pass scheduler. - if (getOptLevel() != CodeGenOpt::None) { - PM.add(createPostRAScheduler(getOptLevel())); - printAndVerify(PM, "After PostRAScheduler"); - } - - // Branch folding must be run after regalloc and prolog/epilog insertion. - if (getOptLevel() != CodeGenOpt::None) { - PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); - printNoVerify(PM, "After BranchFolding"); - } - - // Tail duplication. - if (getOptLevel() != CodeGenOpt::None) { - PM.add(createTailDuplicatePass(false)); - printNoVerify(PM, "After TailDuplicate"); - } - - PM.add(createGCMachineCodeAnalysisPass()); - - //if (PrintGCInfo) - // PM.add(createGCInfoPrinter(dbgs())); - - if (getOptLevel() != CodeGenOpt::None) { - PM.add(createCodePlacementOptPass()); - printNoVerify(PM, "After CodePlacementOpt"); - } - - if (addPreEmitPass(PM)) - printNoVerify(PM, "After PreEmit passes"); - - PM.add(createPTXMFInfoExtract(*this, getOptLevel())); - PM.add(createPTXFPRoundingModePass(*this, getOptLevel())); + if (addPass(TailDuplicateID) != &NoPassID) + printNoVerify("After TailDuplicate"); +} - return false; +bool PTXPassConfig::addPreEmitPass() { + PM.add(createPTXMFInfoExtract(getPTXTargetMachine(), getOptLevel())); + PM.add(createPTXFPRoundingModePass(getPTXTargetMachine(), getOptLevel())); + return true; } diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h index 22911f7..278d155 100644 --- a/lib/Target/PTX/PTXTargetMachine.h +++ b/lib/Target/PTX/PTXTargetMachine.h @@ -59,15 +59,6 @@ class PTXTargetMachine : public LLVMTargetMachine { virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual bool addInstSelector(PassManagerBase &PM); - virtual bool addPostRegAlloc(PassManagerBase &PM); - - // We override this method to supply our own set of codegen passes. - virtual bool addPassesToEmitFile(PassManagerBase &, - formatted_raw_ostream &, - CodeGenFileType, - bool = true); - // Emission of machine code through JITCodeEmitter is not supported. virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, @@ -83,14 +74,13 @@ class PTXTargetMachine : public LLVMTargetMachine { return true; } - private: - - bool addCommonCodeGenPasses(PassManagerBase &, - bool DisableVerify, MCContext *&OutCtx); + // Pass Pipeline Configuration + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); }; // class PTXTargetMachine class PTX32TargetMachine : public PTXTargetMachine { + virtual void anchor(); public: PTX32TargetMachine(const Target &T, StringRef TT, @@ -100,6 +90,7 @@ public: }; // class PTX32TargetMachine class PTX64TargetMachine : public PTXTargetMachine { + virtual void anchor(); public: PTX64TargetMachine(const Target &T, StringRef TT, diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 1b85495..bcd8bd2 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_target(PowerPCCodeGen PPCFrameLowering.cpp PPCJITInfo.cpp PPCMCInstLower.cpp + PPCMachineFunctionInfo.cpp PPCRegisterInfo.cpp PPCSubtarget.cpp PPCTargetMachine.cpp diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index b6a0835..000d6d4 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -94,7 +94,6 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, unsigned Code = MI->getOperand(OpNo).getImm(); if (StringRef(Modifier) == "cc") { switch ((PPC::Predicate)Code) { - default: assert(0 && "Invalid predicate"); case PPC::PRED_ALWAYS: return; // Don't print anything for always. case PPC::PRED_LT: O << "lt"; return; case PPC::PRED_LE: O << "le"; return; @@ -175,7 +174,7 @@ void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo, unsigned CCReg = MI->getOperand(OpNo).getReg(); unsigned RegNo; switch (CCReg) { - default: assert(0 && "Unknown CR register"); + default: llvm_unreachable("Unknown CR register"); case PPC::CR0: RegNo = 0; break; case PPC::CR1: RegNo = 1; break; case PPC::CR2: RegNo = 2; break; diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 4ed4b76..21fc733 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -1,4 +1,4 @@ -//===-- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax ----------===// +//===- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -24,8 +24,9 @@ class PPCInstPrinter : public MCInstPrinter { // 0 -> AIX, 1 -> Darwin. unsigned SyntaxVariant; public: - PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant) - : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {} + PPCInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, + unsigned syntaxVariant) + : MCInstPrinter(MAI, MRI), SyntaxVariant(syntaxVariant) {} bool isDarwinSyntax() const { return SyntaxVariant == 1; diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt index febf438..b674883 100644 --- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_library(LLVMPowerPCDesc PPCMCAsmInfo.cpp PPCMCCodeEmitter.cpp PPCPredicates.cpp + PPCELFObjectWriter.cpp ) add_dependencies(LLVMPowerPCDesc PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 34a5774..02dad45 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -57,13 +57,6 @@ public: MCValue Target, uint64_t &FixedValue) {} }; -class PPCELFObjectWriter : public MCELFObjectTargetWriter { -public: - PPCELFObjectWriter(bool Is64Bit, Triple::OSType OSType, uint16_t EMachine, - bool HasRelocationAddend, bool isLittleEndian) - : MCELFObjectTargetWriter(Is64Bit, OSType, EMachine, HasRelocationAddend) {} -}; - class PPCAsmBackend : public MCAsmBackend { const Target &TheTarget; public: @@ -80,16 +73,16 @@ public: { "fixup_ppc_ha16", 16, 16, 0 }, { "fixup_ppc_lo14", 16, 14, 0 } }; - + if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); - + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); return Infos[Kind - FirstTargetFixupKind]; } - - bool MayNeedRelaxation(const MCInst &Inst) const { + + bool mayNeedRelaxation(const MCInst &Inst) const { // FIXME. return false; } @@ -99,24 +92,23 @@ public: const MCInstFragment *DF, const MCAsmLayout &Layout) const { // FIXME. - assert(0 && "RelaxInstruction() unimplemented"); - return false; + llvm_unreachable("relaxInstruction() unimplemented"); } - - void RelaxInstruction(const MCInst &Inst, MCInst &Res) const { + + void relaxInstruction(const MCInst &Inst, MCInst &Res) const { // FIXME. - assert(0 && "RelaxInstruction() unimplemented"); + llvm_unreachable("relaxInstruction() unimplemented"); } - - bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const { + + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { // FIXME: Zero fill for now. That's not right, but at least will get the // section size right. for (uint64_t i = 0; i != Count; ++i) OW->Write8(0); return true; - } - + } + unsigned getPointerSize() const { StringRef Name = TheTarget.getName(); if (Name == "ppc64") return 8; @@ -132,12 +124,12 @@ namespace { class DarwinPPCAsmBackend : public PPCAsmBackend { public: DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { } - - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { - assert(0 && "UNIMP"); + llvm_unreachable("UNIMP"); } - + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { bool is64 = getPointerSize() == 8; return createMachObjectWriter(new PPCMachObjectWriter( @@ -147,19 +139,19 @@ namespace { object::mach::CSPPC_ALL), OS, /*IsLittleEndian=*/false); } - + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { return false; } }; class ELFPPCAsmBackend : public PPCAsmBackend { - Triple::OSType OSType; + uint8_t OSABI; public: - ELFPPCAsmBackend(const Target &T, Triple::OSType OSType) : - PPCAsmBackend(T), OSType(OSType) { } - - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + ELFPPCAsmBackend(const Target &T, uint8_t OSABI) : + PPCAsmBackend(T), OSABI(OSABI) { } + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. @@ -172,17 +164,12 @@ namespace { for (unsigned i = 0; i != 4; ++i) Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff); } - + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { bool is64 = getPointerSize() == 8; - return createELFObjectWriter(new PPCELFObjectWriter( - /*Is64Bit=*/is64, - OSType, - is64 ? ELF::EM_PPC64 : ELF::EM_PPC, - /*addend*/ true, /*isLittleEndian*/ false), - OS, /*IsLittleEndian=*/false); + return createPPCELFObjectWriter(OS, is64, OSABI); } - + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { return false; } @@ -197,5 +184,6 @@ MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT) { if (Triple(TT).isOSDarwin()) return new DarwinPPCAsmBackend(T); - return new ELFPPCAsmBackend(T, Triple(TT).getOS()); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); + return new ELFPPCAsmBackend(T, OSABI); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h index 369bbdc..9c975c0 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h @@ -1,4 +1,4 @@ -//===-- PPCBaseInfo.h - Top level definitions for PPC -------- --*- C++ -*-===// +//===-- PPCBaseInfo.h - Top level definitions for PPC -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp new file mode 100644 index 0000000..a197981 --- /dev/null +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -0,0 +1,103 @@ +//===-- PPCELFObjectWriter.cpp - PPC ELF Writer ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/PPCFixupKinds.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +namespace { + class PPCELFObjectWriter : public MCELFObjectTargetWriter { + public: + PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI); + + virtual ~PPCELFObjectWriter(); + protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const; + virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset); + }; +} + +PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) + : MCELFObjectTargetWriter(Is64Bit, OSABI, + Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC, + /*HasRelocationAddend*/ true) {} + +PPCELFObjectWriter::~PPCELFObjectWriter() { +} + +unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + // determine the type of the relocation + unsigned Type; + if (IsPCRel) { + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("Unimplemented"); + case PPC::fixup_ppc_br24: + Type = ELF::R_PPC_REL24; + break; + case FK_PCRel_4: + Type = ELF::R_PPC_REL32; + break; + } + } else { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + case PPC::fixup_ppc_br24: + Type = ELF::R_PPC_ADDR24; + break; + case PPC::fixup_ppc_brcond14: + Type = ELF::R_PPC_ADDR14_BRTAKEN; // XXX: or BRNTAKEN?_ + break; + case PPC::fixup_ppc_ha16: + Type = ELF::R_PPC_ADDR16_HA; + break; + case PPC::fixup_ppc_lo16: + Type = ELF::R_PPC_ADDR16_LO; + break; + case PPC::fixup_ppc_lo14: + Type = ELF::R_PPC_ADDR14; + break; + case FK_Data_4: + Type = ELF::R_PPC_ADDR32; + break; + case FK_Data_2: + Type = ELF::R_PPC_ADDR16; + break; + } + } + return Type; +} + +void PPCELFObjectWriter:: +adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { + switch ((unsigned)Fixup.getKind()) { + case PPC::fixup_ppc_ha16: + case PPC::fixup_ppc_lo16: + RelocOffset += 2; + break; + default: + break; + } +} + +MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = new PPCELFObjectWriter(Is64Bit, OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false); +} diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index e9424d8..245b457 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -1,4 +1,4 @@ -//===-- PPCMCAsmInfo.cpp - PPC asm properties -------------------*- C++ -*-===// +//===-- PPCMCAsmInfo.cpp - PPC asm properties -----------------------------===// // // The LLVM Compiler Infrastructure // @@ -14,6 +14,8 @@ #include "PPCMCAsmInfo.h" using namespace llvm; +void PPCMCAsmInfoDarwin::anchor() { } + PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { if (is64Bit) PointerSize = 8; @@ -30,6 +32,8 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { SupportsDebugInformation= true; // Debug information. } +void PPCLinuxMCAsmInfo::anchor() { } + PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { if (is64Bit) PointerSize = 8; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index 96ae6fb..7b4ed9f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -1,4 +1,4 @@ -//=====-- PPCMCAsmInfo.h - PPC asm properties -----------------*- C++ -*--====// +//===-- PPCMCAsmInfo.h - PPC asm properties --------------------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -18,11 +18,15 @@ namespace llvm { - struct PPCMCAsmInfoDarwin : public MCAsmInfoDarwin { + class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin { + virtual void anchor(); + public: explicit PPCMCAsmInfoDarwin(bool is64Bit); }; - struct PPCLinuxMCAsmInfo : public MCAsmInfo { + class PPCLinuxMCAsmInfo : public MCAsmInfo { + virtual void anchor(); + public: explicit PPCLinuxMCAsmInfo(bool is64Bit); }; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 262f97c..5a6827f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -57,7 +57,7 @@ public: // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. - unsigned getBinaryCodeForInstr(const MCInst &MI, + uint64_t getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups) const; void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 7c47051..226fbfe 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -------*- C++ -*-===// +//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -----------------===// // // The LLVM Compiler Infrastructure // @@ -20,6 +20,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC @@ -107,8 +108,9 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createPPCMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { - return new PPCInstPrinter(MAI, SyntaxVariant); + return new PPCInstPrinter(MAI, MRI, SyntaxVariant); } extern "C" void LLVMInitializePowerPCTargetMC() { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index e5bf2a9..b7fa064 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -14,14 +14,18 @@ #ifndef PPCMCTARGETDESC_H #define PPCMCTARGETDESC_H +#include "llvm/Support/DataTypes.h" + namespace llvm { class MCAsmBackend; class MCCodeEmitter; class MCContext; class MCInstrInfo; +class MCObjectWriter; class MCSubtargetInfo; class Target; class StringRef; +class raw_ostream; extern Target ThePPC32Target; extern Target ThePPC64Target; @@ -31,7 +35,11 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx); MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT); - + +/// createPPCELFObjectWriter - Construct an PPC ELF object writer. +MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint8_t OSABI); } // End llvm namespace // Defines symbolic names for PowerPC registers. This defines a mapping from diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 367f9cc..724374c 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -1,10 +1,10 @@ -//===- PPC.td - Describe the PowerPC Target Machine --------*- tablegen -*-===// -// +//===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This is the top level entry point for the PowerPC target. diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 5dc2d3d..591ae02 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1,4 +1,4 @@ -//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=// +//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly ------===// // // The LLVM Compiler Infrastructure // @@ -39,6 +39,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -49,6 +50,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ELF.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallString.h" #include "InstPrinter/PPCInstPrinter.h" @@ -391,14 +393,26 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { return AsmPrinter::EmitFunctionEntryLabel(); // Emit an official procedure descriptor. - // FIXME 64-bit SVR4: Use MCSection here! - OutStreamer.EmitRawText(StringRef("\t.section\t\".opd\",\"aw\"")); - OutStreamer.EmitRawText(StringRef("\t.align 3")); + const MCSection *Current = OutStreamer.getCurrentSection(); + const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd", + ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getReadOnly()); + OutStreamer.SwitchSection(Section); OutStreamer.EmitLabel(CurrentFnSym); - OutStreamer.EmitRawText("\t.quad .L." + Twine(CurrentFnSym->getName()) + - ",.TOC.@tocbase"); - OutStreamer.EmitRawText(StringRef("\t.previous")); - OutStreamer.EmitRawText(".L." + Twine(CurrentFnSym->getName()) + ":"); + OutStreamer.EmitValueToAlignment(8); + MCSymbol *Symbol1 = + OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName())); + MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase")); + OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), + Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/); + OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext), + Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/); + OutStreamer.SwitchSection(Current); + + MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( + ".L." + Twine(CurrentFnSym->getName())); + OutStreamer.EmitLabel(RealFnSym); + CurrentFnSymForSize = RealFnSym; } @@ -408,8 +422,10 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { bool isPPC64 = TD->getPointerSizeInBits() == 64; if (isPPC64 && !TOC.empty()) { - // FIXME 64-bit SVR4: Use MCSection here? - OutStreamer.EmitRawText(StringRef("\t.section\t\".toc\",\"aw\"")); + const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc", + ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getReadOnly()); + OutStreamer.SwitchSection(Section); // FIXME: This is nondeterminstic! for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(), diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 475edf3..5f775e1 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -1,4 +1,4 @@ -//===-- PPCBranchSelector.cpp - Emit long conditional branches-----*- C++ -*-=// +//===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 441db94..8efc9c1 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -1,10 +1,10 @@ //===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This describes the calling conventions for the PowerPC 32- and 64-bit diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 9d2f4d0..252a2d1 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -1,4 +1,4 @@ -//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC32 -------*- C++ -*-=// +//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC -----------------===// // // The LLVM Compiler Infrastructure // @@ -50,7 +50,7 @@ namespace { /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for /// machine instructions. - unsigned getBinaryCodeForInstr(const MachineInstr &MI) const; + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; MachineRelocation GetRelocation(const MachineOperand &MO, diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 5c45018..6d612f7 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1,4 +1,4 @@ -//=====- PPCFrameLowering.cpp - PPC Frame Information -----------*- C++ -*-===// +//===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// // // The LLVM Compiler Infrastructure // @@ -367,8 +367,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::R0, RegState::Kill) .addImm(NegFrameSize); BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) - .addReg(PPC::R1) - .addReg(PPC::R1) + .addReg(PPC::R1, RegState::Kill) + .addReg(PPC::R1, RegState::Define) .addReg(PPC::R0); } else if (isInt<16>(NegFrameSize)) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1) @@ -382,8 +382,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::R0, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) - .addReg(PPC::R1) - .addReg(PPC::R1) + .addReg(PPC::R1, RegState::Kill) + .addReg(PPC::R1, RegState::Define) .addReg(PPC::R0); } } else { // PPC64. @@ -400,8 +400,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::X0) .addImm(NegFrameSize); BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) - .addReg(PPC::X1) - .addReg(PPC::X1) + .addReg(PPC::X1, RegState::Kill) + .addReg(PPC::X1, RegState::Define) .addReg(PPC::X0); } else if (isInt<16>(NegFrameSize)) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1) @@ -415,8 +415,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::X0, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) - .addReg(PPC::X1) - .addReg(PPC::X1) + .addReg(PPC::X1, RegState::Kill) + .addReg(PPC::X1, RegState::Define) .addReg(PPC::X0); } } diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index 20faa71..d708541 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -1,4 +1,4 @@ -//==-- PPCFrameLowering.h - Define frame lowering for PowerPC ----*- C++ -*-==// +//===-- PPCFrameLowering.h - Define frame lowering for PowerPC --*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 4a509a3..6651d14 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -602,7 +602,6 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { case ISD::SETULT: return 0; case ISD::SETUGT: return 1; } - return 0; } SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f3a3d17..bfed7ba 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -17,7 +17,6 @@ #include "PPCTargetMachine.h" #include "MCTargetDesc/PPCPredicates.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/VectorExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -1428,8 +1427,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, false, - /*isReturnValueUsed=*/true, + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__trampoline_setup", PtrVT), Args, DAG, dl); @@ -1699,7 +1699,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Arguments stored in registers. if (VA.isRegLoc()) { - TargetRegisterClass *RC; + const TargetRegisterClass *RC; EVT ValVT = VA.getValVT(); switch (ValVT.getSimpleVT().SimpleTy) { @@ -1915,12 +1915,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin( for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { EVT ObjectVT = Ins[ArgNo].VT; - unsigned ObjSize = ObjectVT.getSizeInBits()/8; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; if (Flags.isByVal()) { // ObjSize is the true size, ArgSize rounded up to multiple of regs. - ObjSize = Flags.getByValSize(); + unsigned ObjSize = Flags.getByValSize(); unsigned ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; VecArgOffset += ArgSize; @@ -2840,7 +2839,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, SDValue PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -4259,8 +4258,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // Check to see if this is a shuffle of 4-byte values. If so, we can use our // perfect shuffle table to emit an optimal matching sequence. - SmallVector<int, 16> PermMask; - SVOp->getMask(PermMask); + ArrayRef<int> PermMask = SVOp->getMask(); unsigned PFIndexes[4]; bool isFourElementShuffle = true; @@ -4572,7 +4570,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); } - return SDValue(); } void PPCTargetLowering::ReplaceNodeResults(SDNode *N, @@ -4582,8 +4579,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: - assert(false && "Do not know how to custom type legalize this operation!"); - return; + llvm_unreachable("Do not know how to custom type legalize this operation!"); case ISD::VAARG: { if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() || TM.getSubtarget<PPCSubtarget>().isPPC64()) diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 942f5ee..3534e9c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -437,8 +437,8 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, + LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -472,21 +472,21 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals) const; SDValue - LowerCall_Darwin(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue - LowerCall_SVR4(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool isTailCall, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; }; } diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index cdbc264..02bffed 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1,10 +1,10 @@ -//===- PPCInstr64Bit.td - The PowerPC 64-bit Support -------*- tablegen -*-===// -// +//===-- PPCInstr64Bit.td - The PowerPC 64-bit Support ------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the PowerPC 64-bit instructions. These patterns are used @@ -106,7 +106,7 @@ let isCall = 1, PPC970_Unit = 7, (outs), (ins aaddr:$func, variable_ops), "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; } - let Uses = [CTR8, RM] in { + let Uses = [X11, CTR8, RM] in { def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins variable_ops), "bctrl", BrB, @@ -575,7 +575,8 @@ def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "", [(set G8RC:$rD, (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; - + +let hasSideEffects = 1 in { let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg), "ld 2, 8($reg)", LdStLD, @@ -585,6 +586,7 @@ let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. def LDtoc_restore : DSForm_1<58, 0, (outs), (ins), "ld 2, 40(1)", LdStLD, [(PPCtoc_restore)]>, isPPC64; +} def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), "ldx $rD, $src", LdStLD, [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 256370f..707fa41 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1,10 +1,10 @@ -//===- PPCInstrAltivec.td - The PowerPC Altivec Extension --*- tablegen -*-===// -// +//===-- PPCInstrAltivec.td - The PowerPC Altivec Extension -*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the Altivec extension to the PowerPC instruction set. diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 84a15b1..d332e2a 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -1,10 +1,10 @@ //===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 6d16f1d..7a8ec40 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- PPCInstrInfo.cpp - PowerPC32 Instruction Information -----*- C++ -*-===// +//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index e90f8cb..e5f171d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -1,4 +1,4 @@ -//===- PPCInstrInfo.h - PowerPC Instruction Information ---------*- C++ -*-===// +//===-- PPCInstrInfo.h - PowerPC Instruction Information --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef POWERPC32_INSTRUCTIONINFO_H -#define POWERPC32_INSTRUCTIONINFO_H +#ifndef POWERPC_INSTRUCTIONINFO_H +#define POWERPC_INSTRUCTIONINFO_H #include "PPC.h" #include "llvm/Target/TargetInstrInfo.h" diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index d4c9d10..e234012 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1,10 +1,10 @@ -//===- PPCInstrInfo.td - The PowerPC Instruction Set -------*- tablegen -*-===// -// +//===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the subset of the 32-bit PowerPC instruction set, as used diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h index 47ead59..2f8243a 100644 --- a/lib/Target/PowerPC/PPCJITInfo.h +++ b/lib/Target/PowerPC/PPCJITInfo.h @@ -1,4 +1,4 @@ -//===- PPCJITInfo.h - PowerPC impl. of the JIT interface --------*- C++ -*-===// +//===-- PPCJITInfo.h - PowerPC impl. of the JIT interface -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 33af426..276edcb 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -140,7 +140,7 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, switch (MO.getType()) { default: MI->dump(); - assert(0 && "unknown operand type"); + llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: assert(!MO.getSubReg() && "Subregs should be eliminated!"); MCOp = MCOperand::CreateReg(MO.getReg()); @@ -166,6 +166,8 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP, isDarwin); break; + case MachineOperand::MO_RegisterMask: + continue; } OutMI.addOperand(MCOp); diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp new file mode 100644 index 0000000..6a0aec8 --- /dev/null +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp @@ -0,0 +1,15 @@ +//===-- PPCMachineFunctionInfo.cpp - Private data used for PowerPC --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PPCMachineFunctionInfo.h" + +using namespace llvm; + +void PPCFunctionInfo::anchor() { } + diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index e2649c8..24caffa 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -21,7 +21,8 @@ namespace llvm { /// PPCFunctionInfo - This class is derived from MachineFunction private /// PowerPC target-specific information for each MachineFunction. class PPCFunctionInfo : public MachineFunctionInfo { -private: + virtual void anchor(); + /// FramePointerSaveIndex - Frame index of where the old frame pointer is /// stored. Also used as an anchor for instructions that need to be altered /// when using frame pointers (dyna_add, dyna_sub.) diff --git a/lib/Target/PowerPC/PPCPerfectShuffle.h b/lib/Target/PowerPC/PPCPerfectShuffle.h index 3164e33..17b836d 100644 --- a/lib/Target/PowerPC/PPCPerfectShuffle.h +++ b/lib/Target/PowerPC/PPCPerfectShuffle.h @@ -1,4 +1,4 @@ -//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table ---------------===// +//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 27f7f4a..306cc1f 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- PPCRegisterInfo.cpp - PowerPC Register Information -------*- C++ -*-===// +//===-- PPCRegisterInfo.cpp - PowerPC Register Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -98,10 +98,10 @@ PPCRegisterInfo::getPointerRegClass(unsigned Kind) const { return &PPC::GPRCRegClass; } -const unsigned* +const uint16_t* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { // 32-bit Darwin calling convention. - static const unsigned Darwin32_CalleeSavedRegs[] = { + static const uint16_t Darwin32_CalleeSavedRegs[] = { PPC::R13, PPC::R14, PPC::R15, PPC::R16, PPC::R17, PPC::R18, PPC::R19, PPC::R20, PPC::R21, PPC::R22, PPC::R23, @@ -123,7 +123,7 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { }; // 32-bit SVR4 calling convention. - static const unsigned SVR4_CalleeSavedRegs[] = { + static const uint16_t SVR4_CalleeSavedRegs[] = { PPC::R14, PPC::R15, PPC::R16, PPC::R17, PPC::R18, PPC::R19, PPC::R20, PPC::R21, PPC::R22, PPC::R23, @@ -147,7 +147,7 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { 0 }; // 64-bit Darwin calling convention. - static const unsigned Darwin64_CalleeSavedRegs[] = { + static const uint16_t Darwin64_CalleeSavedRegs[] = { PPC::X14, PPC::X15, PPC::X16, PPC::X17, PPC::X18, PPC::X19, PPC::X20, PPC::X21, PPC::X22, PPC::X23, @@ -169,7 +169,7 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { }; // 64-bit SVR4 calling convention. - static const unsigned SVR4_64_CalleeSavedRegs[] = { + static const uint16_t SVR4_64_CalleeSavedRegs[] = { PPC::X14, PPC::X15, PPC::X16, PPC::X17, PPC::X18, PPC::X19, PPC::X20, PPC::X21, PPC::X22, PPC::X23, @@ -299,8 +299,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, DebugLoc dl = MI->getDebugLoc(); if (isInt<16>(CalleeAmt)) { - BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg). - addImm(CalleeAmt); + BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) + .addReg(StackReg, RegState::Kill) + .addImm(CalleeAmt); } else { MachineBasicBlock::iterator MBBI = I; BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) @@ -308,9 +309,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) .addReg(TmpReg, RegState::Kill) .addImm(CalleeAmt & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(ADDInstr)) - .addReg(StackReg) - .addReg(StackReg) + BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) + .addReg(StackReg, RegState::Kill) .addReg(TmpReg); } } @@ -407,12 +407,12 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) .addReg(Reg, RegState::Kill) - .addReg(PPC::X1) + .addReg(PPC::X1, RegState::Define) .addReg(MI.getOperand(1).getReg()); else BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) .addReg(PPC::X0, RegState::Kill) - .addReg(PPC::X1) + .addReg(PPC::X1, RegState::Define) .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) @@ -428,7 +428,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, } else { BuildMI(MBB, II, dl, TII.get(PPC::STWUX)) .addReg(Reg, RegState::Kill) - .addReg(PPC::R1) + .addReg(PPC::R1, RegState::Define) .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) @@ -528,7 +528,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, if (DestReg != PPC::CR0) { unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; // rlwinm r11, r11, 32-ShiftBits, 0, 31. - BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg) + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) .addReg(Reg).addImm(32-ShiftBits).addImm(0) .addImm(31); } @@ -681,7 +681,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); - MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false); + MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true); } unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index faf690f..6ce90bc 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -1,4 +1,4 @@ -//===- PPCRegisterInfo.h - PowerPC Register Information Impl -----*- C++ -*-==// +//===-- PPCRegisterInfo.h - PowerPC Register Information Impl ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -41,7 +41,7 @@ public: MachineFunction &MF) const; /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; BitVector getReservedRegs(const MachineFunction &MF) const; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 1acdf4e..0e55313 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -1,10 +1,10 @@ -//===- PPCRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===// -// +//===-- PPCRegisterInfo.td - The PowerPC Register File -----*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // diff --git a/lib/Target/PowerPC/PPCRelocations.h b/lib/Target/PowerPC/PPCRelocations.h index a33e7e0..0b392f9 100644 --- a/lib/Target/PowerPC/PPCRelocations.h +++ b/lib/Target/PowerPC/PPCRelocations.h @@ -1,4 +1,4 @@ -//===- PPCRelocations.h - PPC32 Code Relocations ----------------*- C++ -*-===// +//===-- PPCRelocations.h - PPC Code Relocations -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef PPC32RELOCATIONS_H -#define PPC32RELOCATIONS_H +#ifndef PPCRELOCATIONS_H +#define PPCRELOCATIONS_H #include "llvm/CodeGen/MachineRelocation.h" diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 69e435b..4e37d0a 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -1,10 +1,10 @@ -//===- PPCSchedule.td - PowerPC Scheduling Definitions -----*- tablegen -*-===// -// +//===-- PPCSchedule.td - PowerPC Scheduling Definitions ----*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td index 94ee9bd..76f7465 100644 --- a/lib/Target/PowerPC/PPCSchedule440.td +++ b/lib/Target/PowerPC/PPCSchedule440.td @@ -1,10 +1,10 @@ -//===- PPCSchedule440.td - PPC 440 Scheduling Definitions --*- tablegen -*-===// -// +//===-- PPCSchedule440.td - PPC 440 Scheduling Definitions -*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // Primary reference: diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index ad4da1f..e7e5498 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -1,10 +1,10 @@ -//===- PPCScheduleG3.td - PPC G3 Scheduling Definitions ----*- tablegen -*-===// -// +//===-- PPCScheduleG3.td - PPC G3 Scheduling Definitions ---*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the G3 (750) processor. diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index 03c3b29..87a3151 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -1,10 +1,10 @@ -//===- PPCScheduleG4.td - PPC G4 Scheduling Definitions ----*- tablegen -*-===// -// +//===-- PPCScheduleG4.td - PPC G4 Scheduling Definitions ---*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the G4 (7400) processor. diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index 00cac3c..f76557a 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -1,10 +1,10 @@ -//===- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. -----*- tablegen -*-===// -// +//===-- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. ----*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the G4+ (7450) processor. diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index 1671f22..bc0820b 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -1,10 +1,10 @@ -//===- PPCScheduleG5.td - PPC G5 Scheduling Definitions ----*- tablegen -*-===// -// +//===-- PPCScheduleG5.td - PPC G5 Scheduling Definitions ---*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the G5 (970) processor. diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index baa0eb5..c89fab3 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -1,4 +1,4 @@ -//===- PowerPCSubtarget.cpp - PPC Subtarget Information -------------------===// +//===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 62b2424..69fe50b 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -1,4 +1,4 @@ -//=====-- PPCSubtarget.h - Define Subtarget for the PPC -------*- C++ -*--====// +//===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 8e71c46..da20274 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -15,6 +15,7 @@ #include "PPCTargetMachine.h" #include "llvm/PassManager.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" @@ -22,7 +23,7 @@ using namespace llvm; extern "C" void LLVMInitializePowerPCTarget() { // Register the targets - RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target); + RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target); RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target); } @@ -40,11 +41,9 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, InstrItins(Subtarget.getInstrItineraryData()) { } -/// Override this for PowerPC. Tail merging happily breaks up instruction issue -/// groups, which typically degrades performance. -bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; } +void PPC32TargetMachine::anchor() { } -PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, +PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, @@ -52,8 +51,9 @@ PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { } +void PPC64TargetMachine::anchor() { } -PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, +PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, @@ -66,13 +66,39 @@ PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, // Pass Pipeline Configuration //===----------------------------------------------------------------------===// -bool PPCTargetMachine::addInstSelector(PassManagerBase &PM) { +namespace { +/// PPC Code Generator Pass Configuration Options. +class PPCPassConfig : public TargetPassConfig { +public: + PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + PPCTargetMachine &getPPCTargetMachine() const { + return getTM<PPCTargetMachine>(); + } + + virtual bool addInstSelector(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { + TargetPassConfig *PassConfig = new PPCPassConfig(this, PM); + + // Override this for PowerPC. Tail merging happily breaks up instruction issue + // groups, which typically degrades performance. + PassConfig->setEnableTailMerge(false); + + return PassConfig; +} + +bool PPCPassConfig::addInstSelector() { // Install an instruction selector. - PM.add(createPPCISelDag(*this)); + PM.add(createPPCISelDag(getPPCTargetMachine())); return false; } -bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM) { +bool PPCPassConfig::addPreEmitPass() { // Must run branch selection immediately preceding the asm printer. PM.add(createPPCBranchSelectionPass()); return false; @@ -84,12 +110,12 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, if (Subtarget.isPPC64()) // Temporary workaround for the inability of PPC64 JIT to handle jump // tables. - Options.DisableJumpTables = true; - + Options.DisableJumpTables = true; + // Inform the subtarget that we are in JIT mode. FIXME: does this break macho // writing? Subtarget.SetJITMode(); - + // Machine code emitter pass for PowerPC. PM.add(createPPCJITCodeEmitterPass(*this, JCE)); diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 0427876..6dd11c9 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -1,4 +1,4 @@ -//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC -----*- C++ -*-=// +//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -50,7 +50,7 @@ public: return &FrameLowering; } virtual PPCJITInfo *getJITInfo() { return &JITInfo; } - virtual const PPCTargetLowering *getTargetLowering() const { + virtual const PPCTargetLowering *getTargetLowering() const { return &TLInfo; } virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const { @@ -59,24 +59,23 @@ public: virtual const PPCRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - + virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const InstrItineraryData *getInstrItineraryData() const { + virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM); - virtual bool addPreEmitPass(PassManagerBase &PM); + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); - virtual bool getEnableTailMergeDefault() const; }; /// PPC32TargetMachine - PowerPC 32-bit target machine. /// class PPC32TargetMachine : public PPCTargetMachine { + virtual void anchor(); public: PPC32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -87,6 +86,7 @@ public: /// PPC64TargetMachine - PowerPC 64-bit target machine. /// class PPC64TargetMachine : public PPCTargetMachine { + virtual void anchor(); public: PPC64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index 56ee7c2..ae4af0f 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(SparcCodeGen SparcISelDAGToDAG.cpp SparcISelLowering.cpp SparcFrameLowering.cpp + SparcMachineFunctionInfo.cpp SparcRegisterInfo.cpp SparcSubtarget.cpp SparcTargetMachine.cpp diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index 9295408..883aa3a 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -282,7 +282,7 @@ bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) if (RegSet.count(Reg)) return true; // check Aliased Registers - for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg); + for (const uint16_t *Alias = TM.getRegisterInfo()->getAliasSet(Reg); *Alias; ++ Alias) if (RegSet.count(*Alias)) return true; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 6a7e090..f5e10fc 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -16,6 +16,8 @@ using namespace llvm; +void SparcELFMCAsmInfo::anchor() { } + SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) { IsLittleEndian = false; Triple TheTriple(TT); diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h index 0cb6827..616e1c5 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h @@ -1,4 +1,4 @@ -//=====-- SparcMCAsmInfo.h - Sparc asm properties -------------*- C++ -*--====// +//===-- SparcMCAsmInfo.h - Sparc asm properties ----------------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -20,7 +20,9 @@ namespace llvm { class Target; - struct SparcELFMCAsmInfo : public MCAsmInfo { + class SparcELFMCAsmInfo : public MCAsmInfo { + virtual void anchor(); + public: explicit SparcELFMCAsmInfo(const Target &T, StringRef TT); }; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index eda04c3..7fdb0c3 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions --------*- C++ -*-===// +//===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions -----------------===// // // The LLVM Compiler Infrastructure // @@ -17,6 +17,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h index 7b2c614..ce6ae17 100644 --- a/lib/Target/Sparc/Sparc.h +++ b/lib/Target/Sparc/Sparc.h @@ -18,7 +18,6 @@ #include "MCTargetDesc/SparcMCTargetDesc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" -#include <cassert> namespace llvm { class FunctionPass; @@ -74,7 +73,6 @@ namespace llvm { inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) { switch (CC) { - default: llvm_unreachable("Unknown condition code"); case SPCC::ICC_NE: return "ne"; case SPCC::ICC_E: return "e"; case SPCC::ICC_G: return "g"; @@ -103,7 +101,8 @@ namespace llvm { case SPCC::FCC_LE: return "le"; case SPCC::FCC_ULE: return "ule"; case SPCC::FCC_O: return "o"; - } + } + llvm_unreachable("Invalid cond code"); } } // end namespace llvm #endif diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td index 7643366..611f8e8 100644 --- a/lib/Target/Sparc/Sparc.td +++ b/lib/Target/Sparc/Sparc.td @@ -1,10 +1,10 @@ -//===- Sparc.td - Describe the Sparc Target Machine --------*- tablegen -*-===// -// +//===-- Sparc.td - Describe the Sparc Target Machine -------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index 7548bbf..c14b3d4 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -62,6 +62,8 @@ namespace { virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; + + virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const; }; } // end of anonymous namespace @@ -140,7 +142,7 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum, std::string operand = ""; const MachineOperand &MO = MI->getOperand(opNum); switch (MO.getType()) { - default: assert(0 && "Operand is not a register "); + default: llvm_unreachable("Operand is not a register"); case MachineOperand::MO_Register: assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && "Operand is not a physical register "); @@ -241,7 +243,14 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { return I == Pred->end() || !I->isBarrier(); } - +MachineLocation SparcAsmPrinter:: +getDebugValueLocation(const MachineInstr *MI) const { + assert(MI->getNumOperands() == 4 && "Invalid number of operands!"); + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() && + "Unexpected MachineOperand types"); + return MachineLocation(MI->getOperand(0).getReg(), + MI->getOperand(1).getImm()); +} // Force static initialization. extern "C" void LLVMInitializeSparcAsmPrinter() { diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td index 856f87a..d471220 100644 --- a/lib/Target/Sparc/SparcCallingConv.td +++ b/lib/Target/Sparc/SparcCallingConv.td @@ -1,10 +1,10 @@ -//===- SparcCallingConv.td - Calling Conventions Sparc -----*- tablegen -*-===// -// +//===-- SparcCallingConv.td - Calling Conventions Sparc ----*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This describes the calling conventions for the Sparc architectures. diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index 320c8ca..1c5c89e 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -1,4 +1,4 @@ -//====- SparcFrameLowering.cpp - Sparc Frame Information -------*- C++ -*-====// +//===-- SparcFrameLowering.cpp - Sparc Frame Information ------------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h index 9a2ddc8..210705e 100644 --- a/lib/Target/Sparc/SparcFrameLowering.h +++ b/lib/Target/Sparc/SparcFrameLowering.h @@ -1,4 +1,4 @@ -//===- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*--===// +//===-- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index 8c6103d..93710c4 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -176,7 +176,6 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) { MulLHS, MulRHS); // The high part is in the Y register. return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1)); - return NULL; } } diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 3608d3b..a6b63fb 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -25,7 +25,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/ADT/VectorExtras.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -348,7 +347,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, SDValue SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 8a1886a..4a7c479 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -77,9 +77,8 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td index 6535259..dce3312 100644 --- a/lib/Target/Sparc/SparcInstrFormats.td +++ b/lib/Target/Sparc/SparcInstrFormats.td @@ -1,10 +1,10 @@ -//===- SparcInstrFormats.td - Sparc Instruction Formats ----*- tablegen -*-===// -// +//===-- SparcInstrFormats.td - Sparc Instruction Formats ---*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction { diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 5290d42..faff468 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- SparcInstrInfo.cpp - Sparc Instruction Information -------*- C++ -*-===// +//===-- SparcInstrInfo.cpp - Sparc Instruction Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -79,7 +79,6 @@ static bool IsIntegerCC(unsigned CC) static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC) { switch(CC) { - default: llvm_unreachable("Unknown condition code"); case SPCC::ICC_NE: return SPCC::ICC_E; case SPCC::ICC_E: return SPCC::ICC_NE; case SPCC::ICC_G: return SPCC::ICC_LE; @@ -110,6 +109,18 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC) case SPCC::FCC_NE: return SPCC::FCC_E; case SPCC::FCC_E: return SPCC::FCC_NE; } + llvm_unreachable("Invalid cond code"); +} + +MachineInstr * +SparcInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc dl) const { + MachineInstrBuilder MIB = BuildMI(MF, dl, get(SP::DBG_VALUE)) + .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); + return &*MIB; } diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index eda64ef..4932531 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -1,4 +1,4 @@ -//===- SparcInstrInfo.h - Sparc Instruction Information ---------*- C++ -*-===// +//===-- SparcInstrInfo.h - Sparc Instruction Information --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -62,6 +62,13 @@ public: virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; + /// emitFrameIndexDebugValue - Emit a target-dependent form of + /// DBG_VALUE encoding the address of a frame index. + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc dl) const; virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index cf5c48f..15541ef 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -1,10 +1,10 @@ -//===- SparcInstrInfo.td - Target Description for Sparc Target ------------===// -// +//===-- SparcInstrInfo.td - Target Description for Sparc Target -----------===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the Sparc instructions in TableGen format. diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.cpp b/lib/Target/Sparc/SparcMachineFunctionInfo.cpp new file mode 100644 index 0000000..e744282 --- /dev/null +++ b/lib/Target/Sparc/SparcMachineFunctionInfo.cpp @@ -0,0 +1,14 @@ +//===-- SparcMachineFunctionInfo.cpp - Sparc Machine Function Info --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SparcMachineFunctionInfo.h" + +using namespace llvm; + +void SparcMachineFunctionInfo::anchor() { } diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h index 0b74308..90c27a4 100644 --- a/lib/Target/Sparc/SparcMachineFunctionInfo.h +++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h @@ -18,6 +18,7 @@ namespace llvm { class SparcMachineFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); private: unsigned GlobalBaseReg; diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 8c16251..c392fcc 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- SparcRegisterInfo.cpp - SPARC Register Information -------*- C++ -*-===// +//===-- SparcRegisterInfo.cpp - SPARC Register Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -33,9 +33,9 @@ SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st, : SparcGenRegisterInfo(SP::I7), Subtarget(st), TII(tii) { } -const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) +const uint16_t* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - static const unsigned CalleeSavedRegs[] = { 0 }; + static const uint16_t CalleeSavedRegs[] = { 0 }; return CalleeSavedRegs; } @@ -118,10 +118,8 @@ unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const { unsigned SparcRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); - return 0; } unsigned SparcRegisterInfo::getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); - return 0; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index f845667..9515ad3 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -1,4 +1,4 @@ -//===- SparcRegisterInfo.h - Sparc Register Information Impl ----*- C++ -*-===// +//===-- SparcRegisterInfo.h - Sparc Register Information Impl ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -32,7 +32,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii); /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; BitVector getReservedRegs(const MachineFunction &MF) const; diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td index cf92829..81bff6c 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.td +++ b/lib/Target/Sparc/SparcRegisterInfo.td @@ -1,10 +1,10 @@ -//===- SparcRegisterInfo.td - Sparc Register defs ----------*- tablegen -*-===// -// +//===-- SparcRegisterInfo.td - Sparc Register defs ---------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -39,6 +39,7 @@ class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> { let Num = num; let SubRegs = subregs; let SubRegIndices = [sub_even, sub_odd]; + let CoveredBySubRegs = 1; } // Control Registers diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp index 6c501cf..e5b2aeb 100644 --- a/lib/Target/Sparc/SparcSubtarget.cpp +++ b/lib/Target/Sparc/SparcSubtarget.cpp @@ -1,4 +1,4 @@ -//===- SparcSubtarget.cpp - SPARC Subtarget Information -------------------===// +//===-- SparcSubtarget.cpp - SPARC Subtarget Information ------------------===// // // The LLVM Compiler Infrastructure // @@ -21,6 +21,8 @@ using namespace llvm; +void SparcSubtarget::anchor() { } + SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) : SparcGenSubtargetInfo(TT, CPU, FS), diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index 00a04c3..a81931b 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -1,4 +1,4 @@ -//=====-- SparcSubtarget.h - Define Subtarget for the SPARC ----*- C++ -*-====// +//===-- SparcSubtarget.h - Define Subtarget for the SPARC -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -24,6 +24,7 @@ namespace llvm { class StringRef; class SparcSubtarget : public SparcGenSubtargetInfo { + virtual void anchor(); bool IsV9; bool V8DeprecatedInsts; bool IsVIS; diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 8e16fd7..80a3be6 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -13,6 +13,7 @@ #include "Sparc.h" #include "SparcTargetMachine.h" #include "llvm/PassManager.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -24,7 +25,7 @@ extern "C" void LLVMInitializeSparcTarget() { /// SparcTargetMachine ctor - Create an ILP32 architecture model /// -SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, +SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, @@ -37,20 +38,42 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget) { } -bool SparcTargetMachine::addInstSelector(PassManagerBase &PM) { - PM.add(createSparcISelDag(*this)); +namespace { +/// Sparc Code Generator Pass Configuration Options. +class SparcPassConfig : public TargetPassConfig { +public: + SparcPassConfig(SparcTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + SparcTargetMachine &getSparcTargetMachine() const { + return getTM<SparcTargetMachine>(); + } + + virtual bool addInstSelector(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) { + return new SparcPassConfig(this, PM); +} + +bool SparcPassConfig::addInstSelector() { + PM.add(createSparcISelDag(getSparcTargetMachine())); return false; } /// addPreEmitPass - This pass may be implemented by targets that want to run /// passes immediately before machine code is emitted. This should return /// true if -print-machineinstrs should print out the code after the passes. -bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM){ - PM.add(createSparcFPMoverPass(*this)); - PM.add(createSparcDelaySlotFillerPass(*this)); +bool SparcPassConfig::addPreEmitPass(){ + PM.add(createSparcFPMoverPass(getSparcTargetMachine())); + PM.add(createSparcDelaySlotFillerPass(getSparcTargetMachine())); return true; } +void SparcV8TargetMachine::anchor() { } + SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -61,7 +84,9 @@ SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { } -SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, +void SparcV9TargetMachine::anchor() { } + +SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index cedc1e3..b203dfa 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -55,13 +55,13 @@ public: virtual const TargetData *getTargetData() const { return &DataLayout; } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM); - virtual bool addPreEmitPass(PassManagerBase &PM); + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); }; /// SparcV8TargetMachine - Sparc 32-bit target machine /// class SparcV8TargetMachine : public SparcTargetMachine { + virtual void anchor(); public: SparcV8TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -73,6 +73,7 @@ public: /// SparcV9TargetMachine - Sparc 64-bit target machine /// class SparcV9TargetMachine : public SparcTargetMachine { + virtual void anchor(); public: SparcV9TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index ff60e0b..3acb4dd 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -147,8 +147,10 @@ void TargetData::init() { setAlignment(INTEGER_ALIGN, 2, 2, 16); // i16 setAlignment(INTEGER_ALIGN, 4, 4, 32); // i32 setAlignment(INTEGER_ALIGN, 4, 8, 64); // i64 + setAlignment(FLOAT_ALIGN, 2, 2, 16); // half setAlignment(FLOAT_ALIGN, 4, 4, 32); // float setAlignment(FLOAT_ALIGN, 8, 8, 64); // double + setAlignment(FLOAT_ALIGN, 16, 16, 128); // ppcf128, quad, ... setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32, v1i64, ... setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ... setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct @@ -477,6 +479,8 @@ uint64_t TargetData::getTypeSizeInBits(Type *Ty) const { return cast<IntegerType>(Ty)->getBitWidth(); case Type::VoidTyID: return 8; + case Type::HalfTyID: + return 16; case Type::FloatTyID: return 32; case Type::DoubleTyID: @@ -493,9 +497,7 @@ uint64_t TargetData::getTypeSizeInBits(Type *Ty) const { return cast<VectorType>(Ty)->getBitWidth(); default: llvm_unreachable("TargetData::getTypeSizeInBits(): Unsupported type"); - break; } - return 0; } /*! @@ -534,6 +536,7 @@ unsigned TargetData::getAlignment(Type *Ty, bool abi_or_pref) const { case Type::VoidTyID: AlignType = INTEGER_ALIGN; break; + case Type::HalfTyID: case Type::FloatTyID: case Type::DoubleTyID: // PPC_FP128TyID and FP128TyID have different data contents, but the @@ -549,7 +552,6 @@ unsigned TargetData::getAlignment(Type *Ty, bool abi_or_pref) const { break; default: llvm_unreachable("Bad type for getAlignment!!!"); - break; } return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty), diff --git a/lib/Target/TargetJITInfo.cpp b/lib/Target/TargetJITInfo.cpp new file mode 100644 index 0000000..aafedf8 --- /dev/null +++ b/lib/Target/TargetJITInfo.cpp @@ -0,0 +1,14 @@ +//===- Target/TargetJITInfo.h - Target Information for JIT ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetJITInfo.h" + +using namespace llvm; + +void TargetJITInfo::anchor() { } diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index 768facb..269958f 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -20,6 +20,8 @@ INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo", "Target Library Information", false, true) char TargetLibraryInfo::ID = 0; +void TargetLibraryInfo::anchor() { } + const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = { "acos", @@ -111,7 +113,11 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "tanhf", "trunc", "truncf", - "truncl" + "truncl", + "__cxa_atexit", + "__cxa_guard_abort", + "__cxa_guard_acquire", + "__cxa_guard_release" }; /// initialize - Initialize the set of available library functions based on the diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index fc8b67b..1589604 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -73,31 +73,27 @@ static bool isSuitableForBSS(const GlobalVariable *GV, bool NoZerosInBSS) { /// IsNullTerminatedString - Return true if the specified constant (which is /// known to have a type that is an array of 1/2/4 byte elements) ends with a -/// nul value and contains no other nuls in it. +/// nul value and contains no other nuls in it. Note that this is more general +/// than ConstantDataSequential::isString because we allow 2 & 4 byte strings. static bool IsNullTerminatedString(const Constant *C) { - ArrayType *ATy = cast<ArrayType>(C->getType()); - - // First check: is we have constant array of i8 terminated with zero - if (const ConstantArray *CVA = dyn_cast<ConstantArray>(C)) { - if (ATy->getNumElements() == 0) return false; - - ConstantInt *Null = - dyn_cast<ConstantInt>(CVA->getOperand(ATy->getNumElements()-1)); - if (Null == 0 || !Null->isZero()) + // First check: is we have constant array terminated with zero + if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(C)) { + unsigned NumElts = CDS->getNumElements(); + assert(NumElts != 0 && "Can't have an empty CDS"); + + if (CDS->getElementAsInteger(NumElts-1) != 0) return false; // Not null terminated. - + // Verify that the null doesn't occur anywhere else in the string. - for (unsigned i = 0, e = ATy->getNumElements()-1; i != e; ++i) - // Reject constantexpr elements etc. - if (!isa<ConstantInt>(CVA->getOperand(i)) || - CVA->getOperand(i) == Null) + for (unsigned i = 0; i != NumElts-1; ++i) + if (CDS->getElementAsInteger(i) == 0) return false; return true; } // Another possibility: [1 x i8] zeroinitializer if (isa<ConstantAggregateZero>(C)) - return ATy->getNumElements() == 1; + return cast<ArrayType>(C->getType())->getNumElements() == 1; return false; } @@ -160,7 +156,6 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, // relocation, then we may have to drop this into a wriable data section // even though it is marked const. switch (C->getRelocationInfo()) { - default: assert(0 && "unknown relocation info kind"); case Constant::NoRelocation: // If the global is required to have a unique address, it can't be put // into a mergable section: just drop it into the general read-only @@ -234,7 +229,6 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, return SectionKind::getDataNoRel(); switch (C->getRelocationInfo()) { - default: assert(0 && "unknown relocation info kind"); case Constant::NoRelocation: return SectionKind::getDataNoRel(); case Constant::LocalRelocation: @@ -242,6 +236,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, case Constant::GlobalRelocations: return SectionKind::getDataRel(); } + llvm_unreachable("Invalid relocation"); } /// SectionForGlobal - This method computes the appropriate section to emit diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index fb7bbbb..b4969ca 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -22,7 +22,6 @@ using namespace llvm; // namespace llvm { - bool StrongPHIElim; bool HasDivModLibcall; bool AsmVerbosityDefault(false); } @@ -35,7 +34,7 @@ static cl::opt<bool> FunctionSections("ffunction-sections", cl::desc("Emit functions into separate sections"), cl::init(false)); - + //--------------------------------------------------------------------------- // TargetMachine Class // diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index 2689837..1716423 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -71,7 +71,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { /// registers for the specific register class. static void getAllocatableSetForRC(const MachineFunction &MF, const TargetRegisterClass *RC, BitVector &R){ - ArrayRef<unsigned> Order = RC->getRawAllocationOrder(MF); + ArrayRef<uint16_t> Order = RC->getRawAllocationOrder(MF); for (unsigned i = 0; i != Order.size(); ++i) R.set(Order[i]); } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index f4639a3..d91830f 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -31,10 +31,9 @@ using namespace llvm; namespace { struct X86Operand; -class X86ATTAsmParser : public MCTargetAsmParser { +class X86AsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; - private: MCAsmParser &getParser() const { return Parser; } @@ -45,12 +44,24 @@ private: return Parser.Error(L, Msg, Ranges); } + X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) { + Error(Loc, Msg); + return 0; + } + X86Operand *ParseOperand(); + X86Operand *ParseATTOperand(); + X86Operand *ParseIntelOperand(); + X86Operand *ParseIntelMemOperand(); + X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); + bool processInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Ops); + bool MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); @@ -81,7 +92,7 @@ private: /// } public: - X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) + X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) : MCTargetAsmParser(), STI(sti), Parser(parser) { // Initialize the set of available features. @@ -93,6 +104,10 @@ public: SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); + + bool isParsingIntelSyntax() { + return getParser().getAssemblerDialect(); + } }; } // end anonymous namespace @@ -103,6 +118,31 @@ static unsigned MatchRegisterName(StringRef Name); /// } +static bool isImmSExti16i8Value(uint64_t Value) { + return (( Value <= 0x000000000000007FULL)|| + (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); +} + +static bool isImmSExti32i8Value(uint64_t Value) { + return (( Value <= 0x000000000000007FULL)|| + (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); +} + +static bool isImmZExtu32u8Value(uint64_t Value) { + return (Value <= 0x00000000000000FFULL); +} + +static bool isImmSExti64i8Value(uint64_t Value) { + return (( Value <= 0x000000000000007FULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); +} + +static bool isImmSExti64i32Value(uint64_t Value) { + return (( Value <= 0x000000007FFFFFFFULL)|| + (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); +} namespace { /// X86Operand - Instances of this class represent a parsed X86 machine @@ -137,6 +177,7 @@ struct X86Operand : public MCParsedAsmOperand { unsigned BaseReg; unsigned IndexReg; unsigned Scale; + unsigned Size; } Mem; }; @@ -209,10 +250,7 @@ struct X86Operand : public MCParsedAsmOperand { // Otherwise, check the value is in a range that makes sense for this // extension. - uint64_t Value = CE->getValue(); - return (( Value <= 0x000000000000007FULL)|| - (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| - (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + return isImmSExti16i8Value(CE->getValue()); } bool isImmSExti32i8() const { if (!isImm()) @@ -226,10 +264,7 @@ struct X86Operand : public MCParsedAsmOperand { // Otherwise, check the value is in a range that makes sense for this // extension. - uint64_t Value = CE->getValue(); - return (( Value <= 0x000000000000007FULL)|| - (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| - (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + return isImmSExti32i8Value(CE->getValue()); } bool isImmZExtu32u8() const { if (!isImm()) @@ -243,8 +278,7 @@ struct X86Operand : public MCParsedAsmOperand { // Otherwise, check the value is in a range that makes sense for this // extension. - uint64_t Value = CE->getValue(); - return (Value <= 0x00000000000000FFULL); + return isImmZExtu32u8Value(CE->getValue()); } bool isImmSExti64i8() const { if (!isImm()) @@ -258,9 +292,7 @@ struct X86Operand : public MCParsedAsmOperand { // Otherwise, check the value is in a range that makes sense for this // extension. - uint64_t Value = CE->getValue(); - return (( Value <= 0x000000000000007FULL)|| - (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + return isImmSExti64i8Value(CE->getValue()); } bool isImmSExti64i32() const { if (!isImm()) @@ -274,12 +306,31 @@ struct X86Operand : public MCParsedAsmOperand { // Otherwise, check the value is in a range that makes sense for this // extension. - uint64_t Value = CE->getValue(); - return (( Value <= 0x000000007FFFFFFFULL)|| - (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + return isImmSExti64i32Value(CE->getValue()); } bool isMem() const { return Kind == Memory; } + bool isMem8() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 8); + } + bool isMem16() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 16); + } + bool isMem32() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 32); + } + bool isMem64() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 64); + } + bool isMem80() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 80); + } + bool isMem128() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 128); + } + bool isMem256() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 256); + } bool isAbsMem() const { return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && @@ -306,6 +357,28 @@ struct X86Operand : public MCParsedAsmOperand { addExpr(Inst, getImm()); } + void addMem8Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMem16Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMem32Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMem64Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMem80Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMem128Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMem256Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMemOperands(MCInst &Inst, unsigned N) const { assert((N == 5) && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); @@ -317,7 +390,11 @@ struct X86Operand : public MCParsedAsmOperand { void addAbsMemOperands(MCInst &Inst, unsigned N) const { assert((N == 1) && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp())) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); } static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { @@ -342,20 +419,22 @@ struct X86Operand : public MCParsedAsmOperand { /// Create an absolute memory operand. static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, - SMLoc EndLoc) { + SMLoc EndLoc, unsigned Size = 0) { X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; Res->Mem.BaseReg = 0; Res->Mem.IndexReg = 0; Res->Mem.Scale = 1; + Res->Mem.Size = Size; return Res; } /// Create a generalized memory operand. static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, - unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) { + unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, + unsigned Size = 0) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); @@ -369,13 +448,14 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.BaseReg = BaseReg; Res->Mem.IndexReg = IndexReg; Res->Mem.Scale = Scale; + Res->Mem.Size = Size; return Res; } }; } // end anonymous namespace. -bool X86ATTAsmParser::isSrcOp(X86Operand &Op) { +bool X86AsmParser::isSrcOp(X86Operand &Op) { unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI; return (Op.isMem() && @@ -385,7 +465,7 @@ bool X86ATTAsmParser::isSrcOp(X86Operand &Op) { Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0); } -bool X86ATTAsmParser::isDstOp(X86Operand &Op) { +bool X86AsmParser::isDstOp(X86Operand &Op) { unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; return Op.isMem() && Op.Mem.SegReg == X86::ES && @@ -394,18 +474,22 @@ bool X86ATTAsmParser::isDstOp(X86Operand &Op) { Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0; } -bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, - SMLoc &StartLoc, SMLoc &EndLoc) { +bool X86AsmParser::ParseRegister(unsigned &RegNo, + SMLoc &StartLoc, SMLoc &EndLoc) { RegNo = 0; - const AsmToken &TokPercent = Parser.getTok(); - assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); - StartLoc = TokPercent.getLoc(); - Parser.Lex(); // Eat percent token. + if (!isParsingIntelSyntax()) { + const AsmToken &TokPercent = Parser.getTok(); + assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); + StartLoc = TokPercent.getLoc(); + Parser.Lex(); // Eat percent token. + } const AsmToken &Tok = Parser.getTok(); - if (Tok.isNot(AsmToken::Identifier)) + if (Tok.isNot(AsmToken::Identifier)) { + if (isParsingIntelSyntax()) return true; return Error(StartLoc, "invalid register name", SMRange(StartLoc, Tok.getEndLoc())); + } RegNo = MatchRegisterName(Tok.getString()); @@ -485,16 +569,182 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, } } - if (RegNo == 0) + if (RegNo == 0) { + if (isParsingIntelSyntax()) return true; return Error(StartLoc, "invalid register name", SMRange(StartLoc, Tok.getEndLoc())); + } EndLoc = Tok.getEndLoc(); Parser.Lex(); // Eat identifier token. return false; } -X86Operand *X86ATTAsmParser::ParseOperand() { +X86Operand *X86AsmParser::ParseOperand() { + if (isParsingIntelSyntax()) + return ParseIntelOperand(); + return ParseATTOperand(); +} + +/// getIntelMemOperandSize - Return intel memory operand size. +static unsigned getIntelMemOperandSize(StringRef OpStr) { + unsigned Size = 0; + if (OpStr == "BYTE") Size = 8; + if (OpStr == "WORD") Size = 16; + if (OpStr == "DWORD") Size = 32; + if (OpStr == "QWORD") Size = 64; + if (OpStr == "XWORD") Size = 80; + if (OpStr == "XMMWORD") Size = 128; + if (OpStr == "YMMWORD") Size = 256; + return Size; +} + +X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, + unsigned Size) { + unsigned BaseReg = 0, IndexReg = 0, Scale = 1; + SMLoc Start = Parser.getTok().getLoc(), End; + + const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); + // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ] + + // Eat '[' + if (getLexer().isNot(AsmToken::LBrac)) + return ErrorOperand(Start, "Expected '[' token!"); + Parser.Lex(); + + if (getLexer().is(AsmToken::Identifier)) { + // Parse BaseReg + if (ParseRegister(BaseReg, Start, End)) { + // Handle '[' 'symbol' ']' + if (getParser().ParseExpression(Disp, End)) return 0; + if (getLexer().isNot(AsmToken::RBrac)) + return ErrorOperand(Start, "Expected ']' token!"); + Parser.Lex(); + return X86Operand::CreateMem(Disp, Start, End, Size); + } + } else if (getLexer().is(AsmToken::Integer)) { + int64_t Val = Parser.getTok().getIntVal(); + Parser.Lex(); + SMLoc Loc = Parser.getTok().getLoc(); + if (getLexer().is(AsmToken::RBrac)) { + // Handle '[' number ']' + Parser.Lex(); + const MCExpr *Disp = MCConstantExpr::Create(Val, getContext()); + if (SegReg) + return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale, + Start, End, Size); + return X86Operand::CreateMem(Disp, Start, End, Size); + } else if (getLexer().is(AsmToken::Star)) { + // Handle '[' Scale*IndexReg ']' + Parser.Lex(); + SMLoc IdxRegLoc = Parser.getTok().getLoc(); + if (ParseRegister(IndexReg, IdxRegLoc, End)) + return ErrorOperand(IdxRegLoc, "Expected register"); + Scale = Val; + } else + return ErrorOperand(Loc, "Unepxeted token"); + } + + if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus)) { + bool isPlus = getLexer().is(AsmToken::Plus); + Parser.Lex(); + SMLoc PlusLoc = Parser.getTok().getLoc(); + if (getLexer().is(AsmToken::Integer)) { + int64_t Val = Parser.getTok().getIntVal(); + Parser.Lex(); + if (getLexer().is(AsmToken::Star)) { + Parser.Lex(); + SMLoc IdxRegLoc = Parser.getTok().getLoc(); + if (ParseRegister(IndexReg, IdxRegLoc, End)) + return ErrorOperand(IdxRegLoc, "Expected register"); + Scale = Val; + } else if (getLexer().is(AsmToken::RBrac)) { + const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext()); + Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext()); + } else + return ErrorOperand(PlusLoc, "unexpected token after +"); + } else if (getLexer().is(AsmToken::Identifier)) { + // This could be an index register or a displacement expression. + End = Parser.getTok().getLoc(); + if (!IndexReg) + ParseRegister(IndexReg, Start, End); + else if (getParser().ParseExpression(Disp, End)) return 0; + } + } + + if (getLexer().isNot(AsmToken::RBrac)) + if (getParser().ParseExpression(Disp, End)) return 0; + + End = Parser.getTok().getLoc(); + if (getLexer().isNot(AsmToken::RBrac)) + return ErrorOperand(End, "expected ']' token!"); + Parser.Lex(); + End = Parser.getTok().getLoc(); + + // handle [-42] + if (!BaseReg && !IndexReg) + return X86Operand::CreateMem(Disp, Start, End, Size); + + return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, + Start, End, Size); +} + +/// ParseIntelMemOperand - Parse intel style memory operand. +X86Operand *X86AsmParser::ParseIntelMemOperand() { + const AsmToken &Tok = Parser.getTok(); + SMLoc Start = Parser.getTok().getLoc(), End; + unsigned SegReg = 0; + + unsigned Size = getIntelMemOperandSize(Tok.getString()); + if (Size) { + Parser.Lex(); + assert (Tok.getString() == "PTR" && "Unexpected token!"); + Parser.Lex(); + } + + if (getLexer().is(AsmToken::LBrac)) + return ParseIntelBracExpression(SegReg, Size); + + if (!ParseRegister(SegReg, Start, End)) { + // Handel SegReg : [ ... ] + if (getLexer().isNot(AsmToken::Colon)) + return ErrorOperand(Start, "Expected ':' token!"); + Parser.Lex(); // Eat : + if (getLexer().isNot(AsmToken::LBrac)) + return ErrorOperand(Start, "Expected '[' token!"); + return ParseIntelBracExpression(SegReg, Size); + } + + const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); + if (getParser().ParseExpression(Disp, End)) return 0; + return X86Operand::CreateMem(Disp, Start, End, Size); +} + +X86Operand *X86AsmParser::ParseIntelOperand() { + SMLoc Start = Parser.getTok().getLoc(), End; + + // immediate. + if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || + getLexer().is(AsmToken::Minus)) { + const MCExpr *Val; + if (!getParser().ParseExpression(Val, End)) { + End = Parser.getTok().getLoc(); + return X86Operand::CreateImm(Val, Start, End); + } + } + + // register + unsigned RegNo = 0; + if (!ParseRegister(RegNo, Start, End)) { + End = Parser.getTok().getLoc(); + return X86Operand::CreateReg(RegNo, Start, End); + } + + // mem operand + return ParseIntelMemOperand(); +} + +X86Operand *X86AsmParser::ParseATTOperand() { switch (getLexer().getKind()) { default: // Parse a memory operand with no segment register. @@ -533,7 +783,7 @@ X86Operand *X86ATTAsmParser::ParseOperand() { /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix /// has already been parsed if present. -X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { +X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // We have to disambiguate a parenthesized expression "(4+5)" from the start // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The @@ -664,7 +914,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { MemStart, MemEnd); } -bool X86ATTAsmParser:: +bool X86AsmParser:: ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { StringRef PatchedName = Name; @@ -734,10 +984,9 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); - if (ExtraImmOp) + if (ExtraImmOp && !isParsingIntelSyntax()) Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); - // Determine whether this is an instruction prefix. bool isPrefix = Name == "lock" || Name == "rep" || @@ -791,6 +1040,9 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, else if (isPrefix && getLexer().is(AsmToken::Slash)) Parser.Lex(); // Consume the prefix separator Slash + if (ExtraImmOp && isParsingIntelSyntax()) + Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); + // This is a terrible hack to handle "out[bwl]? %al, (%dx)" -> // "outb %al, %dx". Out doesn't take a memory form, but this is a widely // documented form in various unofficial manuals, so a lot of code uses it. @@ -926,11 +1178,21 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, Name.startswith("rcl") || Name.startswith("rcr") || Name.startswith("rol") || Name.startswith("ror")) && Operands.size() == 3) { - X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); - if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && - cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { - delete Operands[1]; - Operands.erase(Operands.begin() + 1); + if (isParsingIntelSyntax()) { + // Intel syntax + X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]); + if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && + cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { + delete Operands[2]; + Operands.pop_back(); + } + } else { + X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); + if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && + cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { + delete Operands[1]; + Operands.erase(Operands.begin() + 1); + } } } @@ -949,7 +1211,246 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, return false; } -bool X86ATTAsmParser:: +bool X86AsmParser:: +processInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { + switch (Inst.getOpcode()) { + default: return false; + case X86::AND16i16: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti16i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::AND16ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::AND32i32: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti32i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::AND32ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::AND64i32: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti64i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::AND64ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::XOR16i16: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti16i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::XOR16ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::XOR32i32: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti32i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::XOR32ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::XOR64i32: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti64i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::XOR64ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::OR16i16: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti16i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::OR16ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::OR32i32: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti32i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::OR32ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::OR64i32: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti64i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::OR64ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::CMP16i16: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti16i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::CMP16ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::CMP32i32: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti32i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::CMP32ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::CMP64i32: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti64i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::CMP64ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::ADD16i16: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti16i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::ADD16ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::ADD32i32: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti32i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::ADD32ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::ADD64i32: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti64i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::ADD64ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::SUB16i16: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti16i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::SUB16ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::SUB32i32: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti32i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::SUB32ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + case X86::SUB64i32: { + if (!Inst.getOperand(0).isImm() || + !isImmSExti64i8Value(Inst.getOperand(0).getImm())) + return false; + + MCInst TmpInst; + TmpInst.setOpcode(X86::SUB64ri8); + TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); + TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; + } + } +} + +bool X86AsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out) { @@ -967,6 +1468,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, Op->getToken() == "fstenv" || Op->getToken() == "fclex") { MCInst Inst; Inst.setOpcode(X86::WAIT); + Inst.setLoc(IDLoc); Out.EmitInstruction(Inst); const char *Repl = @@ -990,9 +1492,17 @@ MatchAndEmitInstruction(SMLoc IDLoc, MCInst Inst; // First, try a direct match. - switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) { + switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo, + isParsingIntelSyntax())) { default: break; case Match_Success: + // Some instructions need post-processing to, for example, tweak which + // encoding is selected. Loop on it while changes happen so the + // individual transformations can chain off each other. + while (processInstruction(Inst, Operands)) + ; + + Inst.setLoc(IDLoc); Out.EmitInstruction(Inst); return false; case Match_MissingFeature: @@ -1050,6 +1560,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, (Match1 == Match_Success) + (Match2 == Match_Success) + (Match3 == Match_Success) + (Match4 == Match_Success); if (NumSuccessfulMatches == 1) { + Inst.setLoc(IDLoc); Out.EmitInstruction(Inst); return false; } @@ -1130,18 +1641,29 @@ MatchAndEmitInstruction(SMLoc IDLoc, } -bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { +bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") return ParseDirectiveWord(2, DirectiveID.getLoc()); else if (IDVal.startswith(".code")) return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); + else if (IDVal.startswith(".intel_syntax")) { + getParser().setAssemblerDialect(1); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if(Parser.getTok().getString() == "noprefix") { + // FIXME : Handle noprefix + Parser.Lex(); + } else + return true; + } + return false; + } return true; } /// ParseDirectiveWord /// ::= .word [ expression (, expression)* ] -bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { +bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; @@ -1166,7 +1688,7 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { /// ParseDirectiveCode /// ::= .code32 | .code64 -bool X86ATTAsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { +bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { if (IDVal == ".code32") { Parser.Lex(); if (is64BitMode()) { @@ -1191,8 +1713,8 @@ extern "C" void LLVMInitializeX86AsmLexer(); // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { - RegisterMCAsmParser<X86ATTAsmParser> X(TheX86_32Target); - RegisterMCAsmParser<X86ATTAsmParser> Y(TheX86_64Target); + RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target); + RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target); LLVMInitializeX86AsmLexer(); } diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index be15899..f612e23 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -26,6 +26,7 @@ set(sources X86InstrInfo.cpp X86JITInfo.cpp X86MCInstLower.cpp + X86MachineFunctionInfo.cpp X86RegisterInfo.cpp X86SelectionDAGInfo.cpp X86Subtarget.cpp diff --git a/lib/Target/X86/Disassembler/LLVMBuild.txt b/lib/Target/X86/Disassembler/LLVMBuild.txt index cac7adf..0609f3c 100644 --- a/lib/Target/X86/Disassembler/LLVMBuild.txt +++ b/lib/Target/X86/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = X86Disassembler parent = X86 -required_libraries = MC Support X86Info +required_libraries = MC Support X86Desc X86Info add_to_library_groups = X86 diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 3aacb20e..8278bde 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -1,4 +1,4 @@ -//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===// +//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// // // The LLVM Compiler Infrastructure // @@ -18,9 +18,11 @@ #include "X86DisassemblerDecoder.h" #include "llvm/MC/EDInstInfo.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" @@ -42,6 +44,11 @@ void x86DisassemblerDebug(const char *file, dbgs() << file << ":" << line << ": " << s; } +const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii) { + const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); + return MII->getName(Opcode); +} + #define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s)); namespace llvm { @@ -65,17 +72,19 @@ extern Target TheX86_32Target, TheX86_64Target; } static bool translateInstruction(MCInst &target, - InternalInstruction &source); + InternalInstruction &source, + const MCDisassembler *Dis); -X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode) : - MCDisassembler(STI), - fMode(mode) { -} +X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, + DisassemblerMode mode, + const MCInstrInfo *MII) + : MCDisassembler(STI), MII(MII), fMode(mode) {} X86GenericDisassembler::~X86GenericDisassembler() { + delete MII; } -EDInstInfo *X86GenericDisassembler::getEDInfo() const { +const EDInstInfo *X86GenericDisassembler::getEDInfo() const { return instInfoX86; } @@ -116,6 +125,8 @@ X86GenericDisassembler::getInstruction(MCInst &instr, uint64_t address, raw_ostream &vStream, raw_ostream &cStream) const { + CommentStream = &cStream; + InternalInstruction internalInstr; dlog_t loggerFn = logger; @@ -127,6 +138,7 @@ X86GenericDisassembler::getInstruction(MCInst &instr, (void*)®ion, loggerFn, (void*)&vStream, + (void*)MII, address, fMode); @@ -136,7 +148,8 @@ X86GenericDisassembler::getInstruction(MCInst &instr, } else { size = internalInstr.length; - return (!translateInstruction(instr, internalInstr)) ? Success : Fail; + return (!translateInstruction(instr, internalInstr, this)) ? + Success : Fail; } } @@ -161,6 +174,140 @@ static void translateRegister(MCInst &mcInst, Reg reg) { mcInst.addOperand(MCOperand::CreateReg(llvmRegnum)); } +/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the +/// immediate Value in the MCInst. +/// +/// @param Value - The immediate Value, has had any PC adjustment made by +/// the caller. +/// @param isBranch - If the instruction is a branch instruction +/// @param Address - The starting address of the instruction +/// @param Offset - The byte offset to this immediate in the instruction +/// @param Width - The byte width of this immediate in the instruction +/// +/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was +/// called then that function is called to get any symbolic information for the +/// immediate in the instruction using the Address, Offset and Width. If that +/// returns non-zero then the symbolic information it returns is used to create +/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() +/// returns zero and isBranch is true then a symbol look up for immediate Value +/// is done and if a symbol is found an MCExpr is created with that, else +/// an MCExpr with the immediate Value is created. This function returns true +/// if it adds an operand to the MCInst and false otherwise. +static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, + uint64_t Address, uint64_t Offset, + uint64_t Width, MCInst &MI, + const MCDisassembler *Dis) { + LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback(); + struct LLVMOpInfo1 SymbolicOp; + memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); + SymbolicOp.Value = Value; + void *DisInfo = Dis->getDisInfoBlock(); + + if (!getOpInfo || + !getOpInfo(DisInfo, Address, Offset, Width, 1, &SymbolicOp)) { + // Clear SymbolicOp.Value from above and also all other fields. + memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); + LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); + if (!SymbolLookUp) + return false; + uint64_t ReferenceType; + if (isBranch) + ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; + else + ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + const char *ReferenceName; + const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, + &ReferenceName); + if (Name) { + SymbolicOp.AddSymbol.Name = Name; + SymbolicOp.AddSymbol.Present = true; + } + // For branches always create an MCExpr so it gets printed as hex address. + else if (isBranch) { + SymbolicOp.Value = Value; + } + if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) + (*Dis->CommentStream) << "symbol stub for: " << ReferenceName; + if (!Name && !isBranch) + return false; + } + + MCContext *Ctx = Dis->getMCContext(); + const MCExpr *Add = NULL; + if (SymbolicOp.AddSymbol.Present) { + if (SymbolicOp.AddSymbol.Name) { + StringRef Name(SymbolicOp.AddSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Add = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, *Ctx); + } + } + + const MCExpr *Sub = NULL; + if (SymbolicOp.SubtractSymbol.Present) { + if (SymbolicOp.SubtractSymbol.Name) { + StringRef Name(SymbolicOp.SubtractSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Sub = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, *Ctx); + } + } + + const MCExpr *Off = NULL; + if (SymbolicOp.Value != 0) + Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); + + const MCExpr *Expr; + if (Sub) { + const MCExpr *LHS; + if (Add) + LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); + else + LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); + else + Expr = LHS; + } else if (Add) { + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); + else + Expr = Add; + } else { + if (Off != 0) + Expr = Off; + else + Expr = MCConstantExpr::Create(0, *Ctx); + } + + MI.addOperand(MCOperand::CreateExpr(Expr)); + + return true; +} + +/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being +/// referenced by a load instruction with the base register that is the rip. +/// These can often be addresses in a literal pool. The Address of the +/// instruction and its immediate Value are used to determine the address +/// being referenced in the literal pool entry. The SymbolLookUp call back will +/// return a pointer to a literal 'C' string if the referenced address is an +/// address into a section with 'C' string literals. +static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, + const void *Decoder) { + const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); + LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); + if (SymbolLookUp) { + void *DisInfo = Dis->getDisInfoBlock(); + uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; + const char *ReferenceName; + (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); + if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) + (*Dis->CommentStream) << "literal pool for: " << ReferenceName; + } +} + /// translateImmediate - Appends an immediate operand to an MCInst. /// /// @param mcInst - The MCInst to append to. @@ -169,10 +316,11 @@ static void translateRegister(MCInst &mcInst, Reg reg) { /// @param insn - The internal instruction. static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, - InternalInstruction &insn) { + InternalInstruction &insn, + const MCDisassembler *Dis) { // Sign-extend the immediate if necessary. - OperandType type = operand.type; + OperandType type = (OperandType)operand.type; if (type == TYPE_RELv) { switch (insn.displacementSize) { @@ -225,6 +373,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, } } + bool isBranch = false; + uint64_t pcrel = 0; switch (type) { case TYPE_XMM128: mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); @@ -232,8 +382,11 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, case TYPE_XMM256: mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); return; - case TYPE_MOFFS8: case TYPE_REL8: + isBranch = true; + pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; + // fall through to sign extend the immediate if needed. + case TYPE_MOFFS8: if(immediate & 0x80) immediate |= ~(0xffull); break; @@ -241,9 +394,12 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, if(immediate & 0x8000) immediate |= ~(0xffffull); break; - case TYPE_MOFFS32: case TYPE_REL32: case TYPE_REL64: + isBranch = true; + pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; + // fall through to sign extend the immediate if needed. + case TYPE_MOFFS32: if(immediate & 0x80000000) immediate |= ~(0xffffffffull); break; @@ -253,7 +409,10 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, break; } - mcInst.addOperand(MCOperand::CreateImm(immediate)); + if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation, + insn.immediateOffset, insn.immediateSize, + mcInst, Dis)) + mcInst.addOperand(MCOperand::CreateImm(immediate)); } /// translateRMRegister - Translates a register stored in the R/M field of the @@ -300,7 +459,8 @@ static bool translateRMRegister(MCInst &mcInst, /// @param insn - The instruction to extract Mod, R/M, and SIB fields /// from. /// @return - 0 on success; nonzero otherwise -static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { +static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, + const MCDisassembler *Dis) { // Addresses in an MCInst are represented as five operands: // 1. basereg (register) The R/M base, or (if there is a SIB) the // SIB base @@ -318,6 +478,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { MCOperand indexReg; MCOperand displacement; MCOperand segmentReg; + uint64_t pcrel = 0; if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { if (insn.sibBase != SIB_BASE_NONE) { @@ -359,8 +520,14 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); return true; } - if (insn.mode == MODE_64BIT) + if (insn.mode == MODE_64BIT){ + pcrel = insn.startLocation + + insn.displacementOffset + insn.displacementSize; + tryAddingPcLoadReferenceComment(insn.startLocation + + insn.displacementOffset, + insn.displacement + pcrel, Dis); baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6 + } else baseReg = MCOperand::CreateReg(0); @@ -426,7 +593,10 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { mcInst.addOperand(baseReg); mcInst.addOperand(scaleAmount); mcInst.addOperand(indexReg); - mcInst.addOperand(displacement); + if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false, + insn.startLocation, insn.displacementOffset, + insn.displacementSize, mcInst, Dis)) + mcInst.addOperand(displacement); mcInst.addOperand(segmentReg); return false; } @@ -440,7 +610,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { /// from. /// @return - 0 on success; nonzero otherwise static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, - InternalInstruction &insn) { + InternalInstruction &insn, const MCDisassembler *Dis) { switch (operand.type) { default: debug("Unexpected type for a R/M operand"); @@ -480,7 +650,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_M1632: case TYPE_M1664: case TYPE_LEA: - return translateRMMemory(mcInst, insn); + return translateRMMemory(mcInst, insn, Dis); } } @@ -510,7 +680,8 @@ static bool translateFPRegister(MCInst &mcInst, /// @param insn - The internal instruction. /// @return - false on success; true otherwise. static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, - InternalInstruction &insn) { + InternalInstruction &insn, + const MCDisassembler *Dis) { switch (operand.encoding) { default: debug("Unhandled operand encoding during translation"); @@ -519,7 +690,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, translateRegister(mcInst, insn.reg); return false; case ENCODING_RM: - return translateRM(mcInst, operand, insn); + return translateRM(mcInst, operand, insn, Dis); case ENCODING_CB: case ENCODING_CW: case ENCODING_CD: @@ -537,7 +708,8 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, translateImmediate(mcInst, insn.immediates[insn.numImmediatesTranslated++], operand, - insn); + insn, + Dis); return false; case ENCODING_RB: case ENCODING_RW: @@ -556,7 +728,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, case ENCODING_DUP: return translateOperand(mcInst, insn.spec->operands[operand.type - TYPE_DUP0], - insn); + insn, Dis); } } @@ -567,7 +739,8 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, /// @param insn - The internal instruction. /// @return - false on success; true otherwise. static bool translateInstruction(MCInst &mcInst, - InternalInstruction &insn) { + InternalInstruction &insn, + const MCDisassembler *Dis) { if (!insn.spec) { debug("Instruction has no specification"); return true; @@ -581,7 +754,7 @@ static bool translateInstruction(MCInst &mcInst, for (index = 0; index < X86_MAX_OPERANDS; ++index) { if (insn.spec->operands[index].encoding != ENCODING_NONE) { - if (translateOperand(mcInst, insn.spec->operands[index], insn)) { + if (translateOperand(mcInst, insn.spec->operands[index], insn, Dis)) { return true; } } @@ -590,12 +763,16 @@ static bool translateInstruction(MCInst &mcInst, return false; } -static MCDisassembler *createX86_32Disassembler(const Target &T, const MCSubtargetInfo &STI) { - return new X86Disassembler::X86_32Disassembler(STI); +static MCDisassembler *createX86_32Disassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new X86Disassembler::X86GenericDisassembler(STI, MODE_32BIT, + T.createMCInstrInfo()); } -static MCDisassembler *createX86_64Disassembler(const Target &T, const MCSubtargetInfo &STI) { - return new X86Disassembler::X86_64Disassembler(STI); +static MCDisassembler *createX86_64Disassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new X86Disassembler::X86GenericDisassembler(STI, MODE_64BIT, + T.createMCInstrInfo()); } extern "C" void LLVMInitializeX86Disassembler() { diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h index 6ac9a0f..c11f51c 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -1,4 +1,4 @@ -//===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===// +//===-- X86Disassembler.h - Disassembler for x86 and x86_64 -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -78,7 +78,7 @@ const char* name; #define INSTRUCTION_IDS \ - const InstrUID *instructionIDs; + unsigned instructionIDs; #include "X86DisassemblerDecoderCommon.h" @@ -87,11 +87,10 @@ #include "llvm/MC/MCDisassembler.h" -struct InternalInstruction; - namespace llvm { class MCInst; +class MCInstrInfo; class MCSubtargetInfo; class MemoryObject; class raw_ostream; @@ -104,13 +103,16 @@ namespace X86Disassembler { /// All each platform class should have to do is subclass the constructor, and /// provide a different disassemblerMode value. class X86GenericDisassembler : public MCDisassembler { -protected: + const MCInstrInfo *MII; +public: /// Constructor - Initializes the disassembler. /// /// @param mode - The X86 architecture mode to decode for. - X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode); -public: + X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode, + const MCInstrInfo *MII); +private: ~X86GenericDisassembler(); +public: /// getInstruction - See MCDisassembler. DecodeStatus getInstruction(MCInst &instr, @@ -121,37 +123,13 @@ public: raw_ostream &cStream) const; /// getEDInfo - See MCDisassembler. - EDInstInfo *getEDInfo() const; + const EDInstInfo *getEDInfo() const; private: DisassemblerMode fMode; }; -/// X86_16Disassembler - 16-bit X86 disassembler. -class X86_16Disassembler : public X86GenericDisassembler { -public: - X86_16Disassembler(const MCSubtargetInfo &STI) : - X86GenericDisassembler(STI, MODE_16BIT) { - } -}; - -/// X86_16Disassembler - 32-bit X86 disassembler. -class X86_32Disassembler : public X86GenericDisassembler { -public: - X86_32Disassembler(const MCSubtargetInfo &STI) : - X86GenericDisassembler(STI, MODE_32BIT) { - } -}; - -/// X86_16Disassembler - 64-bit X86 disassembler. -class X86_64Disassembler : public X86GenericDisassembler { -public: - X86_64Disassembler(const MCSubtargetInfo &STI) : - X86GenericDisassembler(STI, MODE_64BIT) { - } -}; - } // namespace X86Disassembler - + } // namespace llvm - + #endif diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index 1a24807..b0e66f0 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1,4 +1,4 @@ -/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==* +/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===* * * The LLVM Compiler Infrastructure * @@ -82,11 +82,9 @@ static int modRMRequired(OpcodeType type, decision = &THREEBYTEA7_SYM; break; } - + return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. modrm_type != MODRM_ONEENTRY; - - return 0; } /* @@ -103,12 +101,9 @@ static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM) { - const struct ModRMDecision* dec; + const struct ModRMDecision* dec = 0; switch (type) { - default: - debug("Unknown opcode type"); - return 0; case ONEBYTE: dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; break; @@ -134,14 +129,17 @@ static InstrUID decode(OpcodeType type, debug("Corrupt table! Unknown modrm_type"); return 0; case MODRM_ONEENTRY: - return dec->instructionIDs[0]; + return modRMTable[dec->instructionIDs]; case MODRM_SPLITRM: if (modFromModRM(modRM) == 0x3) - return dec->instructionIDs[1]; - else - return dec->instructionIDs[0]; + return modRMTable[dec->instructionIDs+1]; + return modRMTable[dec->instructionIDs]; + case MODRM_SPLITREG: + if (modFromModRM(modRM) == 0x3) + return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; + return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; case MODRM_FULL: - return dec->instructionIDs[modRM]; + return modRMTable[dec->instructionIDs+modRM]; } } @@ -712,7 +710,7 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) { * @return - 0 if the ModR/M could be read when needed or was not needed; * nonzero otherwise. */ -static int getID(struct InternalInstruction* insn) { +static int getID(struct InternalInstruction* insn, void *miiArg) { uint8_t attrMask; uint16_t instructionID; @@ -765,6 +763,8 @@ static int getID(struct InternalInstruction* insn) { else { if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) attrMask |= ATTR_OPSIZE; + else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) + attrMask |= ATTR_ADSIZE; else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) attrMask |= ATTR_XS; else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) @@ -826,7 +826,7 @@ static int getID(struct InternalInstruction* insn) { const struct InstructionSpecifier *spec; uint16_t instructionIDWithOpsize; - const struct InstructionSpecifier *specWithOpsize; + const char *specName, *specWithOpSizeName; spec = specifierForUID(instructionID); @@ -843,11 +843,13 @@ static int getID(struct InternalInstruction* insn) { return 0; } - specWithOpsize = specifierForUID(instructionIDWithOpsize); - - if (is16BitEquvalent(spec->name, specWithOpsize->name)) { + specName = x86DisassemblerGetInstrName(instructionID, miiArg); + specWithOpSizeName = + x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); + + if (is16BitEquvalent(specName, specWithOpSizeName)) { insn->instructionID = instructionIDWithOpsize; - insn->spec = specWithOpsize; + insn->spec = specifierForUID(instructionIDWithOpsize); } else { insn->instructionID = instructionID; insn->spec = spec; @@ -1014,6 +1016,7 @@ static int readDisplacement(struct InternalInstruction* insn) { return 0; insn->consumedDisplacement = TRUE; + insn->displacementOffset = insn->readerCursor - insn->startLocation; switch (insn->eaDisplacement) { case EA_DISP_NONE: @@ -1410,6 +1413,7 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { size = insn->immediateSize; else insn->immediateSize = size; + insn->immediateOffset = insn->readerCursor - insn->startLocation; switch (size) { case 1: @@ -1472,6 +1476,7 @@ static int readVVVV(struct InternalInstruction* insn) { static int readOperands(struct InternalInstruction* insn) { int index; int hasVVVV, needVVVV; + int sawRegImm = 0; dbgprintf(insn, "readOperands()"); @@ -1500,11 +1505,22 @@ static int readOperands(struct InternalInstruction* insn) { dbgprintf(insn, "We currently don't hande code-offset encodings"); return -1; case ENCODING_IB: + if (sawRegImm) { + /* Saw a register immediate so don't read again and instead split the + previous immediate. FIXME: This is a hack. */ + insn->immediates[insn->numImmediatesConsumed] = + insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; + ++insn->numImmediatesConsumed; + break; + } if (readImmediate(insn, 1)) return -1; if (insn->spec->operands[index].type == TYPE_IMM3 && insn->immediates[insn->numImmediatesConsumed - 1] > 7) return -1; + if (insn->spec->operands[index].type == TYPE_XMM128 || + insn->spec->operands[index].type == TYPE_XMM256) + sawRegImm = 1; break; case ENCODING_IW: if (readImmediate(insn, 2)) @@ -1596,6 +1612,7 @@ int decodeInstruction(struct InternalInstruction* insn, void* readerArg, dlog_t logger, void* loggerArg, + void* miiArg, uint64_t startLoc, DisassemblerMode mode) { memset(insn, 0, sizeof(struct InternalInstruction)); @@ -1611,7 +1628,7 @@ int decodeInstruction(struct InternalInstruction* insn, if (readPrefixes(insn) || readOpcode(insn) || - getID(insn) || + getID(insn, miiArg) || insn->instructionID == 0 || readOperands(insn)) return -1; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index a9c90f8..fae309b 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -1,4 +1,4 @@ -/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==* +/*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===* * * The LLVM Compiler Infrastructure * @@ -20,11 +20,10 @@ extern "C" { #endif -#define INSTRUCTION_SPECIFIER_FIELDS \ - const char* name; +#define INSTRUCTION_SPECIFIER_FIELDS #define INSTRUCTION_IDS \ - const InstrUID *instructionIDs; + unsigned instructionIDs; #include "X86DisassemblerDecoderCommon.h" @@ -460,6 +459,11 @@ struct InternalInstruction { uint8_t addressSize; uint8_t displacementSize; uint8_t immediateSize; + + /* Offsets from the start of the instruction to the pieces of data, which is + needed to find relocation entries for adding symbolic operands */ + uint8_t displacementOffset; + uint8_t immediateOffset; /* opcode state */ @@ -554,6 +558,7 @@ int decodeInstruction(struct InternalInstruction* insn, void* readerArg, dlog_t logger, void* loggerArg, + void* miiArg, uint64_t startLoc, DisassemblerMode mode); @@ -568,6 +573,8 @@ void x86DisassemblerDebug(const char *file, unsigned line, const char *s); +const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii); + #ifdef __cplusplus } #endif diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index a7ef0cc..d2e30f1 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -1,4 +1,4 @@ -/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==* +/*===-- X86DisassemblerDecoderCommon.h - Disassembler decoder -----*- C -*-===* * * The LLVM Compiler Infrastructure * @@ -54,8 +54,9 @@ ENUM_ENTRY(ATTR_XD, 0x04) \ ENUM_ENTRY(ATTR_REXW, 0x08) \ ENUM_ENTRY(ATTR_OPSIZE, 0x10) \ - ENUM_ENTRY(ATTR_VEX, 0x20) \ - ENUM_ENTRY(ATTR_VEXL, 0x40) + ENUM_ENTRY(ATTR_ADSIZE, 0x20) \ + ENUM_ENTRY(ATTR_VEX, 0x40) \ + ENUM_ENTRY(ATTR_VEXL, 0x80) #define ENUM_ENTRY(n, v) n = v, enum attributeBits { @@ -77,6 +78,8 @@ enum attributeBits { "64-bit mode but no more") \ ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \ "operands change width") \ + ENUM_ENTRY(IC_ADSIZE, 3, "requires an ADSIZE prefix, so " \ + "operands change width") \ ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \ "but not the operands") \ ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \ @@ -88,6 +91,7 @@ enum attributeBits { ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\ "change width; overrides IC_OPSIZE") \ ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \ + ENUM_ENTRY(IC_64BIT_ADSIZE, 3, "Just as meaningful as IC_ADSIZE") \ ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \ "secondary") \ ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \ @@ -156,6 +160,8 @@ typedef uint16_t InstrUID; * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode * corresponds to one instruction; otherwise, it corresponds to * a different instruction. + * MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This + corresponds to instructions that use reg field as opcode * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond * to a different instruction. */ @@ -163,6 +169,7 @@ typedef uint16_t InstrUID; #define MODRMTYPES \ ENUM_ENTRY(MODRM_ONEENTRY) \ ENUM_ENTRY(MODRM_SPLITRM) \ + ENUM_ENTRY(MODRM_SPLITREG) \ ENUM_ENTRY(MODRM_FULL) #define ENUM_ENTRY(n) n, @@ -336,8 +343,8 @@ typedef enum { * operand. */ struct OperandSpecifier { - OperandEncoding encoding; - OperandType type; + uint8_t encoding; + uint8_t type; }; /* @@ -364,7 +371,7 @@ typedef enum { * its operands. */ struct InstructionSpecifier { - ModifierType modifierType; + uint8_t modifierType; uint8_t modifierBase; struct OperandSpecifier operands[X86_MAX_OPERANDS]; diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 8f26d9f..b7ccb4c 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -30,10 +30,6 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "X86GenAsmWriter.inc" -X86ATTInstPrinter::X86ATTInstPrinter(const MCAsmInfo &MAI) - : MCInstPrinter(MAI) { -} - void X86ATTInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << '%' << getRegisterName(RegNo); @@ -45,11 +41,12 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, if (!printAliasInstr(MI, OS)) printInstruction(MI, OS); + // Next always print the annotation. + printAnnotation(OS, Annot); + // If verbose assembly is enabled, we can print some informative comments. - if (CommentStream) { - printAnnotation(OS, Annot); + if (CommentStream) EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); - } } StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { @@ -59,15 +56,39 @@ StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O) { switch (MI->getOperand(Op).getImm()) { - default: assert(0 && "Invalid ssecc argument!"); - case 0: O << "eq"; break; - case 1: O << "lt"; break; - case 2: O << "le"; break; - case 3: O << "unord"; break; - case 4: O << "neq"; break; - case 5: O << "nlt"; break; - case 6: O << "nle"; break; - case 7: O << "ord"; break; + default: llvm_unreachable("Invalid ssecc argument!"); + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + case 8: O << "eq_uq"; break; + case 9: O << "nge"; break; + case 0xa: O << "ngt"; break; + case 0xb: O << "false"; break; + case 0xc: O << "neq_oq"; break; + case 0xd: O << "ge"; break; + case 0xe: O << "gt"; break; + case 0xf: O << "true"; break; + case 0x10: O << "eq_os"; break; + case 0x11: O << "lt_oq"; break; + case 0x12: O << "le_oq"; break; + case 0x13: O << "unord_s"; break; + case 0x14: O << "neq_us"; break; + case 0x15: O << "nlt_uq"; break; + case 0x16: O << "nle_uq"; break; + case 0x17: O << "ord_s"; break; + case 0x18: O << "eq_us"; break; + case 0x19: O << "nge_uq"; break; + case 0x1a: O << "ngt_uq"; break; + case 0x1b: O << "false_os"; break; + case 0x1c: O << "neq_os"; break; + case 0x1d: O << "ge_oq"; break; + case 0x1e: O << "gt_oq"; break; + case 0x1f: O << "true_us"; break; } } @@ -79,11 +100,21 @@ void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isImm()) - // Print this as a signed 32-bit value. - O << (int)Op.getImm(); + O << Op.getImm(); else { assert(Op.isExpr() && "unknown pcrel immediate operand"); - O << *Op.getExpr(); + // If a symbolic branch target was added as a constant expression then print + // that address in hex. + const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); + int64_t Address; + if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + O << "0x"; + O.write_hex(Address); + } + else { + // Otherwise, just print the expression. + O << *Op.getExpr(); + } } } diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index 0293869..ff94301 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -1,4 +1,4 @@ -//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===// +//==- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -22,8 +22,9 @@ class MCOperand; class X86ATTInstPrinter : public MCInstPrinter { public: - X86ATTInstPrinter(const MCAsmInfo &MAI); - + X86ATTInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MRI) {} + virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot); virtual StringRef getOpcodeName(unsigned Opcode) const; diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 6e4b1b9..30a847f 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -76,10 +76,19 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::PSHUFDmi: case X86::VPSHUFDmi: DestName = getRegName(MI->getOperand(0).getReg()); - DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(), + DecodePSHUFMask(MVT::v4i32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + case X86::VPSHUFDYri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VPSHUFDYmi: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSHUFMask(MVT::v8i32, MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); break; + case X86::PSHUFHWri: case X86::VPSHUFHWri: Src1Name = getRegName(MI->getOperand(1).getReg()); @@ -437,31 +446,31 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::VPERMILPSmi: - DecodeVPERMILPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); + DecodePSHUFMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPSYri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::VPERMILPSYmi: - DecodeVPERMILPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); + DecodePSHUFMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPDri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::VPERMILPDmi: - DecodeVPERMILPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); + DecodePSHUFMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPDYri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::VPERMILPDYmi: - DecodeVPERMILPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(), + DecodePSHUFMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -471,7 +480,9 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, // FALL THROUGH. case X86::VPERM2F128rm: case X86::VPERM2I128rm: - DecodeVPERM2F128Mask(MI->getOperand(MI->getNumOperands()-1).getImm(), + // For instruction comments purpose, assume the 256-bit vector is v4i64. + DecodeVPERM2X128Mask(MVT::v4i64, + MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); diff --git a/lib/Target/X86/InstPrinter/X86InstComments.h b/lib/Target/X86/InstPrinter/X86InstComments.h index 6b86db4..13fdf9a 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.h +++ b/lib/Target/X86/InstPrinter/X86InstComments.h @@ -1,4 +1,4 @@ -//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===// +//=- X86InstComments.h - Generate verbose-asm comments for instrs -*- C++ -*-=// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index f9ab5ae..46a96d2 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -35,12 +35,13 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) { printInstruction(MI, OS); - + + // Next always print the annotation. + printAnnotation(OS, Annot); + // If verbose assembly is enabled, we can print some informative comments. - if (CommentStream) { - printAnnotation(OS, Annot); + if (CommentStream) EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); - } } StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { return getInstructionName(Opcode); @@ -49,15 +50,40 @@ StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O) { switch (MI->getOperand(Op).getImm()) { - default: assert(0 && "Invalid ssecc argument!"); - case 0: O << "eq"; break; - case 1: O << "lt"; break; - case 2: O << "le"; break; - case 3: O << "unord"; break; - case 4: O << "neq"; break; - case 5: O << "nlt"; break; - case 6: O << "nle"; break; - case 7: O << "ord"; break; + default: llvm_unreachable("Invalid ssecc argument!"); + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + case 8: O << "eq_uq"; break; + case 9: O << "nge"; break; + case 0xa: O << "ngt"; break; + case 0xb: O << "false"; break; + case 0xc: O << "neq_oq"; break; + case 0xd: O << "ge"; break; + case 0xe: O << "gt"; break; + case 0xf: O << "true"; break; + case 0x10: O << "eq_os"; break; + case 0x11: O << "lt_oq"; break; + case 0x12: O << "le_oq"; break; + case 0x13: O << "unord_s"; break; + case 0x14: O << "neq_us"; break; + case 0x15: O << "nlt_uq"; break; + case 0x16: O << "nle_uq"; break; + case 0x17: O << "ord_s"; break; + case 0x18: O << "eq_us"; break; + case 0x19: O << "nge_uq"; break; + case 0x1a: O << "ngt_uq"; break; + case 0x1b: O << "false_os"; break; + case 0x1c: O << "neq_os"; break; + case 0x1d: O << "ge_oq"; break; + case 0x1e: O << "gt_oq"; break; + case 0x1f: O << "true_us"; break; + } } @@ -70,7 +96,18 @@ void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo, O << Op.getImm(); else { assert(Op.isExpr() && "unknown pcrel immediate operand"); - O << *Op.getExpr(); + // If a symbolic branch target was added as a constant expression then print + // that address in hex. + const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); + int64_t Address; + if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + O << "0x"; + O.write_hex(Address); + } + else { + // Otherwise, just print the expression. + O << *Op.getExpr(); + } } } diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index 6d5ec62..ea1d38a 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -1,4 +1,4 @@ -//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===// +//= X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -23,8 +23,8 @@ class MCOperand; class X86IntelInstPrinter : public MCInstPrinter { public: - X86IntelInstPrinter(const MCAsmInfo &MAI) - : MCInstPrinter(MAI) {} + X86IntelInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MRI) {} virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot); diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt index ab2ebb4..1c240e5 100644 --- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt @@ -2,8 +2,10 @@ add_llvm_library(LLVMX86Desc X86AsmBackend.cpp X86MCTargetDesc.cpp X86MCAsmInfo.cpp - X86MCCodeEmitter.cpp + X86MCCodeEmitter.cpp X86MachObjectWriter.cpp + X86ELFObjectWriter.cpp + X86WinCOFFObjectWriter.cpp ) add_dependencies(LLVMX86Desc X86CommonTableGen) diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 87b2b05..9ccbf1c 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -37,18 +37,22 @@ MCDisableArithRelaxation("mc-x86-disable-arith-relaxation", static unsigned getFixupKindLog2Size(unsigned Kind) { switch (Kind) { - default: assert(0 && "invalid fixup kind!"); + default: llvm_unreachable("invalid fixup kind!"); case FK_PCRel_1: + case FK_SecRel_1: case FK_Data_1: return 0; case FK_PCRel_2: + case FK_SecRel_2: case FK_Data_2: return 1; case FK_PCRel_4: case X86::reloc_riprel_4byte: case X86::reloc_riprel_4byte_movq_load: case X86::reloc_signed_4byte: case X86::reloc_global_offset_table: + case FK_SecRel_4: case FK_Data_4: return 2; case FK_PCRel_8: + case FK_SecRel_8: case FK_Data_8: return 3; } } @@ -57,9 +61,9 @@ namespace { class X86ELFObjectWriter : public MCELFObjectTargetWriter { public: - X86ELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine, - bool HasRelocationAddend) - : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {} + X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine, + bool HasRelocationAddend, bool foobar) + : MCELFObjectTargetWriter(is64Bit, OSABI, EMachine, HasRelocationAddend) {} }; class X86AsmBackend : public MCAsmBackend { @@ -87,7 +91,7 @@ public: return Infos[Kind - FirstTargetFixupKind]; } - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind()); @@ -105,16 +109,16 @@ public: Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); } - bool MayNeedRelaxation(const MCInst &Inst) const; + bool mayNeedRelaxation(const MCInst &Inst) const; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCInstFragment *DF, const MCAsmLayout &Layout) const; - void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; + void relaxInstruction(const MCInst &Inst, MCInst &Res) const; - bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const; }; } // end anonymous namespace @@ -219,7 +223,7 @@ static unsigned getRelaxedOpcode(unsigned Op) { return getRelaxedOpcodeBranch(Op); } -bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const { +bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst) const { // Branches can always be relaxed. if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode()) return true; @@ -259,7 +263,7 @@ bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, // FIXME: Can tblgen help at all here to verify there aren't other instructions // we can relax? -void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { +void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode()); @@ -275,10 +279,10 @@ void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { Res.setOpcode(RelaxedOp); } -/// WriteNopData - Write optimal nops to the output file for the \arg Count +/// writeNopData - Write optimal nops to the output file for the \arg Count /// bytes. This returns the number of bytes written. It may return 0 if /// the \arg Count is more than the maximum optimal nops. -bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { +bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { static const uint8_t Nops[10][10] = { // nop {0x90}, @@ -323,9 +327,9 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { namespace { class ELFX86AsmBackend : public X86AsmBackend { public: - Triple::OSType OSType; - ELFX86AsmBackend(const Target &T, Triple::OSType _OSType) - : X86AsmBackend(T), OSType(_OSType) { + uint8_t OSABI; + ELFX86AsmBackend(const Target &T, uint8_t _OSABI) + : X86AsmBackend(T), OSABI(_OSABI) { HasReliableSymbolDifference = true; } @@ -337,31 +341,21 @@ public: class ELFX86_32AsmBackend : public ELFX86AsmBackend { public: - ELFX86_32AsmBackend(const Target &T, Triple::OSType OSType) - : ELFX86AsmBackend(T, OSType) {} + ELFX86_32AsmBackend(const Target &T, uint8_t OSABI) + : ELFX86AsmBackend(T, OSABI) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createELFObjectWriter(createELFObjectTargetWriter(), - OS, /*IsLittleEndian*/ true); - } - - MCELFObjectTargetWriter *createELFObjectTargetWriter() const { - return new X86ELFObjectWriter(false, OSType, ELF::EM_386, false); + return createX86ELFObjectWriter(OS, /*Is64Bit*/ false, OSABI); } }; class ELFX86_64AsmBackend : public ELFX86AsmBackend { public: - ELFX86_64AsmBackend(const Target &T, Triple::OSType OSType) - : ELFX86AsmBackend(T, OSType) {} + ELFX86_64AsmBackend(const Target &T, uint8_t OSABI) + : ELFX86AsmBackend(T, OSABI) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createELFObjectWriter(createELFObjectTargetWriter(), - OS, /*IsLittleEndian*/ true); - } - - MCELFObjectTargetWriter *createELFObjectTargetWriter() const { - return new X86ELFObjectWriter(true, OSType, ELF::EM_X86_64, true); + return createX86ELFObjectWriter(OS, /*Is64Bit*/ true, OSABI); } }; @@ -375,7 +369,7 @@ public: } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createWinCOFFObjectWriter(OS, Is64Bit); + return createX86WinCOFFObjectWriter(OS, Is64Bit); } }; @@ -455,7 +449,8 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT) { if (TheTriple.isOSWindows()) return new WindowsX86AsmBackend(T, false); - return new ELFX86_32AsmBackend(T, TheTriple.getOS()); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); + return new ELFX86_32AsmBackend(T, OSABI); } MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) { @@ -467,5 +462,6 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) { if (TheTriple.isOSWindows()) return new WindowsX86AsmBackend(T, true); - return new ELFX86_64AsmBackend(T, TheTriple.getOS()); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); + return new ELFX86_64AsmBackend(T, OSABI); } diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 662ac1d..a0bb6dc 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -19,7 +19,7 @@ #include "X86MCTargetDesc.h" #include "llvm/Support/DataTypes.h" -#include <cassert> +#include "llvm/Support/ErrorHandling.h" namespace llvm { @@ -164,7 +164,13 @@ namespace X86II { /// is some TLS offset from the picbase. /// /// This is the 32-bit TLS offset for Darwin TLS in PIC mode. - MO_TLVP_PIC_BASE + MO_TLVP_PIC_BASE, + + /// MO_SECREL - On a symbol operand this indicates that the immediate is + /// the offset from beginning of section. + /// + /// This is the TLS offset for the COFF/Windows TLS mechanism. + MO_SECREL }; enum { @@ -223,19 +229,13 @@ namespace X86II { // destinations are the same register. MRMInitReg = 32, - //// MRM_C1 - A mod/rm byte of exactly 0xC1. - MRM_C1 = 33, - MRM_C2 = 34, - MRM_C3 = 35, - MRM_C4 = 36, - MRM_C8 = 37, - MRM_C9 = 38, - MRM_E8 = 39, - MRM_F0 = 40, - MRM_F8 = 41, - MRM_F9 = 42, - MRM_D0 = 45, - MRM_D1 = 46, + //// MRM_XX - A mod/rm byte of exactly 0xXX. + MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36, + MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40, + MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46, + MRM_D4 = 47, MRM_D8 = 48, MRM_D9 = 49, MRM_DA = 50, + MRM_DB = 51, MRM_DC = 52, MRM_DD = 53, MRM_DE = 54, + MRM_DF = 55, /// RawFrmImm8 - This is used for the ENTER instruction, which has two /// immediates, the first of which is a 16-bit immediate (specified by @@ -426,10 +426,9 @@ namespace X86II { /// this flag to indicate that the encoder should do the wacky 3DNow! thing. Has3DNow0F0FOpcode = 1U << 7, - /// XOP_W - Same bit as VEX_W. Used to indicate swapping of - /// operand 3 and 4 to be encoded in ModRM or I8IMM. This is used - /// for FMA4 and XOP instructions. - XOP_W = 1U << 8, + /// MemOp4 - Used to indicate swapping of operand 3 and 4 to be encoded in + /// ModRM or I8IMM. This is used for FMA4 and XOP instructions. + MemOp4 = 1U << 8, /// XOP - Opcode prefix used by XOP instructions. XOP = 1U << 9 @@ -451,7 +450,7 @@ namespace X86II { /// of the specified instruction. static inline unsigned getSizeOfImm(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { - default: assert(0 && "Unknown immediate size"); + default: llvm_unreachable("Unknown immediate size"); case X86II::Imm8: case X86II::Imm8PCRel: return 1; case X86II::Imm16: @@ -466,7 +465,7 @@ namespace X86II { /// TSFlags indicates that it is pc relative. static inline unsigned isImmPCRel(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { - default: assert(0 && "Unknown immediate size"); + default: llvm_unreachable("Unknown immediate size"); case X86II::Imm8PCRel: case X86II::Imm16PCRel: case X86II::Imm32PCRel: @@ -489,8 +488,8 @@ namespace X86II { /// static inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) { switch (TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form"); - default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!"); + case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this form"); + default: llvm_unreachable("Unknown FormMask value in getMemoryOperandNo!"); case X86II::Pseudo: case X86II::RawFrm: case X86II::AddRegFrm: @@ -503,11 +502,11 @@ namespace X86II { return 0; case X86II::MRMSrcMem: { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; - bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; unsigned FirstMemOp = 1; if (HasVEX_4V) ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). - if (HasXOP_W) + if (HasMemOp4) ++FirstMemOp;// Skip the register source (which is encoded in I8IMM). // FIXME: Maybe lea should have its own form? This is a horrible hack. @@ -530,18 +529,17 @@ namespace X86II { ++FirstMemOp;// Skip the register dest (which is encoded in VEX_VVVV). return FirstMemOp; } - case X86II::MRM_C1: - case X86II::MRM_C2: - case X86II::MRM_C3: - case X86II::MRM_C4: - case X86II::MRM_C8: - case X86II::MRM_C9: - case X86II::MRM_E8: - case X86II::MRM_F0: - case X86II::MRM_F8: - case X86II::MRM_F9: - case X86II::MRM_D0: - case X86II::MRM_D1: + case X86II::MRM_C1: case X86II::MRM_C2: + case X86II::MRM_C3: case X86II::MRM_C4: + case X86II::MRM_C8: case X86II::MRM_C9: + case X86II::MRM_E8: case X86II::MRM_F0: + case X86II::MRM_F8: case X86II::MRM_F9: + case X86II::MRM_D0: case X86II::MRM_D1: + case X86II::MRM_D4: case X86II::MRM_D8: + case X86II::MRM_D9: case X86II::MRM_DA: + case X86II::MRM_DB: case X86II::MRM_DC: + case X86II::MRM_DD: case X86II::MRM_DE: + case X86II::MRM_DF: return -1; } } diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp new file mode 100644 index 0000000..5a42a80 --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -0,0 +1,224 @@ +//===-- X86ELFObjectWriter.cpp - X86 ELF Writer ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/X86FixupKinds.h" +#include "MCTargetDesc/X86MCTargetDesc.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +namespace { + class X86ELFObjectWriter : public MCELFObjectTargetWriter { + public: + X86ELFObjectWriter(bool is64Bit, uint8_t OSABI); + + virtual ~X86ELFObjectWriter(); + protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const; + }; +} + +X86ELFObjectWriter::X86ELFObjectWriter(bool Is64Bit, uint8_t OSABI) + : MCELFObjectTargetWriter(Is64Bit, OSABI, + Is64Bit ? ELF::EM_X86_64 : ELF::EM_386, + /*HasRelocationAddend*/ Is64Bit) {} + +X86ELFObjectWriter::~X86ELFObjectWriter() +{} + +unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + // determine the type of the relocation + + MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? + MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + unsigned Type; + if (is64Bit()) { + if (IsPCRel) { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + + case FK_Data_8: Type = ELF::R_X86_64_PC64; break; + case FK_Data_4: Type = ELF::R_X86_64_PC32; break; + case FK_Data_2: Type = ELF::R_X86_64_PC16; break; + + case FK_PCRel_8: + assert(Modifier == MCSymbolRefExpr::VK_None); + Type = ELF::R_X86_64_PC64; + break; + case X86::reloc_signed_4byte: + case X86::reloc_riprel_4byte_movq_load: + case X86::reloc_riprel_4byte: + case FK_PCRel_4: + switch (Modifier) { + default: + llvm_unreachable("Unimplemented"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_X86_64_PC32; + break; + case MCSymbolRefExpr::VK_PLT: + Type = ELF::R_X86_64_PLT32; + break; + case MCSymbolRefExpr::VK_GOTPCREL: + Type = ELF::R_X86_64_GOTPCREL; + break; + case MCSymbolRefExpr::VK_GOTTPOFF: + Type = ELF::R_X86_64_GOTTPOFF; + break; + case MCSymbolRefExpr::VK_TLSGD: + Type = ELF::R_X86_64_TLSGD; + break; + case MCSymbolRefExpr::VK_TLSLD: + Type = ELF::R_X86_64_TLSLD; + break; + } + break; + case FK_PCRel_2: + assert(Modifier == MCSymbolRefExpr::VK_None); + Type = ELF::R_X86_64_PC16; + break; + case FK_PCRel_1: + assert(Modifier == MCSymbolRefExpr::VK_None); + Type = ELF::R_X86_64_PC8; + break; + } + } else { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + case FK_Data_8: Type = ELF::R_X86_64_64; break; + case X86::reloc_signed_4byte: + switch (Modifier) { + default: + llvm_unreachable("Unimplemented"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_X86_64_32S; + break; + case MCSymbolRefExpr::VK_GOT: + Type = ELF::R_X86_64_GOT32; + break; + case MCSymbolRefExpr::VK_GOTPCREL: + Type = ELF::R_X86_64_GOTPCREL; + break; + case MCSymbolRefExpr::VK_TPOFF: + Type = ELF::R_X86_64_TPOFF32; + break; + case MCSymbolRefExpr::VK_DTPOFF: + Type = ELF::R_X86_64_DTPOFF32; + break; + } + break; + case FK_Data_4: + Type = ELF::R_X86_64_32; + break; + case FK_Data_2: Type = ELF::R_X86_64_16; break; + case FK_PCRel_1: + case FK_Data_1: Type = ELF::R_X86_64_8; break; + } + } + } else { + if (IsPCRel) { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + + case X86::reloc_global_offset_table: + Type = ELF::R_386_GOTPC; + break; + + case X86::reloc_signed_4byte: + case FK_PCRel_4: + case FK_Data_4: + switch (Modifier) { + default: + llvm_unreachable("Unimplemented"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_386_PC32; + break; + case MCSymbolRefExpr::VK_PLT: + Type = ELF::R_386_PLT32; + break; + } + break; + } + } else { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + + case X86::reloc_global_offset_table: + Type = ELF::R_386_GOTPC; + break; + + // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode + // instead? + case X86::reloc_signed_4byte: + case FK_PCRel_4: + case FK_Data_4: + switch (Modifier) { + default: + llvm_unreachable("Unimplemented"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_386_32; + break; + case MCSymbolRefExpr::VK_GOT: + Type = ELF::R_386_GOT32; + break; + case MCSymbolRefExpr::VK_GOTOFF: + Type = ELF::R_386_GOTOFF; + break; + case MCSymbolRefExpr::VK_TLSGD: + Type = ELF::R_386_TLS_GD; + break; + case MCSymbolRefExpr::VK_TPOFF: + Type = ELF::R_386_TLS_LE_32; + break; + case MCSymbolRefExpr::VK_INDNTPOFF: + Type = ELF::R_386_TLS_IE; + break; + case MCSymbolRefExpr::VK_NTPOFF: + Type = ELF::R_386_TLS_LE; + break; + case MCSymbolRefExpr::VK_GOTNTPOFF: + Type = ELF::R_386_TLS_GOTIE; + break; + case MCSymbolRefExpr::VK_TLSLDM: + Type = ELF::R_386_TLS_LDM; + break; + case MCSymbolRefExpr::VK_DTPOFF: + Type = ELF::R_386_TLS_LDO_32; + break; + case MCSymbolRefExpr::VK_GOTTPOFF: + Type = ELF::R_386_TLS_IE_32; + break; + } + break; + case FK_Data_2: Type = ELF::R_386_16; break; + case FK_PCRel_1: + case FK_Data_1: Type = ELF::R_386_8; break; + } + } + } + + return Type; +} + +MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = + new X86ELFObjectWriter(Is64Bit, OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); +} diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h index 17d242a..f2e34cb 100644 --- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h +++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h @@ -1,4 +1,4 @@ -//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===// +//===-- X86FixupKinds.h - X86 Specific Fixup Entries ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index eb64ad1..003a14a 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -48,6 +48,8 @@ static const char *const x86_asm_table[] = { "{cc}", "cc", 0,0}; +void X86MCAsmInfoDarwin::anchor() { } + X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { bool is64Bit = T.getArch() == Triple::x86_64; if (is64Bit) @@ -80,6 +82,8 @@ X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple) : X86MCAsmInfoDarwin(Triple) { } +void X86ELFMCAsmInfo::anchor() { } + X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { if (T.getArch() == Triple::x86_64) PointerSize = 8; @@ -125,6 +129,8 @@ getNonexecutableStackSection(MCContext &Ctx) const { 0, SectionKind::getMetadata()); } +void X86MCAsmInfoMicrosoft::anchor() { } + X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { if (Triple.getArch() == Triple::x86_64) { GlobalPrefix = ""; @@ -137,6 +143,8 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { TextAlignFillValue = 0x90; } +void X86MCAsmInfoGNUCOFF::anchor() { } + X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { if (Triple.getArch() == Triple::x86_64) { GlobalPrefix = ""; diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h index 5d619e8..b6b70fd 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h @@ -1,4 +1,4 @@ -//=====-- X86MCAsmInfo.h - X86 asm properties -----------------*- C++ -*--====// +//===-- X86MCAsmInfo.h - X86 asm properties --------------------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -21,7 +21,9 @@ namespace llvm { class Triple; - struct X86MCAsmInfoDarwin : public MCAsmInfoDarwin { + class X86MCAsmInfoDarwin : public MCAsmInfoDarwin { + virtual void anchor(); + public: explicit X86MCAsmInfoDarwin(const Triple &Triple); }; @@ -33,16 +35,22 @@ namespace llvm { MCStreamer &Streamer) const; }; - struct X86ELFMCAsmInfo : public MCAsmInfo { + class X86ELFMCAsmInfo : public MCAsmInfo { + virtual void anchor(); + public: explicit X86ELFMCAsmInfo(const Triple &Triple); virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const; }; - struct X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft { + class X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft { + virtual void anchor(); + public: explicit X86MCAsmInfoMicrosoft(const Triple &Triple); }; - struct X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF { + class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF { + virtual void anchor(); + public: explicit X86MCAsmInfoGNUCOFF(const Triple &Triple); }; } // namespace llvm diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 8e14cb1..37727b6 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1,4 +1,4 @@ -//===-- X86/X86MCCodeEmitter.cpp - Convert X86 code to machine code -------===// +//===-- X86MCCodeEmitter.cpp - Convert X86 code to machine code -----------===// // // The LLVM Compiler Infrastructure // @@ -85,7 +85,7 @@ public: } } - void EmitImmediate(const MCOperand &Disp, + void EmitImmediate(const MCOperand &Disp, SMLoc Loc, unsigned ImmSize, MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, @@ -202,8 +202,8 @@ StartsWithGlobalOffsetTable(const MCExpr *Expr) { } void X86MCCodeEmitter:: -EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, - unsigned &CurByte, raw_ostream &OS, +EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, + MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const { const MCExpr *Expr = NULL; if (DispOp.isImm()) { @@ -222,6 +222,7 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, // If we have an immoffset, add it to the expression. if ((FixupKind == FK_Data_4 || + FixupKind == FK_Data_8 || FixupKind == MCFixupKind(X86::reloc_signed_4byte))) { GlobalOffsetTableExprKind Kind = StartsWithGlobalOffsetTable(Expr); if (Kind != GOT_None) { @@ -230,6 +231,11 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, FixupKind = MCFixupKind(X86::reloc_global_offset_table); if (Kind == GOT_Normal) ImmOffset = CurByte; + } else if (Expr->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr); + if (Ref->getKind() == MCSymbolRefExpr::VK_SECREL) { + FixupKind = MCFixupKind(FK_SecRel_4); + } } } @@ -249,7 +255,7 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, Ctx); // Emit a symbolic constant as a fixup and 4 zeros. - Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind)); + Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind, Loc)); EmitConstant(0, Size, CurByte, OS); } @@ -285,7 +291,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // expression to emit. int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0; - EmitImmediate(Disp, 4, MCFixupKind(FixupKind), + EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), CurByte, OS, Fixups, -ImmSize); return; } @@ -309,7 +315,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, if (BaseReg == 0) { // [disp32] in X86-32 mode EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); - EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); + EmitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, CurByte, OS, Fixups); return; } @@ -325,13 +331,13 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. if (Disp.isImm() && isDisp8(Disp.getImm())) { EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS); - EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); + EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups); return; } // Otherwise, emit the most general non-SIB encoding: [REG+disp32] EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS); - EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS, + EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS, Fixups); return; } @@ -390,10 +396,10 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Do we need to output a displacement? if (ForceDisp8) - EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); + EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups); else if (ForceDisp32 || Disp.getImm() != 0) - EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS, - Fixups); + EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte), + CurByte, OS, Fixups); } /// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix @@ -431,10 +437,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // opcode extension, or ignored, depending on the opcode byte) unsigned char VEX_W = 0; - // XOP_W: opcode specific, same bit as VEX_W, but used to - // swap operand 3 and 4 for FMA4 and XOP instructions - unsigned char XOP_W = 0; - // XOP: Use XOP prefix byte 0x8f instead of VEX. unsigned char XOP = 0; @@ -477,9 +479,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W) VEX_W = 1; - if ((TSFlags >> X86II::VEXShift) & X86II::XOP_W) - XOP_W = 1; - if ((TSFlags >> X86II::VEXShift) & X86II::XOP) XOP = 1; @@ -487,7 +486,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_L = 1; switch (TSFlags & X86II::Op0Mask) { - default: assert(0 && "Invalid prefix!"); + default: llvm_unreachable("Invalid prefix!"); case X86II::T8: // 0F 38 VEX_5M = 0x2; break; @@ -538,7 +537,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // Classify VEX_B, VEX_4V, VEX_R, VEX_X unsigned CurOp = 0; switch (TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); + case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!"); case X86II::MRMDestMem: { // MRMDestMem instructions forms: // MemAddr, src1(ModR/M) @@ -669,7 +668,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // 3 byte VEX prefix EmitByte(XOP ? 0x8F : 0xC4, CurByte, OS); EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS); - EmitByte(LastByte | ((VEX_W | XOP_W) << 7), CurByte, OS); + EmitByte(LastByte | (VEX_W << 7), CurByte, OS); } /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64 @@ -702,7 +701,7 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, } switch (TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); + case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!"); case X86II::MRMSrcReg: if (MI.getOperand(0).isReg() && X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) @@ -772,12 +771,12 @@ void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags, const MCInst &MI, raw_ostream &OS) const { switch (TSFlags & X86II::SegOvrMask) { - default: assert(0 && "Invalid segment!"); + default: llvm_unreachable("Invalid segment!"); case 0: // No segment override, check for explicit one on memory operand. if (MemOperand != -1) { // If the instruction has a memory operand. switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) { - default: assert(0 && "Unknown segment register!"); + default: llvm_unreachable("Unknown segment register!"); case 0: break; case X86::CS: EmitByte(0x2E, CurByte, OS); break; case X86::SS: EmitByte(0x36, CurByte, OS); break; @@ -828,7 +827,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, bool Need0FPrefix = false; switch (TSFlags & X86II::Op0Mask) { - default: assert(0 && "Invalid prefix!"); + default: llvm_unreachable("Invalid prefix!"); case 0: break; // No prefix! case X86II::REP: break; // already handled. case X86II::TB: // Two-byte opcode prefix @@ -929,8 +928,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // It uses the VEX.VVVV field? bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; - bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W; - unsigned XOP_W_I8IMMOperand = 2; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; + const unsigned MemOp4_I8IMMOperand = 2; // Determine where the memory operand starts, if present. int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode); @@ -949,27 +948,29 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, unsigned SrcRegNum = 0; switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: - assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!"); + llvm_unreachable("FIXME: Remove this form when the JIT moves to MCCodeEmitter!"); default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n"; - assert(0 && "Unknown FormMask value in X86MCCodeEmitter!"); + llvm_unreachable("Unknown FormMask value in X86MCCodeEmitter!"); case X86II::Pseudo: - assert(0 && "Pseudo instruction shouldn't be emitted"); + llvm_unreachable("Pseudo instruction shouldn't be emitted"); case X86II::RawFrm: EmitByte(BaseOpcode, CurByte, OS); break; case X86II::RawFrmImm8: EmitByte(BaseOpcode, CurByte, OS); - EmitImmediate(MI.getOperand(CurOp++), + EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), CurByte, OS, Fixups); - EmitImmediate(MI.getOperand(CurOp++), 1, FK_Data_1, CurByte, OS, Fixups); + EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 1, FK_Data_1, CurByte, + OS, Fixups); break; case X86II::RawFrmImm16: EmitByte(BaseOpcode, CurByte, OS); - EmitImmediate(MI.getOperand(CurOp++), + EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), CurByte, OS, Fixups); - EmitImmediate(MI.getOperand(CurOp++), 2, FK_Data_2, CurByte, OS, Fixups); + EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 2, FK_Data_2, CurByte, + OS, Fixups); break; case X86II::AddRegFrm: @@ -1003,14 +1004,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) SrcRegNum++; - if(HasXOP_W) // Skip 2nd src (which is encoded in I8IMM) + if(HasMemOp4) // Skip 2nd src (which is encoded in I8IMM) SrcRegNum++; EmitRegModRMByte(MI.getOperand(SrcRegNum), GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS); - // 2 operands skipped with HasXOP_W, comensate accordingly - CurOp = HasXOP_W ? SrcRegNum : SrcRegNum + 1; + // 2 operands skipped with HasMemOp4, comensate accordingly + CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1; if (HasVEX_4VOp3) ++CurOp; break; @@ -1022,7 +1023,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, ++AddrOperands; ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). } - if(HasXOP_W) // Skip second register source (encoded in I8IMM) + if(HasMemOp4) // Skip second register source (encoded in I8IMM) ++FirstMemOp; EmitByte(BaseOpcode, CurByte, OS); @@ -1057,53 +1058,45 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, TSFlags, CurByte, OS, Fixups); CurOp += X86::AddrNumOperands; break; - case X86II::MRM_C1: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xC1, CurByte, OS); - break; - case X86II::MRM_C2: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xC2, CurByte, OS); - break; - case X86II::MRM_C3: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xC3, CurByte, OS); - break; - case X86II::MRM_C4: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xC4, CurByte, OS); - break; - case X86II::MRM_C8: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xC8, CurByte, OS); - break; - case X86II::MRM_C9: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xC9, CurByte, OS); - break; - case X86II::MRM_E8: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xE8, CurByte, OS); - break; - case X86II::MRM_F0: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xF0, CurByte, OS); - break; - case X86II::MRM_F8: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xF8, CurByte, OS); - break; + case X86II::MRM_C1: case X86II::MRM_C2: + case X86II::MRM_C3: case X86II::MRM_C4: + case X86II::MRM_C8: case X86II::MRM_C9: + case X86II::MRM_D0: case X86II::MRM_D1: + case X86II::MRM_D4: case X86II::MRM_D8: + case X86II::MRM_D9: case X86II::MRM_DA: + case X86II::MRM_DB: case X86II::MRM_DC: + case X86II::MRM_DD: case X86II::MRM_DE: + case X86II::MRM_DF: case X86II::MRM_E8: + case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9: EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xF9, CurByte, OS); - break; - case X86II::MRM_D0: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xD0, CurByte, OS); - break; - case X86II::MRM_D1: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xD1, CurByte, OS); + + unsigned char MRM; + switch (TSFlags & X86II::FormMask) { + default: llvm_unreachable("Invalid Form"); + case X86II::MRM_C1: MRM = 0xC1; break; + case X86II::MRM_C2: MRM = 0xC2; break; + case X86II::MRM_C3: MRM = 0xC3; break; + case X86II::MRM_C4: MRM = 0xC4; break; + case X86II::MRM_C8: MRM = 0xC8; break; + case X86II::MRM_C9: MRM = 0xC9; break; + case X86II::MRM_D0: MRM = 0xD0; break; + case X86II::MRM_D1: MRM = 0xD1; break; + case X86II::MRM_D4: MRM = 0xD4; break; + case X86II::MRM_D8: MRM = 0xD8; break; + case X86II::MRM_D9: MRM = 0xD9; break; + case X86II::MRM_DA: MRM = 0xDA; break; + case X86II::MRM_DB: MRM = 0xDB; break; + case X86II::MRM_DC: MRM = 0xDC; break; + case X86II::MRM_DD: MRM = 0xDD; break; + case X86II::MRM_DE: MRM = 0xDE; break; + case X86II::MRM_DF: MRM = 0xDF; break; + case X86II::MRM_E8: MRM = 0xE8; break; + case X86II::MRM_F0: MRM = 0xF0; break; + case X86II::MRM_F8: MRM = 0xF8; break; + case X86II::MRM_F9: MRM = 0xF9; break; + } + EmitByte(MRM, CurByte, OS); break; } @@ -1113,7 +1106,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // The last source register of a 4 operand instruction in AVX is encoded // in bits[7:4] of a immediate byte. if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { - const MCOperand &MO = MI.getOperand(HasXOP_W ? XOP_W_I8IMMOperand + const MCOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand : CurOp); CurOp++; bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg()); @@ -1129,8 +1122,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, RegNum |= Val; } } - EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS, - Fixups); + EmitImmediate(MCOperand::CreateImm(RegNum), MI.getLoc(), 1, FK_Data_1, + CurByte, OS, Fixups); } else { unsigned FixupKind; // FIXME: Is there a better way to know that we need a signed relocation? @@ -1141,7 +1134,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, FixupKind = X86::reloc_signed_4byte; else FixupKind = getImmFixupKind(TSFlags); - EmitImmediate(MI.getOperand(CurOp++), + EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind), CurByte, OS, Fixups); } diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index f2a34ed..efd18c7 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- X86MCTargetDesc.cpp - X86 Target Descriptions -----------*- C++ -*-===// +//===-- X86MCTargetDesc.cpp - X86 Target Descriptions ---------------------===// // // The LLVM Compiler Infrastructure // @@ -24,6 +24,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/Support/Host.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #define GET_REGINFO_MC_DESC @@ -35,6 +36,10 @@ #define GET_SUBTARGETINFO_MC_DESC #include "X86GenSubtargetInfo.inc" +#if _MSC_VER +#include <intrin.h> +#endif + using namespace llvm; @@ -72,6 +77,8 @@ bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, *rECX = registers[2]; *rEDX = registers[3]; return false; + #else + return true; #endif #elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) #if defined(__GNUC__) @@ -98,9 +105,12 @@ bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, mov dword ptr [esi],edx } return false; + #else + return true; #endif -#endif +#else return true; +#endif } /// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the @@ -131,7 +141,11 @@ bool X86_MC::GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, *rECX = registers[2]; *rEDX = registers[3]; return false; + #else + return true; #endif + #else + return true; #endif #elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) #if defined(__GNUC__) @@ -160,9 +174,12 @@ bool X86_MC::GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, mov dword ptr [esi],edx } return false; + #else + return true; #endif -#endif +#else return true; +#endif } void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family, @@ -319,7 +336,8 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU, std::string CPUName = CPU; if (CPUName.empty()) { -#if defined (__x86_64__) || defined(__i386__) +#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\ + || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) CPUName = sys::getHostCPUName(); #else CPUName = "generic"; @@ -456,11 +474,12 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createX86MCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new X86ATTInstPrinter(MAI); + return new X86ATTInstPrinter(MAI, MRI); if (SyntaxVariant == 1) - return new X86IntelInstPrinter(MAI); + return new X86IntelInstPrinter(MAI, MRI); return 0; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index a4e0b5a..9896cbe 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -88,6 +88,12 @@ MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, uint32_t CPUType, uint32_t CPUSubtype); +/// createX86ELFObjectWriter - Construct an X86 ELF object writer. +MCObjectWriter *createX86ELFObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint8_t OSABI); +/// createX86WinCOFFObjectWriter - Construct an X86 Win COFF object writer. +MCObjectWriter *createX86WinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit); } // End llvm namespace diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp new file mode 100644 index 0000000..bc272ef --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -0,0 +1,65 @@ +//===-- X86WinCOFFObjectWriter.cpp - X86 Win COFF Writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/X86FixupKinds.h" +#include "MCTargetDesc/X86MCTargetDesc.h" +#include "llvm/MC/MCWinCOFFObjectWriter.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +namespace llvm { + class MCObjectWriter; +} + +namespace { + class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { + const bool Is64Bit; + + public: + X86WinCOFFObjectWriter(bool Is64Bit_); + ~X86WinCOFFObjectWriter(); + + virtual unsigned getRelocType(unsigned FixupKind) const; + }; +} + +X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit_) + : MCWinCOFFObjectTargetWriter(Is64Bit_ ? COFF::IMAGE_FILE_MACHINE_AMD64 : + COFF::IMAGE_FILE_MACHINE_I386), + Is64Bit(Is64Bit_) {} + +X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {} + +unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const { + switch (FixupKind) { + case FK_PCRel_4: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + return Is64Bit ? COFF::IMAGE_REL_AMD64_REL32 : COFF::IMAGE_REL_I386_REL32; + case FK_Data_4: + case X86::reloc_signed_4byte: + return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32 : COFF::IMAGE_REL_I386_DIR32; + case FK_Data_8: + if (Is64Bit) + return COFF::IMAGE_REL_AMD64_ADDR64; + llvm_unreachable("unsupported relocation type"); + case FK_SecRel_4: + return Is64Bit ? COFF::IMAGE_REL_AMD64_SECREL : COFF::IMAGE_REL_I386_SECREL; + default: + llvm_unreachable("unsupported relocation type"); + } +} + +MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_ostream &OS, + bool Is64Bit) { + MCWinCOFFObjectTargetWriter *MOTW = new X86WinCOFFObjectWriter(Is64Bit); + return createWinCOFFObjectWriter(MOTW, OS); +} diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index b407955..f9c1d35 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -56,7 +56,7 @@ cmovs, we should expand to a conditional branch like GCC produces. Some isel ideas: -1. Dynamic programming based approach when compile time if not an +1. Dynamic programming based approach when compile time is not an issue. 2. Code duplication (addressing mode) during isel. 3. Other ideas from "Register-Sensitive Selection, Duplication, and @@ -2060,35 +2060,3 @@ Instead we could generate: The trick is to match "fetch_and_add(X, -C) == C". //===---------------------------------------------------------------------===// - -unsigned log2(unsigned x) { - return x > 1 ? 32-__builtin_clz(x-1) : 0; -} - -generates (x86_64): - xorl %eax, %eax - cmpl $2, %edi - jb LBB0_2 -## BB#1: - decl %edi - movl $63, %ecx - bsrl %edi, %eax - cmovel %ecx, %eax - xorl $-32, %eax - addl $33, %eax -LBB0_2: - ret - -The cmov and the early test are redundant: - xorl %eax, %eax - cmpl $2, %edi - jb LBB0_2 -## BB#1: - decl %edi - bsrl %edi, %eax - xorl $-32, %eax - addl $33, %eax -LBB0_2: - ret - -//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index e7631b6..f4b85ae 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -63,11 +63,23 @@ void DecodeMOVLHPSMask(unsigned NElts, ShuffleMask.push_back(NElts+i); } -void DecodePSHUFMask(unsigned NElts, unsigned Imm, +/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. +/// VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. +void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != NElts; ++i) { - ShuffleMask.push_back(Imm % NElts); - Imm /= NElts; + unsigned NumElts = VT.getVectorNumElements(); + + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLaneElts = NumElts / NumLanes; + + int NewImm = Imm; + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + ShuffleMask.push_back(NewImm % NumLaneElts + l); + NewImm /= NumLaneElts; + } + if (NumLaneElts == 4) NewImm = Imm; // reload imm } } @@ -95,6 +107,9 @@ void DecodePSHUFLWMask(unsigned Imm, ShuffleMask.push_back(7); } +/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates +/// the type of the vector allowing it to handle different datatypes and vector +/// widths. void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); @@ -103,22 +118,24 @@ void DecodeSHUFPMask(EVT VT, unsigned Imm, unsigned NumLaneElts = NumElts / NumLanes; int NewImm = Imm; - for (unsigned l = 0; l < NumLanes; ++l) { - unsigned LaneStart = l * NumLaneElts; + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { // Part that reads from dest. for (unsigned i = 0; i != NumLaneElts/2; ++i) { - ShuffleMask.push_back(NewImm % NumLaneElts + LaneStart); + ShuffleMask.push_back(NewImm % NumLaneElts + l); NewImm /= NumLaneElts; } // Part that reads from src. for (unsigned i = 0; i != NumLaneElts/2; ++i) { - ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + LaneStart); + ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + l); NewImm /= NumLaneElts; } if (NumLaneElts == 4) NewImm = Imm; // reload imm } } +/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd +/// and punpckh*. VT indicates the type of the vector allowing it to handle +/// different datatypes and vector widths. void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); @@ -128,10 +145,8 @@ void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { if (NumLanes == 0 ) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; - for (unsigned s = 0; s < NumLanes; ++s) { - unsigned Start = s * NumLaneElts + NumLaneElts/2; - unsigned End = s * NumLaneElts + NumLaneElts; - for (unsigned i = Start; i != End; ++i) { + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) { ShuffleMask.push_back(i); // Reads from dest/src1 ShuffleMask.push_back(i+NumElts); // Reads from src/src2 } @@ -139,8 +154,8 @@ void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { } /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd -/// etc. VT indicates the type of the vector allowing it to handle different -/// datatypes and vector widths. +/// and punpckl*. VT indicates the type of the vector allowing it to handle +/// different datatypes and vector widths. void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); @@ -150,38 +165,15 @@ void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { if (NumLanes == 0 ) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; - for (unsigned s = 0; s < NumLanes; ++s) { - unsigned Start = s * NumLaneElts; - unsigned End = s * NumLaneElts + NumLaneElts/2; - for (unsigned i = Start; i != End; ++i) { + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) { ShuffleMask.push_back(i); // Reads from dest/src1 ShuffleMask.push_back(i+NumElts); // Reads from src/src2 } } } -// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit -// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128 -// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of -// the first lane must be the same of the second. -void DecodeVPERMILPMask(EVT VT, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { - unsigned NumElts = VT.getVectorNumElements(); - - unsigned NumLanes = VT.getSizeInBits() / 128; - unsigned NumLaneElts = NumElts / NumLanes; - - for (unsigned l = 0; l != NumLanes; ++l) { - unsigned LaneStart = l*NumLaneElts; - for (unsigned i = 0; i != NumLaneElts; ++i) { - unsigned Idx = NumLaneElts == 4 ? (Imm >> (i*2)) & 0x3 - : (Imm >> (i+LaneStart)) & 0x1; - ShuffleMask.push_back(Idx+LaneStart); - } - } -} - -void DecodeVPERM2F128Mask(EVT VT, unsigned Imm, +void DecodeVPERM2X128Mask(EVT VT, unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { unsigned HalfSize = VT.getVectorNumElements()/2; unsigned FstHalfBegin = (Imm & 0x3) * HalfSize; @@ -193,12 +185,4 @@ void DecodeVPERM2F128Mask(EVT VT, unsigned Imm, ShuffleMask.push_back(i); } -void DecodeVPERM2F128Mask(unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { - // VPERM2F128 is used by any 256-bit EVT, but X86InstComments only - // has information about the instruction and not the types. So for - // instruction comments purpose, assume the 256-bit vector is v4i64. - return DecodeVPERM2F128Mask(MVT::v4i64, Imm, ShuffleMask); -} - } // llvm namespace diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 243728f..877c9bd 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -37,7 +37,7 @@ void DecodeMOVHLPSMask(unsigned NElts, void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<unsigned> &ShuffleMask); -void DecodePSHUFMask(unsigned NElts, unsigned Imm, +void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); void DecodePSHUFHWMask(unsigned Imm, @@ -46,30 +46,24 @@ void DecodePSHUFHWMask(unsigned Imm, void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); +/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates +/// the type of the vector allowing it to handle different datatypes and vector +/// widths. void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); /// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd -/// etc. VT indicates the type of the vector allowing it to handle different -/// datatypes and vector widths. +/// and punpckh*. VT indicates the type of the vector allowing it to handle +/// different datatypes and vector widths. void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd -/// etc. VT indicates the type of the vector allowing it to handle different -/// datatypes and vector widths. +/// and punpckl*. VT indicates the type of the vector allowing it to handle +/// different datatypes and vector widths. void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); -// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit -// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128 -// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of -// the first lane must be the same of the second. -void DecodeVPERMILPMask(EVT VT, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodeVPERM2F128Mask(unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); -void DecodeVPERM2F128Mask(EVT VT, unsigned Imm, +void DecodeVPERM2X128Mask(EVT VT, unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); } // llvm namespace diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 8229ca5..b6591d4 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -1,4 +1,4 @@ -//===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===// +//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -55,7 +55,7 @@ def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41", [FeatureSSSE3]>; def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42", "Enable SSE 4.2 instructions", - [FeatureSSE41, FeaturePOPCNT]>; + [FeatureSSE41]>; def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", "Enable 3DNow! instructions", [FeatureMMX]>; @@ -78,20 +78,23 @@ def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem", "Fast unaligned memory access">; def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", "Support SSE 4a instructions", - [FeaturePOPCNT]>; + [FeatureSSE3]>; -def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true", - "Enable AVX instructions">; -def FeatureAVX2 : SubtargetFeature<"avx2", "HasAVX2", "true", +def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", + "Enable AVX instructions", + [FeatureSSE42]>; +def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", "Enable AVX2 instructions", [FeatureAVX]>; def FeatureCLMUL : SubtargetFeature<"clmul", "HasCLMUL", "true", "Enable carry-less multiplication instructions">; def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true", - "Enable three-operand fused multiple-add">; + "Enable three-operand fused multiple-add", + [FeatureAVX]>; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", - "Enable four-operand fused multiple-add">; -def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", + "Enable four-operand fused multiple-add", + [FeatureAVX]>; +def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", "Enable XOP instructions">; def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", "HasVectorUAMem", "true", @@ -112,13 +115,23 @@ def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", "Support BMI instructions">; def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", "Support BMI2 instructions">; +def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", + "Use LEA for adjusting the stack pointer">; //===----------------------------------------------------------------------===// // X86 processors supported. //===----------------------------------------------------------------------===// +include "X86Schedule.td" + +def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom", + "Intel Atom processors">; + class Proc<string Name, list<SubtargetFeature> Features> - : Processor<Name, NoItineraries, Features>; + : Processor<Name, GenericItineraries, Features>; + +class AtomProc<string Name, list<SubtargetFeature> Features> + : Processor<Name, AtomItineraries, Features>; def : Proc<"generic", []>; def : Proc<"i386", []>; @@ -143,35 +156,38 @@ def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; -def : Proc<"atom", [FeatureSSE3, FeatureCMPXCHG16B, FeatureMOVBE, - FeatureSlowBTMem]>; +def : AtomProc<"atom", [ProcIntelAtom, FeatureSSE3, FeatureCMPXCHG16B, + FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP]>; // "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureFastUAMem, FeatureAES]>; + FeatureSlowBTMem, FeatureFastUAMem, + FeaturePOPCNT, FeatureAES]>; def : Proc<"nehalem", [FeatureSSE42, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureFastUAMem]>; + FeatureSlowBTMem, FeatureFastUAMem, + FeaturePOPCNT]>; // Westmere is a similar machine to nehalem with some additional features. // Westmere is the corei3/i5/i7 path from nehalem to sandybridge def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureFastUAMem, FeatureAES, - FeatureCLMUL]>; + FeatureSlowBTMem, FeatureFastUAMem, + FeaturePOPCNT, FeatureAES, FeatureCLMUL]>; // Sandy Bridge // SSE is not listed here since llvm treats AVX as a reimplementation of SSE, // rather than a superset. // FIXME: Disabling AVX for now since it's not ready. -def : Proc<"corei7-avx", [FeatureSSE42, FeatureCMPXCHG16B, +def : Proc<"corei7-avx", [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT, FeatureAES, FeatureCLMUL]>; // Ivy Bridge -def : Proc<"core-avx-i", [FeatureSSE42, FeatureCMPXCHG16B, +def : Proc<"core-avx-i", [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT, FeatureAES, FeatureCLMUL, FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>; // Haswell -// FIXME: Disabling AVX/AVX2 for now since it's not ready. -def : Proc<"core-avx2", [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES, - FeatureCLMUL, FeatureRDRAND, FeatureF16C, - FeatureFSGSBase, FeatureFMA3, FeatureMOVBE, - FeatureLZCNT, FeatureBMI, FeatureBMI2]>; +// FIXME: Disabling AVX/AVX2/FMA3 for now since it's not ready. +def : Proc<"core-avx2", [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT, + FeatureAES, FeatureCLMUL, FeatureRDRAND, + FeatureF16C, FeatureFSGSBase, + FeatureMOVBE, FeatureLZCNT, FeatureBMI, + FeatureBMI2]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [Feature3DNow]>; @@ -197,15 +213,20 @@ def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A, Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, - FeatureSlowBTMem]>; -// FIXME: Disabling AVX for now since it's not ready. + FeaturePOPCNT, FeatureSlowBTMem]>; +// Bobcat +def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B, + FeatureLZCNT, FeaturePOPCNT]>; +// FIXME: Disabling AVX/FMA4 for now since it's not ready. +// Bulldozer def : Proc<"bdver1", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B, - FeatureAES, FeatureCLMUL, FeatureFMA4, - FeatureXOP, FeatureLZCNT]>; + FeatureAES, FeatureCLMUL, + FeatureXOP, FeatureLZCNT, FeaturePOPCNT]>; +// Enhanced Bulldozer def : Proc<"bdver2", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B, - FeatureAES, FeatureCLMUL, FeatureFMA4, - FeatureXOP, FeatureF16C, FeatureLZCNT, - FeatureBMI]>; + FeatureAES, FeatureCLMUL, + FeatureXOP, FeatureF16C, FeatureLZCNT, + FeaturePOPCNT, FeatureBMI]>; def : Proc<"winchip-c6", [FeatureMMX]>; def : Proc<"winchip2", [Feature3DNow]>; @@ -237,9 +258,11 @@ include "X86CallingConv.td" // Assembly Parser //===----------------------------------------------------------------------===// -// Currently the X86 assembly parser only supports ATT syntax. def ATTAsmParser : AsmParser { - string AsmParserClassName = "ATTAsmParser"; + string AsmParserClassName = "AsmParser"; +} + +def ATTAsmParserVariant : AsmParserVariant { int Variant = 0; // Discard comments in assembly strings. @@ -249,6 +272,16 @@ def ATTAsmParser : AsmParser { string RegisterPrefix = "%"; } +def IntelAsmParserVariant : AsmParserVariant { + int Variant = 1; + + // Discard comments in assembly strings. + string CommentDelimiter = ";"; + + // Recognize hard coded registers. + string RegisterPrefix = ""; +} + //===----------------------------------------------------------------------===// // Assembly Printers //===----------------------------------------------------------------------===// @@ -269,8 +302,7 @@ def IntelAsmWriter : AsmWriter { def X86 : Target { // Information about the instructions... let InstructionSet = X86InstrInfo; - let AssemblyParsers = [ATTAsmParser]; - + let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant]; let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; } diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 4c3ff02..268cbf4 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -199,6 +199,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, case X86II::MO_TLVP_PIC_BASE: O << "@TLVP" << '-' << *MF->getPICBaseSymbol(); break; + case X86II::MO_SECREL: O << "@SECREL"; break; } } @@ -266,14 +267,38 @@ void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op, unsigned char value = MI->getOperand(Op).getImm(); assert(value <= 7 && "Invalid ssecc argument!"); switch (value) { - case 0: O << "eq"; break; - case 1: O << "lt"; break; - case 2: O << "le"; break; - case 3: O << "unord"; break; - case 4: O << "neq"; break; - case 5: O << "nlt"; break; - case 6: O << "nle"; break; - case 7: O << "ord"; break; + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + case 8: O << "eq_uq"; break; + case 9: O << "nge"; break; + case 0xa: O << "ngt"; break; + case 0xb: O << "false"; break; + case 0xc: O << "neq_oq"; break; + case 0xd: O << "ge"; break; + case 0xe: O << "gt"; break; + case 0xf: O << "true"; break; + case 0x10: O << "eq_os"; break; + case 0x11: O << "lt_oq"; break; + case 0x12: O << "le_oq"; break; + case 0x13: O << "unord_s"; break; + case 0x14: O << "neq_us"; break; + case 0x15: O << "nlt_uq"; break; + case 0x16: O << "nle_uq"; break; + case 0x17: O << "ord_s"; break; + case 0x18: O << "eq_us"; break; + case 0x19: O << "nge_uq"; break; + case 0x1a: O << "ngt_uq"; break; + case 0x1b: O << "false_os"; break; + case 0x1c: O << "neq_os"; break; + case 0x1d: O << "ge_oq"; break; + case 0x1e: O << "gt_oq"; break; + case 0x1f: O << "true_us"; break; } } @@ -575,7 +600,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() && - MMI->callsExternalVAFunctionWithFloatingPointArguments()) { + MMI->usesVAFloatArgument()) { StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused"; MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName); OutStreamer.EmitSymbolAttribute(S, MCSA_Global); diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp index 4326814..e01ff41 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp +++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/X86COFFMachineModuleInfo.cpp -------------------------===// +//===-- X86COFFMachineModuleInfo.cpp - X86 COFF MMI Impl ------------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index 98ab2a6..63c08f1 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/X86COFFMachineModuleInfo.h -----------------*- C++ -*-===// +//===-- X86COFFMachineModuleInfo.h - X86 COFF MMI Impl ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index aab2a05..d148989 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -1,10 +1,10 @@ -//===- X86CallingConv.td - Calling Conventions X86 32/64 ---*- tablegen -*-===// -// +//===-- X86CallingConv.td - Calling Conventions X86 32/64 --*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This describes the calling conventions for the X86-32 and X86-64 @@ -61,7 +61,7 @@ def RetCC_X86_32_C : CallingConv<[ // weirdly; this is really the sse-regparm calling convention) in which // case they use XMM0, otherwise it is the same as the common X86 calling // conv. - CCIfInReg<CCIfSubtarget<"hasXMMInt()", + CCIfInReg<CCIfSubtarget<"hasSSE2()", CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>, CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>, CCDelegateTo<RetCC_X86Common> @@ -73,8 +73,8 @@ def RetCC_X86_32_Fast : CallingConv<[ // SSE2. // This can happen when a float, 2 x float, or 3 x float vector is split by // target lowering, and is returned in 1-3 sse regs. - CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, - CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, + CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, + CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, // For integers, ECX can be used as an extra return register CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>, @@ -150,12 +150,12 @@ def CC_X86_64_C : CallingConv<[ // The first 8 MMX vector arguments are passed in XMM registers on Darwin. CCIfType<[x86mmx], CCIfSubtarget<"isTargetDarwin()", - CCIfSubtarget<"hasXMMInt()", + CCIfSubtarget<"hasSSE2()", CCPromoteToType<v2i64>>>>, // The first 8 FP/Vector arguments are passed in XMM registers. CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfSubtarget<"hasXMM()", + CCIfSubtarget<"hasSSE1()", CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, // The first 8 256-bit vector arguments are passed in YMM registers, unless @@ -198,6 +198,10 @@ def CC_X86_Win64_C : CallingConv<[ // 128 bit vectors are passed by pointer CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>, + + // 256 bit vectors are passed by pointer + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCPassIndirect<i64>>, + // The first 4 MMX vector arguments are passed in GPRs. CCIfType<[x86mmx], CCBitConvertToType<i64>>, @@ -238,7 +242,7 @@ def CC_X86_64_GHC : CallingConv<[ // Pass in STG registers: F1, F2, F3, F4, D1, D2 CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfSubtarget<"hasXMM()", + CCIfSubtarget<"hasSSE1()", CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>> ]>; @@ -256,7 +260,7 @@ def CC_X86_32_Common : CallingConv<[ // The first 3 float or double arguments, if marked 'inreg' and if the call // is not a vararg call and if SSE2 is available, are passed in SSE registers. CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64], - CCIfSubtarget<"hasXMMInt()", + CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>, // The first 3 __m64 vector arguments are passed in mmx registers if the @@ -327,8 +331,8 @@ def CC_X86_32_ThisCall : CallingConv<[ // Promote i8/i16 arguments to i32. CCIfType<[i8, i16], CCPromoteToType<i32>>, - // The 'nest' parameter, if any, is passed in EAX. - CCIfNest<CCAssignToReg<[EAX]>>, + // Pass sret arguments indirectly through EAX + CCIfSRet<CCAssignToReg<[EAX]>>, // The first integer argument is passed in ECX CCIfType<[i32], CCAssignToReg<[ECX]>>, @@ -355,7 +359,7 @@ def CC_X86_32_FastCC : CallingConv<[ // The first 3 float or double arguments, if the call is not a vararg // call and if SSE2 is available, are passed in SSE registers. CCIfNotVarArg<CCIfType<[f32,f64], - CCIfSubtarget<"hasXMMInt()", + CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>>, // Doubles get 8-byte slots that are 8-byte aligned. @@ -404,3 +408,18 @@ def CC_X86 : CallingConv<[ CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>, CCDelegateTo<CC_X86_32> ]>; + +//===----------------------------------------------------------------------===// +// Callee-saved Registers. +//===----------------------------------------------------------------------===// + +def CSR_Ghc : CalleeSavedRegs<(add)>; + +def CSR_32 : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>; +def CSR_64 : CalleeSavedRegs<(add RBX, R12, R13, R14, R15, RBP)>; + +def CSR_32EHRet : CalleeSavedRegs<(add EAX, EDX, CSR_32)>; +def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>; + +def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15, + (sequence "XMM%u", 6, 15))>; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index ed16e88..ee3de9a 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -1,4 +1,4 @@ -//===-- X86/X86CodeEmitter.cpp - Convert X86 code to machine code ---------===// +//===-- X86CodeEmitter.cpp - Convert X86 code to machine code -------------===// // // The LLVM Compiler Infrastructure // @@ -806,8 +806,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, } assert(MO.isImm() && "Unknown RawFrm operand!"); - if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32 || - Opcode == X86::WINCALL64pcrel32) { + if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { // Fix up immediate operand for pc relative calls. intptr_t Imm = (intptr_t)MO.getImm(); Imm = Imm - MCE.getCurrentPCValue() - 4; diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index 4a72d15..c1a49a7 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -60,7 +60,6 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { llvm_unreachable("unknown x86 machine relocation type"); } } - return 0; } long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, @@ -83,7 +82,6 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, llvm_unreachable("unknown x86 relocation type"); } } - return 0; } unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const { @@ -107,7 +105,6 @@ unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const { llvm_unreachable("unknown x86 relocation type"); } } - return 0; } bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { @@ -132,7 +129,6 @@ bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { llvm_unreachable("unknown x86 relocation type"); } } - return 0; } unsigned X86ELFWriterInfo::getAbsoluteLabelMachineRelTy() const { @@ -146,8 +142,6 @@ long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset, if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32) return SymOffset - (RelOffset + 4); - else - assert(0 && "computeRelocation unknown for this relocation type"); - return 0; + llvm_unreachable("computeRelocation unknown for this relocation type"); } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 1589439..f90764e 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -60,8 +60,8 @@ public: explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { Subtarget = &TM.getSubtarget<X86Subtarget>(); StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; - X86ScalarSSEf64 = Subtarget->hasSSE2() || Subtarget->hasAVX(); - X86ScalarSSEf32 = Subtarget->hasSSE1() || Subtarget->hasAVX(); + X86ScalarSSEf64 = Subtarget->hasSSE2(); + X86ScalarSSEf32 = Subtarget->hasSSE1(); } virtual bool TargetSelectInstruction(const Instruction *I); @@ -837,8 +837,8 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) { static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { bool HasAVX = Subtarget->hasAVX(); - bool X86ScalarSSEf32 = HasAVX || Subtarget->hasSSE1(); - bool X86ScalarSSEf64 = HasAVX || Subtarget->hasSSE2(); + bool X86ScalarSSEf32 = Subtarget->hasSSE1(); + bool X86ScalarSSEf64 = Subtarget->hasSSE2(); switch (VT.getSimpleVT().SimpleTy) { default: return 0; @@ -1576,10 +1576,11 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { SmallVector<unsigned, 8> Args; SmallVector<MVT, 8> ArgVTs; SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; - Args.reserve(CS.arg_size()); - ArgVals.reserve(CS.arg_size()); - ArgVTs.reserve(CS.arg_size()); - ArgFlags.reserve(CS.arg_size()); + unsigned arg_size = CS.arg_size(); + Args.reserve(arg_size); + ArgVals.reserve(arg_size); + ArgVTs.reserve(arg_size); + ArgFlags.reserve(arg_size); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { // If we're lowering a mem intrinsic instead of a regular call, skip the @@ -1792,9 +1793,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (CalleeOp) { // Register-indirect call. unsigned CallOpc; - if (Subtarget->isTargetWin64()) - CallOpc = X86::WINCALL64r; - else if (Subtarget->is64Bit()) + if (Subtarget->is64Bit()) CallOpc = X86::CALL64r; else CallOpc = X86::CALL32r; @@ -1805,9 +1804,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Direct call. assert(GV && "Not a direct call"); unsigned CallOpc; - if (Subtarget->isTargetWin64()) - CallOpc = X86::WINCALL64pcrel32; - else if (Subtarget->is64Bit()) + if (Subtarget->is64Bit()) CallOpc = X86::CALL64pcrel32; else CallOpc = X86::CALLpcrel32; @@ -1852,10 +1849,15 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); + // Add a register mask with the call-preserved registers. + // Proper defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv())); + // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); unsigned NumBytesCallee = 0; - if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet)) + if (!Subtarget->is64Bit() && !Subtarget->isTargetWindows() && + CS.paramHasAttr(1, Attribute::StructRet)) NumBytesCallee = 4; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) .addImm(NumBytes).addImm(NumBytesCallee); @@ -2102,7 +2104,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { if (!X86SelectAddress(C, AM)) return 0; unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; - TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); + const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); unsigned ResultReg = createResultReg(RC); addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg), AM); diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index e3461c8..32de194 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -1644,6 +1644,30 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { return; } + case X86::WIN_FTOL_32: + case X86::WIN_FTOL_64: { + // Push the operand into ST0. + MachineOperand &Op = MI->getOperand(0); + assert(Op.isUse() && Op.isReg() && + Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6); + unsigned FPReg = getFPReg(Op); + if (Op.isKill()) + moveToTop(FPReg, I); + else + duplicateToTop(FPReg, FPReg, I); + + // Emit the call. This will pop the operand. + BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::CALLpcrel32)) + .addExternalSymbol("_ftol2") + .addReg(X86::ST0, RegState::ImplicitKill) + .addReg(X86::EAX, RegState::Define | RegState::Implicit) + .addReg(X86::EDX, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + --StackTop; + + break; + } + case X86::RET: case X86::RETI: // If RET has an FP register use operand, pass the first one in ST(0) and diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 6a40cc1..000e375 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1,4 +1,4 @@ -//=======- X86FrameLowering.cpp - X86 Frame Information --------*- C++ -*-====// +//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// // // The LLVM Compiler Infrastructure // @@ -79,6 +79,10 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { } } +static unsigned getLEArOpcode(unsigned is64Bit) { + return is64Bit ? X86::LEA64r : X86::LEA32r; +} + /// findDeadCallerSavedReg - Return a caller-saved register that isn't live /// when it reaches the "return" instruction. We can then pop a stack object /// to this register without worry about clobbering it. @@ -91,11 +95,11 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, if (!F || MF->getMMI().callsEHReturn()) return 0; - static const unsigned CallerSavedRegs32Bit[] = { + static const uint16_t CallerSavedRegs32Bit[] = { X86::EAX, X86::EDX, X86::ECX, 0 }; - static const unsigned CallerSavedRegs64Bit[] = { + static const uint16_t CallerSavedRegs64Bit[] = { X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, X86::R8, X86::R9, X86::R10, X86::R11, 0 }; @@ -113,7 +117,7 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, case X86::TCRETURNmi64: case X86::EH_RETURN: case X86::EH_RETURN64: { - SmallSet<unsigned, 8> Uses; + SmallSet<uint16_t, 8> Uses; for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MBBI->getOperand(i); if (!MO.isReg() || MO.isDef()) @@ -121,11 +125,11 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, unsigned Reg = MO.getReg(); if (!Reg) continue; - for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI) + for (const uint16_t *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI) Uses.insert(*AsI); } - const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; + const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; for (; *CS; ++CS) if (!Uses.count(*CS)) return *CS; @@ -141,13 +145,18 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, static void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned StackPtr, int64_t NumBytes, - bool Is64Bit, const TargetInstrInfo &TII, - const TargetRegisterInfo &TRI) { + bool Is64Bit, bool UseLEA, + const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) { bool isSub = NumBytes < 0; uint64_t Offset = isSub ? -NumBytes : NumBytes; - unsigned Opc = isSub ? - getSUBriOpcode(Is64Bit, Offset) : - getADDriOpcode(Is64Bit, Offset); + unsigned Opc; + if (UseLEA) + Opc = getLEArOpcode(Is64Bit); + else + Opc = isSub + ? getSUBriOpcode(Is64Bit, Offset) + : getADDriOpcode(Is64Bit, Offset); + uint64_t Chunk = (1LL << 31) - 1; DebugLoc DL = MBB.findDebugLoc(MBBI); @@ -171,13 +180,21 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, } } - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(ThisVal); + MachineInstr *MI = NULL; + + if (UseLEA) { + MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), + StackPtr, false, isSub ? -ThisVal : ThisVal); + } else { + MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(ThisVal); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + } + if (isSub) MI->setFlag(MachineInstr::FrameSetup); - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + Offset -= ThisVal; } } @@ -191,7 +208,8 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || - Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || + Opc == X86::LEA32r || Opc == X86::LEA64_32r) && PI->getOperand(0).getReg() == StackPtr) { if (NumBytes) *NumBytes += PI->getOperand(2).getImm(); @@ -237,8 +255,8 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB, } /// mergeSPUpdates - Checks the instruction before/after the passed -/// instruction. If it is an ADD/SUB instruction it is deleted argument and the -/// stack adjustment is returned as a positive value for ADD and a negative for +/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and the +/// stack adjustment is returned as a positive value for ADD/LEA and a negative for /// SUB. static int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, @@ -254,7 +272,8 @@ static int mergeSPUpdates(MachineBasicBlock &MBB, int Offset = 0; if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || - Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || + Opc == X86::LEA32r || Opc == X86::LEA64_32r) && PI->getOperand(0).getReg() == StackPtr){ Offset += PI->getOperand(2).getImm(); MBB.erase(PI); @@ -456,21 +475,21 @@ encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); // Encode the registers in the order they were saved, 3-bits per register. The - // registers are numbered from 1 to 6. + // registers are numbered from 1 to CU_NUM_SAVED_REGS. uint32_t RegEnc = 0; - for (int I = 0; I != 6; --I) { + for (int I = CU_NUM_SAVED_REGS - 1, Idx = 0; I != -1; --I) { unsigned Reg = SavedRegs[I]; - if (Reg == 0) break; + if (Reg == 0) continue; + int CURegNum = getCompactUnwindRegNum(CURegs, Reg); - if (CURegNum == -1) - return ~0U; + if (CURegNum == -1) return ~0U; // Encode the 3-bit register number in order, skipping over 3-bits for each // register. - RegEnc |= (CURegNum & 0x7) << ((5 - I) * 3); + RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); } - assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!"); + assert((RegEnc & 0x3FFFF) == RegEnc && "Invalid compact register encoding!"); return RegEnc; } @@ -626,6 +645,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { bool HasFP = hasFP(MF); bool Is64Bit = STI.is64Bit(); bool IsWin64 = STI.isTargetWin64(); + bool UseLEA = STI.useLeaForSP(); unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -879,7 +899,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // FIXME: %rax preserves the offset and should be available. if (isSPUpdateNeeded) emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, - TII, *RegInfo); + UseLEA, TII, *RegInfo); if (isEAXAlive) { // Restore EAX @@ -891,7 +911,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { } } else if (NumBytes) emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, - TII, *RegInfo); + UseLEA, TII, *RegInfo); if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. @@ -935,6 +955,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); bool Is64Bit = STI.is64Bit(); + bool UseLEA = STI.useLeaForSP(); unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -1015,7 +1036,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // We cannot use LEA here, because stack pointer was realigned. We need to // deallocate local frame back. if (CSSize) { - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo); + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII, + *RegInfo); MBBI = prior(LastCSPop); } @@ -1036,7 +1058,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } else if (NumBytes) { // Adjust stack pointer back: ESP += numbytes. - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo); + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII, *RegInfo); } // We're returning from function via eh_return. @@ -1071,7 +1093,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (Offset) { // Check for possible merge with preceding ADD instruction. Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo); + emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, UseLEA, TII, *RegInfo); } // Jump to label or value in register. @@ -1115,7 +1137,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // Check for possible merge with preceding ADD instruction. delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo); + emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, UseLEA, TII, *RegInfo); } } @@ -1298,29 +1320,29 @@ HasNestArgument(const MachineFunction *MF) { return false; } + +/// GetScratchRegister - Get a register for performing work in the segmented +/// stack prologue. Depending on platform and the properties of the function +/// either one or two registers will be needed. Set primary to true for +/// the first register, false for the second. static unsigned -GetScratchRegister(bool Is64Bit, const MachineFunction &MF) { - if (Is64Bit) { - return X86::R11; - } else { - CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); - bool IsNested = HasNestArgument(&MF); - - if (CallingConvention == CallingConv::X86_FastCall) { - if (IsNested) { - report_fatal_error("Segmented stacks does not support fastcall with " - "nested function."); - return -1; - } else { - return X86::EAX; - } - } else { - if (IsNested) - return X86::EDX; - else - return X86::ECX; - } +GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) { + if (Is64Bit) + return Primary ? X86::R11 : X86::R12; + + CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); + bool IsNested = HasNestArgument(&MF); + + if (CallingConvention == CallingConv::X86_FastCall || + CallingConvention == CallingConv::Fast) { + if (IsNested) + report_fatal_error("Segmented stacks does not support fastcall with " + "nested function."); + return Primary ? X86::EAX : X86::ECX; } + if (IsNested) + return Primary ? X86::EDX : X86::EAX; + return Primary ? X86::ECX : X86::EAX; } // The stack limit in the TCB is set to this many bytes above the actual stack @@ -1338,14 +1360,15 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { DebugLoc DL; const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>(); - unsigned ScratchReg = GetScratchRegister(Is64Bit, MF); + unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true); assert(!MF.getRegInfo().isLiveIn(ScratchReg) && "Scratch register is live-in"); if (MF.getFunction()->isVarArg()) report_fatal_error("Segmented stacks do not support vararg functions."); - if (!ST->isTargetLinux()) - report_fatal_error("Segmented stacks supported only on linux."); + if (!ST->isTargetLinux() && !ST->isTargetDarwin() && + !ST->isTargetWin32() && !ST->isTargetFreeBSD()) + report_fatal_error("Segmented stacks not supported on this platform."); MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); @@ -1376,36 +1399,99 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // prologue. StackSize = MFI->getStackSize(); + // When the frame size is less than 256 we just compare the stack + // boundary directly to the value of the stack pointer, per gcc. + bool CompareStackPointer = StackSize < kSplitStackAvailable; + // Read the limit off the current stacklet off the stack_guard location. if (Is64Bit) { - TlsReg = X86::FS; - TlsOffset = 0x70; + if (ST->isTargetLinux()) { + TlsReg = X86::FS; + TlsOffset = 0x70; + } else if (ST->isTargetDarwin()) { + TlsReg = X86::GS; + TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. + } else if (ST->isTargetFreeBSD()) { + TlsReg = X86::FS; + TlsOffset = 0x18; + } else { + report_fatal_error("Segmented stacks not supported on this platform."); + } - if (StackSize < kSplitStackAvailable) + if (CompareStackPointer) ScratchReg = X86::RSP; else BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP) - .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + .addImm(1).addReg(0).addImm(-StackSize).addReg(0); BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg) - .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); + .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); } else { - TlsReg = X86::GS; - TlsOffset = 0x30; + if (ST->isTargetLinux()) { + TlsReg = X86::GS; + TlsOffset = 0x30; + } else if (ST->isTargetDarwin()) { + TlsReg = X86::GS; + TlsOffset = 0x48 + 90*4; + } else if (ST->isTargetWin32()) { + TlsReg = X86::FS; + TlsOffset = 0x14; // pvArbitrary, reserved for application use + } else if (ST->isTargetFreeBSD()) { + report_fatal_error("Segmented stacks not supported on FreeBSD i386."); + } else { + report_fatal_error("Segmented stacks not supported on this platform."); + } - if (StackSize < kSplitStackAvailable) + if (CompareStackPointer) ScratchReg = X86::ESP; else BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) - .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + .addImm(1).addReg(0).addImm(-StackSize).addReg(0); + + if (ST->isTargetLinux() || ST->isTargetWin32()) { + BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) + .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); + } else if (ST->isTargetDarwin()) { + + // TlsOffset doesn't fit into a mod r/m byte so we need an extra register + unsigned ScratchReg2; + bool SaveScratch2; + if (CompareStackPointer) { + // The primary scratch register is available for holding the TLS offset + ScratchReg2 = GetScratchRegister(Is64Bit, MF, true); + SaveScratch2 = false; + } else { + // Need to use a second register to hold the TLS offset + ScratchReg2 = GetScratchRegister(Is64Bit, MF, false); + + // Unfortunately, with fastcc the second scratch register may hold an arg + SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); + } - BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) - .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); + // If Scratch2 is live-in then it needs to be saved + assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && + "Scratch register is live-in and not saved"); + + if (SaveScratch2) + BuildMI(checkMBB, DL, TII.get(X86::PUSH32r)) + .addReg(ScratchReg2, RegState::Kill); + + BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2) + .addImm(TlsOffset); + BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) + .addReg(ScratchReg) + .addReg(ScratchReg2).addImm(1).addReg(0) + .addImm(0) + .addReg(TlsReg); + + if (SaveScratch2) + BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2); + } } // This jump is taken if SP >= (Stacklet Limit + Stack Space required). // It jumps to normal execution of the function body. - BuildMI(checkMBB, DL, TII.get(X86::JG_4)).addMBB(&prologueMBB); + BuildMI(checkMBB, DL, TII.get(X86::JA_4)).addMBB(&prologueMBB); // On 32 bit we first push the arguments size and then the frame size. On 64 // bit, we pass the stack frame size in r10 and the argument size in r11. diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index 6f49064..d55a497 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -1,4 +1,4 @@ -//=-- X86TargetFrameLowering.h - Define frame lowering for X86 ---*- C++ -*-===// +//===-- X86TargetFrameLowering.h - Define frame lowering for X86 -*- C++ -*-==// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 3c35763..aa508b8 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -540,7 +540,7 @@ void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, const TargetInstrInfo *TII = TM.getInstrInfo(); if (Subtarget->isTargetCygMing()) { unsigned CallOp = - Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32; + Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32; BuildMI(BB, DebugLoc(), TII->get(CallOp)).addExternalSymbol("__main"); } @@ -725,6 +725,213 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { return false; } +// Insert a node into the DAG at least before the Pos node's position. This +// will reposition the node as needed, and will assign it a node ID that is <= +// the Pos node's ID. Note that this does *not* preserve the uniqueness of node +// IDs! The selection DAG must no longer depend on their uniqueness when this +// is used. +static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { + if (N.getNode()->getNodeId() == -1 || + N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) { + DAG.RepositionNode(Pos.getNode(), N.getNode()); + N.getNode()->setNodeId(Pos.getNode()->getNodeId()); + } +} + +// Transform "(X >> (8-C1)) & C2" to "(X >> 8) & 0xff)" if safe. This +// allows us to convert the shift and and into an h-register extract and +// a scaled index. Returns false if the simplification is performed. +static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, + uint64_t Mask, + SDValue Shift, SDValue X, + X86ISelAddressMode &AM) { + if (Shift.getOpcode() != ISD::SRL || + !isa<ConstantSDNode>(Shift.getOperand(1)) || + !Shift.hasOneUse()) + return true; + + int ScaleLog = 8 - Shift.getConstantOperandVal(1); + if (ScaleLog <= 0 || ScaleLog >= 4 || + Mask != (0xffu << ScaleLog)) + return true; + + EVT VT = N.getValueType(); + DebugLoc DL = N.getDebugLoc(); + SDValue Eight = DAG.getConstant(8, MVT::i8); + SDValue NewMask = DAG.getConstant(0xff, VT); + SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight); + SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask); + SDValue ShlCount = DAG.getConstant(ScaleLog, MVT::i8); + SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount); + + // Insert the new nodes into the topological ordering. We must do this in + // a valid topological ordering as nothing is going to go back and re-sort + // these nodes. We continually insert before 'N' in sequence as this is + // essentially a pre-flattened and pre-sorted sequence of nodes. There is no + // hierarchy left to express. + InsertDAGNode(DAG, N, Eight); + InsertDAGNode(DAG, N, Srl); + InsertDAGNode(DAG, N, NewMask); + InsertDAGNode(DAG, N, And); + InsertDAGNode(DAG, N, ShlCount); + InsertDAGNode(DAG, N, Shl); + DAG.ReplaceAllUsesWith(N, Shl); + AM.IndexReg = And; + AM.Scale = (1 << ScaleLog); + return false; +} + +// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this +// allows us to fold the shift into this addressing mode. Returns false if the +// transform succeeded. +static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, + uint64_t Mask, + SDValue Shift, SDValue X, + X86ISelAddressMode &AM) { + if (Shift.getOpcode() != ISD::SHL || + !isa<ConstantSDNode>(Shift.getOperand(1))) + return true; + + // Not likely to be profitable if either the AND or SHIFT node has more + // than one use (unless all uses are for address computation). Besides, + // isel mechanism requires their node ids to be reused. + if (!N.hasOneUse() || !Shift.hasOneUse()) + return true; + + // Verify that the shift amount is something we can fold. + unsigned ShiftAmt = Shift.getConstantOperandVal(1); + if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) + return true; + + EVT VT = N.getValueType(); + DebugLoc DL = N.getDebugLoc(); + SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT); + SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); + SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1)); + + // Insert the new nodes into the topological ordering. We must do this in + // a valid topological ordering as nothing is going to go back and re-sort + // these nodes. We continually insert before 'N' in sequence as this is + // essentially a pre-flattened and pre-sorted sequence of nodes. There is no + // hierarchy left to express. + InsertDAGNode(DAG, N, NewMask); + InsertDAGNode(DAG, N, NewAnd); + InsertDAGNode(DAG, N, NewShift); + DAG.ReplaceAllUsesWith(N, NewShift); + + AM.Scale = 1 << ShiftAmt; + AM.IndexReg = NewAnd; + return false; +} + +// Implement some heroics to detect shifts of masked values where the mask can +// be replaced by extending the shift and undoing that in the addressing mode +// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and +// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in +// the addressing mode. This results in code such as: +// +// int f(short *y, int *lookup_table) { +// ... +// return *y + lookup_table[*y >> 11]; +// } +// +// Turning into: +// movzwl (%rdi), %eax +// movl %eax, %ecx +// shrl $11, %ecx +// addl (%rsi,%rcx,4), %eax +// +// Instead of: +// movzwl (%rdi), %eax +// movl %eax, %ecx +// shrl $9, %ecx +// andl $124, %rcx +// addl (%rsi,%rcx), %eax +// +// Note that this function assumes the mask is provided as a mask *after* the +// value is shifted. The input chain may or may not match that, but computing +// such a mask is trivial. +static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, + uint64_t Mask, + SDValue Shift, SDValue X, + X86ISelAddressMode &AM) { + if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() || + !isa<ConstantSDNode>(Shift.getOperand(1))) + return true; + + unsigned ShiftAmt = Shift.getConstantOperandVal(1); + unsigned MaskLZ = CountLeadingZeros_64(Mask); + unsigned MaskTZ = CountTrailingZeros_64(Mask); + + // The amount of shift we're trying to fit into the addressing mode is taken + // from the trailing zeros of the mask. + unsigned AMShiftAmt = MaskTZ; + + // There is nothing we can do here unless the mask is removing some bits. + // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits. + if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true; + + // We also need to ensure that mask is a continuous run of bits. + if (CountTrailingOnes_64(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true; + + // Scale the leading zero count down based on the actual size of the value. + // Also scale it down based on the size of the shift. + MaskLZ -= (64 - X.getValueSizeInBits()) + ShiftAmt; + + // The final check is to ensure that any masked out high bits of X are + // already known to be zero. Otherwise, the mask has a semantic impact + // other than masking out a couple of low bits. Unfortunately, because of + // the mask, zero extensions will be removed from operands in some cases. + // This code works extra hard to look through extensions because we can + // replace them with zero extensions cheaply if necessary. + bool ReplacingAnyExtend = false; + if (X.getOpcode() == ISD::ANY_EXTEND) { + unsigned ExtendBits = + X.getValueSizeInBits() - X.getOperand(0).getValueSizeInBits(); + // Assume that we'll replace the any-extend with a zero-extend, and + // narrow the search to the extended value. + X = X.getOperand(0); + MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits; + ReplacingAnyExtend = true; + } + APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(), + MaskLZ); + APInt KnownZero, KnownOne; + DAG.ComputeMaskedBits(X, MaskedHighBits, KnownZero, KnownOne); + if (MaskedHighBits != KnownZero) return true; + + // We've identified a pattern that can be transformed into a single shift + // and an addressing mode. Make it so. + EVT VT = N.getValueType(); + if (ReplacingAnyExtend) { + assert(X.getValueType() != VT); + // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. + SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, X.getDebugLoc(), VT, X); + InsertDAGNode(DAG, N, NewX); + X = NewX; + } + DebugLoc DL = N.getDebugLoc(); + SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8); + SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); + SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8); + SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt); + + // Insert the new nodes into the topological ordering. We must do this in + // a valid topological ordering as nothing is going to go back and re-sort + // these nodes. We continually insert before 'N' in sequence as this is + // essentially a pre-flattened and pre-sorted sequence of nodes. There is no + // hierarchy left to express. + InsertDAGNode(DAG, N, NewSRLAmt); + InsertDAGNode(DAG, N, NewSRL); + InsertDAGNode(DAG, N, NewSHLAmt); + InsertDAGNode(DAG, N, NewSHL); + DAG.ReplaceAllUsesWith(N, NewSHL); + + AM.Scale = 1 << AMShiftAmt; + AM.IndexReg = NewSRL; + return false; +} + bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth) { DebugLoc dl = N.getDebugLoc(); @@ -814,6 +1021,33 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, break; } + case ISD::SRL: { + // Scale must not be used already. + if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; + + SDValue And = N.getOperand(0); + if (And.getOpcode() != ISD::AND) break; + SDValue X = And.getOperand(0); + + // We only handle up to 64-bit values here as those are what matter for + // addressing mode optimizations. + if (X.getValueSizeInBits() > 64) break; + + // The mask used for the transform is expected to be post-shift, but we + // found the shift first so just apply the shift to the mask before passing + // it down. + if (!isa<ConstantSDNode>(N.getOperand(1)) || + !isa<ConstantSDNode>(And.getOperand(1))) + break; + uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1); + + // Try to fold the mask and shift into the scale, and return false if we + // succeed. + if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM)) + return false; + break; + } + case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: // A mul_lohi where we need the low part can be folded as a plain multiply. @@ -917,16 +1151,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.Scale = 1; // Insert the new nodes into the topological ordering. - if (Zero.getNode()->getNodeId() == -1 || - Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) { - CurDAG->RepositionNode(N.getNode(), Zero.getNode()); - Zero.getNode()->setNodeId(N.getNode()->getNodeId()); - } - if (Neg.getNode()->getNodeId() == -1 || - Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) { - CurDAG->RepositionNode(N.getNode(), Neg.getNode()); - Neg.getNode()->setNodeId(N.getNode()->getNodeId()); - } + InsertDAGNode(*CurDAG, N, Zero); + InsertDAGNode(*CurDAG, N, Neg); return false; } @@ -981,121 +1207,34 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // Perform some heroic transforms on an and of a constant-count shift // with a constant to enable use of the scaled offset field. - SDValue Shift = N.getOperand(0); - if (Shift.getNumOperands() != 2) break; - // Scale must not be used already. if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; + SDValue Shift = N.getOperand(0); + if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break; SDValue X = Shift.getOperand(0); - ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1)); - ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); - if (!C1 || !C2) break; - - // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This - // allows us to convert the shift and and into an h-register extract and - // a scaled index. - if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) { - unsigned ScaleLog = 8 - C1->getZExtValue(); - if (ScaleLog > 0 && ScaleLog < 4 && - C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) { - SDValue Eight = CurDAG->getConstant(8, MVT::i8); - SDValue Mask = CurDAG->getConstant(0xff, N.getValueType()); - SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), - X, Eight); - SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(), - Srl, Mask); - SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8); - SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), - And, ShlCount); - - // Insert the new nodes into the topological ordering. - if (Eight.getNode()->getNodeId() == -1 || - Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) { - CurDAG->RepositionNode(X.getNode(), Eight.getNode()); - Eight.getNode()->setNodeId(X.getNode()->getNodeId()); - } - if (Mask.getNode()->getNodeId() == -1 || - Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) { - CurDAG->RepositionNode(X.getNode(), Mask.getNode()); - Mask.getNode()->setNodeId(X.getNode()->getNodeId()); - } - if (Srl.getNode()->getNodeId() == -1 || - Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { - CurDAG->RepositionNode(Shift.getNode(), Srl.getNode()); - Srl.getNode()->setNodeId(Shift.getNode()->getNodeId()); - } - if (And.getNode()->getNodeId() == -1 || - And.getNode()->getNodeId() > N.getNode()->getNodeId()) { - CurDAG->RepositionNode(N.getNode(), And.getNode()); - And.getNode()->setNodeId(N.getNode()->getNodeId()); - } - if (ShlCount.getNode()->getNodeId() == -1 || - ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) { - CurDAG->RepositionNode(X.getNode(), ShlCount.getNode()); - ShlCount.getNode()->setNodeId(N.getNode()->getNodeId()); - } - if (Shl.getNode()->getNodeId() == -1 || - Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) { - CurDAG->RepositionNode(N.getNode(), Shl.getNode()); - Shl.getNode()->setNodeId(N.getNode()->getNodeId()); - } - CurDAG->ReplaceAllUsesWith(N, Shl); - AM.IndexReg = And; - AM.Scale = (1 << ScaleLog); - return false; - } - } - // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this - // allows us to fold the shift into this addressing mode. - if (Shift.getOpcode() != ISD::SHL) break; + // We only handle up to 64-bit values here as those are what matter for + // addressing mode optimizations. + if (X.getValueSizeInBits() > 64) break; - // Not likely to be profitable if either the AND or SHIFT node has more - // than one use (unless all uses are for address computation). Besides, - // isel mechanism requires their node ids to be reused. - if (!N.hasOneUse() || !Shift.hasOneUse()) + if (!isa<ConstantSDNode>(N.getOperand(1))) break; - - // Verify that the shift amount is something we can fold. - unsigned ShiftCst = C1->getZExtValue(); - if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) - break; - - // Get the new AND mask, this folds to a constant. - SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), - SDValue(C2, 0), SDValue(C1, 0)); - SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X, - NewANDMask); - SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), - NewAND, SDValue(C1, 0)); + uint64_t Mask = N.getConstantOperandVal(1); - // Insert the new nodes into the topological ordering. - if (C1->getNodeId() > X.getNode()->getNodeId()) { - CurDAG->RepositionNode(X.getNode(), C1); - C1->setNodeId(X.getNode()->getNodeId()); - } - if (NewANDMask.getNode()->getNodeId() == -1 || - NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) { - CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode()); - NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId()); - } - if (NewAND.getNode()->getNodeId() == -1 || - NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { - CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode()); - NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId()); - } - if (NewSHIFT.getNode()->getNodeId() == -1 || - NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) { - CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode()); - NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); - } + // Try to fold the mask and shift into an extract and scale. + if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM)) + return false; - CurDAG->ReplaceAllUsesWith(N, NewSHIFT); - - AM.Scale = 1 << ShiftCst; - AM.IndexReg = NewAND; - return false; + // Try to fold the mask and shift directly into the scale. + if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM)) + return false; + + // Try to swap the mask and shift to place shifts which can be done as + // a scale on the outside of the mask. + if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM)) + return false; + break; } } @@ -1829,7 +1968,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst); return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0), getI8Imm(ShlVal)); - break; } case X86ISD::UMUL: { SDValue N0 = Node->getOperand(0); @@ -2131,7 +2269,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // On x86-32, only the ABCD registers have 8-bit subregisters. if (!Subtarget->is64Bit()) { - TargetRegisterClass *TRC = 0; + const TargetRegisterClass *TRC; switch (N0.getValueType().getSimpleVT().SimpleTy) { case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; @@ -2160,7 +2298,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Reg = N0.getNode()->getOperand(0); // Put the value in an ABCD register. - TargetRegisterClass *TRC = 0; + const TargetRegisterClass *TRC; switch (N0.getValueType().getSimpleVT().SimpleTy) { case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; @@ -2240,6 +2378,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { Chain->getOpcode() != ISD::LOAD || StoredVal->getOpcode() != X86ISD::DEC || StoredVal.getResNo() != 0 || + !StoredVal.getNode()->hasNUsesOfValue(1, 0) || + !Chain.getNode()->hasNUsesOfValue(1, 0) || StoredVal->getOperand(0).getNode() != Chain.getNode()) break; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 03727a2..cae9aad 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "x86-isel" +#include "X86ISelLowering.h" #include "X86.h" #include "X86InstrBuilder.h" -#include "X86ISelLowering.h" #include "X86TargetMachine.h" #include "X86TargetObjectFile.h" #include "Utils/X86ShuffleDecode.h" @@ -39,20 +39,17 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/VectorExtras.h" +#include "llvm/ADT/VariadicFunction.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" +#include <bitset> using namespace llvm; -using namespace dwarf; STATISTIC(NumTailCalls, "Number of tail calls"); @@ -60,17 +57,6 @@ STATISTIC(NumTailCalls, "Number of tail calls"); static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); -static SDValue Insert128BitVector(SDValue Result, - SDValue Vec, - SDValue Idx, - SelectionDAG &DAG, - DebugLoc dl); - -static SDValue Extract128BitVector(SDValue Vec, - SDValue Idx, - SelectionDAG &DAG, - DebugLoc dl); - /// Generate a DAG to grab 128-bits from a vector > 128 bits. This /// sets things up to match to an AVX VEXTRACTF128 instruction or a /// simple subregister reference. Idx is an index in the 128 bits we @@ -168,8 +154,8 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) : TargetLowering(TM, createTLOF(TM)) { Subtarget = &TM.getSubtarget<X86Subtarget>(); - X86ScalarSSEf64 = Subtarget->hasXMMInt(); - X86ScalarSSEf32 = Subtarget->hasXMM(); + X86ScalarSSEf64 = Subtarget->hasSSE2(); + X86ScalarSSEf32 = Subtarget->hasSSE1(); X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; RegInfo = TM.getRegisterInfo(); @@ -185,8 +171,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // For 64-bit since we have so many registers use the ILP scheduler, for // 32-bit code use the register pressure specific scheduling. + // For 32 bit Atom, use Hybrid (register pressure + latency) scheduling. if (Subtarget->is64Bit()) setSchedulingPreference(Sched::ILP); + else if (Subtarget->isAtom()) + setSchedulingPreference(Sched::Hybrid); else setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); @@ -198,15 +187,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setLibcallName(RTLIB::SREM_I64, "_allrem"); setLibcallName(RTLIB::UREM_I64, "_aullrem"); setLibcallName(RTLIB::MUL_I64, "_allmul"); - setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2"); - setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2"); setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall); setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C); + + // The _ftol2 runtime function has an unusual calling conv, which + // is modeled by a special pseudo-instruction. + setLibcallName(RTLIB::FPTOUINT_F64_I64, 0); + setLibcallName(RTLIB::FPTOUINT_F32_I64, 0); + setLibcallName(RTLIB::FPTOUINT_F64_I32, 0); + setLibcallName(RTLIB::FPTOUINT_F32_I32, 0); } if (Subtarget->isTargetDarwin()) { @@ -255,7 +247,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->is64Bit()) { setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); - setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); + setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); } else if (!TM.Options.UseSoftFloat) { // We have an algorithm for SSE2->double, and we turn this into a // 64-bit FILD followed by conditional FADD for other targets. @@ -326,6 +318,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); } + if (isTargetFTOL()) { + // Use the _ftol2 runtime function, which has a pseudo-instruction + // to handle its weird calling convention. + setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); + } + // TODO: when we have SSE, these could be more efficient, by using movd/movq. if (!X86ScalarSSEf64) { setOperationAction(ISD::BITCAST , MVT::f32 , Expand); @@ -378,32 +376,46 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FREM , MVT::f80 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i16 , Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i64 , Expand); + // Promote the i8 variants and force them on up to i32 which has a shorter + // encoding. + setOperationAction(ISD::CTTZ , MVT::i8 , Promote); + AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32); + setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote); + AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32); if (Subtarget->hasBMI()) { - setOperationAction(ISD::CTTZ , MVT::i8 , Promote); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand); + if (Subtarget->is64Bit()) + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); } else { - setOperationAction(ISD::CTTZ , MVT::i8 , Custom); setOperationAction(ISD::CTTZ , MVT::i16 , Custom); setOperationAction(ISD::CTTZ , MVT::i32 , Custom); if (Subtarget->is64Bit()) setOperationAction(ISD::CTTZ , MVT::i64 , Custom); } - setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i8 , Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i16 , Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i64 , Expand); if (Subtarget->hasLZCNT()) { + // When promoting the i8 variants, force them to i32 for a shorter + // encoding. setOperationAction(ISD::CTLZ , MVT::i8 , Promote); + AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote); + AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand); + if (Subtarget->is64Bit()) + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); } else { setOperationAction(ISD::CTLZ , MVT::i8 , Custom); setOperationAction(ISD::CTLZ , MVT::i16 , Custom); setOperationAction(ISD::CTLZ , MVT::i32 , Custom); - if (Subtarget->is64Bit()) + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom); + if (Subtarget->is64Bit()) { setOperationAction(ISD::CTLZ , MVT::i64 , Custom); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); + } } if (Subtarget->hasPOPCNT()) { @@ -466,7 +478,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); } - if (Subtarget->hasXMM()) + if (Subtarget->hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom); @@ -800,7 +812,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::BITCAST, MVT::v2i32, Expand); setOperationAction(ISD::BITCAST, MVT::v1i64, Expand); - if (!TM.Options.UseSoftFloat && Subtarget->hasXMM()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) { addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); setOperationAction(ISD::FADD, MVT::v4f32, Legal); @@ -817,7 +829,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::v4f32, Custom); } - if (!TM.Options.UseSoftFloat && Subtarget->hasXMMInt()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) { addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM @@ -923,7 +935,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); } - if (Subtarget->hasSSE41orAVX()) { + if (Subtarget->hasSSE41()) { setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); @@ -959,14 +971,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); // FIXME: these should be Legal but thats only for the case where - // the index is constant. For now custom expand to deal with that + // the index is constant. For now custom expand to deal with that. if (Subtarget->is64Bit()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); } } - if (Subtarget->hasXMMInt()) { + if (Subtarget->hasSSE2()) { setOperationAction(ISD::SRL, MVT::v8i16, Custom); setOperationAction(ISD::SRL, MVT::v16i8, Custom); @@ -995,7 +1007,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } } - if (Subtarget->hasSSE42orAVX()) + if (Subtarget->hasSSE42()) setOperationAction(ISD::SETCC, MVT::v2i64, Custom); if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) { @@ -1157,7 +1169,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // of this type with custom code. for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) { - setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, + Custom); } // We want to custom lower some of our intrinsics. @@ -1195,7 +1208,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); - setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::SHL); @@ -1210,6 +1222,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SINT_TO_FP); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); @@ -1279,7 +1293,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const { } unsigned Align = 4; - if (Subtarget->hasXMM()) + if (Subtarget->hasSSE1()) getMaxByValAlign(Ty, Align); return Align; } @@ -1313,17 +1327,20 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, ((DstAlign == 0 || DstAlign >= 16) && (SrcAlign == 0 || SrcAlign >= 16))) && Subtarget->getStackAlignment() >= 16) { - if (Subtarget->hasAVX() && - Subtarget->getStackAlignment() >= 32) - return MVT::v8f32; - if (Subtarget->hasXMMInt()) + if (Subtarget->getStackAlignment() >= 32) { + if (Subtarget->hasAVX2()) + return MVT::v8i32; + if (Subtarget->hasAVX()) + return MVT::v8f32; + } + if (Subtarget->hasSSE2()) return MVT::v4i32; - if (Subtarget->hasXMM()) + if (Subtarget->hasSSE1()) return MVT::v4f32; } else if (!MemcpyStrSrc && Size >= 8 && !Subtarget->is64Bit() && Subtarget->getStackAlignment() >= 8 && - Subtarget->hasXMMInt()) { + Subtarget->hasSSE2()) { // Do not use f64 to lower memcpy if source is string constant. It's // better to use i32 to avoid the loads. return MVT::f64; @@ -1488,14 +1505,14 @@ X86TargetLowering::LowerReturn(SDValue Chain, // or SSE or MMX vectors. if ((ValVT == MVT::f32 || ValVT == MVT::f64 || VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) && - (Subtarget->is64Bit() && !Subtarget->hasXMM())) { + (Subtarget->is64Bit() && !Subtarget->hasSSE1())) { report_fatal_error("SSE register return with SSE disabled"); } // Likewise we can't return F64 values with SSE1 only. gcc does so, but // llvm-gcc has never done it right and no one has noticed, so this // should be OK for now. if (ValVT == MVT::f64 && - (Subtarget->is64Bit() && !Subtarget->hasXMMInt())) + (Subtarget->is64Bit() && !Subtarget->hasSSE2())) report_fatal_error("SSE2 register return with SSE2 disabled"); // Returns in ST0/ST1 are handled specially: these are pushed as operands to @@ -1521,7 +1538,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, ValToCopy); // If we don't have SSE2 available, convert to v4f32 so the generated // register is legal. - if (!Subtarget->hasXMMInt()) + if (!Subtarget->hasSSE2()) ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy); } } @@ -1568,8 +1585,12 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const { return false; SDNode *Copy = *N->use_begin(); - if (Copy->getOpcode() != ISD::CopyToReg && - Copy->getOpcode() != ISD::FP_EXTEND) + if (Copy->getOpcode() == ISD::CopyToReg) { + // If the copy has a glue operand, we conservatively assume it isn't safe to + // perform a tail call. + if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) + return false; + } else if (Copy->getOpcode() != ISD::FP_EXTEND) return false; bool HasRet = false; @@ -1621,7 +1642,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && - ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) { + ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { report_fatal_error("SSE register return with SSE disabled"); } @@ -1711,7 +1732,7 @@ static bool IsTailCallConvention(CallingConv::ID CC) { } bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { - if (!CI->isTailCall()) + if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls) return false; CallSite CS(CI); @@ -1790,6 +1811,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, MachineFrameInfo *MFI = MF.getFrameInfo(); bool Is64Bit = Subtarget->is64Bit(); + bool IsWindows = Subtarget->isTargetWindows(); bool IsWin64 = Subtarget->isTargetWin64(); assert(!(isVarArg && IsTailCallConvention(CallConv)) && @@ -1820,7 +1842,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); - TargetRegisterClass *RC = NULL; + const TargetRegisterClass *RC; if (RegVT == MVT::i32) RC = X86::GR32RegisterClass; else if (Is64Bit && RegVT == MVT::i64) @@ -1928,19 +1950,20 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, TotalNumIntRegs = 6; TotalNumXMMRegs = 8; GPR64ArgRegs = GPR64ArgRegs64Bit; - NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs); + NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, + TotalNumXMMRegs); } unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, TotalNumIntRegs); bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); - assert(!(NumXMMRegs && !Subtarget->hasXMM()) && + assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"); if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps || - !Subtarget->hasXMM()) + !Subtarget->hasSSE1()) // Kernel mode asks for SSE to be disabled, so don't push them // on the stack. TotalNumXMMRegs = 0; @@ -1957,8 +1980,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); } else { // For X86-64, if there are vararg parameters that are passed via - // registers, then we must store them to their spots on the stack so they - // may be loaded by deferencing the result of va_next. + // registers, then we must store them to their spots on the stack so + // they may be loaded by deferencing the result of va_next. FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16); FuncInfo->setRegSaveFrameIndex( @@ -2024,7 +2047,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } else { FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. - if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins)) + if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows && + ArgsAreStructReturn(Ins)) FuncInfo->setBytesToPopOnReturn(4); } @@ -2099,7 +2123,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, SDValue X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -2108,9 +2132,13 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, MachineFunction &MF = DAG.getMachineFunction(); bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isTargetWin64(); + bool IsWindows = Subtarget->isTargetWindows(); bool IsStructRet = CallIsStructReturn(Outs); bool IsSibcall = false; + if (MF.getTarget().Options.DisableTailCalls) + isTailCall = false; + if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, @@ -2303,7 +2331,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); - assert((Subtarget->hasXMM() || !NumXMMRegs) + assert((Subtarget->hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"); Chain = DAG.getCopyToReg(Chain, dl, X86::AL, @@ -2488,6 +2516,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (Is64Bit && isVarArg && !IsWin64) Ops.push_back(DAG.getRegister(X86::AL, MVT::i8)); + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + if (InFlag.getNode()) Ops.push_back(InFlag); @@ -2510,10 +2544,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, getTargetMachine().Options.GuaranteedTailCallOpt)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything - else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet) + else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows && + IsStructRet) // If this is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. // This is common for Darwin/X86, Linux & Mingw32 targets. + // For MSVC Win32 targets, the caller pops the hidden struct pointer. NumBytesForCalleeToPush = 4; else NumBytesForCalleeToPush = 0; // Callee pops nothing. @@ -2709,9 +2745,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, return false; } - // If the call result is in ST0 / ST1, it needs to be popped off the x87 stack. - // Therefore if it's not used by the call it is not safe to optimize this into - // a sibcall. + // If the call result is in ST0 / ST1, it needs to be popped off the x87 + // stack. Therefore, if it's not used by the call it is not safe to optimize + // this into a sibcall. bool Unused = false; for (unsigned i = 0, e = Ins.size(); i != e; ++i) { if (!Ins[i].Used) { @@ -2853,9 +2889,8 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PSHUFD: case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: - case X86ISD::SHUFPD: + case X86ISD::SHUFP: case X86ISD::PALIGN: - case X86ISD::SHUFPS: case X86ISD::MOVLHPS: case X86ISD::MOVLHPD: case X86ISD::MOVHLPS: @@ -2872,7 +2907,6 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::VPERM2X128: return true; } - return false; } static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, @@ -2884,8 +2918,6 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVDDUP: return DAG.getNode(Opc, dl, VT, V1); } - - return SDValue(); } static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, @@ -2898,8 +2930,6 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::VPERMILP: return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8)); } - - return SDValue(); } static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, @@ -2907,13 +2937,11 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); case X86ISD::PALIGN: - case X86ISD::SHUFPD: - case X86ISD::SHUFPS: + case X86ISD::SHUFP: case X86ISD::VPERM2X128: return DAG.getNode(Opc, dl, VT, V1, V2, DAG.getConstant(TargetMask, MVT::i8)); } - return SDValue(); } static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, @@ -2931,7 +2959,6 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::UNPCKH: return DAG.getNode(Opc, dl, VT, V1, V2); } - return SDValue(); } SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { @@ -3126,17 +3153,6 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) { return (Val < 0) || (Val >= Low && Val < Hi); } -/// isUndefOrInRange - Return true if every element in Mask, begining -/// from position Pos and ending in Pos+Size, falls within the specified -/// range (L, L+Pos]. or is undef. -static bool isUndefOrInRange(const SmallVectorImpl<int> &Mask, - int Pos, int Size, int Low, int Hi) { - for (int i = Pos, e = Pos+Size; i != e; ++i) - if (!isUndefOrInRange(Mask[i], Low, Hi)) - return false; - return true; -} - /// isUndefOrEqual - Val is either less than zero (undef) or equal to the /// specified value. static bool isUndefOrEqual(int Val, int CmpVal) { @@ -3148,7 +3164,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) { /// isSequentialOrUndefInRange - Return true if every element in Mask, begining /// from position Pos and ending in Pos+Size, falls within the specified /// sequential range (L, L+Pos]. or is undef. -static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask, +static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, int Pos, int Size, int Low) { for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low) if (!isUndefOrEqual(Mask[i], Low)) @@ -3159,7 +3175,7 @@ static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask, /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference /// the second operand. -static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) { +static bool isPSHUFDMask(ArrayRef<int> Mask, EVT VT) { if (VT == MVT::v4f32 || VT == MVT::v4i32 ) return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); if (VT == MVT::v2f64 || VT == MVT::v2i64) @@ -3167,180 +3183,113 @@ static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) { return false; } -bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) { - SmallVector<int, 8> M; - N->getMask(M); - return ::isPSHUFDMask(M, N->getValueType(0)); -} - /// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFHW. -static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, EVT VT) { +static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT) { if (VT != MVT::v8i16) return false; // Lower quadword copied in order or undef. - for (int i = 0; i != 4; ++i) - if (Mask[i] >= 0 && Mask[i] != i) - return false; + if (!isSequentialOrUndefInRange(Mask, 0, 4, 0)) + return false; // Upper quadword shuffled. - for (int i = 4; i != 8; ++i) + for (unsigned i = 4; i != 8; ++i) if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7)) return false; return true; } -bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) { - SmallVector<int, 8> M; - N->getMask(M); - return ::isPSHUFHWMask(M, N->getValueType(0)); -} - /// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFLW. -static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, EVT VT) { +static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT) { if (VT != MVT::v8i16) return false; // Upper quadword copied in order. - for (int i = 4; i != 8; ++i) - if (Mask[i] >= 0 && Mask[i] != i) - return false; + if (!isSequentialOrUndefInRange(Mask, 4, 4, 4)) + return false; // Lower quadword shuffled. - for (int i = 0; i != 4; ++i) + for (unsigned i = 0; i != 4; ++i) if (Mask[i] >= 4) return false; return true; } -bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { - SmallVector<int, 8> M; - N->getMask(M); - return ::isPSHUFLWMask(M, N->getValueType(0)); -} - /// isPALIGNRMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PALIGNR. -static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, - bool hasSSSE3OrAVX) { - int i, e = VT.getVectorNumElements(); - if (VT.getSizeInBits() != 128) +static bool isPALIGNRMask(ArrayRef<int> Mask, EVT VT, + const X86Subtarget *Subtarget) { + if ((VT.getSizeInBits() == 128 && !Subtarget->hasSSSE3()) || + (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())) return false; - // Do not handle v2i64 / v2f64 shuffles with palignr. - if (e < 4 || !hasSSSE3OrAVX) + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts/NumLanes; + + // Do not handle 64-bit element shuffles with palignr. + if (NumLaneElts == 2) return false; - for (i = 0; i != e; ++i) - if (Mask[i] >= 0) - break; + for (unsigned l = 0; l != NumElts; l+=NumLaneElts) { + unsigned i; + for (i = 0; i != NumLaneElts; ++i) { + if (Mask[i+l] >= 0) + break; + } - // All undef, not a palignr. - if (i == e) - return false; + // Lane is all undef, go to next lane + if (i == NumLaneElts) + continue; - // Make sure we're shifting in the right direction. - if (Mask[i] <= i) - return false; + int Start = Mask[i+l]; - int s = Mask[i] - i; + // Make sure its in this lane in one of the sources + if (!isUndefOrInRange(Start, l, l+NumLaneElts) && + !isUndefOrInRange(Start, l+NumElts, l+NumElts+NumLaneElts)) + return false; - // Check the rest of the elements to see if they are consecutive. - for (++i; i != e; ++i) { - int m = Mask[i]; - if (m >= 0 && m != s+i) + // If not lane 0, then we must match lane 0 + if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Start, Mask[i]+l)) return false; - } - return true; -} -/// isVSHUFPYMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to 256-bit -/// VSHUFPSY. -static bool isVSHUFPYMask(const SmallVectorImpl<int> &Mask, EVT VT, - bool HasAVX, bool Commuted = false) { - int NumElems = VT.getVectorNumElements(); + // Correct second source to be contiguous with first source + if (Start >= (int)NumElts) + Start -= NumElts - NumLaneElts; - if (!HasAVX || VT.getSizeInBits() != 256) - return false; + // Make sure we're shifting in the right direction. + if (Start <= (int)(i+l)) + return false; - if (NumElems != 4 && NumElems != 8) - return false; + Start -= i; - // VSHUFPSY divides the resulting vector into 4 chunks. - // The sources are also splitted into 4 chunks, and each destination - // chunk must come from a different source chunk. - // - // SRC1 => X7 X6 X5 X4 X3 X2 X1 X0 - // SRC2 => Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y9 - // - // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4, - // Y3..Y0, Y3..Y0, X3..X0, X3..X0 - // - // VSHUFPDY divides the resulting vector into 4 chunks. - // The sources are also splitted into 4 chunks, and each destination - // chunk must come from a different source chunk. - // - // SRC1 => X3 X2 X1 X0 - // SRC2 => Y3 Y2 Y1 Y0 - // - // DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0 - // - unsigned QuarterSize = NumElems/4; - unsigned HalfSize = QuarterSize*2; - for (unsigned l = 0; l != 2; ++l) { - unsigned LaneStart = l*HalfSize; - for (unsigned s = 0; s != 2; ++s) { - unsigned QuarterStart = s*QuarterSize; - unsigned Src = (Commuted) ? (1-s) : s; - unsigned SrcStart = Src*NumElems + LaneStart; - for (unsigned i = 0; i != QuarterSize; ++i) { - int Idx = Mask[i+QuarterStart+LaneStart]; - if (!isUndefOrInRange(Idx, SrcStart, SrcStart+HalfSize)) - return false; - // For VSHUFPSY, the mask of the second half must be the same as the first - // but with the appropriate offsets. This works in the same way as - // VPERMILPS works with masks. - if (NumElems == 4 || l == 0 || Mask[i+QuarterStart] < 0) - continue; - if (!isUndefOrEqual(Idx, Mask[i+QuarterStart]+HalfSize)) - return false; - } - } - } + // Check the rest of the elements to see if they are consecutive. + for (++i; i != NumLaneElts; ++i) { + int Idx = Mask[i+l]; - return true; -} + // Make sure its in this lane + if (!isUndefOrInRange(Idx, l, l+NumLaneElts) && + !isUndefOrInRange(Idx, l+NumElts, l+NumElts+NumLaneElts)) + return false; -/// getShuffleVSHUFPYImmediate - Return the appropriate immediate to shuffle -/// the specified VECTOR_MASK mask with VSHUFPSY/VSHUFPDY instructions. -static unsigned getShuffleVSHUFPYImmediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - EVT VT = SVOp->getValueType(0); - int NumElems = VT.getVectorNumElements(); + // If not lane 0, then we must match lane 0 + if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Idx, Mask[i]+l)) + return false; - assert(VT.getSizeInBits() == 256 && "Only supports 256-bit types"); - assert((NumElems == 4 || NumElems == 8) && "Only supports v4 and v8 types"); + if (Idx >= (int)NumElts) + Idx -= NumElts - NumLaneElts; - int HalfSize = NumElems/2; - unsigned Mul = (NumElems == 8) ? 2 : 1; - unsigned Mask = 0; - for (int i = 0; i != NumElems; ++i) { - int Elt = SVOp->getMaskElt(i); - if (Elt < 0) - continue; - Elt %= HalfSize; - unsigned Shamt = i; - // For VSHUFPSY, the mask of the first half must be equal to the second one. - if (NumElems == 8) Shamt %= HalfSize; - Mask |= Elt << (Shamt*Mul); + if (!isUndefOrEqual(Idx, Start+i)) + return false; + + } } - return Mask; + return true; } /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming @@ -3359,42 +3308,63 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, } /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to 128-bit +/// specifies a shuffle of elements that is suitable for input to 128/256-bit /// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be /// reverse of what x86 shuffles want. -static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT, +static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX, bool Commuted = false) { - unsigned NumElems = VT.getVectorNumElements(); - - if (VT.getSizeInBits() != 128) + if (!HasAVX && VT.getSizeInBits() == 256) return false; - if (NumElems != 2 && NumElems != 4) + unsigned NumElems = VT.getVectorNumElements(); + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElems = NumElems/NumLanes; + + if (NumLaneElems != 2 && NumLaneElems != 4) return false; - unsigned Half = NumElems / 2; - unsigned SrcStart = Commuted ? NumElems : 0; - for (unsigned i = 0; i != Half; ++i) - if (!isUndefOrInRange(Mask[i], SrcStart, SrcStart+NumElems)) - return false; - SrcStart = Commuted ? 0 : NumElems; - for (unsigned i = Half; i != NumElems; ++i) - if (!isUndefOrInRange(Mask[i], SrcStart, SrcStart+NumElems)) - return false; + // VSHUFPSY divides the resulting vector into 4 chunks. + // The sources are also splitted into 4 chunks, and each destination + // chunk must come from a different source chunk. + // + // SRC1 => X7 X6 X5 X4 X3 X2 X1 X0 + // SRC2 => Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y9 + // + // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4, + // Y3..Y0, Y3..Y0, X3..X0, X3..X0 + // + // VSHUFPDY divides the resulting vector into 4 chunks. + // The sources are also splitted into 4 chunks, and each destination + // chunk must come from a different source chunk. + // + // SRC1 => X3 X2 X1 X0 + // SRC2 => Y3 Y2 Y1 Y0 + // + // DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0 + // + unsigned HalfLaneElems = NumLaneElems/2; + for (unsigned l = 0; l != NumElems; l += NumLaneElems) { + for (unsigned i = 0; i != NumLaneElems; ++i) { + int Idx = Mask[i+l]; + unsigned RngStart = l + ((Commuted == (i<HalfLaneElems)) ? NumElems : 0); + if (!isUndefOrInRange(Idx, RngStart, RngStart+NumLaneElems)) + return false; + // For VSHUFPSY, the mask of the second half must be the same as the + // first but with the appropriate offsets. This works in the same way as + // VPERMILPS works with masks. + if (NumElems != 8 || l == 0 || Mask[i] < 0) + continue; + if (!isUndefOrEqual(Idx, Mask[i]+l)) + return false; + } + } return true; } -bool X86::isSHUFPMask(ShuffleVectorSDNode *N) { - SmallVector<int, 8> M; - N->getMask(M); - return ::isSHUFPMask(M, N->getValueType(0)); -} - /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. -bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); +static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) { unsigned NumElems = VT.getVectorNumElements(); if (VT.getSizeInBits() != 128) @@ -3404,17 +3374,16 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { return false; // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 - return isUndefOrEqual(N->getMaskElt(0), 6) && - isUndefOrEqual(N->getMaskElt(1), 7) && - isUndefOrEqual(N->getMaskElt(2), 2) && - isUndefOrEqual(N->getMaskElt(3), 3); + return isUndefOrEqual(Mask[0], 6) && + isUndefOrEqual(Mask[1], 7) && + isUndefOrEqual(Mask[2], 2) && + isUndefOrEqual(Mask[3], 3); } /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, /// <2, 3, 2, 3> -bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); +static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) { unsigned NumElems = VT.getVectorNumElements(); if (VT.getSizeInBits() != 128) @@ -3423,26 +3392,29 @@ bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { if (NumElems != 4) return false; - return isUndefOrEqual(N->getMaskElt(0), 2) && - isUndefOrEqual(N->getMaskElt(1), 3) && - isUndefOrEqual(N->getMaskElt(2), 2) && - isUndefOrEqual(N->getMaskElt(3), 3); + return isUndefOrEqual(Mask[0], 2) && + isUndefOrEqual(Mask[1], 3) && + isUndefOrEqual(Mask[2], 2) && + isUndefOrEqual(Mask[3], 3); } /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. -bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { - unsigned NumElems = N->getValueType(0).getVectorNumElements(); +static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) { + if (VT.getSizeInBits() != 128) + return false; + + unsigned NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4) return false; - for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(N->getMaskElt(i), i + NumElems)) + for (unsigned i = 0; i != NumElems/2; ++i) + if (!isUndefOrEqual(Mask[i], i + NumElems)) return false; - for (unsigned i = NumElems/2; i < NumElems; ++i) - if (!isUndefOrEqual(N->getMaskElt(i), i)) + for (unsigned i = NumElems/2; i != NumElems; ++i) + if (!isUndefOrEqual(Mask[i], i)) return false; return true; @@ -3450,19 +3422,19 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { /// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLHPS. -bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { - unsigned NumElems = N->getValueType(0).getVectorNumElements(); +static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) { + unsigned NumElems = VT.getVectorNumElements(); if ((NumElems != 2 && NumElems != 4) - || N->getValueType(0).getSizeInBits() > 128) + || VT.getSizeInBits() > 128) return false; - for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(N->getMaskElt(i), i)) + for (unsigned i = 0; i != NumElems/2; ++i) + if (!isUndefOrEqual(Mask[i], i)) return false; - for (unsigned i = 0; i < NumElems/2; ++i) - if (!isUndefOrEqual(N->getMaskElt(i + NumElems/2), i + NumElems)) + for (unsigned i = 0; i != NumElems/2; ++i) + if (!isUndefOrEqual(Mask[i + NumElems/2], i + NumElems)) return false; return true; @@ -3470,9 +3442,9 @@ bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. -static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, +static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2, bool V2IsSplat = false) { - int NumElts = VT.getVectorNumElements(); + unsigned NumElts = VT.getVectorNumElements(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); @@ -3486,11 +3458,9 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, unsigned NumLanes = VT.getSizeInBits()/128; unsigned NumLaneElts = NumElts/NumLanes; - unsigned Start = 0; - unsigned End = NumLaneElts; - for (unsigned s = 0; s < NumLanes; ++s) { - for (unsigned i = Start, j = s * NumLaneElts; - i != End; + for (unsigned l = 0; l != NumLanes; ++l) { + for (unsigned i = l*NumLaneElts, j = l*NumLaneElts; + i != (l+1)*NumLaneElts; i += 2, ++j) { int BitI = Mask[i]; int BitI1 = Mask[i+1]; @@ -3504,25 +3474,16 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, return false; } } - // Process the next 128 bits. - Start += NumLaneElts; - End += NumLaneElts; } return true; } -bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool HasAVX2, bool V2IsSplat) { - SmallVector<int, 8> M; - N->getMask(M); - return ::isUNPCKLMask(M, N->getValueType(0), HasAVX2, V2IsSplat); -} - /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. -static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, +static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2, bool V2IsSplat = false) { - int NumElts = VT.getVectorNumElements(); + unsigned NumElts = VT.getVectorNumElements(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); @@ -3536,11 +3497,9 @@ static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, unsigned NumLanes = VT.getSizeInBits()/128; unsigned NumLaneElts = NumElts/NumLanes; - unsigned Start = 0; - unsigned End = NumLaneElts; for (unsigned l = 0; l != NumLanes; ++l) { - for (unsigned i = Start, j = (l*NumLaneElts)+NumLaneElts/2; - i != End; i += 2, ++j) { + for (unsigned i = l*NumLaneElts, j = (l*NumLaneElts)+NumLaneElts/2; + i != (l+1)*NumLaneElts; i += 2, ++j) { int BitI = Mask[i]; int BitI1 = Mask[i+1]; if (!isUndefOrEqual(BitI, j)) @@ -3553,42 +3512,39 @@ static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, return false; } } - // Process the next 128 bits. - Start += NumLaneElts; - End += NumLaneElts; } return true; } -bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool HasAVX2, bool V2IsSplat) { - SmallVector<int, 8> M; - N->getMask(M); - return ::isUNPCKHMask(M, N->getValueType(0), HasAVX2, V2IsSplat); -} - /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> -static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) { - int NumElems = VT.getVectorNumElements(); - if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) +static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, + bool HasAVX2) { + unsigned NumElts = VT.getVectorNumElements(); + + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Unsupported vector type for unpckh"); + + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + (!HasAVX2 || (NumElts != 16 && NumElts != 32))) return false; // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern // FIXME: Need a better way to get rid of this, there's no latency difference // between UNPCKLPD and MOVDDUP, the later should always be checked first and // the former later. We should also remove the "_undef" special mask. - if (NumElems == 4 && VT.getSizeInBits() == 256) + if (NumElts == 4 && VT.getSizeInBits() == 256) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate // independently on 128-bit lanes. - unsigned NumLanes = VT.getSizeInBits() / 128; - unsigned NumLaneElts = NumElems / NumLanes; + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts/NumLanes; - for (unsigned s = 0; s < NumLanes; ++s) { - for (unsigned i = s * NumLaneElts, j = s * NumLaneElts; - i != NumLaneElts * (s + 1); + for (unsigned l = 0; l != NumLanes; ++l) { + for (unsigned i = l*NumLaneElts, j = l*NumLaneElts; + i != (l+1)*NumLaneElts; i += 2, ++j) { int BitI = Mask[i]; int BitI1 = Mask[i+1]; @@ -3603,81 +3559,77 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) { return true; } -bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) { - SmallVector<int, 8> M; - N->getMask(M); - return ::isUNPCKL_v_undef_Mask(M, N->getValueType(0)); -} - /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// <2, 2, 3, 3> -static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) { - int NumElems = VT.getVectorNumElements(); - if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) +static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) { + unsigned NumElts = VT.getVectorNumElements(); + + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Unsupported vector type for unpckh"); + + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + (!HasAVX2 || (NumElts != 16 && NumElts != 32))) return false; - for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; - if (!isUndefOrEqual(BitI, j)) - return false; - if (!isUndefOrEqual(BitI1, j)) - return false; + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts/NumLanes; + + for (unsigned l = 0; l != NumLanes; ++l) { + for (unsigned i = l*NumLaneElts, j = (l*NumLaneElts)+NumLaneElts/2; + i != (l+1)*NumLaneElts; i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; + if (!isUndefOrEqual(BitI, j)) + return false; + if (!isUndefOrEqual(BitI1, j)) + return false; + } } return true; } -bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { - SmallVector<int, 8> M; - N->getMask(M); - return ::isUNPCKH_v_undef_Mask(M, N->getValueType(0)); -} - /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. -static bool isMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT) { +static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) { if (VT.getVectorElementType().getSizeInBits() < 32) return false; + if (VT.getSizeInBits() == 256) + return false; - int NumElts = VT.getVectorNumElements(); + unsigned NumElts = VT.getVectorNumElements(); if (!isUndefOrEqual(Mask[0], NumElts)) return false; - for (int i = 1; i < NumElts; ++i) + for (unsigned i = 1; i != NumElts; ++i) if (!isUndefOrEqual(Mask[i], i)) return false; return true; } -bool X86::isMOVLMask(ShuffleVectorSDNode *N) { - SmallVector<int, 8> M; - N->getMask(M); - return ::isMOVLMask(M, N->getValueType(0)); -} - /// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered /// as permutations between 128-bit chunks or halves. As an example: this /// shuffle bellow: /// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15> /// The first half comes from the second half of V1 and the second half from the /// the second half of V2. -static bool isVPERM2X128Mask(const SmallVectorImpl<int> &Mask, EVT VT, - bool HasAVX) { +static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) { if (!HasAVX || VT.getSizeInBits() != 256) return false; // The shuffle result is divided into half A and half B. In total the two // sources have 4 halves, namely: C, D, E, F. The final values of A and // B must come from C, D, E or F. - int HalfSize = VT.getVectorNumElements()/2; + unsigned HalfSize = VT.getVectorNumElements()/2; bool MatchA = false, MatchB = false; // Check if A comes from one of C, D, E, F. - for (int Half = 0; Half < 4; ++Half) { + for (unsigned Half = 0; Half != 4; ++Half) { if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) { MatchA = true; break; @@ -3685,7 +3637,7 @@ static bool isVPERM2X128Mask(const SmallVectorImpl<int> &Mask, EVT VT, } // Check if B comes from one of C, D, E, F. - for (int Half = 0; Half < 4; ++Half) { + for (unsigned Half = 0; Half != 4; ++Half) { if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) { MatchB = true; break; @@ -3700,16 +3652,16 @@ static bool isVPERM2X128Mask(const SmallVectorImpl<int> &Mask, EVT VT, static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) { EVT VT = SVOp->getValueType(0); - int HalfSize = VT.getVectorNumElements()/2; + unsigned HalfSize = VT.getVectorNumElements()/2; - int FstHalf = 0, SndHalf = 0; - for (int i = 0; i < HalfSize; ++i) { + unsigned FstHalf = 0, SndHalf = 0; + for (unsigned i = 0; i < HalfSize; ++i) { if (SVOp->getMaskElt(i) > 0) { FstHalf = SVOp->getMaskElt(i)/HalfSize; break; } } - for (int i = HalfSize; i < HalfSize*2; ++i) { + for (unsigned i = HalfSize; i < HalfSize*2; ++i) { if (SVOp->getMaskElt(i) > 0) { SndHalf = SVOp->getMaskElt(i)/HalfSize; break; @@ -3725,31 +3677,28 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) { /// type is 32 or 64. In the VPERMILPS the high half of the mask should point /// to the same elements of the low, but to the higher half of the source. /// In VPERMILPD the two lanes could be shuffled independently of each other -/// with the same restriction that lanes can't be crossed. -static bool isVPERMILPMask(const SmallVectorImpl<int> &Mask, EVT VT, - bool HasAVX) { - int NumElts = VT.getVectorNumElements(); - int NumLanes = VT.getSizeInBits()/128; - +/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY. +static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) { if (!HasAVX) return false; + unsigned NumElts = VT.getVectorNumElements(); // Only match 256-bit with 32/64-bit types if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8)) return false; - int LaneSize = NumElts/NumLanes; - for (int l = 0; l != NumLanes; ++l) { - int LaneStart = l*LaneSize; - for (int i = 0; i != LaneSize; ++i) { - if (!isUndefOrInRange(Mask[i+LaneStart], LaneStart, LaneStart+LaneSize)) + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned LaneSize = NumElts/NumLanes; + for (unsigned l = 0; l != NumElts; l += LaneSize) { + for (unsigned i = 0; i != LaneSize; ++i) { + if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize)) return false; - if (NumElts == 4 || l == 0) + if (NumElts != 8 || l == 0) continue; // VPERMILPS handling if (Mask[i] < 0) continue; - if (!isUndefOrEqual(Mask[i+LaneStart], Mask[i]+LaneSize)) + if (!isUndefOrEqual(Mask[i+l], Mask[i]+l)) return false; } } @@ -3757,48 +3706,19 @@ static bool isVPERMILPMask(const SmallVectorImpl<int> &Mask, EVT VT, return true; } -/// getShuffleVPERMILPImmediate - Return the appropriate immediate to shuffle -/// the specified VECTOR_MASK mask with VPERMILPS/D* instructions. -static unsigned getShuffleVPERMILPImmediate(ShuffleVectorSDNode *SVOp) { - EVT VT = SVOp->getValueType(0); - - int NumElts = VT.getVectorNumElements(); - int NumLanes = VT.getSizeInBits()/128; - int LaneSize = NumElts/NumLanes; - - // Although the mask is equal for both lanes do it twice to get the cases - // where a mask will match because the same mask element is undef on the - // first half but valid on the second. This would get pathological cases - // such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid. - unsigned Shift = (LaneSize == 4) ? 2 : 1; - unsigned Mask = 0; - for (int i = 0; i != NumElts; ++i) { - int MaskElt = SVOp->getMaskElt(i); - if (MaskElt < 0) - continue; - MaskElt %= LaneSize; - unsigned Shamt = i; - // VPERMILPSY, the mask of the first half must be equal to the second one - if (NumElts == 8) Shamt %= LaneSize; - Mask |= MaskElt << (Shamt*Shift); - } - - return Mask; -} - -/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse +/// isCommutedMOVLMask - Returns true if the shuffle mask is except the reverse /// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. -static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT, +static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT, bool V2IsSplat = false, bool V2IsUndef = false) { - int NumOps = VT.getVectorNumElements(); + unsigned NumOps = VT.getVectorNumElements(); if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) return false; if (!isUndefOrEqual(Mask[0], 0)) return false; - for (int i = 1; i < NumOps; ++i) + for (unsigned i = 1; i != NumOps; ++i) if (!(isUndefOrEqual(Mask[i], i+NumOps) || (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) || (V2IsSplat && isUndefOrEqual(Mask[i], NumOps)))) @@ -3807,26 +3727,14 @@ static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT, return true; } -static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false, - bool V2IsUndef = false) { - SmallVector<int, 8> M; - N->getMask(M); - return isCommutedMOVLMask(M, N->getValueType(0), V2IsSplat, V2IsUndef); -} - /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. /// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7> -bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N, - const X86Subtarget *Subtarget) { - if (!Subtarget->hasSSE3orAVX()) - return false; - - // The second vector must be undef - if (N->getOperand(1).getOpcode() != ISD::UNDEF) +static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT, + const X86Subtarget *Subtarget) { + if (!Subtarget->hasSSE3()) return false; - EVT VT = N->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); if ((VT.getSizeInBits() == 128 && NumElems != 4) || @@ -3834,9 +3742,9 @@ bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N, return false; // "i+1" is the value the indexed mask element must have - for (unsigned i = 0; i < NumElems; i += 2) - if (!isUndefOrEqual(N->getMaskElt(i), i+1) || - !isUndefOrEqual(N->getMaskElt(i+1), i+1)) + for (unsigned i = 0; i != NumElems; i += 2) + if (!isUndefOrEqual(Mask[i], i+1) || + !isUndefOrEqual(Mask[i+1], i+1)) return false; return true; @@ -3845,16 +3753,11 @@ bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N, /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. /// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6> -bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N, - const X86Subtarget *Subtarget) { - if (!Subtarget->hasSSE3orAVX()) - return false; - - // The second vector must be undef - if (N->getOperand(1).getOpcode() != ISD::UNDEF) +static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT, + const X86Subtarget *Subtarget) { + if (!Subtarget->hasSSE3()) return false; - EVT VT = N->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); if ((VT.getSizeInBits() == 128 && NumElems != 4) || @@ -3862,9 +3765,9 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N, return false; // "i" is the value the indexed mask element must have - for (unsigned i = 0; i < NumElems; i += 2) - if (!isUndefOrEqual(N->getMaskElt(i), i) || - !isUndefOrEqual(N->getMaskElt(i+1), i)) + for (unsigned i = 0; i != NumElems; i += 2) + if (!isUndefOrEqual(Mask[i], i) || + !isUndefOrEqual(Mask[i+1], i)) return false; return true; @@ -3873,17 +3776,16 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N, /// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to 256-bit /// version of MOVDDUP. -static bool isMOVDDUPYMask(const SmallVectorImpl<int> &Mask, EVT VT, - bool HasAVX) { - int NumElts = VT.getVectorNumElements(); +static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) { + unsigned NumElts = VT.getVectorNumElements(); if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4) return false; - for (int i = 0; i != NumElts/2; ++i) + for (unsigned i = 0; i != NumElts/2; ++i) if (!isUndefOrEqual(Mask[i], 0)) return false; - for (int i = NumElts/2; i != NumElts; ++i) + for (unsigned i = NumElts/2; i != NumElts; ++i) if (!isUndefOrEqual(Mask[i], NumElts/2)) return false; return true; @@ -3892,18 +3794,16 @@ static bool isMOVDDUPYMask(const SmallVectorImpl<int> &Mask, EVT VT, /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to 128-bit /// version of MOVDDUP. -bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); - +static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) { if (VT.getSizeInBits() != 128) return false; - int e = VT.getVectorNumElements() / 2; - for (int i = 0; i < e; ++i) - if (!isUndefOrEqual(N->getMaskElt(i), i)) + unsigned e = VT.getVectorNumElements() / 2; + for (unsigned i = 0; i != e; ++i) + if (!isUndefOrEqual(Mask[i], i)) return false; - for (int i = 0; i < e; ++i) - if (!isUndefOrEqual(N->getMaskElt(e+i), i)) + for (unsigned i = 0; i != e; ++i) + if (!isUndefOrEqual(Mask[e+i], i)) return false; return true; } @@ -3948,31 +3848,43 @@ bool X86::isVINSERTF128Index(SDNode *N) { /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions. -unsigned X86::getShuffleSHUFImmediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - int NumOperands = SVOp->getValueType(0).getVectorNumElements(); +/// Handles 128-bit and 256-bit. +static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { + EVT VT = N->getValueType(0); + + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Unsupported vector type for PSHUF/SHUFP"); - unsigned Shift = (NumOperands == 4) ? 2 : 1; + // Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate + // independently on 128-bit lanes. + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts/NumLanes; + + assert((NumLaneElts == 2 || NumLaneElts == 4) && + "Only supports 2 or 4 elements per lane"); + + unsigned Shift = (NumLaneElts == 4) ? 1 : 0; unsigned Mask = 0; - for (int i = 0; i < NumOperands; ++i) { - int Val = SVOp->getMaskElt(NumOperands-i-1); - if (Val < 0) Val = 0; - if (Val >= NumOperands) Val -= NumOperands; - Mask |= Val; - if (i != NumOperands - 1) - Mask <<= Shift; + for (unsigned i = 0; i != NumElts; ++i) { + int Elt = N->getMaskElt(i); + if (Elt < 0) continue; + Elt %= NumLaneElts; + unsigned ShAmt = i << Shift; + if (ShAmt >= 8) ShAmt -= 8; + Mask |= Elt << ShAmt; } + return Mask; } /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction. -unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); +static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) { unsigned Mask = 0; // 8 nodes, but we only care about the last 4. for (unsigned i = 7; i >= 4; --i) { - int Val = SVOp->getMaskElt(i); + int Val = N->getMaskElt(i); if (Val >= 0) Mask |= (Val - 4); if (i != 4) @@ -3983,12 +3895,11 @@ unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction. -unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); +static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) { unsigned Mask = 0; // 8 nodes, but we only care about the first 4. for (int i = 3; i >= 0; --i) { - int Val = SVOp->getMaskElt(i); + int Val = N->getMaskElt(i); if (Val >= 0) Mask |= Val; if (i != 0) @@ -4002,14 +3913,21 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) { EVT VT = SVOp->getValueType(0); unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3; - int Val = 0; - unsigned i, e; - for (i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts/NumLanes; + + int Val = 0; + unsigned i; + for (i = 0; i != NumElts; ++i) { Val = SVOp->getMaskElt(i); if (Val >= 0) break; } + if (Val >= (int)NumElts) + Val -= NumElts - NumLaneElts; + assert(Val - i > 0 && "PALIGNR imm should be positive"); return (Val - i) * EltSize; } @@ -4082,17 +4000,16 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, /// match movhlps. The lower half elements should come from upper half of /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). -static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) { - EVT VT = Op->getValueType(0); +static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, EVT VT) { if (VT.getSizeInBits() != 128) return false; if (VT.getVectorNumElements() != 4) return false; for (unsigned i = 0, e = 2; i != e; ++i) - if (!isUndefOrEqual(Op->getMaskElt(i), i+2)) + if (!isUndefOrEqual(Mask[i], i+2)) return false; for (unsigned i = 2; i != 4; ++i) - if (!isUndefOrEqual(Op->getMaskElt(i), i+4)) + if (!isUndefOrEqual(Mask[i], i+4)) return false; return true; } @@ -4140,8 +4057,7 @@ static bool WillBeConstantPoolLoad(SDNode *N) { /// half of V2 (and in order). And since V1 will become the source of the /// MOVLP, it must be either a vector load or a scalar load to vector. static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, - ShuffleVectorSDNode *Op) { - EVT VT = Op->getValueType(0); + ArrayRef<int> Mask, EVT VT) { if (VT.getSizeInBits() != 128) return false; @@ -4157,10 +4073,10 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, if (NumElems != 2 && NumElems != 4) return false; for (unsigned i = 0, e = NumElems/2; i != e; ++i) - if (!isUndefOrEqual(Op->getMaskElt(i), i)) + if (!isUndefOrEqual(Mask[i], i)) return false; for (unsigned i = NumElems/2; i != NumElems; ++i) - if (!isUndefOrEqual(Op->getMaskElt(i), i+NumElems)) + if (!isUndefOrEqual(Mask[i], i+NumElems)) return false; return true; } @@ -4208,15 +4124,15 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) { /// getZeroVector - Returns a vector of specified type with all zero elements. /// -static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG, - DebugLoc dl) { +static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, + SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); // Always build SSE zero vectors as <4 x i32> bitcasted // to their dest type. This ensures they get CSE'd. SDValue Vec; if (VT.getSizeInBits() == 128) { // SSE - if (HasXMMInt) { // SSE2 + if (Subtarget->hasSSE2()) { // SSE2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); } else { // SSE1 @@ -4224,12 +4140,17 @@ static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG, Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); } } else if (VT.getSizeInBits() == 256) { // AVX - // 256-bit logic and arithmetic instructions in AVX are - // all floating-point, no support for integer ops. Default - // to emitting fp zeroed vectors then. - SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); - SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8); + if (Subtarget->hasAVX2()) { // AVX2 + SDValue Cst = DAG.getTargetConstant(0, MVT::i32); + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); + } else { + // 256-bit logic and arithmetic instructions in AVX are all + // floating-point, no support for integer ops. Emit fp zeroed vectors. + SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8); + } } return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } @@ -4266,24 +4187,12 @@ static SDValue getOnesVector(EVT VT, bool HasAVX2, SelectionDAG &DAG, /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. -static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - EVT VT = SVOp->getValueType(0); - unsigned NumElems = VT.getVectorNumElements(); - - bool Changed = false; - SmallVector<int, 8> MaskVec; - SVOp->getMask(MaskVec); - +static void NormalizeMask(SmallVectorImpl<int> &Mask, unsigned NumElems) { for (unsigned i = 0; i != NumElems; ++i) { - if (MaskVec[i] > (int)NumElems) { - MaskVec[i] = NumElems; - Changed = true; + if (Mask[i] > (int)NumElems) { + Mask[i] = NumElems; } } - if (Changed) - return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0), - SVOp->getOperand(1), &MaskVec[0]); - return SDValue(SVOp, 0); } /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd @@ -4387,7 +4296,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { // Extract the 128-bit part containing the splat element and update // the splat element index when it refers to the higher register. if (Size == 256) { - unsigned Idx = (EltNo > NumElems/2) ? NumElems/2 : 0; + unsigned Idx = (EltNo >= NumElems/2) ? NumElems/2 : 0; V1 = Extract128BitVector(V1, DAG.getConstant(Idx, MVT::i32), DAG, dl); if (Idx > 0) EltNo -= NumElems/2; @@ -4419,11 +4328,12 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { /// element of V2 is swizzled into the zero/undef vector, landing at element /// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, - bool isZero, bool HasXMMInt, + bool IsZero, + const X86Subtarget *Subtarget, SelectionDAG &DAG) { EVT VT = V2.getValueType(); - SDValue V1 = isZero - ? getZeroVector(VT, HasXMMInt, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); + SDValue V1 = IsZero + ? getZeroVector(VT, Subtarget, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); unsigned NumElems = VT.getVectorNumElements(); SmallVector<int, 16> MaskVec; for (unsigned i = 0; i != NumElems; ++i) @@ -4450,20 +4360,20 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, if (Index < 0) return DAG.getUNDEF(VT.getVectorElementType()); - int NumElems = VT.getVectorNumElements(); - SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1); + unsigned NumElems = VT.getVectorNumElements(); + SDValue NewV = (Index < (int)NumElems) ? SV->getOperand(0) + : SV->getOperand(1); return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1); } // Recurse into target specific vector shuffles to find scalars. if (isTargetShuffle(Opcode)) { - int NumElems = VT.getVectorNumElements(); + unsigned NumElems = VT.getVectorNumElements(); SmallVector<unsigned, 16> ShuffleMask; SDValue ImmN; switch(Opcode) { - case X86ISD::SHUFPS: - case X86ISD::SHUFPD: + case X86ISD::SHUFP: ImmN = N->getOperand(N->getNumOperands()-1); DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), ShuffleMask); @@ -4481,9 +4391,9 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, DecodeMOVLHPSMask(NumElems, ShuffleMask); break; case X86ISD::PSHUFD: + case X86ISD::VPERMILP: ImmN = N->getOperand(N->getNumOperands()-1); - DecodePSHUFMask(NumElems, - cast<ConstantSDNode>(ImmN)->getZExtValue(), + DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), ShuffleMask); break; case X86ISD::PSHUFHW: @@ -4505,14 +4415,9 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG, Depth+1); } - case X86ISD::VPERMILP: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERMILPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; case X86ISD::VPERM2X128: ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), + DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), ShuffleMask); break; case X86ISD::MOVDDUP: @@ -4523,16 +4428,15 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, case X86ISD::MOVSLDUP: case X86ISD::PALIGN: return SDValue(); // Not yet implemented. - default: - assert(0 && "unknown target shuffle node"); - return SDValue(); + default: llvm_unreachable("unknown target shuffle node"); } Index = ShuffleMask[Index]; if (Index < 0) return DAG.getUNDEF(VT.getVectorElementType()); - SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1); + SDValue NewV = (Index < (int)NumElems) ? N->getOperand(0) + : N->getOperand(1); return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1); } @@ -4693,6 +4597,7 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, + const X86Subtarget* Subtarget, const TargetLowering &TLI) { if (NumNonZero > 8) return SDValue(); @@ -4704,7 +4609,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; if (ThisIsNonZero && First) { if (NumZero) - V = getZeroVector(MVT::v8i16, true, DAG, dl); + V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); else V = DAG.getUNDEF(MVT::v8i16); First = false; @@ -4740,6 +4645,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, + const X86Subtarget* Subtarget, const TargetLowering &TLI) { if (NumNonZero > 4) return SDValue(); @@ -4752,7 +4658,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, if (isNonZero) { if (First) { if (NumZero) - V = getZeroVector(MVT::v8i16, true, DAG, dl); + V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); else V = DAG.getUNDEF(MVT::v8i16); First = false; @@ -4773,7 +4679,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, const TargetLowering &TLI, DebugLoc dl) { assert(VT.getSizeInBits() == 128 && "Unknown type for VShift"); EVT ShVT = MVT::v2i64; - unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; + unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(Opc, dl, ShVT, SrcOp, @@ -4841,21 +4747,16 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, int EltNo = (Offset - StartOffset) >> 2; int NumElems = VT.getVectorNumElements(); - EVT CanonVT = VT.getSizeInBits() == 128 ? MVT::v4i32 : MVT::v8i32; EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems); SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(StartOffset), false, false, false, 0); - // Canonicalize it to a v4i32 or v8i32 shuffle. SmallVector<int, 8> Mask; for (int i = 0; i < NumElems; ++i) Mask.push_back(EltNo); - V1 = DAG.getNode(ISD::BITCAST, dl, CanonVT, V1); - return DAG.getNode(ISD::BITCAST, dl, NVT, - DAG.getVectorShuffle(CanonVT, dl, V1, - DAG.getUNDEF(CanonVT),&Mask[0])); + return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]); } return SDValue(); @@ -4938,7 +4839,10 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, /// a scalar load. /// The scalar load node is returned when a pattern is found, /// or SDValue() otherwise. -static SDValue isVectorBroadcast(SDValue &Op, bool hasAVX2) { +static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) { + if (!Subtarget->hasAVX()) + return SDValue(); + EVT VT = Op.getValueType(); SDValue V = Op; @@ -4993,22 +4897,14 @@ static SDValue isVectorBroadcast(SDValue &Op, bool hasAVX2) { if (!ISD::isNormalLoad(Ld.getNode())) return SDValue(); + // Reject loads that have uses of the chain result + if (Ld->hasAnyUseOfValue(1)) + return SDValue(); + bool Is256 = VT.getSizeInBits() == 256; bool Is128 = VT.getSizeInBits() == 128; unsigned ScalarSize = Ld.getValueType().getSizeInBits(); - if (hasAVX2) { - // VBroadcast to YMM - if (Is256 && (ScalarSize == 8 || ScalarSize == 16 || - ScalarSize == 32 || ScalarSize == 64 )) - return Ld; - - // VBroadcast to XMM - if (Is128 && (ScalarSize == 8 || ScalarSize == 32 || - ScalarSize == 16 || ScalarSize == 64 )) - return Ld; - } - // VBroadcast to YMM if (Is256 && (ScalarSize == 32 || ScalarSize == 64)) return Ld; @@ -5017,6 +4913,17 @@ static SDValue isVectorBroadcast(SDValue &Op, bool hasAVX2) { if (Is128 && (ScalarSize == 32)) return Ld; + // The integer check is needed for the 64-bit into 128-bit so it doesn't match + // double since there is vbroadcastsd xmm + if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) { + // VBroadcast to YMM + if (Is256 && (ScalarSize == 8 || ScalarSize == 16)) + return Ld; + + // VBroadcast to XMM + if (Is128 && (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)) + return Ld; + } // Unsupported broadcast. return SDValue(); @@ -5034,27 +4941,25 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ISD::isBuildVectorAllZeros(Op.getNode())) { // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd // and 2) ensure that i64 scalars are eliminated on x86-32 hosts. - if (Op.getValueType() == MVT::v4i32 || - Op.getValueType() == MVT::v8i32) + if (VT == MVT::v4i32 || VT == MVT::v8i32) return Op; - return getZeroVector(Op.getValueType(), Subtarget->hasXMMInt(), DAG, dl); + return getZeroVector(VT, Subtarget, DAG, dl); } // Vectors containing all ones can be matched by pcmpeqd on 128-bit width // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use // vpcmpeqd on 256-bit vectors. if (ISD::isBuildVectorAllOnes(Op.getNode())) { - if (Op.getValueType() == MVT::v4i32 || - (Op.getValueType() == MVT::v8i32 && Subtarget->hasAVX2())) + if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasAVX2())) return Op; - return getOnesVector(Op.getValueType(), Subtarget->hasAVX2(), DAG, dl); + return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl); } - SDValue LD = isVectorBroadcast(Op, Subtarget->hasAVX2()); - if (Subtarget->hasAVX() && LD.getNode()) - return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD); + SDValue LD = isVectorBroadcast(Op, Subtarget); + if (LD.getNode()) + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD); unsigned EVTBits = ExtVT.getSizeInBits(); @@ -5105,8 +5010,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // convert it to a vector with movd (S2V+shuffle to zero extend). Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item); - Item = getShuffleVectorZeroOrUndef(Item, 0, true, - Subtarget->hasXMMInt(), DAG); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); // Now we have our 32-bit value zero extended in the low element of // a vector. If Idx != 0, swizzle it into place. @@ -5119,7 +5023,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DAG.getUNDEF(Item.getValueType()), &Mask[0]); } - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item); + return DAG.getNode(ISD::BITCAST, dl, VT, Item); } } @@ -5128,23 +5032,33 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // the rest of the elements. This will be matched as movd/movq/movss/movsd // depending on what the source datatype is. if (Idx == 0) { - if (NumZero == 0) { + if (NumZero == 0) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); - } else if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || + + if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || (ExtVT == MVT::i64 && Subtarget->is64Bit())) { + if (VT.getSizeInBits() == 256) { + SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec, + Item, DAG.getIntPtrConstant(0)); + } + assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. - return getShuffleVectorZeroOrUndef(Item, 0, true,Subtarget->hasXMMInt(), - DAG); - } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { + return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); + } + + if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - unsigned NumBits = VT.getSizeInBits(); - assert((NumBits == 128 || NumBits == 256) && - "Expected an SSE or AVX value type!"); - EVT MiddleVT = NumBits == 128 ? MVT::v4i32 : MVT::v8i32; - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); - Item = getShuffleVectorZeroOrUndef(Item, 0, true, - Subtarget->hasXMMInt(), DAG); + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); + if (VT.getSizeInBits() == 256) { + SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl); + Item = Insert128BitVector(ZeroVec, Item, DAG.getConstant(0, MVT::i32), + DAG, dl); + } else { + assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); + } return DAG.getNode(ISD::BITCAST, dl, VT, Item); } } @@ -5172,8 +5086,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a shuffle of zero and zero-extended scalar to vector. - Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, - Subtarget->hasXMMInt(), DAG); + Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG); SmallVector<int, 8> MaskVec; for (unsigned i = 0; i < NumElems; i++) MaskVec.push_back(i == Idx ? 0 : 1); @@ -5203,9 +5116,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // For AVX-length vectors, build the individual 128-bit pieces and use // shuffles to put them in place. - if (VT.getSizeInBits() == 256 && !ISD::isBuildVectorAllZeros(Op.getNode())) { + if (VT.getSizeInBits() == 256) { SmallVector<SDValue, 32> V; - for (unsigned i = 0; i < NumElems; ++i) + for (unsigned i = 0; i != NumElems; ++i) V.push_back(Op.getOperand(i)); EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); @@ -5229,8 +5142,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { unsigned Idx = CountTrailingZeros_32(NonZeros); SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(Idx)); - return getShuffleVectorZeroOrUndef(V2, Idx, true, - Subtarget->hasXMMInt(), DAG); + return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG); } return SDValue(); } @@ -5238,24 +5150,23 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // If element VT is < 32 bits, convert it to inserts into a zero vector. if (EVTBits == 8 && NumElems == 16) { SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, - *this); + Subtarget, *this); if (V.getNode()) return V; } if (EVTBits == 16 && NumElems == 8) { SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, - *this); + Subtarget, *this); if (V.getNode()) return V; } // If element VT is == 32 bits, turn it into a number of shuffles. - SmallVector<SDValue, 8> V; - V.resize(NumElems); + SmallVector<SDValue, 8> V(NumElems); if (NumElems == 4 && NumZero > 0) { for (unsigned i = 0; i < 4; ++i) { bool isZero = !(NonZeros & (1 << i)); if (isZero) - V[i] = getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl); + V[i] = getZeroVector(VT, Subtarget, DAG, dl); else V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); } @@ -5278,13 +5189,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { } } - SmallVector<int, 8> MaskVec; - bool Reverse = (NonZeros & 0x3) == 2; - for (unsigned i = 0; i < 2; ++i) - MaskVec.push_back(Reverse ? 1-i : i); - Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; - for (unsigned i = 0; i < 2; ++i) - MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems); + bool Reverse1 = (NonZeros & 0x3) == 2; + bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2; + int MaskVec[] = { + Reverse1 ? 1 : 0, + Reverse1 ? 0 : 1, + static_cast<int>(Reverse2 ? NumElems+1 : NumElems), + static_cast<int>(Reverse2 ? NumElems : NumElems+1) + }; return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]); } @@ -5299,7 +5211,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return LD; // For SSE 4.1, use insertps to put the high elements into the low element. - if (getSubtarget()->hasSSE41orAVX()) { + if (getSubtarget()->hasSSE41()) { SDValue Result; if (Op.getOperand(0).getOpcode() != ISD::UNDEF) Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); @@ -5430,7 +5342,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, // mask values count as coming from any quadword, for better codegen. unsigned LoQuad[] = { 0, 0, 0, 0 }; unsigned HiQuad[] = { 0, 0, 0, 0 }; - BitVector InputQuads(4); + std::bitset<4> InputQuads; for (unsigned i = 0; i < 8; ++i) { unsigned *Quad = i < 4 ? LoQuad : HiQuad; int EltIdx = SVOp->getMaskElt(i); @@ -5470,10 +5382,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, // quads, disable the next transformation since it does not help SSSE3. bool V1Used = InputQuads[0] || InputQuads[1]; bool V2Used = InputQuads[2] || InputQuads[3]; - if (Subtarget->hasSSSE3orAVX()) { + if (Subtarget->hasSSSE3()) { if (InputQuads.count() == 2 && V1Used && V2Used) { - BestLoQuad = InputQuads.find_first(); - BestHiQuad = InputQuads.find_next(BestLoQuad); + BestLoQuad = InputQuads[0] ? 0 : 1; + BestHiQuad = InputQuads[2] ? 2 : 3; } if (InputQuads.count() > 2) { BestLoQuad = -1; @@ -5486,9 +5398,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, // words from all 4 input quadwords. SDValue NewV; if (BestLoQuad >= 0 || BestHiQuad >= 0) { - SmallVector<int, 8> MaskV; - MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad); - MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad); + int MaskV[] = { + BestLoQuad < 0 ? 0 : BestLoQuad, + BestHiQuad < 0 ? 1 : BestHiQuad + }; NewV = DAG.getVectorShuffle(MVT::v2i64, dl, DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1), DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]); @@ -5533,8 +5446,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, unsigned TargetMask = 0; NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskVals[0]); - TargetMask = pshufhw ? X86::getShufflePSHUFHWImmediate(NewV.getNode()): - X86::getShufflePSHUFLWImmediate(NewV.getNode()); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode()); + TargetMask = pshufhw ? getShufflePSHUFHWImmediate(SVOp): + getShufflePSHUFLWImmediate(SVOp); V1 = NewV.getOperand(0); return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG); } @@ -5543,7 +5457,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, // If we have SSSE3, and all words of the result are from 1 input vector, // case 2 is generated, otherwise case 3 is generated. If no SSSE3 // is present, fall back to case 4. - if (Subtarget->hasSSSE3orAVX()) { + if (Subtarget->hasSSSE3()) { SmallVector<SDValue,16> pshufbMask; // If we have elements from both input vectors, set the high bit of the @@ -5591,59 +5505,51 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order, // and update MaskVals with new element order. - BitVector InOrder(8); + std::bitset<8> InOrder; if (BestLoQuad >= 0) { - SmallVector<int, 8> MaskV; + int MaskV[] = { -1, -1, -1, -1, 4, 5, 6, 7 }; for (int i = 0; i != 4; ++i) { int idx = MaskVals[i]; if (idx < 0) { - MaskV.push_back(-1); InOrder.set(i); } else if ((idx / 4) == BestLoQuad) { - MaskV.push_back(idx & 3); + MaskV[i] = idx & 3; InOrder.set(i); - } else { - MaskV.push_back(-1); } } - for (unsigned i = 4; i != 8; ++i) - MaskV.push_back(i); NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); - if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3orAVX()) + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode()); NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16, - NewV.getOperand(0), - X86::getShufflePSHUFLWImmediate(NewV.getNode()), - DAG); + NewV.getOperand(0), + getShufflePSHUFLWImmediate(SVOp), DAG); + } } // If BestHi >= 0, generate a pshufhw to put the high elements in order, // and update MaskVals with the new element order. if (BestHiQuad >= 0) { - SmallVector<int, 8> MaskV; - for (unsigned i = 0; i != 4; ++i) - MaskV.push_back(i); + int MaskV[] = { 0, 1, 2, 3, -1, -1, -1, -1 }; for (unsigned i = 4; i != 8; ++i) { int idx = MaskVals[i]; if (idx < 0) { - MaskV.push_back(-1); InOrder.set(i); } else if ((idx / 4) == BestHiQuad) { - MaskV.push_back((idx & 3) + 4); + MaskV[i] = (idx & 3) + 4; InOrder.set(i); - } else { - MaskV.push_back(-1); } } NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); - if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3orAVX()) + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode()); NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16, - NewV.getOperand(0), - X86::getShufflePSHUFHWImmediate(NewV.getNode()), - DAG); + NewV.getOperand(0), + getShufflePSHUFHWImmediate(SVOp), DAG); + } } // In case BestHi & BestLo were both -1, which means each quadword has a word @@ -5685,8 +5591,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); - SmallVector<int, 16> MaskVals; - SVOp->getMask(MaskVals); + ArrayRef<int> MaskVals = SVOp->getMask(); // If we have SSSE3, case 1 is generated when all result bytes come from // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is @@ -5705,7 +5610,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, } // If SSSE3, use 1 pshufb instruction per vector with elements in the result. - if (TLI.getSubtarget()->hasSSSE3orAVX()) { + if (TLI.getSubtarget()->hasSSSE3()) { SmallVector<SDValue,16> pshufbMask; // If all result elements are from one input vector, then only translate @@ -5836,7 +5741,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, unsigned NewWidth = (NumElems == 4) ? 2 : 4; EVT NewVT; switch (VT.getSimpleVT().SimpleTy) { - default: assert(false && "Unexpected!"); + default: llvm_unreachable("Unexpected!"); case MVT::v4f32: NewVT = MVT::v2f64; break; case MVT::v4i32: NewVT = MVT::v2i64; break; case MVT::v8i16: NewVT = MVT::v4i32; break; @@ -5902,96 +5807,106 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, OpVT, SrcOp))); } -/// areShuffleHalvesWithinDisjointLanes - Check whether each half of a vector -/// shuffle node referes to only one lane in the sources. -static bool areShuffleHalvesWithinDisjointLanes(ShuffleVectorSDNode *SVOp) { - EVT VT = SVOp->getValueType(0); - int NumElems = VT.getVectorNumElements(); - int HalfSize = NumElems/2; - SmallVector<int, 16> M; - SVOp->getMask(M); - bool MatchA = false, MatchB = false; - - for (int l = 0; l < NumElems*2; l += HalfSize) { - if (isUndefOrInRange(M, 0, HalfSize, l, l+HalfSize)) { - MatchA = true; - break; - } - } - - for (int l = 0; l < NumElems*2; l += HalfSize) { - if (isUndefOrInRange(M, HalfSize, HalfSize, l, l+HalfSize)) { - MatchB = true; - break; - } - } - - return MatchA && MatchB; -} - /// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles /// which could not be matched by any known target speficic shuffle static SDValue LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - if (areShuffleHalvesWithinDisjointLanes(SVOp)) { - // If each half of a vector shuffle node referes to only one lane in the - // source vectors, extract each used 128-bit lane and shuffle them using - // 128-bit shuffles. Then, concatenate the results. Otherwise leave - // the work to the legalizer. - DebugLoc dl = SVOp->getDebugLoc(); - EVT VT = SVOp->getValueType(0); - int NumElems = VT.getVectorNumElements(); - int HalfSize = NumElems/2; - - // Extract the reference for each half - int FstVecExtractIdx = 0, SndVecExtractIdx = 0; - int FstVecOpNum = 0, SndVecOpNum = 0; - for (int i = 0; i < HalfSize; ++i) { - int Elt = SVOp->getMaskElt(i); - if (SVOp->getMaskElt(i) < 0) + EVT VT = SVOp->getValueType(0); + + unsigned NumElems = VT.getVectorNumElements(); + unsigned NumLaneElems = NumElems / 2; + + int MinRange[2][2] = { { static_cast<int>(NumElems), + static_cast<int>(NumElems) }, + { static_cast<int>(NumElems), + static_cast<int>(NumElems) } }; + int MaxRange[2][2] = { { -1, -1 }, { -1, -1 } }; + + // Collect used ranges for each source in each lane + for (unsigned l = 0; l < 2; ++l) { + unsigned LaneStart = l*NumLaneElems; + for (unsigned i = 0; i != NumLaneElems; ++i) { + int Idx = SVOp->getMaskElt(i+LaneStart); + if (Idx < 0) continue; - FstVecOpNum = Elt/NumElems; - FstVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize; - break; + + int Input = 0; + if (Idx >= (int)NumElems) { + Idx -= NumElems; + Input = 1; + } + + if (Idx > MaxRange[l][Input]) + MaxRange[l][Input] = Idx; + if (Idx < MinRange[l][Input]) + MinRange[l][Input] = Idx; } - for (int i = HalfSize; i < NumElems; ++i) { - int Elt = SVOp->getMaskElt(i); - if (SVOp->getMaskElt(i) < 0) + } + + // Make sure each range is 128-bits + int ExtractIdx[2][2] = { { -1, -1 }, { -1, -1 } }; + for (unsigned l = 0; l < 2; ++l) { + for (unsigned Input = 0; Input < 2; ++Input) { + if (MinRange[l][Input] == (int)NumElems && MaxRange[l][Input] < 0) continue; - SndVecOpNum = Elt/NumElems; - SndVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize; - break; + + if (MinRange[l][Input] >= 0 && MaxRange[l][Input] < (int)NumLaneElems) + ExtractIdx[l][Input] = 0; + else if (MinRange[l][Input] >= (int)NumLaneElems && + MaxRange[l][Input] < (int)NumElems) + ExtractIdx[l][Input] = NumLaneElems; + else + return SDValue(); } + } - // Extract the subvectors - SDValue V1 = Extract128BitVector(SVOp->getOperand(FstVecOpNum), - DAG.getConstant(FstVecExtractIdx, MVT::i32), DAG, dl); - SDValue V2 = Extract128BitVector(SVOp->getOperand(SndVecOpNum), - DAG.getConstant(SndVecExtractIdx, MVT::i32), DAG, dl); + DebugLoc dl = SVOp->getDebugLoc(); + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NVT = MVT::getVectorVT(EltVT, NumElems/2); + + SDValue Ops[2][2]; + for (unsigned l = 0; l < 2; ++l) { + for (unsigned Input = 0; Input < 2; ++Input) { + if (ExtractIdx[l][Input] >= 0) + Ops[l][Input] = Extract128BitVector(SVOp->getOperand(Input), + DAG.getConstant(ExtractIdx[l][Input], MVT::i32), + DAG, dl); + else + Ops[l][Input] = DAG.getUNDEF(NVT); + } + } - // Generate 128-bit shuffles - SmallVector<int, 16> MaskV1, MaskV2; - for (int i = 0; i < HalfSize; ++i) { - int Elt = SVOp->getMaskElt(i); - MaskV1.push_back(Elt < 0 ? Elt : Elt % HalfSize); + // Generate 128-bit shuffles + SmallVector<int, 16> Mask1, Mask2; + for (unsigned i = 0; i != NumLaneElems; ++i) { + int Elt = SVOp->getMaskElt(i); + if (Elt >= (int)NumElems) { + Elt %= NumLaneElems; + Elt += NumLaneElems; + } else if (Elt >= 0) { + Elt %= NumLaneElems; } - for (int i = HalfSize; i < NumElems; ++i) { - int Elt = SVOp->getMaskElt(i); - MaskV2.push_back(Elt < 0 ? Elt : Elt % HalfSize); + Mask1.push_back(Elt); + } + for (unsigned i = NumLaneElems; i != NumElems; ++i) { + int Elt = SVOp->getMaskElt(i); + if (Elt >= (int)NumElems) { + Elt %= NumLaneElems; + Elt += NumLaneElems; + } else if (Elt >= 0) { + Elt %= NumLaneElems; } - - EVT NVT = V1.getValueType(); - V1 = DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &MaskV1[0]); - V2 = DAG.getVectorShuffle(NVT, dl, V2, DAG.getUNDEF(NVT), &MaskV2[0]); - - // Concatenate the result back - SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), V1, - DAG.getConstant(0, MVT::i32), DAG, dl); - return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32), - DAG, dl); + Mask2.push_back(Elt); } - return SDValue(); + SDValue Shuf1 = DAG.getVectorShuffle(NVT, dl, Ops[0][0], Ops[0][1], &Mask1[0]); + SDValue Shuf2 = DAG.getVectorShuffle(NVT, dl, Ops[1][0], Ops[1][1], &Mask2[0]); + + // Concatenate the result back + SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shuf1, + DAG.getConstant(0, MVT::i32), DAG, dl); + return Insert128BitVector(V, Shuf2, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); } /// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with @@ -6005,11 +5920,9 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { assert(VT.getSizeInBits() == 128 && "Unsupported vector size"); - SmallVector<std::pair<int, int>, 8> Locs; - Locs.resize(4); - SmallVector<int, 8> Mask1(4U, -1); - SmallVector<int, 8> PermMask; - SVOp->getMask(PermMask); + std::pair<int, int> Locs[4]; + int Mask1[] = { -1, -1, -1, -1 }; + SmallVector<int, 8> PermMask(SVOp->getMask().begin(), SVOp->getMask().end()); unsigned NumHi = 0; unsigned NumLo = 0; @@ -6039,17 +5952,14 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { // vector operands, put the elements into the right order. V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); - SmallVector<int, 8> Mask2(4U, -1); + int Mask2[] = { -1, -1, -1, -1 }; - for (unsigned i = 0; i != 4; ++i) { - if (Locs[i].first == -1) - continue; - else { + for (unsigned i = 0; i != 4; ++i) + if (Locs[i].first != -1) { unsigned Idx = (i < 2) ? 0 : 4; Idx += Locs[i].first * 2 + Locs[i].second; Mask2[i] = Idx; } - } return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]); } else if (NumLo == 3 || NumHi == 3) { @@ -6102,18 +6012,16 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { } // Break it into (shuffle shuffle_hi, shuffle_lo). - Locs.clear(); - Locs.resize(4); - SmallVector<int,8> LoMask(4U, -1); - SmallVector<int,8> HiMask(4U, -1); + int LoMask[] = { -1, -1, -1, -1 }; + int HiMask[] = { -1, -1, -1, -1 }; - SmallVector<int,8> *MaskPtr = &LoMask; + int *MaskPtr = LoMask; unsigned MaskIdx = 0; unsigned LoIdx = 0; unsigned HiIdx = 2; for (unsigned i = 0; i != 4; ++i) { if (i == 2) { - MaskPtr = &HiMask; + MaskPtr = HiMask; MaskIdx = 1; LoIdx = 0; HiIdx = 2; @@ -6123,26 +6031,21 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { Locs[i] = std::make_pair(-1, -1); } else if (Idx < 4) { Locs[i] = std::make_pair(MaskIdx, LoIdx); - (*MaskPtr)[LoIdx] = Idx; + MaskPtr[LoIdx] = Idx; LoIdx++; } else { Locs[i] = std::make_pair(MaskIdx, HiIdx); - (*MaskPtr)[HiIdx] = Idx; + MaskPtr[HiIdx] = Idx; HiIdx++; } } SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]); SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]); - SmallVector<int, 8> MaskOps; - for (unsigned i = 0; i != 4; ++i) { - if (Locs[i].first == -1) { - MaskOps.push_back(-1); - } else { - unsigned Idx = Locs[i].first * 4 + Locs[i].second; - MaskOps.push_back(Idx); - } - } + int MaskOps[] = { -1, -1, -1, -1 }; + for (unsigned i = 0; i != 4; ++i) + if (Locs[i].first != -1) + MaskOps[i] = Locs[i].first * 4 + Locs[i].second; return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]); } @@ -6220,35 +6123,41 @@ bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG, int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt); V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1); + // If we are accessing the upper part of a YMM register + // then the EXTRACT_VECTOR_ELT is likely to be legalized to a sequence of + // EXTRACT_SUBVECTOR + EXTRACT_VECTOR_ELT, which are not detected at this point + // because the legalization of N did not happen yet. + if (Idx >= (int)NumElems/2 && VT.getSizeInBits() == 256) + return false; + // Skip one more bit_convert if necessary - if (V.getOpcode() == ISD::BITCAST) + if (V.getOpcode() == ISD::BITCAST) { + if (!V.hasOneUse()) + return false; V = V.getOperand(0); + } - if (ISD::isNormalLoad(V.getNode())) { - // Is the original load suitable? - LoadSDNode *LN0 = cast<LoadSDNode>(V); + if (!ISD::isNormalLoad(V.getNode())) + return false; - // FIXME: avoid the multi-use bug that is preventing lots of - // of foldings to be detected, this is still wrong of course, but - // give the temporary desired behavior, and if it happens that - // the load has real more uses, during isel it will not fold, and - // will generate poor code. - if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse() - return false; + // Is the original load suitable? + LoadSDNode *LN0 = cast<LoadSDNode>(V); - if (!HasShuffleIntoBitcast) - return true; + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) + return false; - // If there's a bitcast before the shuffle, check if the load type and - // alignment is valid. - unsigned Align = LN0->getAlignment(); - unsigned NewAlign = - TLI.getTargetData()->getABITypeAlignment( - VT.getTypeForEVT(*DAG.getContext())); + if (!HasShuffleIntoBitcast) + return true; - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT)) - return false; - } + // If there's a bitcast before the shuffle, check if the load type and + // alignment is valid. + unsigned Align = LN0->getAlignment(); + unsigned NewAlign = + TLI.getTargetData()->getABITypeAlignment( + VT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT)) + return false; return true; } @@ -6266,14 +6175,14 @@ SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) { static SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, - bool HasXMMInt) { + bool HasSSE2) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); EVT VT = Op.getValueType(); assert(VT != MVT::v2i64 && "unsupported shuffle type"); - if (HasXMMInt && VT == MVT::v2f64) + if (HasSSE2 && VT == MVT::v2f64) return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG); // v4f32 or v4i32: canonizalized to v4f32 (which is legal for SSE1) @@ -6299,24 +6208,8 @@ SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) { return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG); } -static inline unsigned getSHUFPOpcode(EVT VT) { - switch(VT.getSimpleVT().SimpleTy) { - case MVT::v8i32: // Use fp unit for int unpack. - case MVT::v8f32: - case MVT::v4i32: // Use fp unit for int unpack. - case MVT::v4f32: return X86ISD::SHUFPS; - case MVT::v4i64: // Use fp unit for int unpack. - case MVT::v4f64: - case MVT::v2i64: // Use fp unit for int unpack. - case MVT::v2f64: return X86ISD::SHUFPD; - default: - llvm_unreachable("Unknown type for shufp*"); - } - return 0; -} - static -SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { +SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); EVT VT = Op.getValueType(); @@ -6342,7 +6235,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); if (CanFoldLoad) { - if (HasXMMInt && NumElems == 2) + if (HasSSE2 && NumElems == 2) return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); if (NumElems == 4) @@ -6357,10 +6250,10 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { // this is horrible, but will stay like this until we move all shuffle // matching to x86 specific nodes. Note that for the 1st condition all // types are matched with movsd. - if (HasXMMInt) { + if (HasSSE2) { // FIXME: isMOVLMask should be checked and matched before getMOVLP, // as to remove this logic from here, as much as possible - if (NumElems == 2 || !X86::isMOVLMask(SVOp)) + if (NumElems == 2 || !isMOVLMask(SVOp->getMask(), VT)) return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG); } @@ -6368,8 +6261,8 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { assert(VT != MVT::v4i32 && "unsupported shuffle type"); // Invert the operand order and use SHUFPS to match it. - return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V2, V1, - X86::getShuffleSHUFImmediate(SVOp), DAG); + return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V2, V1, + getShuffleSHUFImmediate(SVOp), DAG); } static @@ -6383,7 +6276,7 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, SDValue V2 = Op.getOperand(1); if (isZeroShuffle(SVOp)) - return getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl); + return getZeroVector(VT, Subtarget, DAG, dl); // Handle splat operations if (SVOp->isSplat()) { @@ -6397,8 +6290,8 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, return Op; // Use vbroadcast whenever the splat comes from a foldable load - SDValue LD = isVectorBroadcast(Op, Subtarget->hasAVX2()); - if (Subtarget->hasAVX() && LD.getNode()) + SDValue LD = isVectorBroadcast(Op, Subtarget); + if (LD.getNode()) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD); // Handle splats by matching through known shuffle masks @@ -6417,21 +6310,26 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, if (NewOp.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, NewOp); } else if ((VT == MVT::v4i32 || - (VT == MVT::v4f32 && Subtarget->hasXMMInt()))) { + (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode()) { - if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false)) - return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0), + EVT NewVT = NewOp.getValueType(); + if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), + NewVT, true, false)) + return getVZextMovL(VT, NewVT, NewOp.getOperand(0), DAG, Subtarget, dl); } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); - if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp))) - return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), - DAG, Subtarget, dl); + if (NewOp.getNode()) { + EVT NewVT = NewOp.getValueType(); + if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT)) + return getVZextMovL(VT, NewVT, NewOp.getOperand(1), + DAG, Subtarget, dl); + } } } return SDValue(); @@ -6445,10 +6343,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned NumElems = VT.getVectorNumElements(); + bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; bool V2IsSplat = false; - bool HasXMMInt = Subtarget->hasXMMInt(); + bool HasSSE2 = Subtarget->hasSSE2(); bool HasAVX = Subtarget->hasAVX(); bool HasAVX2 = Subtarget->hasAVX2(); MachineFunction &MF = DAG.getMachineFunction(); @@ -6456,7 +6355,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); - assert(V1.getOpcode() != ISD::UNDEF && "Op 1 of shuffle should not be undef"); + if (V1IsUndef && V2IsUndef) + return DAG.getUNDEF(VT); + + assert(!V1IsUndef && "Op 1 of shuffle should not be undef"); // Vector shuffle lowering takes 3 steps: // @@ -6479,38 +6381,43 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (NewOp.getNode()) return NewOp; + SmallVector<int, 8> M(SVOp->getMask().begin(), SVOp->getMask().end()); + // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and // unpckh_undef). Only use pshufd if speed is more important than size. - if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) + if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasAVX2)) return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG); - if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) + if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasAVX2)) return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG); - if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() && + if (isMOVDDUPMask(M, VT) && Subtarget->hasSSE3() && V2IsUndef && RelaxedMayFoldVectorLoad(V1)) return getMOVDDup(Op, dl, V1, DAG); - if (X86::isMOVHLPS_v_undef_Mask(SVOp)) + if (isMOVHLPS_v_undef_Mask(M, VT)) return getMOVHighToLow(Op, dl, DAG); // Use to match splats - if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef && + if (HasSSE2 && isUNPCKHMask(M, VT, HasAVX2) && V2IsUndef && (VT == MVT::v2f64 || VT == MVT::v2i64)) return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG); - if (X86::isPSHUFDMask(SVOp)) { + if (isPSHUFDMask(M, VT)) { // The actual implementation will match the mask in the if above and then // during isel it can match several different instructions, not only pshufd // as its name says, sad but true, emulate the behavior for now... - if (X86::isMOVDDUPMask(SVOp) && ((VT == MVT::v4f32 || VT == MVT::v2i64))) - return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG); + if (isMOVDDUPMask(M, VT) && ((VT == MVT::v4f32 || VT == MVT::v2i64))) + return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG); + + unsigned TargetMask = getShuffleSHUFImmediate(SVOp); - unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp); + if (HasAVX && (VT == MVT::v4f32 || VT == MVT::v2f64)) + return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, DAG); - if (HasXMMInt && (VT == MVT::v4f32 || VT == MVT::v4i32)) + if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32)) return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG); - return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V1, + return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1, TargetMask, DAG); } @@ -6518,7 +6425,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool isLeft = false; unsigned ShAmt = 0; SDValue ShVal; - bool isShift = HasXMMInt && isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); + bool isShift = HasSSE2 && isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -6527,11 +6434,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - if (X86::isMOVLMask(SVOp)) { + if (isMOVLMask(M, VT)) { if (ISD::isBuildVectorAllZeros(V1.getNode())) return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl); - if (!X86::isMOVLPMask(SVOp)) { - if (HasXMMInt && (VT == MVT::v2i64 || VT == MVT::v2f64)) + if (!isMOVLPMask(M, VT)) { + if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64)) return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); if (VT == MVT::v4i32 || VT == MVT::v4f32) @@ -6540,27 +6447,27 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } // FIXME: fold these into legal mask. - if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp, HasAVX2)) - return getMOVLowToHigh(Op, dl, DAG, HasXMMInt); + if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasAVX2)) + return getMOVLowToHigh(Op, dl, DAG, HasSSE2); - if (X86::isMOVHLPSMask(SVOp)) + if (isMOVHLPSMask(M, VT)) return getMOVHighToLow(Op, dl, DAG); - if (X86::isMOVSHDUPMask(SVOp, Subtarget)) + if (V2IsUndef && isMOVSHDUPMask(M, VT, Subtarget)) return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG); - if (X86::isMOVSLDUPMask(SVOp, Subtarget)) + if (V2IsUndef && isMOVSLDUPMask(M, VT, Subtarget)) return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG); - if (X86::isMOVLPMask(SVOp)) - return getMOVLP(Op, dl, DAG, HasXMMInt); + if (isMOVLPMask(M, VT)) + return getMOVLP(Op, dl, DAG, HasSSE2); - if (ShouldXformToMOVHLPS(SVOp) || - ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) + if (ShouldXformToMOVHLPS(M, VT) || + ShouldXformToMOVLP(V1.getNode(), V2.getNode(), M, VT)) return CommuteVectorShuffle(SVOp, DAG); if (isShift) { - // No better options. Use a vshl / vsrl. + // No better options. Use a vshldq / vsrldq. EVT EltVT = VT.getVectorElementType(); ShAmt *= EltVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); @@ -6573,18 +6480,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { V2IsSplat = isSplatVector(V2.getNode()); // Canonicalize the splat or undef, if present, to be on the RHS. - if (V1IsSplat && !V2IsSplat) { - Op = CommuteVectorShuffle(SVOp, DAG); - SVOp = cast<ShuffleVectorSDNode>(Op); - V1 = SVOp->getOperand(0); - V2 = SVOp->getOperand(1); + if (!V2IsUndef && V1IsSplat && !V2IsSplat) { + CommuteVectorShuffleMask(M, NumElems); + std::swap(V1, V2); std::swap(V1IsSplat, V2IsSplat); Commuted = true; } - SmallVector<int, 32> M; - SVOp->getMask(M); - if (isCommutedMOVLMask(M, VT, V2IsSplat, V2IsUndef)) { // Shuffling low element of v1 into undef, just return v1. if (V2IsUndef) @@ -6604,41 +6506,40 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first // element then try to match unpck{h|l} again. If match, return a - // new vector_shuffle with the corrected mask. - SDValue NewMask = NormalizeMask(SVOp, DAG); - ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask); - if (NSVOp != SVOp) { - if (X86::isUNPCKLMask(NSVOp, HasAVX2, true)) { - return NewMask; - } else if (X86::isUNPCKHMask(NSVOp, HasAVX2, true)) { - return NewMask; - } + // new vector_shuffle with the corrected mask.p + SmallVector<int, 8> NewMask(M.begin(), M.end()); + NormalizeMask(NewMask, NumElems); + if (isUNPCKLMask(NewMask, VT, HasAVX2, true)) { + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG); + } else if (isUNPCKHMask(NewMask, VT, HasAVX2, true)) { + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG); } } if (Commuted) { // Commute is back and try unpck* again. // FIXME: this seems wrong. - SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); - ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); + CommuteVectorShuffleMask(M, NumElems); + std::swap(V1, V2); + std::swap(V1IsSplat, V2IsSplat); + Commuted = false; - if (X86::isUNPCKLMask(NewSVOp, HasAVX2)) - return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V2, V1, DAG); + if (isUNPCKLMask(M, VT, HasAVX2)) + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG); - if (X86::isUNPCKHMask(NewSVOp, HasAVX2)) - return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V2, V1, DAG); + if (isUNPCKHMask(M, VT, HasAVX2)) + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG); } // Normalize the node to match x86 shuffle ops if needed - if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true) || - isVSHUFPYMask(M, VT, HasAVX, /* Commuted */ true))) + if (!V2IsUndef && (isSHUFPMask(M, VT, HasAVX, /* Commuted */ true))) return CommuteVectorShuffle(SVOp, DAG); // The checks below are all present in isShuffleMaskLegal, but they are // inlined here right now to enable us to directly emit target specific // nodes, and remove one by one until they don't return Op anymore. - if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX())) + if (isPALIGNRMask(M, VT, Subtarget)) return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2, getShufflePALIGNRImmediate(SVOp), DAG); @@ -6651,21 +6552,21 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (isPSHUFHWMask(M, VT)) return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1, - X86::getShufflePSHUFHWImmediate(SVOp), + getShufflePSHUFHWImmediate(SVOp), DAG); if (isPSHUFLWMask(M, VT)) return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1, - X86::getShufflePSHUFLWImmediate(SVOp), + getShufflePSHUFLWImmediate(SVOp), DAG); - if (isSHUFPMask(M, VT)) - return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, - X86::getShuffleSHUFImmediate(SVOp), DAG); + if (isSHUFPMask(M, VT, HasAVX)) + return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2, + getShuffleSHUFImmediate(SVOp), DAG); - if (isUNPCKL_v_undef_Mask(M, VT)) + if (isUNPCKL_v_undef_Mask(M, VT, HasAVX2)) return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG); - if (isUNPCKH_v_undef_Mask(M, VT)) + if (isUNPCKH_v_undef_Mask(M, VT, HasAVX2)) return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG); //===--------------------------------------------------------------------===// @@ -6678,20 +6579,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG); // Handle VPERMILPS/D* permutations - if (isVPERMILPMask(M, VT, HasAVX)) + if (isVPERMILPMask(M, VT, HasAVX)) { + if (HasAVX2 && VT == MVT::v8i32) + return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, + getShuffleSHUFImmediate(SVOp), DAG); return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, - getShuffleVPERMILPImmediate(SVOp), DAG); + getShuffleSHUFImmediate(SVOp), DAG); + } // Handle VPERM2F128/VPERM2I128 permutations if (isVPERM2X128Mask(M, VT, HasAVX)) return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1, V2, getShuffleVPERM2X128Immediate(SVOp), DAG); - // Handle VSHUFPS/DY permutations - if (isVSHUFPYMask(M, VT, HasAVX)) - return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, - getShuffleVSHUFPYImmediate(SVOp), DAG); - //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, // lower it into other known shuffles. FIXME: this isn't true yet, but @@ -6810,7 +6710,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length"); - if (Subtarget->hasSSE41orAVX()) { + if (Subtarget->hasSSE41()) { SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); if (Res.getNode()) return Res; @@ -6952,7 +6852,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { return Insert128BitVector(N0, V, Ins128Idx, DAG, dl); } - if (Subtarget->hasSSE41orAVX()) + if (Subtarget->hasSSE41()) return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); if (EltVT == MVT::i8) @@ -7408,19 +7308,77 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(), Chain.getValue(1)); - } + } else if (Subtarget->isTargetWindows()) { + // Just use the implicit TLS architecture + // Need to generate someting similar to: + // mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage + // ; from TEB + // mov ecx, dword [rel _tls_index]: Load index (from C runtime) + // mov rcx, qword [rdx+rcx*8] + // mov eax, .tls$:tlsvar + // [rax+rcx] contains the address + // Windows 64bit: gs:0x58 + // Windows 32bit: fs:__tls_array - assert(false && - "TLS not implemented for this target."); + // If GV is an alias then use the aliasee for determining + // thread-localness. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + GV = GA->resolveAliasedGlobal(false); + DebugLoc dl = GA->getDebugLoc(); + SDValue Chain = DAG.getEntryNode(); - llvm_unreachable("Unreachable"); - return SDValue(); + // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or + // %gs:0x58 (64-bit). + Value *Ptr = Constant::getNullValue(Subtarget->is64Bit() + ? Type::getInt8PtrTy(*DAG.getContext(), + 256) + : Type::getInt32PtrTy(*DAG.getContext(), + 257)); + + SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, + Subtarget->is64Bit() + ? DAG.getIntPtrConstant(0x58) + : DAG.getExternalSymbol("_tls_array", + getPointerTy()), + MachinePointerInfo(Ptr), + false, false, false, 0); + + // Load the _tls_index variable + SDValue IDX = DAG.getExternalSymbol("_tls_index", getPointerTy()); + if (Subtarget->is64Bit()) + IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, getPointerTy(), Chain, + IDX, MachinePointerInfo(), MVT::i32, + false, false, 0); + else + IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(), + false, false, false, 0); + + SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()), + getPointerTy()); + IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale); + + SDValue res = DAG.getNode(ISD::ADD, dl, getPointerTy(), ThreadPointer, IDX); + res = DAG.getLoad(getPointerTy(), dl, Chain, res, MachinePointerInfo(), + false, false, false, 0); + + // Get the offset of start of .tls section + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, + GA->getValueType(0), + GA->getOffset(), X86II::MO_SECREL); + SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), TGA); + + // The address of the thread local variable is the add of the thread + // pointer with the offset of the variable. + return DAG.getNode(ISD::ADD, dl, getPointerTy(), res, Offset); + } + + llvm_unreachable("TLS not implemented for this target."); } -/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values and -/// take a 2 x i32 value to shift plus a shift amount. -SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const { +/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values +/// and take a 2 x i32 value to shift plus a shift amount. +SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{ assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -7561,85 +7519,65 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, // LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion. SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const { - // This algorithm is not obvious. Here it is in C code, more or less: + // This algorithm is not obvious. Here it is what we're trying to output: /* - double uint64_to_double( uint32_t hi, uint32_t lo ) { - static const __m128i exp = { 0x4330000045300000ULL, 0 }; - static const __m128d bias = { 0x1.0p84, 0x1.0p52 }; - - // Copy ints to xmm registers. - __m128i xh = _mm_cvtsi32_si128( hi ); - __m128i xl = _mm_cvtsi32_si128( lo ); - - // Combine into low half of a single xmm register. - __m128i x = _mm_unpacklo_epi32( xh, xl ); - __m128d d; - double sd; - - // Merge in appropriate exponents to give the integer bits the right - // magnitude. - x = _mm_unpacklo_epi32( x, exp ); - - // Subtract away the biases to deal with the IEEE-754 double precision - // implicit 1. - d = _mm_sub_pd( (__m128d) x, bias ); - - // All conversions up to here are exact. The correctly rounded result is - // calculated using the current rounding mode using the following - // horizontal add. - d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) ); - _mm_store_sd( &sd, d ); // Because we are returning doubles in XMM, this - // store doesn't really need to be here (except - // maybe to zero the other double) - return sd; - } + movq %rax, %xmm0 + punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U } + subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 } + #ifdef __SSE3__ + haddpd %xmm0, %xmm0 + #else + pshufd $0x4e, %xmm0, %xmm1 + addpd %xmm1, %xmm0 + #endif */ DebugLoc dl = Op.getDebugLoc(); LLVMContext *Context = DAG.getContext(); // Build some magic constants. - SmallVector<Constant*,4> CV0; - CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000))); - CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000))); - CV0.push_back(ConstantInt::get(*Context, APInt(32, 0))); - CV0.push_back(ConstantInt::get(*Context, APInt(32, 0))); - Constant *C0 = ConstantVector::get(CV0); + const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 }; + Constant *C0 = ConstantDataVector::get(*Context, CV0); SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16); SmallVector<Constant*,2> CV1; CV1.push_back( - ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL)))); + ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); CV1.push_back( - ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); + ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL)))); Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); - SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, - DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Op.getOperand(0), - DAG.getIntPtrConstant(1))); - SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, - DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Op.getOperand(0), - DAG.getIntPtrConstant(0))); - SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); + // Load the 64-bit value into an XMM register. + SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, + Op.getOperand(0)); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, MachinePointerInfo::getConstantPool(), false, false, false, 16); - SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); - SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2); + SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, XR1), + CLod0); + SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, MachinePointerInfo::getConstantPool(), false, false, false, 16); + SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck1); SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); + SDValue Result; - // Add the halves; easiest way is to swap them into another reg first. - int ShufMask[2] = { 1, -1 }; - SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, - DAG.getUNDEF(MVT::v2f64), ShufMask); - SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add, + if (Subtarget->hasSSE3()) { + // FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'. + Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); + } else { + SDValue S2F = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Sub); + SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32, + S2F, 0x4E, DAG); + Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, + DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Shuffle), + Sub); + } + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, DAG.getIntPtrConstant(0)); } @@ -7656,8 +7594,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, Op.getOperand(0)); // Zero out the upper parts of the register. - Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget->hasXMMInt(), - DAG); + Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG); Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load), @@ -7709,6 +7646,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, return LowerUINT_TO_FP_i64(Op, DAG); else if (SrcVT == MVT::i32 && X86ScalarSSEf64) return LowerUINT_TO_FP_i32(Op, DAG); + else if (Subtarget->is64Bit() && + SrcVT == MVT::i64 && DstVT == MVT::f32) + return SDValue(); // Make a 64-bit buffer, and use it to build an FILD. SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64); @@ -7728,7 +7668,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), - StackSlot, MachinePointerInfo(), + StackSlot, MachinePointerInfo(), false, false, 0); // For i64 source, we need to add the appropriate power of 2 if the input // was negative. This is the same as the optimization in @@ -7776,19 +7716,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, } std::pair<SDValue,SDValue> X86TargetLowering:: -FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { +FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) const { DebugLoc DL = Op.getDebugLoc(); EVT DstTy = Op.getValueType(); - if (!IsSigned) { + if (!IsSigned && !isIntegerTypeFTOL(DstTy)) { assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); DstTy = MVT::i64; } assert(DstTy.getSimpleVT() <= MVT::i64 && DstTy.getSimpleVT() >= MVT::i16 && - "Unknown FP_TO_SINT to lower!"); + "Unknown FP_TO_INT to lower!"); // These are really Legal. if (DstTy == MVT::i32 && @@ -7799,26 +7739,29 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) return std::make_pair(SDValue(), SDValue()); - // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary - // stack slot. + // We lower FP->int64 either into FISTP64 followed by a load from a temporary + // stack slot, or into the FTOL runtime function. MachineFunction &MF = DAG.getMachineFunction(); unsigned MemSize = DstTy.getSizeInBits()/8; int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); - - unsigned Opc; - switch (DstTy.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); - case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; - case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; - case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; - } + if (!IsSigned && isIntegerTypeFTOL(DstTy)) + Opc = X86ISD::WIN_FTOL; + else + switch (DstTy.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); + case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; + case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; + case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; + } SDValue Chain = DAG.getEntryNode(); SDValue Value = Op.getOperand(0); EVT TheVT = Op.getOperand(0).getValueType(); + // FIXME This causes a redundant load/store if the SSE-class value is already + // in memory, such as if it is on the callstack. if (isScalarFPTypeInSSEReg(TheVT)) { assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, DL, Value, StackSlot, @@ -7843,12 +7786,26 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), MachineMemOperand::MOStore, MemSize, MemSize); - // Build the FP_TO_INT*_IN_MEM - SDValue Ops[] = { Chain, Value, StackSlot }; - SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), - Ops, 3, DstTy, MMO); - - return std::make_pair(FIST, StackSlot); + if (Opc != X86ISD::WIN_FTOL) { + // Build the FP_TO_INT*_IN_MEM + SDValue Ops[] = { Chain, Value, StackSlot }; + SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), + Ops, 3, DstTy, MMO); + return std::make_pair(FIST, StackSlot); + } else { + SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL, + DAG.getVTList(MVT::Other, MVT::Glue), + Chain, Value); + SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX, + MVT::i32, ftol.getValue(1)); + SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX, + MVT::i32, eax.getValue(2)); + SDValue Ops[] = { eax, edx }; + SDValue pair = IsReplace + ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, 2) + : DAG.getMergeValues(Ops, 2, DL); + return std::make_pair(pair, SDValue()); + } } SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, @@ -7856,27 +7813,37 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, if (Op.getValueType().isVector()) return SDValue(); - std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true); + std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, + /*IsSigned=*/ true, /*IsReplace=*/ false); SDValue FIST = Vals.first, StackSlot = Vals.second; // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. if (FIST.getNode() == 0) return Op; - // Load the result. - return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, MachinePointerInfo(), - false, false, false, 0); + if (StackSlot.getNode()) + // Load the result. + return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), + FIST, StackSlot, MachinePointerInfo(), + false, false, false, 0); + else + // The node is the result. + return FIST; } SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const { - std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, false); + std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, + /*IsSigned=*/ false, /*IsReplace=*/ false); SDValue FIST = Vals.first, StackSlot = Vals.second; assert(FIST.getNode() && "Unexpected failure"); - // Load the result. - return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, MachinePointerInfo(), - false, false, false, 0); + if (StackSlot.getNode()) + // Load the result. + return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), + FIST, StackSlot, MachinePointerInfo(), + false, false, false, 0); + else + // The node is the result. + return FIST; } SDValue X86TargetLowering::LowerFABS(SDValue Op, @@ -7887,15 +7854,14 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, EVT EltVT = VT; if (VT.isVector()) EltVT = VT.getVectorElementType(); - SmallVector<Constant*,4> CV; + Constant *C; if (EltVT == MVT::f64) { - Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))); - CV.assign(2, C); + C = ConstantVector::getSplat(2, + ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))))); } else { - Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))); - CV.assign(4, C); + C = ConstantVector::getSplat(4, + ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))))); } - Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), @@ -7913,15 +7879,12 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { EltVT = VT.getVectorElementType(); NumElts = VT.getVectorNumElements(); } - SmallVector<Constant*,8> CV; - if (EltVT == MVT::f64) { - Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))); - CV.assign(NumElts, C); - } else { - Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))); - CV.assign(NumElts, C); - } - Constant *C = ConstantVector::get(CV); + Constant *C; + if (EltVT == MVT::f64) + C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))); + else + C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))); + C = ConstantVector::getSplat(NumElts, C); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), @@ -8353,9 +8316,8 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { if (isFP) { unsigned SSECC = 8; EVT EltVT = Op0.getValueType().getVectorElementType(); - assert(EltVT == MVT::f32 || EltVT == MVT::f64); + assert(EltVT == MVT::f32 || EltVT == MVT::f64); (void)EltVT; - unsigned Opc = EltVT == MVT::f32 ? X86ISD::CMPPS : X86ISD::CMPPD; bool Swap = false; // SSE Condition code mapping: @@ -8395,19 +8357,24 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { if (SSECC == 8) { if (SetCCOpcode == ISD::SETUEQ) { SDValue UNORD, EQ; - UNORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(3, MVT::i8)); - EQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(0, MVT::i8)); + UNORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, + DAG.getConstant(3, MVT::i8)); + EQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, + DAG.getConstant(0, MVT::i8)); return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ); } else if (SetCCOpcode == ISD::SETONE) { SDValue ORD, NEQ; - ORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(7, MVT::i8)); - NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8)); + ORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, + DAG.getConstant(7, MVT::i8)); + NEQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, + DAG.getConstant(4, MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ); } llvm_unreachable("Illegal FP comparison"); } // Handle all other FP comparisons here. - return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); + return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, + DAG.getConstant(SSECC, MVT::i8)); } // Break 256-bit integer vector compare into smaller ones. @@ -8417,38 +8384,30 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. - unsigned Opc = 0, EQOpc = 0, GTOpc = 0; + unsigned Opc = 0; bool Swap = false, Invert = false, FlipSigns = false; - switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { - default: break; - case MVT::i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; - case MVT::i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; - case MVT::i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; - case MVT::i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; - } - switch (SetCCOpcode) { default: break; case ISD::SETNE: Invert = true; - case ISD::SETEQ: Opc = EQOpc; break; + case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break; case ISD::SETLT: Swap = true; - case ISD::SETGT: Opc = GTOpc; break; + case ISD::SETGT: Opc = X86ISD::PCMPGT; break; case ISD::SETGE: Swap = true; - case ISD::SETLE: Opc = GTOpc; Invert = true; break; + case ISD::SETLE: Opc = X86ISD::PCMPGT; Invert = true; break; case ISD::SETULT: Swap = true; - case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break; + case ISD::SETUGT: Opc = X86ISD::PCMPGT; FlipSigns = true; break; case ISD::SETUGE: Swap = true; - case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break; + case ISD::SETULE: Opc = X86ISD::PCMPGT; FlipSigns = true; Invert = true; break; } if (Swap) std::swap(Op0, Op1); // Check that the operation in question is available (most are plain SSE2, // but PCMPGTQ and PCMPEQQ have different requirements). - if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42orAVX()) + if (Opc == X86ISD::PCMPGT && VT == MVT::v2i64 && !Subtarget->hasSSE42()) return SDValue(); - if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41orAVX()) + if (Opc == X86ISD::PCMPEQ && VT == MVT::v2i64 && !Subtarget->hasSSE41()) return SDValue(); // Since SSE has no unsigned integer comparisons, we need to flip the sign @@ -9113,7 +9072,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { assert(!getTargetMachine().Options.UseSoftFloat && !(DAG.getMachineFunction() .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) && - Subtarget->hasXMM()); + Subtarget->hasSSE1()); } // Insert VAARG_64 node into the DAG @@ -9159,6 +9118,43 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } +// getTargetVShiftNOde - Handle vector element shifts where the shift amount +// may or may not be a constant. Takes immediate version of shift as input. +static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, + SDValue SrcOp, SDValue ShAmt, + SelectionDAG &DAG) { + assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32"); + + if (isa<ConstantSDNode>(ShAmt)) { + switch (Opc) { + default: llvm_unreachable("Unknown target vector shift node"); + case X86ISD::VSHLI: + case X86ISD::VSRLI: + case X86ISD::VSRAI: + return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); + } + } + + // Change opcode to non-immediate version + switch (Opc) { + default: llvm_unreachable("Unknown target vector shift node"); + case X86ISD::VSHLI: Opc = X86ISD::VSHL; break; + case X86ISD::VSRLI: Opc = X86ISD::VSRL; break; + case X86ISD::VSRAI: Opc = X86ISD::VSRA; break; + } + + // Need to build a vector containing shift amount + // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0 + SDValue ShOps[4]; + ShOps[0] = ShAmt; + ShOps[1] = DAG.getConstant(0, MVT::i32); + ShOps[2] = DAG.getUNDEF(MVT::i32); + ShOps[3] = DAG.getUNDEF(MVT::i32); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4); + ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); + return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); +} + SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); @@ -9193,7 +9189,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const unsigned Opc = 0; ISD::CondCode CC = ISD::SETCC_INVALID; switch (IntNo) { - default: break; + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::x86_sse_comieq_ss: case Intrinsic::x86_sse2_comieq_sd: Opc = X86ISD::COMI; @@ -9265,7 +9261,201 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(X86CC, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + // XOP comparison intrinsics + case Intrinsic::x86_xop_vpcomltb: + case Intrinsic::x86_xop_vpcomltw: + case Intrinsic::x86_xop_vpcomltd: + case Intrinsic::x86_xop_vpcomltq: + case Intrinsic::x86_xop_vpcomltub: + case Intrinsic::x86_xop_vpcomltuw: + case Intrinsic::x86_xop_vpcomltud: + case Intrinsic::x86_xop_vpcomltuq: + case Intrinsic::x86_xop_vpcomleb: + case Intrinsic::x86_xop_vpcomlew: + case Intrinsic::x86_xop_vpcomled: + case Intrinsic::x86_xop_vpcomleq: + case Intrinsic::x86_xop_vpcomleub: + case Intrinsic::x86_xop_vpcomleuw: + case Intrinsic::x86_xop_vpcomleud: + case Intrinsic::x86_xop_vpcomleuq: + case Intrinsic::x86_xop_vpcomgtb: + case Intrinsic::x86_xop_vpcomgtw: + case Intrinsic::x86_xop_vpcomgtd: + case Intrinsic::x86_xop_vpcomgtq: + case Intrinsic::x86_xop_vpcomgtub: + case Intrinsic::x86_xop_vpcomgtuw: + case Intrinsic::x86_xop_vpcomgtud: + case Intrinsic::x86_xop_vpcomgtuq: + case Intrinsic::x86_xop_vpcomgeb: + case Intrinsic::x86_xop_vpcomgew: + case Intrinsic::x86_xop_vpcomged: + case Intrinsic::x86_xop_vpcomgeq: + case Intrinsic::x86_xop_vpcomgeub: + case Intrinsic::x86_xop_vpcomgeuw: + case Intrinsic::x86_xop_vpcomgeud: + case Intrinsic::x86_xop_vpcomgeuq: + case Intrinsic::x86_xop_vpcomeqb: + case Intrinsic::x86_xop_vpcomeqw: + case Intrinsic::x86_xop_vpcomeqd: + case Intrinsic::x86_xop_vpcomeqq: + case Intrinsic::x86_xop_vpcomequb: + case Intrinsic::x86_xop_vpcomequw: + case Intrinsic::x86_xop_vpcomequd: + case Intrinsic::x86_xop_vpcomequq: + case Intrinsic::x86_xop_vpcomneb: + case Intrinsic::x86_xop_vpcomnew: + case Intrinsic::x86_xop_vpcomned: + case Intrinsic::x86_xop_vpcomneq: + case Intrinsic::x86_xop_vpcomneub: + case Intrinsic::x86_xop_vpcomneuw: + case Intrinsic::x86_xop_vpcomneud: + case Intrinsic::x86_xop_vpcomneuq: + case Intrinsic::x86_xop_vpcomfalseb: + case Intrinsic::x86_xop_vpcomfalsew: + case Intrinsic::x86_xop_vpcomfalsed: + case Intrinsic::x86_xop_vpcomfalseq: + case Intrinsic::x86_xop_vpcomfalseub: + case Intrinsic::x86_xop_vpcomfalseuw: + case Intrinsic::x86_xop_vpcomfalseud: + case Intrinsic::x86_xop_vpcomfalseuq: + case Intrinsic::x86_xop_vpcomtrueb: + case Intrinsic::x86_xop_vpcomtruew: + case Intrinsic::x86_xop_vpcomtrued: + case Intrinsic::x86_xop_vpcomtrueq: + case Intrinsic::x86_xop_vpcomtrueub: + case Intrinsic::x86_xop_vpcomtrueuw: + case Intrinsic::x86_xop_vpcomtrueud: + case Intrinsic::x86_xop_vpcomtrueuq: { + unsigned CC = 0; + unsigned Opc = 0; + + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_xop_vpcomltb: + case Intrinsic::x86_xop_vpcomltw: + case Intrinsic::x86_xop_vpcomltd: + case Intrinsic::x86_xop_vpcomltq: + CC = 0; + Opc = X86ISD::VPCOM; + break; + case Intrinsic::x86_xop_vpcomltub: + case Intrinsic::x86_xop_vpcomltuw: + case Intrinsic::x86_xop_vpcomltud: + case Intrinsic::x86_xop_vpcomltuq: + CC = 0; + Opc = X86ISD::VPCOMU; + break; + case Intrinsic::x86_xop_vpcomleb: + case Intrinsic::x86_xop_vpcomlew: + case Intrinsic::x86_xop_vpcomled: + case Intrinsic::x86_xop_vpcomleq: + CC = 1; + Opc = X86ISD::VPCOM; + break; + case Intrinsic::x86_xop_vpcomleub: + case Intrinsic::x86_xop_vpcomleuw: + case Intrinsic::x86_xop_vpcomleud: + case Intrinsic::x86_xop_vpcomleuq: + CC = 1; + Opc = X86ISD::VPCOMU; + break; + case Intrinsic::x86_xop_vpcomgtb: + case Intrinsic::x86_xop_vpcomgtw: + case Intrinsic::x86_xop_vpcomgtd: + case Intrinsic::x86_xop_vpcomgtq: + CC = 2; + Opc = X86ISD::VPCOM; + break; + case Intrinsic::x86_xop_vpcomgtub: + case Intrinsic::x86_xop_vpcomgtuw: + case Intrinsic::x86_xop_vpcomgtud: + case Intrinsic::x86_xop_vpcomgtuq: + CC = 2; + Opc = X86ISD::VPCOMU; + break; + case Intrinsic::x86_xop_vpcomgeb: + case Intrinsic::x86_xop_vpcomgew: + case Intrinsic::x86_xop_vpcomged: + case Intrinsic::x86_xop_vpcomgeq: + CC = 3; + Opc = X86ISD::VPCOM; + break; + case Intrinsic::x86_xop_vpcomgeub: + case Intrinsic::x86_xop_vpcomgeuw: + case Intrinsic::x86_xop_vpcomgeud: + case Intrinsic::x86_xop_vpcomgeuq: + CC = 3; + Opc = X86ISD::VPCOMU; + break; + case Intrinsic::x86_xop_vpcomeqb: + case Intrinsic::x86_xop_vpcomeqw: + case Intrinsic::x86_xop_vpcomeqd: + case Intrinsic::x86_xop_vpcomeqq: + CC = 4; + Opc = X86ISD::VPCOM; + break; + case Intrinsic::x86_xop_vpcomequb: + case Intrinsic::x86_xop_vpcomequw: + case Intrinsic::x86_xop_vpcomequd: + case Intrinsic::x86_xop_vpcomequq: + CC = 4; + Opc = X86ISD::VPCOMU; + break; + case Intrinsic::x86_xop_vpcomneb: + case Intrinsic::x86_xop_vpcomnew: + case Intrinsic::x86_xop_vpcomned: + case Intrinsic::x86_xop_vpcomneq: + CC = 5; + Opc = X86ISD::VPCOM; + break; + case Intrinsic::x86_xop_vpcomneub: + case Intrinsic::x86_xop_vpcomneuw: + case Intrinsic::x86_xop_vpcomneud: + case Intrinsic::x86_xop_vpcomneuq: + CC = 5; + Opc = X86ISD::VPCOMU; + break; + case Intrinsic::x86_xop_vpcomfalseb: + case Intrinsic::x86_xop_vpcomfalsew: + case Intrinsic::x86_xop_vpcomfalsed: + case Intrinsic::x86_xop_vpcomfalseq: + CC = 6; + Opc = X86ISD::VPCOM; + break; + case Intrinsic::x86_xop_vpcomfalseub: + case Intrinsic::x86_xop_vpcomfalseuw: + case Intrinsic::x86_xop_vpcomfalseud: + case Intrinsic::x86_xop_vpcomfalseuq: + CC = 6; + Opc = X86ISD::VPCOMU; + break; + case Intrinsic::x86_xop_vpcomtrueb: + case Intrinsic::x86_xop_vpcomtruew: + case Intrinsic::x86_xop_vpcomtrued: + case Intrinsic::x86_xop_vpcomtrueq: + CC = 7; + Opc = X86ISD::VPCOM; + break; + case Intrinsic::x86_xop_vpcomtrueub: + case Intrinsic::x86_xop_vpcomtrueuw: + case Intrinsic::x86_xop_vpcomtrueud: + case Intrinsic::x86_xop_vpcomtrueuq: + CC = 7; + Opc = X86ISD::VPCOMU; + break; + } + + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + return DAG.getNode(Opc, dl, Op.getValueType(), LHS, RHS, + DAG.getConstant(CC, MVT::i8)); + } + // Arithmetic intrinsics. + case Intrinsic::x86_sse2_pmulu_dq: + case Intrinsic::x86_avx2_pmulu_dq: + return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse3_hadd_ps: case Intrinsic::x86_sse3_hadd_pd: case Intrinsic::x86_avx_hadd_ps_256: @@ -9278,6 +9468,18 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx_hsub_pd_256: return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_phadd_w_128: + case Intrinsic::x86_ssse3_phadd_d_128: + case Intrinsic::x86_avx2_phadd_w: + case Intrinsic::x86_avx2_phadd_d: + return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_phsub_w_128: + case Intrinsic::x86_ssse3_phsub_d_128: + case Intrinsic::x86_avx2_phsub_w: + case Intrinsic::x86_avx2_phsub_d: + return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_avx2_psllv_d: case Intrinsic::x86_avx2_psllv_q: case Intrinsic::x86_avx2_psllv_d_256: @@ -9294,6 +9496,33 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx2_psrav_d_256: return DAG.getNode(ISD::SRA, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_pshuf_b_128: + case Intrinsic::x86_avx2_pshuf_b: + return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_psign_b_128: + case Intrinsic::x86_ssse3_psign_w_128: + case Intrinsic::x86_ssse3_psign_d_128: + case Intrinsic::x86_avx2_psign_b: + case Intrinsic::x86_avx2_psign_w: + case Intrinsic::x86_avx2_psign_d: + return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse41_insertps: + return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::x86_avx_vperm2f128_ps_256: + case Intrinsic::x86_avx_vperm2f128_pd_256: + case Intrinsic::x86_avx_vperm2f128_si_256: + case Intrinsic::x86_avx2_vperm2i128: + return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::x86_avx_vpermil_ps: + case Intrinsic::x86_avx_vpermil_pd: + case Intrinsic::x86_avx_vpermil_ps_256: + case Intrinsic::x86_avx_vpermil_pd_256: + return DAG.getNode(X86ISD::VPERMILP, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest @@ -9361,24 +9590,53 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } - // Fix vector shift instructions where the last operand is a non-immediate - // i32 value. - case Intrinsic::x86_avx2_pslli_w: - case Intrinsic::x86_avx2_pslli_d: - case Intrinsic::x86_avx2_pslli_q: - case Intrinsic::x86_avx2_psrli_w: - case Intrinsic::x86_avx2_psrli_d: - case Intrinsic::x86_avx2_psrli_q: - case Intrinsic::x86_avx2_psrai_w: - case Intrinsic::x86_avx2_psrai_d: + // SSE/AVX shift intrinsics + case Intrinsic::x86_sse2_psll_w: + case Intrinsic::x86_sse2_psll_d: + case Intrinsic::x86_sse2_psll_q: + case Intrinsic::x86_avx2_psll_w: + case Intrinsic::x86_avx2_psll_d: + case Intrinsic::x86_avx2_psll_q: + return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse2_psra_w: + case Intrinsic::x86_sse2_psra_d: + case Intrinsic::x86_avx2_psra_w: + case Intrinsic::x86_avx2_psra_d: + return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse2_pslli_w: case Intrinsic::x86_sse2_pslli_d: case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_avx2_pslli_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), DAG); case Intrinsic::x86_sse2_psrli_w: case Intrinsic::x86_sse2_psrli_d: case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), DAG); case Intrinsic::x86_sse2_psrai_w: case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_avx2_psrai_w: + case Intrinsic::x86_avx2_psrai_d: + return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), DAG); + // Fix vector shift instructions where the last operand is a non-immediate + // i32 value. case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: @@ -9392,103 +9650,40 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const return SDValue(); unsigned NewIntNo = 0; - EVT ShAmtVT = MVT::v4i32; switch (IntNo) { - case Intrinsic::x86_sse2_pslli_w: - NewIntNo = Intrinsic::x86_sse2_psll_w; - break; - case Intrinsic::x86_sse2_pslli_d: - NewIntNo = Intrinsic::x86_sse2_psll_d; - break; - case Intrinsic::x86_sse2_pslli_q: - NewIntNo = Intrinsic::x86_sse2_psll_q; - break; - case Intrinsic::x86_sse2_psrli_w: - NewIntNo = Intrinsic::x86_sse2_psrl_w; - break; - case Intrinsic::x86_sse2_psrli_d: - NewIntNo = Intrinsic::x86_sse2_psrl_d; - break; - case Intrinsic::x86_sse2_psrli_q: - NewIntNo = Intrinsic::x86_sse2_psrl_q; - break; - case Intrinsic::x86_sse2_psrai_w: - NewIntNo = Intrinsic::x86_sse2_psra_w; + case Intrinsic::x86_mmx_pslli_w: + NewIntNo = Intrinsic::x86_mmx_psll_w; break; - case Intrinsic::x86_sse2_psrai_d: - NewIntNo = Intrinsic::x86_sse2_psra_d; + case Intrinsic::x86_mmx_pslli_d: + NewIntNo = Intrinsic::x86_mmx_psll_d; break; - case Intrinsic::x86_avx2_pslli_w: - NewIntNo = Intrinsic::x86_avx2_psll_w; + case Intrinsic::x86_mmx_pslli_q: + NewIntNo = Intrinsic::x86_mmx_psll_q; break; - case Intrinsic::x86_avx2_pslli_d: - NewIntNo = Intrinsic::x86_avx2_psll_d; + case Intrinsic::x86_mmx_psrli_w: + NewIntNo = Intrinsic::x86_mmx_psrl_w; break; - case Intrinsic::x86_avx2_pslli_q: - NewIntNo = Intrinsic::x86_avx2_psll_q; + case Intrinsic::x86_mmx_psrli_d: + NewIntNo = Intrinsic::x86_mmx_psrl_d; break; - case Intrinsic::x86_avx2_psrli_w: - NewIntNo = Intrinsic::x86_avx2_psrl_w; + case Intrinsic::x86_mmx_psrli_q: + NewIntNo = Intrinsic::x86_mmx_psrl_q; break; - case Intrinsic::x86_avx2_psrli_d: - NewIntNo = Intrinsic::x86_avx2_psrl_d; + case Intrinsic::x86_mmx_psrai_w: + NewIntNo = Intrinsic::x86_mmx_psra_w; break; - case Intrinsic::x86_avx2_psrli_q: - NewIntNo = Intrinsic::x86_avx2_psrl_q; - break; - case Intrinsic::x86_avx2_psrai_w: - NewIntNo = Intrinsic::x86_avx2_psra_w; - break; - case Intrinsic::x86_avx2_psrai_d: - NewIntNo = Intrinsic::x86_avx2_psra_d; - break; - default: { - ShAmtVT = MVT::v2i32; - switch (IntNo) { - case Intrinsic::x86_mmx_pslli_w: - NewIntNo = Intrinsic::x86_mmx_psll_w; - break; - case Intrinsic::x86_mmx_pslli_d: - NewIntNo = Intrinsic::x86_mmx_psll_d; - break; - case Intrinsic::x86_mmx_pslli_q: - NewIntNo = Intrinsic::x86_mmx_psll_q; - break; - case Intrinsic::x86_mmx_psrli_w: - NewIntNo = Intrinsic::x86_mmx_psrl_w; - break; - case Intrinsic::x86_mmx_psrli_d: - NewIntNo = Intrinsic::x86_mmx_psrl_d; - break; - case Intrinsic::x86_mmx_psrli_q: - NewIntNo = Intrinsic::x86_mmx_psrl_q; - break; - case Intrinsic::x86_mmx_psrai_w: - NewIntNo = Intrinsic::x86_mmx_psra_w; - break; - case Intrinsic::x86_mmx_psrai_d: - NewIntNo = Intrinsic::x86_mmx_psra_d; - break; - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - } + case Intrinsic::x86_mmx_psrai_d: + NewIntNo = Intrinsic::x86_mmx_psra_d; break; - } + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. } // The vector shift intrinsics with scalars uses 32b shift amounts but // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits // to be zero. - SDValue ShOps[4]; - ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, MVT::i32); - if (ShAmtVT == MVT::v4i32) { - ShOps[2] = DAG.getUNDEF(MVT::i32); - ShOps[3] = DAG.getUNDEF(MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4); - } else { - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, ShAmt, + DAG.getConstant(0, MVT::i32)); // FIXME this must be lowered to get rid of the invalid type. - } EVT VT = Op.getValueType(); ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); @@ -9828,7 +10023,8 @@ SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const { return Op; } -SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerCTLZ_ZERO_UNDEF(SDValue Op, + SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); @@ -9836,26 +10032,41 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const { Op = Op.getOperand(0); if (VT == MVT::i8) { + // Zero extend to i32 since there is not an i8 bsr. OpVT = MVT::i32; Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op); } - // Issue a bsf (scan bits forward) which also sets EFLAGS. + // Issue a bsr (scan bits in reverse). SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); + Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op); + + // And xor with NumBits-1. + Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); + + if (VT == MVT::i8) + Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op); + return Op; +} + +SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + unsigned NumBits = VT.getSizeInBits(); + DebugLoc dl = Op.getDebugLoc(); + Op = Op.getOperand(0); + + // Issue a bsf (scan bits forward) which also sets EFLAGS. + SDVTList VTs = DAG.getVTList(VT, MVT::i32); Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op); // If src is zero (i.e. bsf sets ZF), returns NumBits. SDValue Ops[] = { Op, - DAG.getConstant(NumBits, OpVT), + DAG.getConstant(NumBits, VT), DAG.getConstant(X86::COND_E, MVT::i8), Op.getValue(1) }; - Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops)); - - if (VT == MVT::i8) - Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op); - return Op; + return DAG.getNode(X86ISD::CMOV, dl, VT, Ops, array_lengthof(Ops)); } // Lower256IntArith - Break a 256-bit integer operation into two new 128-bit @@ -9910,86 +10121,46 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()) return Lower256IntArith(Op, DAG); + assert((VT == MVT::v2i64 || VT == MVT::v4i64) && + "Only know how to lower V2I64/V4I64 multiply"); + DebugLoc dl = Op.getDebugLoc(); + // Ahi = psrlqi(a, 32); + // Bhi = psrlqi(b, 32); + // + // AloBlo = pmuludq(a, b); + // AloBhi = pmuludq(a, Bhi); + // AhiBlo = pmuludq(Ahi, b); + + // AloBhi = psllqi(AloBhi, 32); + // AhiBlo = psllqi(AhiBlo, 32); + // return AloBlo + AloBhi + AhiBlo; + SDValue A = Op.getOperand(0); SDValue B = Op.getOperand(1); - if (VT == MVT::v4i64) { - assert(Subtarget->hasAVX2() && "Lowering v4i64 multiply requires AVX2"); + SDValue ShAmt = DAG.getConstant(32, MVT::i32); - // ulong2 Ahi = __builtin_ia32_psrlqi256( a, 32); - // ulong2 Bhi = __builtin_ia32_psrlqi256( b, 32); - // ulong2 AloBlo = __builtin_ia32_pmuludq256( a, b ); - // ulong2 AloBhi = __builtin_ia32_pmuludq256( a, Bhi ); - // ulong2 AhiBlo = __builtin_ia32_pmuludq256( Ahi, b ); - // - // AloBhi = __builtin_ia32_psllqi256( AloBhi, 32 ); - // AhiBlo = __builtin_ia32_psllqi256( AhiBlo, 32 ); - // return AloBlo + AloBhi + AhiBlo; - - SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32), - A, DAG.getConstant(32, MVT::i32)); - SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32), - B, DAG.getConstant(32, MVT::i32)); - SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32), - A, B); - SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32), - A, Bhi); - SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32), - Ahi, B); - AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32), - AloBhi, DAG.getConstant(32, MVT::i32)); - AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32), - AhiBlo, DAG.getConstant(32, MVT::i32)); - SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi); - Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); - return Res; - } + SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, ShAmt); + SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B, ShAmt); - assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply"); + // Bit cast to 32-bit vectors for MULUDQ + EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : MVT::v8i32; + A = DAG.getNode(ISD::BITCAST, dl, MulVT, A); + B = DAG.getNode(ISD::BITCAST, dl, MulVT, B); + Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi); + Bhi = DAG.getNode(ISD::BITCAST, dl, MulVT, Bhi); - // ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32); - // ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32); - // ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b ); - // ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi ); - // ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b ); - // - // AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 ); - // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 ); - // return AloBlo + AloBhi + AhiBlo; + SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B); + SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi); + SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B); + + AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi, ShAmt); + AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo, ShAmt); - SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), - A, DAG.getConstant(32, MVT::i32)); - SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), - B, DAG.getConstant(32, MVT::i32)); - SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), - A, B); - SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), - A, Bhi); - SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), - Ahi, B); - AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), - AloBhi, DAG.getConstant(32, MVT::i32)); - AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), - AhiBlo, DAG.getConstant(32, MVT::i32)); SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi); - Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); - return Res; + return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); } SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { @@ -10000,7 +10171,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { SDValue Amt = Op.getOperand(1); LLVMContext *Context = DAG.getContext(); - if (!Subtarget->hasXMMInt()) + if (!Subtarget->hasSSE2()) return SDValue(); // Optimize shl/srl/sra with constant shift amount. @@ -10009,119 +10180,93 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) { uint64_t ShiftAmt = C->getZExtValue(); - if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SHL) { - // Make a large shift. - SDValue SHL = - DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - // Zero out the rightmost bits. - SmallVector<SDValue, 16> V(16, DAG.getConstant(uint8_t(-1U << ShiftAmt), - MVT::i8)); - return DAG.getNode(ISD::AND, dl, VT, SHL, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16)); - } - - if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRL) { - // Make a large shift. - SDValue SRL = - DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - // Zero out the leftmost bits. - SmallVector<SDValue, 16> V(16, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, - MVT::i8)); - return DAG.getNode(ISD::AND, dl, VT, SRL, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16)); + if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || + (Subtarget->hasAVX2() && + (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) { + if (Op.getOpcode() == ISD::SHL) + return DAG.getNode(X86ISD::VSHLI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + if (Op.getOpcode() == ISD::SRL) + return DAG.getNode(X86ISD::VSRLI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64) + return DAG.getNode(X86ISD::VSRAI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); } - if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRA) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRA) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRA) { - if (ShiftAmt == 7) { - // R s>> 7 === R s< 0 - SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl); - return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R); + if (VT == MVT::v16i8) { + if (Op.getOpcode() == ISD::SHL) { + // Make a large shift. + SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL); + // Zero out the rightmost bits. + SmallVector<SDValue, 16> V(16, + DAG.getConstant(uint8_t(-1U << ShiftAmt), + MVT::i8)); + return DAG.getNode(ISD::AND, dl, VT, SHL, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16)); + } + if (Op.getOpcode() == ISD::SRL) { + // Make a large shift. + SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v8i16, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL); + // Zero out the leftmost bits. + SmallVector<SDValue, 16> V(16, + DAG.getConstant(uint8_t(-1U) >> ShiftAmt, + MVT::i8)); + return DAG.getNode(ISD::AND, dl, VT, SRL, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16)); } + if (Op.getOpcode() == ISD::SRA) { + if (ShiftAmt == 7) { + // R s>> 7 === R s< 0 + SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl); + return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R); + } - // R s>> a === ((R u>> a) ^ m) - m - SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); - SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt, - MVT::i8)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16); - Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); - Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); - return Res; + // R s>> a === ((R u>> a) ^ m) - m + SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); + SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt, + MVT::i8)); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16); + Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); + Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); + return Res; + } } if (Subtarget->hasAVX2() && VT == MVT::v32i8) { if (Op.getOpcode() == ISD::SHL) { // Make a large shift. - SDValue SHL = - DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); + SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL); // Zero out the rightmost bits. - SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U << ShiftAmt), - MVT::i8)); + SmallVector<SDValue, 32> V(32, + DAG.getConstant(uint8_t(-1U << ShiftAmt), + MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, SHL, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32)); } if (Op.getOpcode() == ISD::SRL) { // Make a large shift. - SDValue SRL = - DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); + SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v16i16, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL); // Zero out the leftmost bits. - SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, - MVT::i8)); + SmallVector<SDValue, 32> V(32, + DAG.getConstant(uint8_t(-1U) >> ShiftAmt, + MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, SRL, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32)); } if (Op.getOpcode() == ISD::SRA) { if (ShiftAmt == 7) { // R s>> 7 === R s< 0 - SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl); - return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R); + SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl); + return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R); } // R s>> a === ((R u>> a) ^ m) - m @@ -10139,14 +10284,11 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { // Lower SHL with variable shift amount. if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { - Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32), - Op.getOperand(1), DAG.getConstant(23, MVT::i32)); + Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1), + DAG.getConstant(23, MVT::i32)); - ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U)); - - std::vector<Constant*> CV(4, CI); - Constant *C = ConstantVector::get(CV); + const uint32_t CV[] = { 0x3f800000U, 0x3f800000U, 0x3f800000U, 0x3f800000U}; + Constant *C = ConstantDataVector::get(*Context, CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), @@ -10158,55 +10300,54 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::MUL, dl, VT, Op, R); } if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) { + assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq."); + // a = a << 5; - Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), - Op.getOperand(1), DAG.getConstant(5, MVT::i32)); + Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1), + DAG.getConstant(5, MVT::i32)); + Op = DAG.getNode(ISD::BITCAST, dl, VT, Op); - ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15)); - ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63)); + // Turn 'a' into a mask suitable for VSELECT + SDValue VSelM = DAG.getConstant(0x80, VT); + SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op); + OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM); + + SDValue CM1 = DAG.getConstant(0x0f, VT); + SDValue CM2 = DAG.getConstant(0x3f, VT); + + // r = VSELECT(r, psllw(r & (char16)15, 4), a); + SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1); + M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M, + DAG.getConstant(4, MVT::i32), DAG); + M = DAG.getNode(ISD::BITCAST, dl, VT, M); + R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R); - std::vector<Constant*> CVM1(16, CM1); - std::vector<Constant*> CVM2(16, CM2); - Constant *C = ConstantVector::get(CVM1); - SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); - SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, 16); - - // r = pblendv(r, psllw(r & (char16)15, 4), a); - M = DAG.getNode(ISD::AND, dl, VT, R, M); - M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, - DAG.getConstant(4, MVT::i32)); - R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); + OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op); + OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM); + + // r = VSELECT(r, psllw(r & (char16)63, 2), a); + M = DAG.getNode(ISD::AND, dl, VT, R, CM2); + M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M, + DAG.getConstant(2, MVT::i32), DAG); + M = DAG.getNode(ISD::BITCAST, dl, VT, M); + R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R); - C = ConstantVector::get(CVM2); - CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); - M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, 16); - - // r = pblendv(r, psllw(r & (char16)63, 2), a); - M = DAG.getNode(ISD::AND, dl, VT, R, M); - M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, - DAG.getConstant(2, MVT::i32)); - R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); + OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op); + OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM); - // return pblendv(r, r+r, a); - R = DAG.getNode(ISD::VSELECT, dl, VT, Op, + // return VSELECT(r, r+r, a); + R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, DAG.getNode(ISD::ADD, dl, VT, R, R), R); return R; } // Decompose 256-bit shifts into smaller 128-bit shifts. if (VT.getSizeInBits() == 256) { - int NumElems = VT.getVectorNumElements(); + unsigned NumElems = VT.getVectorNumElements(); MVT EltVT = VT.getVectorElementType().getSimpleVT(); EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); @@ -10221,9 +10362,9 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { // Constant shift amount SmallVector<SDValue, 4> Amt1Csts; SmallVector<SDValue, 4> Amt2Csts; - for (int i = 0; i < NumElems/2; ++i) + for (unsigned i = 0; i != NumElems/2; ++i) Amt1Csts.push_back(Amt->getOperand(i)); - for (int i = NumElems/2; i < NumElems; ++i) + for (unsigned i = NumElems/2; i != NumElems; ++i) Amt2Csts.push_back(Amt->getOperand(i)); Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, @@ -10323,77 +10464,58 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); } -SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{ +SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); EVT VT = Op.getValueType(); - if (Subtarget->hasXMMInt() && VT.isVector()) { - unsigned BitsDiff = VT.getScalarType().getSizeInBits() - - ExtraVT.getScalarType().getSizeInBits(); - SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32); + if (!Subtarget->hasSSE2() || !VT.isVector()) + return SDValue(); - unsigned SHLIntrinsicsID = 0; - unsigned SRAIntrinsicsID = 0; - switch (VT.getSimpleVT().SimpleTy) { - default: + unsigned BitsDiff = VT.getScalarType().getSizeInBits() - + ExtraVT.getScalarType().getSizeInBits(); + SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32); + + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v8i32: + case MVT::v16i16: + if (!Subtarget->hasAVX()) return SDValue(); - case MVT::v4i32: - SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d; - SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d; - break; - case MVT::v8i16: - SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w; - SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w; - break; - case MVT::v8i32: - case MVT::v16i16: - if (!Subtarget->hasAVX()) - return SDValue(); - if (!Subtarget->hasAVX2()) { - // needs to be split - int NumElems = VT.getVectorNumElements(); - SDValue Idx0 = DAG.getConstant(0, MVT::i32); - SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32); - - // Extract the LHS vectors - SDValue LHS = Op.getOperand(0); - SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl); - SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl); - - MVT EltVT = VT.getVectorElementType().getSimpleVT(); - EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); - - EVT ExtraEltVT = ExtraVT.getVectorElementType(); - int ExtraNumElems = ExtraVT.getVectorNumElements(); - ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT, - ExtraNumElems/2); - SDValue Extra = DAG.getValueType(ExtraVT); - - LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra); - LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra); - - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);; - } - if (VT == MVT::v8i32) { - SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_d; - SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_d; - } else { - SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_w; - SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_w; - } + if (!Subtarget->hasAVX2()) { + // needs to be split + int NumElems = VT.getVectorNumElements(); + SDValue Idx0 = DAG.getConstant(0, MVT::i32); + SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32); + + // Extract the LHS vectors + SDValue LHS = Op.getOperand(0); + SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl); + SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl); + + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + + EVT ExtraEltVT = ExtraVT.getVectorElementType(); + int ExtraNumElems = ExtraVT.getVectorNumElements(); + ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT, + ExtraNumElems/2); + SDValue Extra = DAG.getValueType(ExtraVT); + + LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra); + LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);; + } + // fall through + case MVT::v4i32: + case MVT::v8i16: { + SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, + Op.getOperand(0), ShAmt, DAG); + return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG); } - - SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(SHLIntrinsicsID, MVT::i32), - Op.getOperand(0), ShAmt); - - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(SRAIntrinsicsID, MVT::i32), - Tmp1, ShAmt); } - - return SDValue(); } @@ -10402,7 +10524,7 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ // Go ahead and emit the fence on x86-64 even if we asked for no-sse2. // There isn't any reason to disable it if the target processor supports it. - if (!Subtarget->hasXMMInt() && !Subtarget->is64Bit()) { + if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) { SDValue Chain = Op.getOperand(0); SDValue Zero = DAG.getConstant(0, MVT::i32); SDValue Ops[] = { @@ -10456,7 +10578,7 @@ SDValue X86TargetLowering::LowerATOMIC_FENCE(SDValue Op, // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for // no-sse2). There isn't any reason to disable it if the target processor // supports it. - if (Subtarget->hasXMMInt() || Subtarget->is64Bit()) + if (Subtarget->hasSSE2() || Subtarget->is64Bit()) return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); SDValue Chain = Op.getOperand(0); @@ -10487,8 +10609,7 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { unsigned Reg = 0; unsigned size = 0; switch(T.getSimpleVT().SimpleTy) { - default: - assert(false && "Invalid value type!"); + default: llvm_unreachable("Invalid value type!"); case MVT::i8: Reg = X86::AL; size = 1; break; case MVT::i16: Reg = X86::AX; size = 2; break; case MVT::i32: Reg = X86::EAX; size = 4; break; @@ -10536,7 +10657,7 @@ SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { EVT SrcVT = Op.getOperand(0).getValueType(); EVT DstVT = Op.getValueType(); - assert(Subtarget->is64Bit() && !Subtarget->hasXMMInt() && + assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() && Subtarget->hasMMX() && "Unexpected custom BITCAST"); assert((DstVT == MVT::i64 || (DstVT.isVector() && DstVT.getSizeInBits()==64)) && @@ -10606,7 +10727,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { unsigned Opc; bool ExtraOp = false; switch (Op.getOpcode()) { - default: assert(0 && "Invalid code"); + default: llvm_unreachable("Invalid code"); case ISD::ADDC: Opc = X86ISD::ADD; break; case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break; case ISD::SUBC: Opc = X86ISD::SUB; break; @@ -10673,6 +10794,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::CTLZ: return LowerCTLZ(Op, DAG); + case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SRA: @@ -10747,8 +10869,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: - assert(false && "Do not know how to custom type legalize this operation!"); - return; + llvm_unreachable("Do not know how to custom type legalize this operation!"); case ISD::SIGN_EXTEND_INREG: case ISD::ADDC: case ISD::ADDE: @@ -10756,16 +10877,25 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SUBE: // We don't want to expand or promote these. return; - case ISD::FP_TO_SINT: { + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; + + if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType())) + return; + std::pair<SDValue,SDValue> Vals = - FP_TO_INTHelper(SDValue(N, 0), DAG, true); + FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true); SDValue FIST = Vals.first, StackSlot = Vals.second; if (FIST.getNode() != 0) { EVT VT = N->getValueType(0); // Return a load from the stack slot. - Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, - MachinePointerInfo(), - false, false, false, 0)); + if (StackSlot.getNode() != 0) + Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, + MachinePointerInfo(), + false, false, false, 0)); + else + Results.push_back(FIST); } return; } @@ -10924,18 +11054,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG"; case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; + case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; + case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ"; case X86ISD::VSHL: return "X86ISD::VSHL"; case X86ISD::VSRL: return "X86ISD::VSRL"; - case X86ISD::CMPPD: return "X86ISD::CMPPD"; - case X86ISD::CMPPS: return "X86ISD::CMPPS"; - case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB"; - case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW"; - case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD"; - case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ"; - case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB"; - case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW"; - case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD"; - case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ"; + case X86ISD::VSRA: return "X86ISD::VSRA"; + case X86ISD::VSHLI: return "X86ISD::VSHLI"; + case X86ISD::VSRLI: return "X86ISD::VSRLI"; + case X86ISD::VSRAI: return "X86ISD::VSRAI"; + case X86ISD::CMPP: return "X86ISD::CMPP"; + case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ"; + case X86ISD::PCMPGT: return "X86ISD::PCMPGT"; case X86ISD::ADD: return "X86ISD::ADD"; case X86ISD::SUB: return "X86ISD::SUB"; case X86ISD::ADC: return "X86ISD::ADC"; @@ -10957,22 +11086,16 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PALIGN: return "X86ISD::PALIGN"; case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; - case X86ISD::PSHUFHW_LD: return "X86ISD::PSHUFHW_LD"; case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW"; - case X86ISD::PSHUFLW_LD: return "X86ISD::PSHUFLW_LD"; - case X86ISD::SHUFPS: return "X86ISD::SHUFPS"; - case X86ISD::SHUFPD: return "X86ISD::SHUFPD"; + case X86ISD::SHUFP: return "X86ISD::SHUFP"; case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS"; case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD"; case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS"; - case X86ISD::MOVHLPD: return "X86ISD::MOVHLPD"; case X86ISD::MOVLPS: return "X86ISD::MOVLPS"; case X86ISD::MOVLPD: return "X86ISD::MOVLPD"; case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP"; case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP"; case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP"; - case X86ISD::MOVSHDUP_LD: return "X86ISD::MOVSHDUP_LD"; - case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD"; case X86ISD::MOVSD: return "X86ISD::MOVSD"; case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; @@ -10980,11 +11103,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; case X86ISD::VPERMILP: return "X86ISD::VPERMILP"; case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; + case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER"; case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA"; + case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL"; } } @@ -11093,15 +11218,15 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, return (VT.getVectorNumElements() == 2 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isMOVLMask(M, VT) || - isSHUFPMask(M, VT) || + isSHUFPMask(M, VT, Subtarget->hasAVX()) || isPSHUFDMask(M, VT) || isPSHUFHWMask(M, VT) || isPSHUFLWMask(M, VT) || - isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()) || + isPALIGNRMask(M, VT, Subtarget) || isUNPCKLMask(M, VT, Subtarget->hasAVX2()) || isUNPCKHMask(M, VT, Subtarget->hasAVX2()) || - isUNPCKL_v_undef_Mask(M, VT) || - isUNPCKH_v_undef_Mask(M, VT)); + isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasAVX2()) || + isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasAVX2())); } bool @@ -11114,8 +11239,8 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, if (NumElts == 4 && VT.getSizeInBits() == 128) { return (isMOVLMask(Mask, VT) || isCommutedMOVLMask(Mask, VT, true) || - isSHUFPMask(Mask, VT) || - isSHUFPMask(Mask, VT, /* Commuted */ true)); + isSHUFPMask(Mask, VT, Subtarget->hasAVX()) || + isSHUFPMask(Mask, VT, Subtarget->hasAVX(), /* Commuted */ true)); } return false; } @@ -11134,7 +11259,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, unsigned CXchgOpc, unsigned notOpc, unsigned EAXreg, - TargetRegisterClass *RC, + const TargetRegisterClass *RC, bool invSrc) const { // For the atomic bitwise operator, we generate // thisMBB: @@ -11506,7 +11631,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, MachineBasicBlock * X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, unsigned numArgs, bool memArg) const { - assert(Subtarget->hasSSE42orAVX() && + assert(Subtarget->hasSSE42() && "Target must have SSE4.2 or AVX features enabled"); DebugLoc dl = MI->getDebugLoc(); @@ -11911,6 +12036,42 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( return EndMBB; } +// The EFLAGS operand of SelectItr might be missing a kill marker +// because there were multiple uses of EFLAGS, and ISel didn't know +// which to mark. Figure out whether SelectItr should have had a +// kill marker, and set it if it should. Returns the correct kill +// marker value. +static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, + MachineBasicBlock* BB, + const TargetRegisterInfo* TRI) { + // Scan forward through BB for a use/def of EFLAGS. + MachineBasicBlock::iterator miI(llvm::next(SelectItr)); + for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) { + const MachineInstr& mi = *miI; + if (mi.readsRegister(X86::EFLAGS)) + return false; + if (mi.definesRegister(X86::EFLAGS)) + break; // Should have kill-flag - update below. + } + + // If we hit the end of the block, check whether EFLAGS is live into a + // successor. + if (miI == BB->end()) { + for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(), + sEnd = BB->succ_end(); + sItr != sEnd; ++sItr) { + MachineBasicBlock* succ = *sItr; + if (succ->isLiveIn(X86::EFLAGS)) + return false; + } + } + + // We found a def, or hit the end of the basic block and EFLAGS wasn't live + // out. SelectMI should have a kill flag on EFLAGS. + SelectItr->addRegisterKilled(X86::EFLAGS, TRI); + return true; +} + MachineBasicBlock * X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -11940,7 +12101,9 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // If the EFLAGS register isn't dead in the terminator, then claim that it's // live into the sink and copy blocks. - if (!MI->killsRegister(X86::EFLAGS)) { + const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo(); + if (!MI->killsRegister(X86::EFLAGS) && + !checkAndUpdateEFLAGSKill(MI, BB, TRI)) { copy0MBB->addLiveIn(X86::EFLAGS); sinkMBB->addLiveIn(X86::EFLAGS); } @@ -12038,7 +12201,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg) .addReg(tmpSPVReg).addReg(sizeVReg); BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr)) - .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg) + .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg) .addReg(SPLimitVReg); BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB); @@ -12051,17 +12214,23 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, BuildMI(bumpMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB); // Calls into a routine in libgcc to allocate more space from the heap. + const uint32_t *RegMask = + getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C); if (Is64Bit) { BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI) .addReg(sizeVReg); BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) - .addExternalSymbol("__morestack_allocate_stack_space").addReg(X86::RDI); + .addExternalSymbol("__morestack_allocate_stack_space").addReg(X86::RDI) + .addRegMask(RegMask) + .addReg(X86::RAX, RegState::ImplicitDefine); } else { BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg) .addImm(12); BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg); BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32)) - .addExternalSymbol("__morestack_allocate_stack_space"); + .addExternalSymbol("__morestack_allocate_stack_space") + .addRegMask(RegMask) + .addReg(X86::EAX, RegState::ImplicitDefine); } if (!Is64Bit) @@ -12159,6 +12328,11 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?"); assert(MI->getOperand(3).isGlobal() && "This should be a global"); + // Get a register mask for the lowered call. + // FIXME: The 32-bit calls have non-standard calling conventions. Use a + // proper register mask. + const uint32_t *RegMask = + getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C); if (Subtarget->is64Bit()) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI) @@ -12169,6 +12343,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, .addReg(0); MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); addDirectMem(MIB, X86::RDI); + MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask); } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) @@ -12179,6 +12354,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, .addReg(0); MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); addDirectMem(MIB, X86::EAX); + MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); } else { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) @@ -12189,6 +12365,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, .addReg(0); MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); addDirectMem(MIB, X86::EAX); + MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); } MI->eraseFromParent(); // The pseudo instruction is gone now. @@ -12199,30 +12376,14 @@ MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { - default: assert(0 && "Unexpected instr type to insert"); + default: llvm_unreachable("Unexpected instr type to insert"); case X86::TAILJMPd64: case X86::TAILJMPr64: case X86::TAILJMPm64: - assert(0 && "TAILJMP64 would not be touched here."); + llvm_unreachable("TAILJMP64 would not be touched here."); case X86::TCRETURNdi64: case X86::TCRETURNri64: case X86::TCRETURNmi64: - // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset. - // On AMD64, additional defs should be added before register allocation. - if (!Subtarget->isTargetWin64()) { - MI->addRegisterDefined(X86::RSI); - MI->addRegisterDefined(X86::RDI); - MI->addRegisterDefined(X86::XMM6); - MI->addRegisterDefined(X86::XMM7); - MI->addRegisterDefined(X86::XMM8); - MI->addRegisterDefined(X86::XMM9); - MI->addRegisterDefined(X86::XMM10); - MI->addRegisterDefined(X86::XMM11); - MI->addRegisterDefined(X86::XMM12); - MI->addRegisterDefined(X86::XMM13); - MI->addRegisterDefined(X86::XMM14); - MI->addRegisterDefined(X86::XMM15); - } return BB; case X86::WIN_ALLOCA: return EmitLoweredWinAlloca(MI, BB); @@ -12572,15 +12733,18 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, case Intrinsic::x86_sse2_movmsk_pd: case Intrinsic::x86_avx_movmsk_pd_256: case Intrinsic::x86_mmx_pmovmskb: - case Intrinsic::x86_sse2_pmovmskb_128: { + case Intrinsic::x86_sse2_pmovmskb_128: + case Intrinsic::x86_avx2_pmovmskb: { // High bits of movmskp{s|d}, pmovmskb are known zero. switch (IntId) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break; case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break; case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break; case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break; case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break; case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break; + case Intrinsic::x86_avx2_pmovmskb: NumLoBits = 32; break; } KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(), Mask.getBitWidth() - NumLoBits); @@ -12651,7 +12815,8 @@ static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) { /// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors. static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget* Subtarget) { DebugLoc dl = N->getDebugLoc(); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); SDValue V1 = SVOp->getOperand(0); @@ -12687,9 +12852,23 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems)) return SDValue(); + // If V1 is coming from a vector load then just fold to a VZEXT_LOAD. + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(V1.getOperand(0))) { + SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other); + SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() }; + SDValue ResNode = + DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2, + Ld->getMemoryVT(), + Ld->getPointerInfo(), + Ld->getAlignment(), + false/*isVolatile*/, true/*ReadMem*/, + false/*WriteMem*/); + return DAG.getNode(ISD::BITCAST, dl, VT, ResNode); + } + // Emit a zeroed vector and insert the desired subvector on its // first half. - SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl); + SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl); SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), DAG.getConstant(0, MVT::i32), DAG, dl); return DCI.CombineTo(N, InsV); @@ -12734,7 +12913,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, // Combine 256-bit vector shuffles. This is only profitable when in AVX mode if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 && N->getOpcode() == ISD::VECTOR_SHUFFLE) - return PerformShuffleCombine256(N, DAG, DCI); + return PerformShuffleCombine256(N, DAG, DCI, Subtarget); // Only handle 128 wide vector from here on. if (VT.getSizeInBits() != 128) @@ -12750,6 +12929,82 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return EltsFromConsecutiveLoads(VT, Elts, dl, DAG); } + +/// PerformTruncateCombine - Converts truncate operation to +/// a sequence of vector shuffle operations. +/// It is possible when we truncate 256-bit vector to 128-bit vector + +SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, + DAGCombinerInfo &DCI) const { + if (!DCI.isBeforeLegalizeOps()) + return SDValue(); + + if (!Subtarget->hasAVX()) return SDValue(); + + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + EVT OpVT = Op.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) { + + SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op, + DAG.getIntPtrConstant(0)); + + SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op, + DAG.getIntPtrConstant(2)); + + OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi); + + // PSHUFD + int ShufMask1[] = {0, 2, 0, 0}; + + OpLo = DAG.getVectorShuffle(VT, dl, OpLo, DAG.getUNDEF(VT), + ShufMask1); + OpHi = DAG.getVectorShuffle(VT, dl, OpHi, DAG.getUNDEF(VT), + ShufMask1); + + // MOVLHPS + int ShufMask2[] = {0, 1, 4, 5}; + + return DAG.getVectorShuffle(VT, dl, OpLo, OpHi, ShufMask2); + } + if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) { + + SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op, + DAG.getIntPtrConstant(0)); + + SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op, + DAG.getIntPtrConstant(4)); + + OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpHi); + + // PSHUFB + int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, + -1, -1, -1, -1, -1, -1, -1, -1}; + + OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, + DAG.getUNDEF(MVT::v16i8), + ShufMask1); + OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, + DAG.getUNDEF(MVT::v16i8), + ShufMask1); + + OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi); + + // MOVLHPS + int ShufMask2[] = {0, 1, 4, 5}; + + SDValue res = DAG.getVectorShuffle(MVT::v4i32, dl, OpLo, OpHi, ShufMask2); + return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, res); + } + + return SDValue(); +} + /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index /// generation and convert it from being a bunch of shuffles and extracts /// to a simple store and scalar loads to extract the elements. @@ -12836,6 +13091,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, /// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT /// nodes. static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { DebugLoc DL = N->getDebugLoc(); SDValue Cond = N->getOperand(0); @@ -12850,7 +13106,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // ignored in unsafe-math mode). if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() && VT != MVT::f80 && DAG.getTargetLoweringInfo().isTypeLegal(VT) && - (Subtarget->hasXMMInt() || + (Subtarget->hasSSE2() || (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) { ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); @@ -13081,6 +13337,57 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, } } + // Canonicalize max and min: + // (x > y) ? x : y -> (x >= y) ? x : y + // (x < y) ? x : y -> (x <= y) ? x : y + // This allows use of COND_S / COND_NS (see TranslateX86CC) which eliminates + // the need for an extra compare + // against zero. e.g. + // (x - y) > 0 : (x - y) ? 0 -> (x - y) >= 0 : (x - y) ? 0 + // subl %esi, %edi + // testl %edi, %edi + // movl $0, %eax + // cmovgl %edi, %eax + // => + // xorl %eax, %eax + // subl %esi, $edi + // cmovsl %eax, %edi + if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC && + DAG.isEqualTo(LHS, Cond.getOperand(0)) && + DAG.isEqualTo(RHS, Cond.getOperand(1))) { + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + switch (CC) { + default: break; + case ISD::SETLT: + case ISD::SETGT: { + ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE; + Cond = DAG.getSetCC(Cond.getDebugLoc(), Cond.getValueType(), + Cond.getOperand(0), Cond.getOperand(1), NewCC); + return DAG.getNode(ISD::SELECT, DL, VT, Cond, LHS, RHS); + } + } + } + + // If we know that this node is legal then we know that it is going to be + // matched by one of the SSE/AVX BLEND instructions. These instructions only + // depend on the highest bit in each word. Try to use SimplifyDemandedBits + // to simplify previous instructions. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() && + !DCI.isBeforeLegalize() && + TLI.isOperationLegal(ISD::VSELECT, VT)) { + unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits(); + assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); + APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); + + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), + DCI.isBeforeLegalizeOps()); + if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) || + TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO)) + DCI.CommitTargetLoweringOpt(TLO); + } + return SDValue(); } @@ -13314,6 +13621,7 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { /// PerformShiftCombine - Transforms vector shift nodes to use vector shifts /// when possible. static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); if (N->getOpcode() == ISD::SHL) { @@ -13325,7 +13633,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, // all elements are shifted by the same amount. We can't do this in legalize // because the a constant vector is typically transformed to a constant pool // so we have no knowledge of the shift amount. - if (!Subtarget->hasXMMInt()) + if (!Subtarget->hasSSE2()) return SDValue(); if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 && @@ -13346,6 +13654,11 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, BaseShAmt = Arg; break; } + // Handle the case where the build_vector is all undef + // FIXME: Should DAG allow this? + if (i == NumElts) + return SDValue(); + for (; i != NumElts; ++i) { SDValue Arg = ShAmtOp.getOperand(i); if (Arg.getOpcode() == ISD::UNDEF) continue; @@ -13372,9 +13685,16 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, BaseShAmt = InVec.getOperand(1); } } - if (BaseShAmt.getNode() == 0) + if (BaseShAmt.getNode() == 0) { + // Don't create instructions with illegal types after legalize + // types has run. + if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) && + !DCI.isBeforeLegalize()) + return SDValue(); + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, DAG.getIntPtrConstant(0)); + } } else return SDValue(); @@ -13389,79 +13709,38 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, switch (N->getOpcode()) { default: llvm_unreachable("Unknown shift opcode!"); - break; case ISD::SHL: - if (VT == MVT::v2i64) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), - ValOp, BaseShAmt); - if (VT == MVT::v4i32) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32), - ValOp, BaseShAmt); - if (VT == MVT::v8i16) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), - ValOp, BaseShAmt); - if (VT == MVT::v4i64) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32), - ValOp, BaseShAmt); - if (VT == MVT::v8i32) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32), - ValOp, BaseShAmt); - if (VT == MVT::v16i16) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32), - ValOp, BaseShAmt); - break; + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v4i64: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG); + } case ISD::SRA: - if (VT == MVT::v4i32) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32), - ValOp, BaseShAmt); - if (VT == MVT::v8i16) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32), - ValOp, BaseShAmt); - if (VT == MVT::v8i32) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32), - ValOp, BaseShAmt); - if (VT == MVT::v16i16) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32), - ValOp, BaseShAmt); - break; + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v4i32: + case MVT::v8i16: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG); + } case ISD::SRL: - if (VT == MVT::v2i64) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), - ValOp, BaseShAmt); - if (VT == MVT::v4i32) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32), - ValOp, BaseShAmt); - if (VT == MVT::v8i16) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32), - ValOp, BaseShAmt); - if (VT == MVT::v4i64) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32), - ValOp, BaseShAmt); - if (VT == MVT::v8i32) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32), - ValOp, BaseShAmt); - if (VT == MVT::v16i16) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32), - ValOp, BaseShAmt); - break; + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v4i64: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG); + } } - return SDValue(); } @@ -13475,7 +13754,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but // we're requiring SSE2 for both. - if (Subtarget->hasXMMInt() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) { + if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CMP0 = N0->getOperand(1); @@ -13666,14 +13945,14 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, // look for psign/blend if (VT == MVT::v2i64 || VT == MVT::v4i64) { - if (!Subtarget->hasSSSE3orAVX() || + if (!Subtarget->hasSSSE3() || (VT == MVT::v4i64 && !Subtarget->hasAVX2())) return SDValue(); // Canonicalize pandn to RHS if (N0.getOpcode() == X86ISD::ANDNP) std::swap(N0, N1); - // or (and (m, x), (pandn m, y)) + // or (and (m, y), (pandn m, x)) if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) { SDValue Mask = N1.getOperand(0); SDValue X = N1.getOperand(1); @@ -13697,24 +13976,14 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, Mask = Mask.getOperand(0); EVT MaskVT = Mask.getValueType(); - // Validate that the Mask operand is a vector sra node. The sra node - // will be an intrinsic. - if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN) - return SDValue(); - + // Validate that the Mask operand is a vector sra node. // FIXME: what to do for bytes, since there is a psignb/pblendvb, but // there is no psrai.b - switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) { - case Intrinsic::x86_sse2_psrai_w: - case Intrinsic::x86_sse2_psrai_d: - case Intrinsic::x86_avx2_psrai_w: - case Intrinsic::x86_avx2_psrai_d: - break; - default: return SDValue(); - } + if (Mask.getOpcode() != X86ISD::VSRAI) + return SDValue(); // Check that the SRA is all signbits. - SDValue SraC = Mask.getOperand(2); + SDValue SraC = Mask.getOperand(1); unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); if ((SraAmt + 1) != EltBits) @@ -13729,14 +13998,14 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, Y = Y.getOperand(0); if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X && ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) && - X.getValueType() == MaskVT && X.getValueType() == Y.getValueType() && - (EltBits == 8 || EltBits == 16 || EltBits == 32)) { - SDValue Sign = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, - Mask.getOperand(1)); - return DAG.getNode(ISD::BITCAST, DL, VT, Sign); + X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { + assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && + "Unsupported VT for PSIGN"); + Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0)); + return DAG.getNode(ISD::BITCAST, DL, VT, Mask); } // PBLENDVB only available on SSE 4.1 - if (!Subtarget->hasSSE41orAVX()) + if (!Subtarget->hasSSE41()) return SDValue(); EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8; @@ -13807,6 +14076,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// PerformXorCombine - Attempts to turn XOR nodes into BLSMSK nodes static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { @@ -13818,6 +14088,8 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, if (VT != MVT::i32 && VT != MVT::i64) return SDValue(); + assert(Subtarget->hasBMI() && "Creating BLSMSK requires BMI instructions"); + // Create BLSMSK instructions by finding X ^ (X-1) SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -13849,7 +14121,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, // shuffle. We need SSE4 for the shuffles. // TODO: It is possible to support ZExt by zeroing the undef values // during the shuffle phase or after the shuffle. - if (RegVT.isVector() && Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) { + if (RegVT.isVector() && RegVT.isInteger() && + Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) { assert(MemVT != RegVT && "Cannot extend to the same type"); assert(MemVT.isVector() && "Must load a vector from memory"); @@ -13896,7 +14169,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, // Bitcast the loaded value to a vector of the original element type, in // the size of the target vector type. - SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, ScalarInVector); + SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, + ScalarInVector); unsigned SizeRatio = RegSz/MemSz; // Redistribute the loaded elements into the different locations. @@ -14039,7 +14313,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, const Function *F = DAG.getMachineFunction().getFunction(); bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps - && Subtarget->hasXMMInt(); + && Subtarget->hasSSE2(); if ((VT.isVector() || (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && isa<LoadSDNode>(St->getValue()) && @@ -14057,7 +14331,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, Ld = cast<LoadSDNode>(St->getChain()); else if (St->getValue().hasOneUse() && ChainVal->getOpcode() == ISD::TokenFactor) { - for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) { + for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) { if (ChainVal->getOperand(i).getNode() == LdVal) { TokenFactorIndex = i; Ld = cast<LoadSDNode>(St->getValue()); @@ -14196,7 +14470,8 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) { A = LHS.getOperand(0); if (LHS.getOperand(1).getOpcode() != ISD::UNDEF) B = LHS.getOperand(1); - cast<ShuffleVectorSDNode>(LHS.getNode())->getMask(LMask); + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask(); + std::copy(Mask.begin(), Mask.end(), LMask.begin()); } else { if (LHS.getOpcode() != ISD::UNDEF) A = LHS; @@ -14213,7 +14488,8 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) { C = RHS.getOperand(0); if (RHS.getOperand(1).getOpcode() != ISD::UNDEF) D = RHS.getOperand(1); - cast<ShuffleVectorSDNode>(RHS.getNode())->getMask(RMask); + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask(); + std::copy(Mask.begin(), Mask.end(), RMask.begin()); } else { if (RHS.getOpcode() != ISD::UNDEF) C = RHS; @@ -14270,7 +14546,7 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. - if (((Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || + if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && isHorizontalBinOp(LHS, RHS, true)) return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS); @@ -14285,7 +14561,7 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(1); // Try to synthesize horizontal subs from subs of shuffles. - if (((Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || + if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && isHorizontalBinOp(LHS, RHS, false)) return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS); @@ -14352,7 +14628,58 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + if (!DCI.isBeforeLegalizeOps()) + return SDValue(); + + if (!Subtarget->hasAVX()) + return SDValue(); + + // Optimize vectors in AVX mode + // Sign extend v8i16 to v8i32 and + // v4i32 to v4i64 + // + // Divide input vector into two parts + // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1} + // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 + // concat the vectors to original VT + + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + EVT OpVT = Op.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) || + (VT == MVT::v8i32 && OpVT == MVT::v8i16)) { + + unsigned NumElems = OpVT.getVectorNumElements(); + SmallVector<int,8> ShufMask1(NumElems, -1); + for (unsigned i = 0; i < NumElems/2; i++) ShufMask1[i] = i; + + SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT), + ShufMask1.data()); + + SmallVector<int,8> ShufMask2(NumElems, -1); + for (unsigned i = 0; i < NumElems/2; i++) ShufMask2[i] = i + NumElems/2; + + SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT), + ShufMask2.data()); + + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + VT.getVectorNumElements()/2); + + OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); + OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); + } + return SDValue(); +} + +static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { // (i32 zext (and (i8 x86isd::setcc_carry), 1)) -> // (and (i32 x86isd::setcc_carry), 1) // This eliminates the zext. This transformation is necessary because @@ -14360,6 +14687,8 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { DebugLoc dl = N->getDebugLoc(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); + if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && N0.getOperand(0).hasOneUse()) { @@ -14374,6 +14703,37 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { N00.getOperand(0), N00.getOperand(1)), DAG.getConstant(1, VT)); } + // Optimize vectors in AVX mode: + // + // v8i16 -> v8i32 + // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32. + // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32. + // Concat upper and lower parts. + // + // v4i32 -> v4i64 + // Use vpunpckldq for 4 lower elements v4i32 -> v2i64. + // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. + // Concat upper and lower parts. + // + if (Subtarget->hasAVX()) { + + if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) || + ((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) { + + SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl); + SDValue OpLo = getTargetShuffleNode(X86ISD::UNPCKL, dl, OpVT, N0, ZeroVec, DAG); + SDValue OpHi = getTargetShuffleNode(X86ISD::UNPCKH, dl, OpVT, N0, ZeroVec, DAG); + + EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + VT.getVectorNumElements()/2); + + OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); + } + } + return SDValue(); } @@ -14490,8 +14850,8 @@ static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG, SDValue Op1 = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. - if (((Subtarget->hasSSSE3orAVX() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || - (Subtarget->hasAVX2() && (VT == MVT::v16i16 || MVT::v8i32))) && + if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || + (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && isHorizontalBinOp(Op0, Op1, true)) return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1); @@ -14523,7 +14883,7 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG, // Try to synthesize horizontal adds from adds of shuffles. EVT VT = N->getValueType(0); - if (((Subtarget->hasSSSE3orAVX() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || + if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && isHorizontalBinOp(Op0, Op1, true)) return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1); @@ -14539,7 +14899,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::EXTRACT_VECTOR_ELT: return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); case ISD::VSELECT: - case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); + case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget); case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget); @@ -14547,7 +14907,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: case ISD::SRA: - case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); + case ISD::SRL: return PerformShiftCombine(N, DAG, DCI, Subtarget); case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget); case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget); @@ -14561,10 +14921,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); - case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG); + case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, Subtarget); + case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget); + case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI); case X86ISD::SETCC: return PerformSETCCCombine(N, DAG); - case X86ISD::SHUFPS: // Handle all target specific shuffles - case X86ISD::SHUFPD: + case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::PALIGN: case X86ISD::UNPCKH: case X86ISD::UNPCKL: @@ -14684,11 +15045,38 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { // X86 Inline Assembly Support //===----------------------------------------------------------------------===// +namespace { + // Helper to match a string separated by whitespace. + bool matchAsmImpl(StringRef s, ArrayRef<const StringRef *> args) { + s = s.substr(s.find_first_not_of(" \t")); // Skip leading whitespace. + + for (unsigned i = 0, e = args.size(); i != e; ++i) { + StringRef piece(*args[i]); + if (!s.startswith(piece)) // Check if the piece matches. + return false; + + s = s.substr(piece.size()); + StringRef::size_type pos = s.find_first_not_of(" \t"); + if (pos == 0) // We matched a prefix. + return false; + + s = s.substr(pos); + } + + return s.empty(); + } + const VariadicFunction1<bool, StringRef, StringRef, matchAsmImpl> matchAsm={}; +} + bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); std::string AsmStr = IA->getAsmString(); + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + if (!Ty || Ty->getBitWidth() % 16 != 0) + return false; + // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a" SmallVector<StringRef, 4> AsmPieces; SplitString(AsmStr, AsmPieces, ";\n"); @@ -14696,35 +15084,27 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { switch (AsmPieces.size()) { default: return false; case 1: - AsmStr = AsmPieces[0]; - AsmPieces.clear(); - SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace. - // FIXME: this should verify that we are targeting a 486 or better. If not, - // we will turn this bswap into something that will be lowered to logical ops - // instead of emitting the bswap asm. For now, we don't support 486 or lower - // so don't worry about this. + // we will turn this bswap into something that will be lowered to logical + // ops instead of emitting the bswap asm. For now, we don't support 486 or + // lower so don't worry about this. // bswap $0 - if (AsmPieces.size() == 2 && - (AsmPieces[0] == "bswap" || - AsmPieces[0] == "bswapq" || - AsmPieces[0] == "bswapl") && - (AsmPieces[1] == "$0" || - AsmPieces[1] == "${0:q}")) { + if (matchAsm(AsmPieces[0], "bswap", "$0") || + matchAsm(AsmPieces[0], "bswapl", "$0") || + matchAsm(AsmPieces[0], "bswapq", "$0") || + matchAsm(AsmPieces[0], "bswap", "${0:q}") || + matchAsm(AsmPieces[0], "bswapl", "${0:q}") || + matchAsm(AsmPieces[0], "bswapq", "${0:q}")) { // No need to check constraints, nothing other than the equivalent of // "=r,0" would be valid here. - IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); - if (!Ty || Ty->getBitWidth() % 16 != 0) - return false; return IntrinsicLowering::LowerToByteSwap(CI); } + // rorw $$8, ${0:w} --> llvm.bswap.i16 if (CI->getType()->isIntegerTy(16) && - AsmPieces.size() == 3 && - (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") && - AsmPieces[1] == "$$8," && - AsmPieces[2] == "${0:w}" && - IA->getConstraintString().compare(0, 5, "=r,0,") == 0) { + IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && + (matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") || + matchAsm(AsmPieces[0], "rolw", "$$8,", "${0:w}"))) { AsmPieces.clear(); const std::string &ConstraintsStr = IA->getConstraintString(); SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); @@ -14733,46 +15113,26 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces[0] == "~{cc}" && AsmPieces[1] == "~{dirflag}" && AsmPieces[2] == "~{flags}" && - AsmPieces[3] == "~{fpsr}") { - IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); - if (!Ty || Ty->getBitWidth() % 16 != 0) - return false; - return IntrinsicLowering::LowerToByteSwap(CI); - } + AsmPieces[3] == "~{fpsr}") + return IntrinsicLowering::LowerToByteSwap(CI); } break; case 3: if (CI->getType()->isIntegerTy(32) && - IA->getConstraintString().compare(0, 5, "=r,0,") == 0) { - SmallVector<StringRef, 4> Words; - SplitString(AsmPieces[0], Words, " \t,"); - if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" && - Words[2] == "${0:w}") { - Words.clear(); - SplitString(AsmPieces[1], Words, " \t,"); - if (Words.size() == 3 && Words[0] == "rorl" && Words[1] == "$$16" && - Words[2] == "$0") { - Words.clear(); - SplitString(AsmPieces[2], Words, " \t,"); - if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" && - Words[2] == "${0:w}") { - AsmPieces.clear(); - const std::string &ConstraintsStr = IA->getConstraintString(); - SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); - std::sort(AsmPieces.begin(), AsmPieces.end()); - if (AsmPieces.size() == 4 && - AsmPieces[0] == "~{cc}" && - AsmPieces[1] == "~{dirflag}" && - AsmPieces[2] == "~{flags}" && - AsmPieces[3] == "~{fpsr}") { - IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); - if (!Ty || Ty->getBitWidth() % 16 != 0) - return false; - return IntrinsicLowering::LowerToByteSwap(CI); - } - } - } - } + IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && + matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") && + matchAsm(AsmPieces[1], "rorl", "$$16,", "$0") && + matchAsm(AsmPieces[2], "rorw", "$$8,", "${0:w}")) { + AsmPieces.clear(); + const std::string &ConstraintsStr = IA->getConstraintString(); + SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); + std::sort(AsmPieces.begin(), AsmPieces.end()); + if (AsmPieces.size() == 4 && + AsmPieces[0] == "~{cc}" && + AsmPieces[1] == "~{dirflag}" && + AsmPieces[2] == "~{flags}" && + AsmPieces[3] == "~{fpsr}") + return IntrinsicLowering::LowerToByteSwap(CI); } if (CI->getType()->isIntegerTy(64)) { @@ -14781,23 +15141,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 - SmallVector<StringRef, 4> Words; - SplitString(AsmPieces[0], Words, " \t"); - if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") { - Words.clear(); - SplitString(AsmPieces[1], Words, " \t"); - if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") { - Words.clear(); - SplitString(AsmPieces[2], Words, " \t,"); - if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" && - Words[2] == "%edx") { - IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); - if (!Ty || Ty->getBitWidth() % 16 != 0) - return false; - return IntrinsicLowering::LowerToByteSwap(CI); - } - } - } + if (matchAsm(AsmPieces[0], "bswap", "%eax") && + matchAsm(AsmPieces[1], "bswap", "%edx") && + matchAsm(AsmPieces[2], "xchgl", "%eax,", "%edx")) + return IntrinsicLowering::LowerToByteSwap(CI); } } break; @@ -14892,7 +15239,8 @@ TargetLowering::ConstraintWeight break; case 'x': case 'Y': - if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM()) + if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) || + ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasAVX())) weight = CW_Register; break; case 'I': @@ -14962,9 +15310,9 @@ LowerXConstraint(EVT ConstraintVT) const { // FP X constraints get lowered to SSE1/2 registers if available, otherwise // 'f' like normal targets. if (ConstraintVT.isFloatingPoint()) { - if (Subtarget->hasXMMInt()) + if (Subtarget->hasSSE2()) return "Y"; - if (Subtarget->hasXMM()) + if (Subtarget->hasSSE1()) return "x"; } @@ -15170,10 +15518,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, if (!Subtarget->hasMMX()) break; return std::make_pair(0U, X86::VR64RegisterClass); case 'Y': // SSE_REGS if SSE2 allowed - if (!Subtarget->hasXMMInt()) break; + if (!Subtarget->hasSSE2()) break; // FALL THROUGH. - case 'x': // SSE_REGS if SSE1 allowed - if (!Subtarget->hasXMM()) break; + case 'x': // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed + if (!Subtarget->hasSSE1()) break; switch (VT.getSimpleVT().SimpleTy) { default: break; @@ -15192,6 +15540,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case MVT::v4f32: case MVT::v2f64: return std::make_pair(0U, X86::VR128RegisterClass); + // AVX types. + case MVT::v32i8: + case MVT::v16i16: + case MVT::v8i32: + case MVT::v4i64: + case MVT::v8f32: + case MVT::v4f64: + return std::make_pair(0U, X86::VR256RegisterClass); + } break; } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index cfc1f88..0327b1f 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -219,16 +219,26 @@ namespace llvm { // VZEXT_MOVL - Vector move low and zero extend. VZEXT_MOVL, - // VSHL, VSRL - Vector logical left / right shift. - VSHL, VSRL, + // VSEXT_MOVL - Vector move low and sign extend. + VSEXT_MOVL, - // CMPPD, CMPPS - Vector double/float comparison. - // CMPPD, CMPPS - Vector double/float comparison. - CMPPD, CMPPS, + // VSHL, VSRL - 128-bit vector logical left / right shift + VSHLDQ, VSRLDQ, + + // VSHL, VSRL, VSRA - Vector shift elements + VSHL, VSRL, VSRA, + + // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate + VSHLI, VSRLI, VSRAI, + + // CMPP - Vector packed double/float comparison. + CMPP, // PCMP* - Vector integer comparisons. - PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ, - PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ, + PCMPEQ, PCMPGT, + + // VPCOM, VPCOMU - XOP Vector integer comparisons. + VPCOM, VPCOMU, // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. ADD, SUB, ADC, SBB, SMUL, @@ -256,19 +266,13 @@ namespace llvm { PSHUFD, PSHUFHW, PSHUFLW, - PSHUFHW_LD, - PSHUFLW_LD, - SHUFPD, - SHUFPS, + SHUFP, MOVDDUP, MOVSHDUP, MOVSLDUP, - MOVSHDUP_LD, - MOVSLDUP_LD, MOVLHPS, MOVLHPD, MOVHLPS, - MOVHLPD, MOVLPS, MOVLPD, MOVSD, @@ -279,6 +283,9 @@ namespace llvm { VPERM2X128, VBROADCAST, + // PMULUDQ - Vector multiply packed unsigned doubleword integers + PMULUDQ, + // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, // according to %al. An operator is needed so that this can be expanded // with control flow. @@ -292,6 +299,9 @@ namespace llvm { // falls back to heap allocation if not. SEG_ALLOCA, + // WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui. + WIN_FTOL, + // Memory barrier MEMBARRIER, MFENCE, @@ -361,77 +371,6 @@ namespace llvm { /// Define some predicates that are used for node matching. namespace X86 { - /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFDMask(ShuffleVectorSDNode *N); - - /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFHWMask(ShuffleVectorSDNode *N); - - /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to PSHUFD. - bool isPSHUFLWMask(ShuffleVectorSDNode *N); - - /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to SHUFP*. - bool isSHUFPMask(ShuffleVectorSDNode *N); - - /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to MOVHLPS. - bool isMOVHLPSMask(ShuffleVectorSDNode *N); - - /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form - /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, - /// <2, 3, 2, 3> - bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N); - - /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for MOVLP{S|D}. - bool isMOVLPMask(ShuffleVectorSDNode *N); - - /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for MOVHP{S|D}. - /// as well as MOVLHPS. - bool isMOVLHPSMask(ShuffleVectorSDNode *N); - - /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to UNPCKL. - bool isUNPCKLMask(ShuffleVectorSDNode *N, bool HasAVX2, - bool V2IsSplat = false); - - /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to UNPCKH. - bool isUNPCKHMask(ShuffleVectorSDNode *N, bool HasAVX2, - bool V2IsSplat = false); - - /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form - /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, - /// <0, 0, 1, 1> - bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N); - - /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form - /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, - /// <2, 2, 3, 3> - bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N); - - /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to MOVSS, - /// MOVSD, and MOVD, i.e. setting the lowest element. - bool isMOVLMask(ShuffleVectorSDNode *N); - - /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to MOVSHDUP. - bool isMOVSHDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget); - - /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to MOVSLDUP. - bool isMOVSLDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget); - - /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to MOVDDUP. - bool isMOVDDUPMask(ShuffleVectorSDNode *N); - /// isVEXTRACTF128Index - Return true if the specified /// EXTRACT_SUBVECTOR operand specifies a vector extract that is /// suitable for input to VEXTRACTF128. @@ -442,19 +381,6 @@ namespace llvm { /// suitable for input to VINSERTF128. bool isVINSERTF128Index(SDNode *N); - /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle - /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* - /// instructions. - unsigned getShuffleSHUFImmediate(SDNode *N); - - /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle - /// the specified VECTOR_SHUFFLE mask with PSHUFHW instruction. - unsigned getShufflePSHUFHWImmediate(SDNode *N); - - /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle - /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction. - unsigned getShufflePSHUFLWImmediate(SDNode *N); - /// getExtractVEXTRACTF128Immediate - Return the appropriate /// immediate to extract the specified EXTRACT_SUBVECTOR index /// with VEXTRACTF128 instructions. @@ -688,6 +614,18 @@ namespace llvm { (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } + /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine + /// for fptoui. + bool isTargetFTOL() const { + return Subtarget->isTargetWindows() && !Subtarget->is64Bit(); + } + + /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be + /// used for fptoui to the given type. + bool isIntegerTypeFTOL(EVT VT) const { + return isTargetFTOL() && VT == MVT::i64; + } + /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; @@ -770,7 +708,8 @@ namespace llvm { SelectionDAG &DAG) const; std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, - bool isSigned) const; + bool isSigned, + bool isReplace) const; SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, SelectionDAG &DAG) const; @@ -824,6 +763,7 @@ namespace llvm { SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const; @@ -837,6 +777,7 @@ namespace llvm { SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; + SDValue PerformTruncateCombine(SDNode* N, SelectionDAG &DAG, DAGCombinerInfo &DCI) const; // Utility functions to help LowerVECTOR_SHUFFLE SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const; @@ -848,8 +789,8 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, + LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -907,7 +848,7 @@ namespace llvm { unsigned cxchgOpc, unsigned notOpc, unsigned EAXreg, - TargetRegisterClass *RC, + const TargetRegisterClass *RC, bool invSrc = false) const; MachineBasicBlock *EmitAtomicBit6432WithCustomInserter( diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td index dd4f6a5..54b91c3 100644 --- a/lib/Target/X86/X86Instr3DNow.td +++ b/lib/Target/X86/X86Instr3DNow.td @@ -1,4 +1,4 @@ -//====- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===// +//===-- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index c99c52d..7fa7499 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1,10 +1,10 @@ -//===- X86InstrArithmetic.td - Integer Arithmetic Instrs ---*- tablegen -*-===// -// +//===-- X86InstrArithmetic.td - Integer Arithmetic Instrs --*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the integer arithmetic instructions in the X86 @@ -18,22 +18,24 @@ let neverHasSideEffects = 1 in def LEA16r : I<0x8D, MRMSrcMem, (outs GR16:$dst), (ins i32mem:$src), - "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize; + "lea{w}\t{$src|$dst}, {$dst|$src}", [], IIC_LEA_16>, OpSize; let isReMaterializable = 1 in def LEA32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "lea{l}\t{$src|$dst}, {$dst|$src}", - [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>; + [(set GR32:$dst, lea32addr:$src)], IIC_LEA>, + Requires<[In32BitMode]>; def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src), "lea{l}\t{$src|$dst}, {$dst|$src}", - [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>; + [(set GR32:$dst, lea32addr:$src)], IIC_LEA>, + Requires<[In64BitMode]>; let isReMaterializable = 1 in def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", - [(set GR64:$dst, lea64addr:$src)]>; + [(set GR64:$dst, lea64addr:$src)], IIC_LEA>; @@ -56,16 +58,18 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), "mul{w}\t$src", - []>, OpSize; // AX,DX = AX*GR16 + [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), "mul{l}\t$src", // EAX,EDX = EAX*GR32 - [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>; + [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/], + IIC_MUL32_REG>; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), "mul{q}\t$src", // RAX,RDX = RAX*GR64 - [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>; + [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/], + IIC_MUL64>; let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), @@ -74,21 +78,21 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)]>; // AL,AH = AL*[mem8] + (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*[mem8] let mayLoad = 1, neverHasSideEffects = 1 in { let Defs = [AX,DX,EFLAGS], Uses = [AX] in def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), "mul{w}\t$src", - []>, OpSize; // AX,DX = AX*[mem16] + [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), "mul{l}\t$src", - []>; // EAX,EDX = EAX*[mem32] + [], IIC_MUL32_MEM>; // EAX,EDX = EAX*[mem32] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), - "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] + "mul{q}\t$src", [], IIC_MUL64>; // RAX,RDX = RAX*[mem64] } let neverHasSideEffects = 1 in { @@ -130,16 +134,19 @@ let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize; + (X86smul_flag GR16:$src1, GR16:$src2))], IIC_IMUL16_RR>, + TB, OpSize; def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, GR32:$src2))]>, TB; + (X86smul_flag GR32:$src1, GR32:$src2))], IIC_IMUL32_RR>, + TB; def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "imul{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, GR64:$src2))]>, TB; + (X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>, + TB; } // Register-Memory Signed Integer Multiply @@ -147,18 +154,23 @@ def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, (load addr:$src2)))]>, + (X86smul_flag GR16:$src1, (load addr:$src2)))], + IIC_IMUL16_RM>, TB, OpSize; def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB; + (X86smul_flag GR32:$src1, (load addr:$src2)))], + IIC_IMUL32_RM>, + TB; def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "imul{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB; + (X86smul_flag GR64:$src1, (load addr:$src2)))], + IIC_IMUL64_RM>, + TB; } // Constraints = "$src1 = $dst" } // Defs = [EFLAGS] @@ -170,33 +182,39 @@ def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize; + (X86smul_flag GR16:$src1, imm:$src2))], + IIC_IMUL16_RRI>, OpSize; def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8 (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>, + (X86smul_flag GR16:$src1, i16immSExt8:$src2))], + IIC_IMUL16_RRI>, OpSize; def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32 (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, imm:$src2))]>; + (X86smul_flag GR32:$src1, imm:$src2))], + IIC_IMUL32_RRI>; def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8 (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>; + (X86smul_flag GR32:$src1, i32immSExt8:$src2))], + IIC_IMUL32_RRI>; def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32 (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>; + (X86smul_flag GR64:$src1, i64immSExt32:$src2))], + IIC_IMUL64_RRI>; def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>; + (X86smul_flag GR64:$src1, i64immSExt8:$src2))], + IIC_IMUL64_RRI>; // Memory-Integer Signed Integer Multiply @@ -204,37 +222,43 @@ def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), imm:$src2))]>, + (X86smul_flag (load addr:$src1), imm:$src2))], + IIC_IMUL16_RMI>, OpSize; def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8 (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, (X86smul_flag (load addr:$src1), - i16immSExt8:$src2))]>, OpSize; + i16immSExt8:$src2))], IIC_IMUL16_RMI>, + OpSize; def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32 (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), imm:$src2))]>; + (X86smul_flag (load addr:$src1), imm:$src2))], + IIC_IMUL32_RMI>; def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8 (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, (X86smul_flag (load addr:$src1), - i32immSExt8:$src2))]>; + i32immSExt8:$src2))], + IIC_IMUL32_RMI>; def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32 (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag (load addr:$src1), - i64immSExt32:$src2))]>; + i64immSExt32:$src2))], + IIC_IMUL64_RMI>; def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag (load addr:$src1), - i64immSExt8:$src2))]>; + i64immSExt8:$src2))], + IIC_IMUL64_RMI>; } // Defs = [EFLAGS] @@ -243,62 +267,62 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 // unsigned division/remainder let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH - "div{b}\t$src", []>; + "div{b}\t$src", [], IIC_DIV8_REG>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX - "div{w}\t$src", []>, OpSize; + "div{w}\t$src", [], IIC_DIV16>, OpSize; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX - "div{l}\t$src", []>; + "div{l}\t$src", [], IIC_DIV32>; // RDX:RAX/r64 = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), - "div{q}\t$src", []>; + "div{q}\t$src", [], IIC_DIV64>; let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "div{b}\t$src", []>; + "div{b}\t$src", [], IIC_DIV8_MEM>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "div{w}\t$src", []>, OpSize; + "div{w}\t$src", [], IIC_DIV16>, OpSize; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), - "div{l}\t$src", []>; + "div{l}\t$src", [], IIC_DIV32>; // RDX:RAX/[mem64] = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), - "div{q}\t$src", []>; + "div{q}\t$src", [], IIC_DIV64>; } // Signed division/remainder. let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH - "idiv{b}\t$src", []>; + "idiv{b}\t$src", [], IIC_IDIV8>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX - "idiv{w}\t$src", []>, OpSize; + "idiv{w}\t$src", [], IIC_IDIV16>, OpSize; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX - "idiv{l}\t$src", []>; + "idiv{l}\t$src", [], IIC_IDIV32>; // RDX:RAX/r64 = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), - "idiv{q}\t$src", []>; + "idiv{q}\t$src", [], IIC_IDIV64>; let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "idiv{b}\t$src", []>; + "idiv{b}\t$src", [], IIC_IDIV8>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "idiv{w}\t$src", []>, OpSize; + "idiv{w}\t$src", [], IIC_IDIV16>, OpSize; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), - "idiv{l}\t$src", []>; + "idiv{l}\t$src", [], IIC_IDIV32>; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), - "idiv{q}\t$src", []>; + "idiv{q}\t$src", [], IIC_IDIV64>; } //===----------------------------------------------------------------------===// @@ -312,35 +336,35 @@ let Constraints = "$src1 = $dst" in { def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), "neg{b}\t$dst", [(set GR8:$dst, (ineg GR8:$src1)), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_REG>; def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1), "neg{w}\t$dst", [(set GR16:$dst, (ineg GR16:$src1)), - (implicit EFLAGS)]>, OpSize; + (implicit EFLAGS)], IIC_UNARY_REG>, OpSize; def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), "neg{l}\t$dst", [(set GR32:$dst, (ineg GR32:$src1)), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_REG>; def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst", [(set GR64:$dst, (ineg GR64:$src1)), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_REG>; } // Constraints = "$src1 = $dst" def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst", [(store (ineg (loadi8 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_MEM>; def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), "neg{w}\t$dst", [(store (ineg (loadi16 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>, OpSize; + (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize; def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), "neg{l}\t$dst", [(store (ineg (loadi32 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_MEM>; def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", [(store (ineg (loadi64 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_MEM>; } // Defs = [EFLAGS] @@ -351,29 +375,30 @@ let Constraints = "$src1 = $dst" in { let AddedComplexity = 15 in { def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), "not{b}\t$dst", - [(set GR8:$dst, (not GR8:$src1))]>; + [(set GR8:$dst, (not GR8:$src1))], IIC_UNARY_REG>; def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1), "not{w}\t$dst", - [(set GR16:$dst, (not GR16:$src1))]>, OpSize; + [(set GR16:$dst, (not GR16:$src1))], IIC_UNARY_REG>, OpSize; def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), "not{l}\t$dst", - [(set GR32:$dst, (not GR32:$src1))]>; + [(set GR32:$dst, (not GR32:$src1))], IIC_UNARY_REG>; def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst", - [(set GR64:$dst, (not GR64:$src1))]>; + [(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>; } } // Constraints = "$src1 = $dst" def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst", - [(store (not (loadi8 addr:$dst)), addr:$dst)]>; + [(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), "not{w}\t$dst", - [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize; + [(store (not (loadi16 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>, + OpSize; def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), "not{l}\t$dst", - [(store (not (loadi32 addr:$dst)), addr:$dst)]>; + [(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", - [(store (not (loadi64 addr:$dst)), addr:$dst)]>; + [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; } // CodeSize // TODO: inc/dec is slow for P4, but fast for Pentium-M. @@ -382,19 +407,22 @@ let Constraints = "$src1 = $dst" in { let CodeSize = 2 in def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "inc{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>; + [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))], + IIC_UNARY_REG>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, + [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], IIC_UNARY_REG>, OpSize, Requires<[In32BitMode]>; def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, + [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))], + IIC_UNARY_REG>, Requires<[In32BitMode]>; def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>; + [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))], + IIC_UNARY_REG>; } // isConvertibleToThreeAddress = 1, CodeSize = 1 @@ -403,19 +431,23 @@ let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can transform into LEA. def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, + [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], + IIC_UNARY_REG>, OpSize, Requires<[In64BitMode]>; def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, + [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))], + IIC_UNARY_REG>, Requires<[In64BitMode]>; def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, + [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))], + IIC_UNARY_REG>, OpSize, Requires<[In64BitMode]>; def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, + [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))], + IIC_UNARY_REG>, Requires<[In64BitMode]>; } // isConvertibleToThreeAddress = 1, CodeSize = 2 @@ -424,37 +456,37 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), let CodeSize = 2 in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_MEM>; def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", [(store (add (loadi16 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize, Requires<[In32BitMode]>; def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", [(store (add (loadi32 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, Requires<[In32BitMode]>; def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", [(store (add (loadi64 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_MEM>; // These are duplicates of their 32-bit counterparts. Only needed so X86 knows // how to unfold them. // FIXME: What is this for?? def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", [(store (add (loadi16 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize, Requires<[In64BitMode]>; def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", [(store (add (loadi32 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, Requires<[In64BitMode]>; def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", [(store (add (loadi16 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize, Requires<[In64BitMode]>; def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", [(store (add (loadi32 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, Requires<[In64BitMode]>; } // CodeSize = 2 @@ -462,18 +494,22 @@ let Constraints = "$src1 = $dst" in { let CodeSize = 2 in def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "dec{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>; + [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))], + IIC_UNARY_REG>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, + [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))], + IIC_UNARY_REG>, OpSize, Requires<[In32BitMode]>; def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, + [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))], + IIC_UNARY_REG>, Requires<[In32BitMode]>; def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>; + [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))], + IIC_UNARY_REG>; } // CodeSize = 2 } // Constraints = "$src1 = $dst" @@ -481,18 +517,18 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", let CodeSize = 2 in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_MEM>; def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", [(store (add (loadi16 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize, Requires<[In32BitMode]>; def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", [(store (add (loadi32 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, + (implicit EFLAGS)], IIC_UNARY_MEM>, Requires<[In32BitMode]>; def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(store (add (loadi64 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>; + (implicit EFLAGS)], IIC_UNARY_MEM>; } // CodeSize = 2 } // Defs = [EFLAGS] @@ -588,11 +624,13 @@ def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem, /// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations) /// or 1 (for i16,i32,i64 operations). class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, - string mnemonic, string args, list<dag> pattern> + string mnemonic, string args, list<dag> pattern, + InstrItinClass itin = IIC_BIN_NONMEM> : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4}, opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode }, f, outs, ins, - !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> { + !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern, + itin> { // Infer instruction prefixes from type info. let hasOpSizePrefix = typeinfo.HasOpSizePrefix; @@ -664,7 +702,7 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, dag outlist, list<dag> pattern> : ITy<opcode, MRMSrcMem, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_MEM>; // BinOpRM_R - Instructions like "add reg, reg, [mem]". class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -776,7 +814,7 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, list<dag> pattern> : ITy<opcode, MRMDestMem, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern>; + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>; // BinOpMR_RMW - Instructions like "add [mem], reg". class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -804,7 +842,7 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern, bits<8> opcode = 0x80> : ITy<opcode, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { let ImmT = typeinfo.ImmEncoding; } @@ -837,7 +875,7 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern> : ITy<0x82, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -1150,7 +1188,7 @@ let Defs = [EFLAGS] in { // register class is constrained to GR8_NOREX. let isPseudo = 1 in def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), - "", []>; + "", [], IIC_BIN_NONMEM>; } //===----------------------------------------------------------------------===// @@ -1160,11 +1198,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop, PatFrag ld_frag> { def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))]>; + [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))], + IIC_BIN_NONMEM>; def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, - (X86andn_flag RC:$src1, (ld_frag addr:$src2)))]>; + (X86andn_flag RC:$src1, (ld_frag addr:$src2)))], IIC_BIN_MEM>; } let Predicates = [HasBMI], Defs = [EFLAGS] in { diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td index 3a43b22..adeaf54 100644 --- a/lib/Target/X86/X86InstrCMovSetCC.td +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -1,10 +1,10 @@ -//===- X86InstrCMovSetCC.td - Conditional Move and SetCC ---*- tablegen -*-===// -// +//===-- X86InstrCMovSetCC.td - Conditional Move and SetCC --*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the X86 conditional move and set on condition @@ -21,17 +21,20 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), [(set GR16:$dst, - (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,TB,OpSize; + (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))], + IIC_CMOV16_RR>,TB,OpSize; def #NAME#32rr : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"), [(set GR32:$dst, - (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>, TB; + (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))], + IIC_CMOV32_RR>, TB; def #NAME#64rr :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"), [(set GR64:$dst, - (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB; + (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))], + IIC_CMOV32_RR>, TB; } let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in { @@ -39,17 +42,18 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - CondNode, EFLAGS))]>, TB, OpSize; + CondNode, EFLAGS))], IIC_CMOV16_RM>, + TB, OpSize; def #NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"), [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - CondNode, EFLAGS))]>, TB; + CondNode, EFLAGS))], IIC_CMOV32_RM>, TB; def #NAME#64rm :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"), [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - CondNode, EFLAGS))]>, TB; + CondNode, EFLAGS))], IIC_CMOV32_RM>, TB; } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" } // end multiclass @@ -78,10 +82,12 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> { let Uses = [EFLAGS] in { def r : I<opc, MRM0r, (outs GR8:$dst), (ins), !strconcat(Mnemonic, "\t$dst"), - [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>, TB; + [(set GR8:$dst, (X86setcc OpNode, EFLAGS))], + IIC_SET_R>, TB; def m : I<opc, MRM0m, (outs), (ins i8mem:$dst), !strconcat(Mnemonic, "\t$dst"), - [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>, TB; + [(store (X86setcc OpNode, EFLAGS), addr:$dst)], + IIC_SET_M>, TB; } // Uses = [EFLAGS] } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index e0cf669..ac49232 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -125,10 +125,26 @@ def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), [(set GR64:$dst, (X86SegAlloca GR64:$size))]>, Requires<[In64BitMode]>; - } +// The MSVC runtime contains an _ftol2 routine for converting floating-point +// to integer values. It has a strange calling convention: the input is +// popped from the x87 stack, and the return value is given in EDX:EAX. No +// other registers (aside from flags) are touched. +// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80 +// variant is unnecessary. + +let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in { + def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src), + "# win32 fptoui", + [(X86WinFTOL RFP32:$src)]>, + Requires<[In32BitMode]>; + def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src), + "# win32 fptoui", + [(X86WinFTOL RFP64:$src)]>, + Requires<[In32BitMode]>; +} //===----------------------------------------------------------------------===// // EH Pseudo Instructions @@ -551,7 +567,7 @@ def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), // Memory barriers // TODO: Get this to fold the constant into the instruction. -let isCodeGenOnly = 1 in +let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), "lock\n\t" "or{l}\t{$zero, $dst|$dst, $zero}", @@ -562,15 +578,6 @@ def Int_MemBarrier : I<0, Pseudo, (outs), (ins), "#MEMBARRIER", [(X86MemBarrier)]>; -// TODO: Get this to fold the constant into the instruction. -let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in -def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero), - "lock\n\t" - "or{q}\t{$zero, (%rsp)|(%rsp), $zero}", - [(X86MemBarrierNoSSE GR64:$zero)]>, - Requires<[In64BitMode]>, LOCK; - - // RegOpc corresponds to the mr version of the instruction // ImmOpc corresponds to the mi version of the instruction // ImmOpc8 corresponds to the mi8 version of the instruction @@ -954,14 +961,9 @@ def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), // Direct PC relative function call for small code model. 32-bit displacement // sign extended to 64-bit. def : Pat<(X86call (i64 tglobaladdr:$dst)), - (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>; -def : Pat<(X86call (i64 texternalsym:$dst)), - (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>; - -def : Pat<(X86call (i64 tglobaladdr:$dst)), - (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>; + (CALL64pcrel32 tglobaladdr:$dst)>; def : Pat<(X86call (i64 texternalsym:$dst)), - (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>; + (CALL64pcrel32 texternalsym:$dst)>; // tailcall stuff def : Pat<(X86tcret GR32_TC:$dst, imm:$off), @@ -1458,58 +1460,62 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; +// Helper imms that check if a mask doesn't change significant shift bits. +def immShift32 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 5; }]>; +def immShift64 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 6; }]>; + // (shl x (and y, 31)) ==> (shl x, y) -def : Pat<(shl GR8:$src1, (and CL, 31)), +def : Pat<(shl GR8:$src1, (and CL, immShift32)), (SHL8rCL GR8:$src1)>; -def : Pat<(shl GR16:$src1, (and CL, 31)), +def : Pat<(shl GR16:$src1, (and CL, immShift32)), (SHL16rCL GR16:$src1)>; -def : Pat<(shl GR32:$src1, (and CL, 31)), +def : Pat<(shl GR32:$src1, (and CL, immShift32)), (SHL32rCL GR32:$src1)>; -def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (shl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst), (SHL8mCL addr:$dst)>; -def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (shl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst), (SHL16mCL addr:$dst)>; -def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (shl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst), (SHL32mCL addr:$dst)>; -def : Pat<(srl GR8:$src1, (and CL, 31)), +def : Pat<(srl GR8:$src1, (and CL, immShift32)), (SHR8rCL GR8:$src1)>; -def : Pat<(srl GR16:$src1, (and CL, 31)), +def : Pat<(srl GR16:$src1, (and CL, immShift32)), (SHR16rCL GR16:$src1)>; -def : Pat<(srl GR32:$src1, (and CL, 31)), +def : Pat<(srl GR32:$src1, (and CL, immShift32)), (SHR32rCL GR32:$src1)>; -def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (srl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst), (SHR8mCL addr:$dst)>; -def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (srl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst), (SHR16mCL addr:$dst)>; -def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (srl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst), (SHR32mCL addr:$dst)>; -def : Pat<(sra GR8:$src1, (and CL, 31)), +def : Pat<(sra GR8:$src1, (and CL, immShift32)), (SAR8rCL GR8:$src1)>; -def : Pat<(sra GR16:$src1, (and CL, 31)), +def : Pat<(sra GR16:$src1, (and CL, immShift32)), (SAR16rCL GR16:$src1)>; -def : Pat<(sra GR32:$src1, (and CL, 31)), +def : Pat<(sra GR32:$src1, (and CL, immShift32)), (SAR32rCL GR32:$src1)>; -def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (sra (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst), (SAR8mCL addr:$dst)>; -def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (sra (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst), (SAR16mCL addr:$dst)>; -def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (sra (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst), (SAR32mCL addr:$dst)>; // (shl x (and y, 63)) ==> (shl x, y) -def : Pat<(shl GR64:$src1, (and CL, 63)), +def : Pat<(shl GR64:$src1, (and CL, immShift64)), (SHL64rCL GR64:$src1)>; def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SHL64mCL addr:$dst)>; -def : Pat<(srl GR64:$src1, (and CL, 63)), +def : Pat<(srl GR64:$src1, (and CL, immShift64)), (SHR64rCL GR64:$src1)>; def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SHR64mCL addr:$dst)>; -def : Pat<(sra GR64:$src1, (and CL, 63)), +def : Pat<(sra GR64:$src1, (and CL, immShift64)), (SAR64rCL GR64:$src1)>; def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SAR64mCL addr:$dst)>; @@ -1753,3 +1759,11 @@ def : Pat<(and GR64:$src1, i64immSExt8:$src2), (AND64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(and GR64:$src1, i64immSExt32:$src2), (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Bit scan instruction patterns to match explicit zero-undef behavior. +def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>; +def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>; +def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>; +def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>; +def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>; +def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>; diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index c228a0a..ba86098 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -1,4 +1,4 @@ -//===- X86InstrControl.td - Control Flow Instructions ------*- tablegen -*-===// +//===-- X86InstrControl.td - Control Flow Instructions -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -20,39 +20,42 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, FPForm = SpecialFP in { def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret", - [(X86retflag 0)]>; + [(X86retflag 0)], IIC_RET>; def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret\t$amt", - [(X86retflag timm:$amt)]>; + [(X86retflag timm:$amt)], IIC_RET_IMM>; def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "retw\t$amt", - []>, OpSize; + [], IIC_RET_IMM>, OpSize; def LRETL : I <0xCB, RawFrm, (outs), (ins), - "lretl", []>; + "lretl", [], IIC_RET>; def LRETQ : RI <0xCB, RawFrm, (outs), (ins), - "lretq", []>; + "lretq", [], IIC_RET>; def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "lret\t$amt", []>; + "lret\t$amt", [], IIC_RET>; def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "lretw\t$amt", []>, OpSize; + "lretw\t$amt", [], IIC_RET>, OpSize; } // Unconditional branches. let isBarrier = 1, isBranch = 1, isTerminator = 1 in { def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst), - "jmp\t$dst", [(br bb:$dst)]>; + "jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>; def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), - "jmp\t$dst", []>; + "jmp\t$dst", [], IIC_JMP_REL>; + // FIXME : Intel syntax for JMP64pcrel32 such that it is not ambiguious + // with JMP_1. def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst), - "jmp{q}\t$dst", []>; + "jmpq\t$dst", [], IIC_JMP_REL>; } // Conditional Branches. let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in { multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> { - def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>; + def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, [], + IIC_Jcc>; def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm, - [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB; + [(X86brcond bb:$dst, Cond, EFLAGS)], IIC_Jcc>, TB; } } @@ -74,61 +77,61 @@ defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>; defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>; // jcx/jecx/jrcx instructions. -let isAsmParserOnly = 1, isBranch = 1, isTerminator = 1 in { +let isBranch = 1, isTerminator = 1 in { // These are the 32-bit versions of this instruction for the asmparser. In // 32-bit mode, the address size prefix is jcxz and the unprefixed version is // jecxz. let Uses = [CX] in def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), - "jcxz\t$dst", []>, AdSize, Requires<[In32BitMode]>; + "jcxz\t$dst", [], IIC_JCXZ>, AdSize, Requires<[In32BitMode]>; let Uses = [ECX] in def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), - "jecxz\t$dst", []>, Requires<[In32BitMode]>; + "jecxz\t$dst", [], IIC_JCXZ>, Requires<[In32BitMode]>; // J*CXZ instruction: 64-bit versions of this instruction for the asmparser. // In 64-bit mode, the address size prefix is jecxz and the unprefixed version // is jrcxz. let Uses = [ECX] in def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), - "jecxz\t$dst", []>, AdSize, Requires<[In64BitMode]>; + "jecxz\t$dst", [], IIC_JCXZ>, AdSize, Requires<[In64BitMode]>; let Uses = [RCX] in def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), - "jrcxz\t$dst", []>, Requires<[In64BitMode]>; + "jrcxz\t$dst", [], IIC_JCXZ>, Requires<[In64BitMode]>; } // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", - [(brind GR32:$dst)]>, Requires<[In32BitMode]>; + [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>; def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", - [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>; + [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[In32BitMode]>; def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", - [(brind GR64:$dst)]>, Requires<[In64BitMode]>; + [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>; def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", - [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>; + [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>, Requires<[In64BitMode]>; def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs), (ins i16imm:$off, i16imm:$seg), - "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize; + "ljmp{w}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>, OpSize; def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs), (ins i32imm:$off, i16imm:$seg), - "ljmp{l}\t{$seg, $off|$off, $seg}", []>; + "ljmp{l}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>; def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), - "ljmp{q}\t{*}$dst", []>; + "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>; def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), - "ljmp{w}\t{*}$dst", []>, OpSize; + "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize; def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst), - "ljmp{l}\t{*}$dst", []>; + "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>; } // Loop instructions -def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>; -def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>; -def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>; +def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", [], IIC_LOOP>; +def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", [], IIC_LOOPE>; +def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", [], IIC_LOOPNE>; //===----------------------------------------------------------------------===// // Call Instructions... @@ -138,32 +141,30 @@ let isCall = 1 in // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. Uses for argument // registers are added manually. - let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP] in { + let Uses = [ESP] in { def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i32imm_pcrel:$dst,variable_ops), - "call{l}\t$dst", []>, Requires<[In32BitMode]>; + "call{l}\t$dst", [], IIC_CALL_RI>, Requires<[In32BitMode]>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), - "call{l}\t{*}$dst", [(X86call GR32:$dst)]>, + "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>, Requires<[In32BitMode]>; def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), - "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>, + "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>, Requires<[In32BitMode]>; def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), (ins i16imm:$off, i16imm:$seg), - "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize; + "lcall{w}\t{$seg, $off|$off, $seg}", [], + IIC_CALL_FAR_PTR>, OpSize; def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs), (ins i32imm:$off, i16imm:$seg), - "lcall{l}\t{$seg, $off|$off, $seg}", []>; + "lcall{l}\t{$seg, $off|$off, $seg}", [], + IIC_CALL_FAR_PTR>; def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst), - "lcall{w}\t{*}$dst", []>, OpSize; + "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize; def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst), - "lcall{l}\t{*}$dst", []>; + "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>; // callw for 16 bit code for the assembler. let isAsmParserOnly = 1 in @@ -177,11 +178,7 @@ let isCall = 1 in let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, isCodeGenOnly = 1 in - let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP] in { + let Uses = [ESP] in { def TCRETURNdi : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>; def TCRETURNri : PseudoI<(outs), @@ -194,74 +191,43 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, // mcinst. def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs), (ins i32imm_pcrel:$dst, variable_ops), - "jmp\t$dst # TAILCALL", - []>; + "jmp\t$dst # TAILCALL", + [], IIC_JMP_REL>; def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), - "", []>; // FIXME: Remove encoding when JIT is dead. + "", [], IIC_JMP_REG>; // FIXME: Remove encoding when JIT is dead. let mayLoad = 1 in def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), - "jmp{l}\t{*}$dst # TAILCALL", []>; + "jmp{l}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>; } //===----------------------------------------------------------------------===// // Call Instructions... // -let isCall = 1 in - // All calls clobber the non-callee saved registers. RSP is marked as - // a use to prevent stack-pointer assignments that appear immediately - // before calls from potentially appearing dead. Uses for argument - // registers are added manually. - let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, - FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [RSP] in { - - // NOTE: this pattern doesn't match "X86call imm", because we do not know - // that the offset between an arbitrary immediate and the call will fit in - // the 32-bit pcrel field that we have. - def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, - (outs), (ins i64i32imm_pcrel:$dst, variable_ops), - "call{q}\t$dst", []>, - Requires<[In64BitMode, NotWin64]>; - def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), - "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, - Requires<[In64BitMode, NotWin64]>; - def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), - "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, - Requires<[In64BitMode, NotWin64]>; - def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), - "lcall{q}\t{*}$dst", []>; - } +// RSP is marked as a use to prevent stack-pointer assignments that appear +// immediately before calls from potentially appearing dead. Uses for argument +// registers are added manually. +let isCall = 1, Uses = [RSP] in { + // NOTE: this pattern doesn't match "X86call imm", because we do not know + // that the offset between an arbitrary immediate and the call will fit in + // the 32-bit pcrel field that we have. + def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, + (outs), (ins i64i32imm_pcrel:$dst, variable_ops), + "call{q}\t$dst", [], IIC_CALL_RI>, + Requires<[In64BitMode]>; + def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), + "call{q}\t{*}$dst", [(X86call GR64:$dst)], + IIC_CALL_RI>, + Requires<[In64BitMode]>; + def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), + "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))], + IIC_CALL_MEM>, + Requires<[In64BitMode]>; - // FIXME: We need to teach codegen about single list of call-clobbered - // registers. -let isCall = 1, isCodeGenOnly = 1 in - // All calls clobber the non-callee saved registers. RSP is marked as - // a use to prevent stack-pointer assignments that appear immediately - // before calls from potentially appearing dead. Uses for argument - // registers are added manually. - let Defs = [RAX, RCX, RDX, R8, R9, R10, R11, - FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], - Uses = [RSP] in { - def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, - (outs), (ins i64i32imm_pcrel:$dst, variable_ops), - "call{q}\t$dst", []>, - Requires<[IsWin64]>; - def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), - "call{q}\t{*}$dst", - [(X86call GR64:$dst)]>, Requires<[IsWin64]>; - def WINCALL64m : I<0xFF, MRM2m, (outs), - (ins i64mem:$dst,variable_ops), - "call{q}\t{*}$dst", - [(X86call (loadi64 addr:$dst))]>, - Requires<[IsWin64]>; - } + def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), + "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>; +} let isCall = 1, isCodeGenOnly = 1 in // __chkstk(MSVC): clobber R10, R11 and EFLAGS. @@ -270,18 +236,13 @@ let isCall = 1, isCodeGenOnly = 1 in Uses = [RSP] in { def W64ALLOCA : Ii32PCRel<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), - "call{q}\t$dst", []>, + "call{q}\t$dst", [], IIC_CALL_RI>, Requires<[IsWin64]>; } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, isCodeGenOnly = 1 in - // AMD64 cc clobbers RSI, RDI, XMM6-XMM15. - let Defs = [RAX, RCX, RDX, R8, R9, R10, R11, - FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], - Uses = [RSP], + let Uses = [RSP], usesCustomInserter = 1 in { def TCRETURNdi64 : PseudoI<(outs), (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops), @@ -294,11 +255,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), - "jmp\t$dst # TAILCALL", []>; + "jmp\t$dst # TAILCALL", [], IIC_JMP_REL>; def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops), - "jmp{q}\t{*}$dst # TAILCALL", []>; + "jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>; let mayLoad = 1 in def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops), - "jmp{q}\t{*}$dst # TAILCALL", []>; + "jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>; } diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index e62e6b7..0d5490a 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -1,10 +1,10 @@ -//===- X86InstrExtension.td - Sign and Zero Extensions -----*- tablegen -*-===// -// +//===-- X86InstrExtension.td - Sign and Zero Extensions ----*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the sign and zero extension operations. @@ -37,40 +37,47 @@ let neverHasSideEffects = 1 in { } + // Sign/Zero extenders def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), - "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>, + TB, OpSize; def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), - "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>, + TB, OpSize; def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR8:$src))]>, TB; + [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB; + [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB; def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR16:$src))]>, TB; + [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB; def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB; + [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>, + TB; def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), - "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>, + TB, OpSize; def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), - "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>, + TB, OpSize; def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR8:$src))]>, TB; + [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB; + [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB; def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR16:$src))]>, TB; + [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB; def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB; + [(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>, + TB; // These are the same as the regular MOVZX32rr8 and MOVZX32rm8 // except that they use GR32_NOREX for the output operand register class @@ -78,12 +85,12 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - []>, TB; + [], IIC_MOVZX>, TB; let mayLoad = 1 in def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - []>, TB; + [], IIC_MOVZX>, TB; // MOVSX64rr8 always has a REX prefix and it has an 8-bit register // operand, which makes it a rare instruction with an 8-bit register @@ -91,32 +98,38 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, // were generalized, this would require a special register class. def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR8:$src))]>, TB; + [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB; + [(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>, + TB; def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR16:$src))]>, TB; + [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB; def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB; + [(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>, + TB; def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR32:$src))]>; + [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>; def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i32 addr:$src))]>; + [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>; // movzbq and movzwq encodings for the disassembler def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), - "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB; + "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, + TB; def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src), - "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB; + "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, + TB; def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB; + "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, + TB; def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB; + "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, + TB; // FIXME: These should be Pat patterns. let isCodeGenOnly = 1 in { @@ -124,15 +137,17 @@ let isCodeGenOnly = 1 in { // Use movzbl instead of movzbq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), - "", [(set GR64:$dst, (zext GR8:$src))]>, TB; + "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), - "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB; + "", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>, + TB; // Use movzwl instead of movzwq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "", [(set GR64:$dst, (zext GR16:$src))]>, TB; + "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB; def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; + "", [(set GR64:$dst, (zextloadi64i16 addr:$src))], + IIC_MOVZX>, TB; // There's no movzlq instruction, but movl can be used for this purpose, using // implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero @@ -142,10 +157,9 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), // necessarily all zero. In such cases, we fall back to these explicit zext // instructions. def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), - "", [(set GR64:$dst, (zext GR32:$src))]>; + "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>; def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), - "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; - - + "", [(set GR64:$dst, (zextloadi64i32 addr:$src))], + IIC_MOVZX>; } diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index f443088..d57937b 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -1,4 +1,4 @@ -//====- X86InstrFMA.td - Describe the X86 Instruction Set --*- tablegen -*-===// +//===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -15,7 +15,7 @@ // FMA3 - Intel 3 operand Fused Multiply-Add instructions //===----------------------------------------------------------------------===// -multiclass fma_rm<bits<8> opc, string OpcodeStr> { +multiclass fma3p_rm<bits<8> opc, string OpcodeStr> { def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -34,415 +34,187 @@ multiclass fma_rm<bits<8> opc, string OpcodeStr> { []>; } -multiclass fma_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, - string OpcodeStr, string PackTy> { - defm r132 : fma_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>; - defm r213 : fma_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>; - defm r231 : fma_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>; +multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, + string OpcodeStr, string PackTy> { + defm r132 : fma3p_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>; + defm r213 : fma3p_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>; + defm r231 : fma3p_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>; } -let isAsmParserOnly = 1 in { - // Fused Multiply-Add - defm VFMADDPS : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">; - defm VFMADDPD : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W; - defm VFMADDSUBPS : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">; - defm VFMADDSUBPD : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W; - defm VFMSUBADDPS : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">; - defm VFMSUBADDPD : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W; - defm VFMSUBPS : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">; - defm VFMSUBPD : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W; +// Fused Multiply-Add +let ExeDomain = SSEPackedSingle in { + defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">; + defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">; + defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">; + defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">; +} + +let ExeDomain = SSEPackedDouble in { + defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W; + defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W; + defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W; + defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W; +} + +// Fused Negative Multiply-Add +let ExeDomain = SSEPackedSingle in { + defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">; + defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">; +} +let ExeDomain = SSEPackedDouble in { + defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W; + defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W; +} + +multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop> { + def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; + def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, x86memop:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; +} - // Fused Negative Multiply-Add - defm VFNMADDPS : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">; - defm VFNMADDPD : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W; - defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">; - defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W; +multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, + string OpcodeStr> { + defm SSr132 : fma3s_rm<opc132, !strconcat(OpcodeStr, "132ss"), f32mem>; + defm SSr213 : fma3s_rm<opc213, !strconcat(OpcodeStr, "213ss"), f32mem>; + defm SSr231 : fma3s_rm<opc231, !strconcat(OpcodeStr, "231ss"), f32mem>; + defm SDr132 : fma3s_rm<opc132, !strconcat(OpcodeStr, "132sd"), f64mem>, VEX_W; + defm SDr213 : fma3s_rm<opc213, !strconcat(OpcodeStr, "213sd"), f64mem>, VEX_W; + defm SDr231 : fma3s_rm<opc231, !strconcat(OpcodeStr, "231sd"), f64mem>, VEX_W; } +defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd">, VEX_LIG; +defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub">, VEX_LIG; + +defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd">, VEX_LIG; +defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub">, VEX_LIG; + //===----------------------------------------------------------------------===// // FMA4 - AMD 4 operand Fused Multiply-Add instructions //===----------------------------------------------------------------------===// -multiclass fma4s<bits<8> opc, string OpcodeStr> { +multiclass fma4s<bits<8> opc, string OpcodeStr, Operand memop, + ComplexPattern mem_cpat, Intrinsic Int> { def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_W; + [(set VR128:$dst, + (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4; def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, f128mem:$src3), + (ins VR128:$src1, VR128:$src2, memop:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_W; + [(set VR128:$dst, + (Int VR128:$src1, VR128:$src2, mem_cpat:$src3))]>, VEX_W, MemOp4; def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2, VR128:$src3), + (ins VR128:$src1, memop:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>; - + [(set VR128:$dst, + (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>; +// For disassembler +let isCodeGenOnly = 1 in + def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>; } -multiclass fma4p<bits<8> opc, string OpcodeStr> { +multiclass fma4p<bits<8> opc, string OpcodeStr, + Intrinsic Int128, Intrinsic Int256, + PatFrag ld_frag128, PatFrag ld_frag256> { def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_W; + [(set VR128:$dst, + (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4; def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, f128mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_W; + [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2, + (ld_frag128 addr:$src3)))]>, VEX_W, MemOp4; def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>; + [(set VR128:$dst, + (Int128 VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>; def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_W; + [(set VR256:$dst, + (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>, VEX_W, MemOp4; def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, f256mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_W; + [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2, + (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4; def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>; -} - -let isAsmParserOnly = 1 in { - defm VFMADDSS4 : fma4s<0x6A, "vfmaddss">; - defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd">; - defm VFMADDPS4 : fma4p<0x68, "vfmaddps">; - defm VFMADDPD4 : fma4p<0x69, "vfmaddpd">; - defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss">; - defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd">; - defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps">; - defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd">; - defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss">; - defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd">; - defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps">; - defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd">; - defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss">; - defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd">; - defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps">; - defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd">; - defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps">; - defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd">; - defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps">; - defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd">; + [(set VR256:$dst, + (Int256 VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>; +// For disassembler +let isCodeGenOnly = 1 in { + def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>; + def rrY_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, VR256:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>; +} // isCodeGenOnly = 1 } -// FMA4 Intrinsics patterns - -// VFMADD -def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2, VR128:$src3), - (VFMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2, - (alignedloadv4f32 addr:$src3)), - (VFMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2), - VR128:$src3), - (VFMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, VR128:$src3), - (VFMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, - (alignedloadv2f64 addr:$src3)), - (VFMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2), - VR128:$src3), - (VFMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2, VR128:$src3), - (VFMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2, - (alignedloadv4f32 addr:$src3)), - (VFMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2), - VR128:$src3), - (VFMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2, VR128:$src3), - (VFMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2, - (alignedloadv2f64 addr:$src3)), - (VFMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2), - VR128:$src3), - (VFMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), - (VFMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; -def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2, - (alignedloadv8f32 addr:$src3)), - (VFMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, - (alignedloadv8f32 addr:$src2), - VR256:$src3), - (VFMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; - -def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), - (VFMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; -def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2, - (alignedloadv4f64 addr:$src3)), - (VFMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, - (alignedloadv4f64 addr:$src2), - VR256:$src3), - (VFMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; - -// VFMSUB -def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2, VR128:$src3), - (VFMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2, - (alignedloadv4f32 addr:$src3)), - (VFMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2), - VR128:$src3), - (VFMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2, VR128:$src3), - (VFMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2, - (alignedloadv2f64 addr:$src3)), - (VFMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2), - VR128:$src3), - (VFMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2, VR128:$src3), - (VFMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2, - (alignedloadv4f32 addr:$src3)), - (VFMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2), - VR128:$src3), - (VFMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2, VR128:$src3), - (VFMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2, - (alignedloadv2f64 addr:$src3)), - (VFMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2), - VR128:$src3), - (VFMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), - (VFMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; -def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2, - (alignedloadv8f32 addr:$src3)), - (VFMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, - (alignedloadv8f32 addr:$src2), - VR256:$src3), - (VFMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; - -def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), - (VFMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; -def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2, - (alignedloadv4f64 addr:$src3)), - (VFMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, - (alignedloadv4f64 addr:$src2), - VR256:$src3), - (VFMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; - -// VFNMADD -def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2, VR128:$src3), - (VFNMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2, - (alignedloadv4f32 addr:$src3)), - (VFNMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2), - VR128:$src3), - (VFNMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2, VR128:$src3), - (VFNMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2, - (alignedloadv2f64 addr:$src3)), - (VFNMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2), - VR128:$src3), - (VFNMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2, VR128:$src3), - (VFNMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2, - (alignedloadv4f32 addr:$src3)), - (VFNMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2), - VR128:$src3), - (VFNMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2, VR128:$src3), - (VFNMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2, - (alignedloadv2f64 addr:$src3)), - (VFNMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2), - VR128:$src3), - (VFNMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), - (VFNMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; -def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2, - (alignedloadv8f32 addr:$src3)), - (VFNMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, - (alignedloadv8f32 addr:$src2), - VR256:$src3), - (VFNMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; - -def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), - (VFNMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; -def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2, - (alignedloadv4f64 addr:$src3)), - (VFNMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, - (alignedloadv4f64 addr:$src2), - VR256:$src3), - (VFNMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; - -// VFNMSUB -def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2, VR128:$src3), - (VFNMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2, - (alignedloadv4f32 addr:$src3)), - (VFNMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2), - VR128:$src3), - (VFNMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2, VR128:$src3), - (VFNMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2, - (alignedloadv2f64 addr:$src3)), - (VFNMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2), - VR128:$src3), - (VFNMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2, VR128:$src3), - (VFNMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2, - (alignedloadv4f32 addr:$src3)), - (VFNMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2), - VR128:$src3), - (VFNMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2, VR128:$src3), - (VFNMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2, - (alignedloadv2f64 addr:$src3)), - (VFNMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2), - VR128:$src3), - (VFNMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), - (VFNMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; -def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2, - (alignedloadv8f32 addr:$src3)), - (VFNMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, - (alignedloadv8f32 addr:$src2), - VR256:$src3), - (VFNMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; - -def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), - (VFNMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; -def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2, - (alignedloadv4f64 addr:$src3)), - (VFNMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, - (alignedloadv4f64 addr:$src2), - VR256:$src3), - (VFNMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; - -// VFMADDSUB -def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2, VR128:$src3), - (VFMADDSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2, - (alignedloadv4f32 addr:$src3)), - (VFMADDSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2), - VR128:$src3), - (VFMADDSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2, VR128:$src3), - (VFMADDSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2, - (alignedloadv2f64 addr:$src3)), - (VFMADDSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2), - VR128:$src3), - (VFMADDSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), - (VFMADDSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; -def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2, - (alignedloadv8f32 addr:$src3)), - (VFMADDSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, - (alignedloadv8f32 addr:$src2), - VR256:$src3), - (VFMADDSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; - -def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), - (VFMADDSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; -def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2, - (alignedloadv4f64 addr:$src3)), - (VFMADDSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, - (alignedloadv4f64 addr:$src2), - VR256:$src3), - (VFMADDSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; - -// VFMSUBADD -def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2, VR128:$src3), - (VFMSUBADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2, - (alignedloadv4f32 addr:$src3)), - (VFMSUBADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2), - VR128:$src3), - (VFMSUBADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2, VR128:$src3), - (VFMSUBADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; -def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2, - (alignedloadv2f64 addr:$src3)), - (VFMSUBADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2), - VR128:$src3), - (VFMSUBADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; - -def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), - (VFMSUBADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; -def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2, - (alignedloadv8f32 addr:$src3)), - (VFMSUBADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, - (alignedloadv8f32 addr:$src2), - VR256:$src3), - (VFMSUBADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; - -def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), - (VFMSUBADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; -def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2, - (alignedloadv4f64 addr:$src3)), - (VFMSUBADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; -def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, - (alignedloadv4f64 addr:$src2), - VR256:$src3), - (VFMSUBADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; +defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", ssmem, sse_load_f32, + int_x86_fma4_vfmadd_ss>; +defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", sdmem, sse_load_f64, + int_x86_fma4_vfmadd_sd>; +defm VFMADDPS4 : fma4p<0x68, "vfmaddps", int_x86_fma4_vfmadd_ps, + int_x86_fma4_vfmadd_ps_256, memopv4f32, memopv8f32>; +defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", int_x86_fma4_vfmadd_pd, + int_x86_fma4_vfmadd_pd_256, memopv2f64, memopv4f64>; +defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", ssmem, sse_load_f32, + int_x86_fma4_vfmsub_ss>; +defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", sdmem, sse_load_f64, + int_x86_fma4_vfmsub_sd>; +defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", int_x86_fma4_vfmsub_ps, + int_x86_fma4_vfmsub_ps_256, memopv4f32, memopv8f32>; +defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", int_x86_fma4_vfmsub_pd, + int_x86_fma4_vfmsub_pd_256, memopv2f64, memopv4f64>; +defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", ssmem, sse_load_f32, + int_x86_fma4_vfnmadd_ss>; +defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", sdmem, sse_load_f64, + int_x86_fma4_vfnmadd_sd>; +defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", int_x86_fma4_vfnmadd_ps, + int_x86_fma4_vfnmadd_ps_256, memopv4f32, memopv8f32>; +defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", int_x86_fma4_vfnmadd_pd, + int_x86_fma4_vfnmadd_pd_256, memopv2f64, memopv4f64>; +defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", ssmem, sse_load_f32, + int_x86_fma4_vfnmsub_ss>; +defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", sdmem, sse_load_f64, + int_x86_fma4_vfnmsub_sd>; +defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", int_x86_fma4_vfnmsub_ps, + int_x86_fma4_vfnmsub_ps_256, memopv4f32, memopv8f32>; +defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", int_x86_fma4_vfnmsub_pd, + int_x86_fma4_vfnmsub_pd_256, memopv2f64, memopv4f64>; +defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma4_vfmaddsub_ps, + int_x86_fma4_vfmaddsub_ps_256, memopv4f32, memopv8f32>; +defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma4_vfmaddsub_pd, + int_x86_fma4_vfmaddsub_pd_256, memopv2f64, memopv4f64>; +defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma4_vfmsubadd_ps, + int_x86_fma4_vfmsubadd_ps_256, memopv4f32, memopv8f32>; +defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma4_vfmsubadd_pd, + int_x86_fma4_vfmsubadd_pd_256, memopv2f64, memopv4f64>; diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 7cb870f..a13887e 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -1,10 +1,10 @@ -//==- X86InstrFPStack.td - Describe the X86 Instruction Set --*- tablegen -*-=// -// +//===- X86InstrFPStack.td - FPU Instruction Set ------------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the X86 x87 FPU instruction set, defining the @@ -225,22 +225,22 @@ class FPrST0PInst<bits<8> o, string asm> // of some of the 'reverse' forms of the fsub and fdiv instructions. As such, // we have to put some 'r's in and take them out of weird places. def ADD_FST0r : FPST0rInst <0xC0, "fadd\t$op">; -def ADD_FrST0 : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, %ST(0)}">; +def ADD_FrST0 : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, ST(0)}">; def ADD_FPrST0 : FPrST0PInst<0xC0, "faddp\t$op">; def SUBR_FST0r : FPST0rInst <0xE8, "fsubr\t$op">; -def SUB_FrST0 : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, %ST(0)}">; +def SUB_FrST0 : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, ST(0)}">; def SUB_FPrST0 : FPrST0PInst<0xE8, "fsub{r}p\t$op">; def SUB_FST0r : FPST0rInst <0xE0, "fsub\t$op">; -def SUBR_FrST0 : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, %ST(0)}">; +def SUBR_FrST0 : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, ST(0)}">; def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p\t$op">; def MUL_FST0r : FPST0rInst <0xC8, "fmul\t$op">; -def MUL_FrST0 : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, %ST(0)}">; +def MUL_FrST0 : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, ST(0)}">; def MUL_FPrST0 : FPrST0PInst<0xC8, "fmulp\t$op">; def DIVR_FST0r : FPST0rInst <0xF8, "fdivr\t$op">; -def DIV_FrST0 : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, %ST(0)}">; +def DIV_FrST0 : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, ST(0)}">; def DIV_FPrST0 : FPrST0PInst<0xF8, "fdiv{r}p\t$op">; def DIV_FST0r : FPST0rInst <0xF0, "fdiv\t$op">; -def DIVR_FrST0 : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, %ST(0)}">; +def DIVR_FrST0 : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, ST(0)}">; def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">; def COM_FST0r : FPST0rInst <0xD0, "fcom\t$op">; @@ -330,21 +330,21 @@ defm CMOVNP : FPCMov<X86_COND_NP>; let Predicates = [HasCMov] in { // These are not factored because there's no clean way to pass DA/DB. def CMOVB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins), - "fcmovb\t{$op, %st(0)|%ST(0), $op}">, DA; + "fcmovb\t{$op, %st(0)|ST(0), $op}">, DA; def CMOVBE_F : FPI<0xD0, AddRegFrm, (outs RST:$op), (ins), - "fcmovbe\t{$op, %st(0)|%ST(0), $op}">, DA; + "fcmovbe\t{$op, %st(0)|ST(0), $op}">, DA; def CMOVE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins), - "fcmove\t{$op, %st(0)|%ST(0), $op}">, DA; + "fcmove\t{$op, %st(0)|ST(0), $op}">, DA; def CMOVP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins), - "fcmovu\t {$op, %st(0)|%ST(0), $op}">, DA; + "fcmovu\t {$op, %st(0)|ST(0), $op}">, DA; def CMOVNB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins), - "fcmovnb\t{$op, %st(0)|%ST(0), $op}">, DB; + "fcmovnb\t{$op, %st(0)|ST(0), $op}">, DB; def CMOVNBE_F: FPI<0xD0, AddRegFrm, (outs RST:$op), (ins), - "fcmovnbe\t{$op, %st(0)|%ST(0), $op}">, DB; + "fcmovnbe\t{$op, %st(0)|ST(0), $op}">, DB; def CMOVNE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins), - "fcmovne\t{$op, %st(0)|%ST(0), $op}">, DB; + "fcmovne\t{$op, %st(0)|ST(0), $op}">, DB; def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins), - "fcmovnu\t{$op, %st(0)|%ST(0), $op}">, DB; + "fcmovnu\t{$op, %st(0)|ST(0), $op}">, DB; } // Predicates = [HasCMov] // Floating point loads & stores. @@ -437,33 +437,26 @@ def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">; } // FISTTP requires SSE3 even though it's a FPStack op. +let Predicates = [HasSSE3] in { def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, - [(X86fp_to_i16mem RFP32:$src, addr:$op)]>, - Requires<[HasSSE3]>; + [(X86fp_to_i16mem RFP32:$src, addr:$op)]>; def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, - [(X86fp_to_i32mem RFP32:$src, addr:$op)]>, - Requires<[HasSSE3]>; + [(X86fp_to_i32mem RFP32:$src, addr:$op)]>; def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, - [(X86fp_to_i64mem RFP32:$src, addr:$op)]>, - Requires<[HasSSE3]>; + [(X86fp_to_i64mem RFP32:$src, addr:$op)]>; def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, - [(X86fp_to_i16mem RFP64:$src, addr:$op)]>, - Requires<[HasSSE3]>; + [(X86fp_to_i16mem RFP64:$src, addr:$op)]>; def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, - [(X86fp_to_i32mem RFP64:$src, addr:$op)]>, - Requires<[HasSSE3]>; + [(X86fp_to_i32mem RFP64:$src, addr:$op)]>; def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, - [(X86fp_to_i64mem RFP64:$src, addr:$op)]>, - Requires<[HasSSE3]>; + [(X86fp_to_i64mem RFP64:$src, addr:$op)]>; def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, - [(X86fp_to_i16mem RFP80:$src, addr:$op)]>, - Requires<[HasSSE3]>; + [(X86fp_to_i16mem RFP80:$src, addr:$op)]>; def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, - [(X86fp_to_i32mem RFP80:$src, addr:$op)]>, - Requires<[HasSSE3]>; + [(X86fp_to_i32mem RFP80:$src, addr:$op)]>; def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, - [(X86fp_to_i64mem RFP80:$src, addr:$op)]>, - Requires<[HasSSE3]>; + [(X86fp_to_i64mem RFP80:$src, addr:$op)]>; +} // Predicates = [HasSSE3] let mayStore = 1 in { def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 7ba3639..b387090 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -1,10 +1,10 @@ -//===- X86InstrFormats.td - X86 Instruction Formats --------*- tablegen -*-===// -// +//===-- X86InstrFormats.td - X86 Instruction Formats -------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -43,6 +43,15 @@ def RawFrmImm8 : Format<43>; def RawFrmImm16 : Format<44>; def MRM_D0 : Format<45>; def MRM_D1 : Format<46>; +def MRM_D4 : Format<47>; +def MRM_D8 : Format<48>; +def MRM_D9 : Format<49>; +def MRM_DA : Format<50>; +def MRM_DB : Format<51>; +def MRM_DC : Format<52>; +def MRM_DD : Format<53>; +def MRM_DE : Format<54>; +def MRM_DF : Format<55>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our @@ -120,10 +129,12 @@ class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; } class VEX_L { bit hasVEX_L = 1; } class VEX_LIG { bit ignoresVEX_L = 1; } class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; } -class XOP_W { bit hasXOP_WPrefix = 1; } +class MemOp4 { bit hasMemOp4Prefix = 1; } class XOP { bit hasXOP_Prefix = 1; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, - string AsmStr, Domain d = GenericDomain> + string AsmStr, + InstrItinClass itin, + Domain d = GenericDomain> : Instruction { let Namespace = "X86"; @@ -139,6 +150,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, // If this is a pseudo instruction, mark it isCodeGenOnly. let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo"); + let Itinerary = itin; + // // Attributes specific to X86 instructions... // @@ -161,7 +174,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasVEX_L = 0; // Does this inst use large (256-bit) registers? bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding? - bit hasXOP_WPrefix = 0; // Same bit as VEX_W, but used for swapping operands + bit hasMemOp4Prefix = 0; // Same bit as VEX_W, but used for swapping operands bit hasXOP_Prefix = 0; // Does this inst require an XOP prefix? // TSFlags layout should be kept in sync with X86InstrInfo.h. @@ -184,56 +197,58 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{38} = hasVEX_L; let TSFlags{39} = ignoresVEX_L; let TSFlags{40} = has3DNow0F0FOpcode; - let TSFlags{41} = hasXOP_WPrefix; + let TSFlags{41} = hasMemOp4Prefix; let TSFlags{42} = hasXOP_Prefix; } class PseudoI<dag oops, dag iops, list<dag> pattern> - : X86Inst<0, Pseudo, NoImm, oops, iops, ""> { + : X86Inst<0, Pseudo, NoImm, oops, iops, "", NoItinerary> { let Pattern = pattern; } class I<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, Domain d = GenericDomain> - : X86Inst<o, f, NoImm, outs, ins, asm, d> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT, + Domain d = GenericDomain> + : X86Inst<o, f, NoImm, outs, ins, asm, itin, d> { let Pattern = pattern; let CodeSize = 3; } class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, Domain d = GenericDomain> - : X86Inst<o, f, Imm8, outs, ins, asm, d> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT, + Domain d = GenericDomain> + : X86Inst<o, f, Imm8, outs, ins, asm, itin, d> { let Pattern = pattern; let CodeSize = 3; } class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> - : X86Inst<o, f, Imm8PCRel, outs, ins, asm> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm8PCRel, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> - : X86Inst<o, f, Imm16, outs, ins, asm> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm16, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> - : X86Inst<o, f, Imm32, outs, ins, asm> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm32, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> - : X86Inst<o, f, Imm16PCRel, outs, ins, asm> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm16PCRel, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> - : X86Inst<o, f, Imm32PCRel, outs, ins, asm> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm32PCRel, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } @@ -244,8 +259,9 @@ class FPI<bits<8> o, Format F, dag outs, dag ins, string asm> : I<o, F, outs, ins, asm, []> {} // FpI_ - Floating Point Pseudo Instruction template. Not Predicated. -class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern> - : X86Inst<0, Pseudo, NoImm, outs, ins, ""> { +class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern, + InstrItinClass itin = IIC_DEFAULT> + : X86Inst<0, Pseudo, NoImm, outs, ins, "", itin> { let FPForm = fp; let Pattern = pattern; } @@ -257,20 +273,23 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern> // Iseg32 - 16-bit segment selector, 32-bit offset class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm16, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> : X86Inst<o, f, Imm32, outs, ins, asm> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm32, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } // SI - SSE 1 & 2 scalar instructions -class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern> { +class SI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); @@ -280,8 +299,8 @@ class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> // SIi8 - SSE 1 & 2 scalar instructions class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern> { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); @@ -291,8 +310,8 @@ class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // PI - SSE 1 & 2 packed instructions class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, - Domain d> - : I<o, F, outs, ins, asm, pattern, d> { + InstrItinClass itin, Domain d> + : I<o, F, outs, ins, asm, pattern, itin, d> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); @@ -302,8 +321,8 @@ class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, // PIi8 - SSE 1 & 2 packed instructions with immediate class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, Domain d> - : Ii8<o, F, outs, ins, asm, pattern, d> { + list<dag> pattern, InstrItinClass itin, Domain d> + : Ii8<o, F, outs, ins, asm, pattern, itin, d> { let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX], !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); @@ -319,30 +338,28 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // VSSI - SSE1 instructions with XS prefix in AVX form. // VPSI - SSE1 instructions with TB prefix in AVX form. -class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>; +class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>; class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>; -class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>; +class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, Requires<[HasSSE1]>; class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, Requires<[HasSSE1]>; class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS, Requires<[HasAVX]>; class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedSingle>, TB, Requires<[HasAVX]>; -class VoPSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, - Requires<[HasXMM]>; // SSE2 Instruction Templates: // @@ -354,28 +371,30 @@ class VoPSI<bits<8> o, Format F, dag outs, dag ins, string asm, // VSDI - SSE2 instructions with XD prefix in AVX form. // VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form. -class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>; +class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; -class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize, +class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[HasSSE2]>; class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[HasSSE2]>; class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD, Requires<[HasAVX]>; class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, TB, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[HasAVX]>; // SSE3 Instruction Templates: @@ -385,15 +404,16 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, // S3DI - SSE3 instructions with XD prefix. class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS, Requires<[HasSSE3]>; class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD, Requires<[HasSSE3]>; -class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize, +class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[HasSSE3]>; @@ -403,16 +423,16 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> // SS3AI - SSSE3 instructions with TA prefix. // // Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version -// uses the MMX registers. We put those instructions here because they better -// fit into the SSSE3 instruction category rather than the MMX category. +// uses the MMX registers. The 64-bit versions are grouped with the MMX +// classes. They need to be enabled even if AVX is enabled. class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[HasSSSE3]>; class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasSSSE3]>; // SSE4.1 Instruction Templates: @@ -421,31 +441,31 @@ class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, // SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8. // class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[HasSSE41]>; class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasSSE41]>; // SSE4.2 Instruction Templates: // // SS428I - SSE 4.2 instructions with T8 prefix. class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[HasSSE42]>; // SS42FI - SSE 4.2 instructions with T8XD prefix. class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>; - + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>; + // SS42AI = SSE 4.2 instructions with TA prefix class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasSSE42]>; // AVX Instruction Templates: @@ -454,12 +474,12 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, // AVX8I - AVX instructions with T8 and OpSize prefix. // AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8. class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize, Requires<[HasAVX]>; class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, Requires<[HasAVX]>; // AVX2 Instruction Templates: @@ -468,12 +488,12 @@ class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // AVX28I - AVX2 instructions with T8 and OpSize prefix. // AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8. class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize, Requires<[HasAVX2]>; class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, Requires<[HasAVX2]>; // AES Instruction Templates: @@ -481,87 +501,88 @@ class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // AES8I // These use the same encoding as the SSE4.2 T8 and TA encodings. class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, - Requires<[HasAES]>; + list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, + Requires<[HasSSE2, HasAES]>; class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, - Requires<[HasAES]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, + Requires<[HasSSE2, HasAES]>; // CLMUL Instruction Templates class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, - OpSize, Requires<[HasCLMUL]>; + list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, + OpSize, Requires<[HasSSE2, HasCLMUL]>; class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>; // FMA3 Instruction Templates class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, + list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, T8, OpSize, VEX_4V, Requires<[HasFMA3]>; // FMA4 Instruction Templates class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>; // XOP 2, 3 and 4 Operand Instruction Template class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XOP, XOP9, Requires<[HasXOP]>; // XOP 2, 3 and 4 Operand Instruction Templates with imm byte class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XOP, XOP8, Requires<[HasXOP]>; // XOP 5 operand instruction (VEX encoding!) class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern> - : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>; // X86-64 Instruction templates... // -class RI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, REX_W; +class RI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, REX_W; class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, REX_W; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, REX_W; class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii32<o, F, outs, ins, asm, pattern>, REX_W; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii32<o, F, outs, ins, asm, pattern, itin>, REX_W; class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern> - : X86Inst<o, f, Imm64, outs, ins, asm>, REX_W { + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : X86Inst<o, f, Imm64, outs, ins, asm, itin>, REX_W { let Pattern = pattern; let CodeSize = 3; } class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : SSI<o, F, outs, ins, asm, pattern>, REX_W; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : SSI<o, F, outs, ins, asm, pattern, itin>, REX_W; class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : SDI<o, F, outs, ins, asm, pattern>, REX_W; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : SDI<o, F, outs, ins, asm, pattern, itin>, REX_W; class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : PDI<o, F, outs, ins, asm, pattern>, REX_W; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : PDI<o, F, outs, ins, asm, pattern, itin>, REX_W; class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : VPDI<o, F, outs, ins, asm, pattern>, VEX_W; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : VPDI<o, F, outs, ins, asm, pattern, itin>, VEX_W; // MMX Instruction templates // @@ -574,23 +595,23 @@ class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm, // MMXID - MMX instructions with XD prefix. // MMXIS - MMX instructions with XS prefix. class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>; class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX,In64BitMode]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In64BitMode]>; class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, REX_W, Requires<[HasMMX]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, TB, REX_W, Requires<[HasMMX]>; class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasMMX]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, asm, pattern, itin>, TB, OpSize, Requires<[HasMMX]>; class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>; class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasMMX]>; class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern> - : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>; + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasMMX]>; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index cd13bc4..4f9f089 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -1,10 +1,10 @@ -//======- X86InstrFragmentsSIMD.td - x86 ISA -------------*- tablegen -*-=====// +//===-- X86InstrFragmentsSIMD.td - x86 SIMD ISA ------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file provides pattern fragments useful for SIMD instructions. @@ -48,7 +48,7 @@ def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>; def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>; def X86pshufb : SDNode<"X86ISD::PSHUFB", - SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86andnp : SDNode<"X86ISD::ANDNP", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, @@ -71,20 +71,30 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS", SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>; def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; +def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>; + def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; -def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>; -def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>; -def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>; -def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>; -def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>; -def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>; -def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; -def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; +def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>; +def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>; +def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>; +def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>; + +def X86vshl : SDNode<"X86ISD::VSHL", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisVec<2>]>>; +def X86vsrl : SDNode<"X86ISD::VSRL", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisVec<2>]>>; +def X86vsra : SDNode<"X86ISD::VSRA", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisVec<2>]>>; + +def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>; +def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>; +def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>; def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, @@ -92,6 +102,17 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; +def X86vpcom : SDNode<"X86ISD::VPCOM", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>>; +def X86vpcomu : SDNode<"X86ISD::VPCOMU", + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>>; + +def X86pmuludq : SDNode<"X86ISD::PMULUDQ", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisSameAs<1,2>]>>; + // Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get // translated into one of the target nodes below during lowering. // Note: this is a work in progress... @@ -112,8 +133,7 @@ def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>; -def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>; -def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>; +def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>; def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>; def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>; @@ -125,7 +145,6 @@ def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>; def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>; def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>; def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>; -def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>; def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; @@ -171,15 +190,15 @@ def sdmem : Operand<v2f64> { //===----------------------------------------------------------------------===// // 128-bit load pattern fragments +// NOTE: all 128-bit integer vector loads are promoted to v2i64 def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; -def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; // 256-bit load pattern fragments +// NOTE: all 256-bit integer vector loads are promoted to v4i64 def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; -def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>; def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; // Like 'store', but always requires 128-bit vector alignment. @@ -210,22 +229,20 @@ def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>; // 128-bit aligned load pattern fragments +// NOTE: all 128-bit integer vector loads are promoted to v2i64 def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>; def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>; -def alignedloadv4i32 : PatFrag<(ops node:$ptr), - (v4i32 (alignedload node:$ptr))>; def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>; // 256-bit aligned load pattern fragments +// NOTE: all 256-bit integer vector loads are promoted to v4i64 def alignedloadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (alignedload256 node:$ptr))>; def alignedloadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (alignedload256 node:$ptr))>; -def alignedloadv8i32 : PatFrag<(ops node:$ptr), - (v8i32 (alignedload256 node:$ptr))>; def alignedloadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (alignedload256 node:$ptr))>; @@ -244,20 +261,16 @@ def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; // 128-bit memop pattern fragments +// NOTE: all 128-bit integer vector loads are promoted to v2i64 def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; -def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; -def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>; -def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; // 256-bit memop pattern fragments +// NOTE: all 256-bit integer vector loads are promoted to v4i64 def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>; -def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>; -def memopv16i16 : PatFrag<(ops node:$ptr), (v16i16 (memop node:$ptr))>; -def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>; // SSSE3 uses MMX registers for some instructions. They aren't aligned on a // 16-byte boundary. @@ -329,24 +342,6 @@ def BYTE_imm : SDNodeXForm<imm, [{ return getI32Imm(N->getZExtValue() >> 3); }]>; -// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*, -// SHUFP* etc. imm. -def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShuffleSHUFImmediate(N)); -}]>; - -// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to -// PSHUFHW imm. -def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShufflePSHUFHWImmediate(N)); -}]>; - -// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to -// PSHUFLW imm. -def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShufflePSHUFLWImmediate(N)); -}]>; - // EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index // to VEXTRACTF128 imm. def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{ @@ -359,72 +354,6 @@ def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{ return getI8Imm(X86::getInsertVINSERTF128Immediate(N)); }]>; -def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - return SVOp->isSplat() && SVOp->getSplatIndex() == 0; -}]>; - -def movddup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movhlps : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movlhps : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movlp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def unpckl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N), Subtarget->hasAVX2()); -}]>; - -def unpckh : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N), Subtarget->hasAVX2()); -}]>; - -def pshufd : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_shuf_imm>; - -def shufp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_shuf_imm>; - -def pshufhw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_pshufhw_imm>; - -def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_pshuflw_imm>; - def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index), (extract_subvector node:$bigvec, node:$index), [{ diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 7d1b9a1..5a479f0 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1,4 +1,4 @@ -//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===// +//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===// // // The LLVM Compiler Infrastructure // @@ -274,7 +274,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD }, { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD }, { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD }, - { X86::WINCALL64r, X86::WINCALL64m, TB_FOLDED_LOAD }, { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD }, { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD }, { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD }, @@ -351,6 +350,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, { X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE }, { X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE }, + { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 }, { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 }, @@ -361,6 +361,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE }, { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE }, // AVX 256-bit foldable instructions + { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, @@ -513,6 +514,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPABSBrr128, X86::VPABSBrm128, TB_ALIGN_16 }, { X86::VPABSDrr128, X86::VPABSDrm128, TB_ALIGN_16 }, { X86::VPABSWrr128, X86::VPABSWrm128, TB_ALIGN_16 }, + { X86::VPERMILPDri, X86::VPERMILPDmi, TB_ALIGN_16 }, + { X86::VPERMILPSri, X86::VPERMILPSmi, TB_ALIGN_16 }, { X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 }, { X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 }, { X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 }, @@ -529,16 +532,26 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) // AVX 256-bit foldable instructions { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 }, { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 }, - { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_16 }, + { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 }, { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, + { X86::VPERMILPDYri, X86::VPERMILPDYmi, TB_ALIGN_32 }, + { X86::VPERMILPSYri, X86::VPERMILPSYmi, TB_ALIGN_32 }, // AVX2 foldable instructions - { X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_16 }, - { X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_16 }, - { X86::VPABSWrr256, X86::VPABSWrm256, TB_ALIGN_16 }, - { X86::VPSHUFDYri, X86::VPSHUFDYmi, TB_ALIGN_16 }, - { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, TB_ALIGN_16 }, - { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, TB_ALIGN_16 } + { X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_32 }, + { X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_32 }, + { X86::VPABSWrr256, X86::VPABSWrm256, TB_ALIGN_32 }, + { X86::VPSHUFDYri, X86::VPSHUFDYmi, TB_ALIGN_32 }, + { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, TB_ALIGN_32 }, + { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, TB_ALIGN_32 }, + { X86::VRCPPSYr, X86::VRCPPSYm, TB_ALIGN_32 }, + { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, TB_ALIGN_32 }, + { X86::VRSQRTPSYr, X86::VRSQRTPSYm, TB_ALIGN_32 }, + { X86::VRSQRTPSYr_Int, X86::VRSQRTPSYm_Int, TB_ALIGN_32 }, + { X86::VSQRTPDYr, X86::VSQRTPDYm, TB_ALIGN_32 }, + { X86::VSQRTPDYr_Int, X86::VSQRTPDYm_Int, TB_ALIGN_32 }, + { X86::VSQRTPSYr, X86::VSQRTPSYm, TB_ALIGN_32 }, + { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -575,6 +588,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 }, { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 }, { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 }, + { X86::BLENDPDrri, X86::BLENDPDrmi, TB_ALIGN_16 }, + { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 }, + { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 }, + { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 }, { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, @@ -692,6 +709,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 }, { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 }, { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 }, + { X86::PBLENDWrri, X86::PBLENDWrmi, TB_ALIGN_16 }, { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 }, { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 }, { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 }, @@ -700,12 +718,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 }, { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 }, { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 }, - { X86::PHADDDrr128, X86::PHADDDrm128, TB_ALIGN_16 }, - { X86::PHADDWrr128, X86::PHADDWrm128, TB_ALIGN_16 }, + { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 }, + { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 }, { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 }, - { X86::PHSUBDrr128, X86::PHSUBDrm128, TB_ALIGN_16 }, + { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 }, { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 }, - { X86::PHSUBWrr128, X86::PHSUBWrm128, TB_ALIGN_16 }, + { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 }, { X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 }, { X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 }, { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 }, @@ -722,10 +740,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 }, { X86::PORrr, X86::PORrm, TB_ALIGN_16 }, { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 }, - { X86::PSHUFBrr128, X86::PSHUFBrm128, TB_ALIGN_16 }, - { X86::PSIGNBrr128, X86::PSIGNBrm128, TB_ALIGN_16 }, - { X86::PSIGNWrr128, X86::PSIGNWrm128, TB_ALIGN_16 }, - { X86::PSIGNDrr128, X86::PSIGNDrm128, TB_ALIGN_16 }, + { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 }, + { X86::PSIGNBrr, X86::PSIGNBrm, TB_ALIGN_16 }, + { X86::PSIGNWrr, X86::PSIGNWrm, TB_ALIGN_16 }, + { X86::PSIGNDrr, X86::PSIGNDrm, TB_ALIGN_16 }, { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 }, { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 }, { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 }, @@ -809,6 +827,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VANDNPSrr, X86::VANDNPSrm, TB_ALIGN_16 }, { X86::VANDPDrr, X86::VANDPDrm, TB_ALIGN_16 }, { X86::VANDPSrr, X86::VANDPSrm, TB_ALIGN_16 }, + { X86::VBLENDPDrri, X86::VBLENDPDrmi, TB_ALIGN_16 }, + { X86::VBLENDPSrri, X86::VBLENDPSrmi, TB_ALIGN_16 }, + { X86::VBLENDVPDrr, X86::VBLENDVPDrm, TB_ALIGN_16 }, + { X86::VBLENDVPSrr, X86::VBLENDVPSrm, TB_ALIGN_16 }, { X86::VCMPPDrri, X86::VCMPPDrmi, TB_ALIGN_16 }, { X86::VCMPPSrri, X86::VCMPPSrmi, TB_ALIGN_16 }, { X86::VCMPSDrr, X86::VCMPSDrm, 0 }, @@ -871,6 +893,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 }, { X86::VPAVGBrr, X86::VPAVGBrm, TB_ALIGN_16 }, { X86::VPAVGWrr, X86::VPAVGWrm, TB_ALIGN_16 }, + { X86::VPBLENDWrri, X86::VPBLENDWrmi, TB_ALIGN_16 }, { X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 }, { X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 }, { X86::VPCMPEQQrr, X86::VPCMPEQQrm, TB_ALIGN_16 }, @@ -879,12 +902,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 }, { X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 }, { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 }, - { X86::VPHADDDrr128, X86::VPHADDDrm128, TB_ALIGN_16 }, + { X86::VPHADDDrr, X86::VPHADDDrm, TB_ALIGN_16 }, { X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 }, - { X86::VPHADDWrr128, X86::VPHADDWrm128, TB_ALIGN_16 }, - { X86::VPHSUBDrr128, X86::VPHSUBDrm128, TB_ALIGN_16 }, + { X86::VPHADDWrr, X86::VPHADDWrm, TB_ALIGN_16 }, + { X86::VPHSUBDrr, X86::VPHSUBDrm, TB_ALIGN_16 }, { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 }, - { X86::VPHSUBWrr128, X86::VPHSUBWrm128, TB_ALIGN_16 }, + { X86::VPHSUBWrr, X86::VPHSUBWrm, TB_ALIGN_16 }, + { X86::VPERMILPDrr, X86::VPERMILPDrm, TB_ALIGN_16 }, + { X86::VPERMILPSrr, X86::VPERMILPSrm, TB_ALIGN_16 }, { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 }, { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, TB_ALIGN_16 }, { X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 }, @@ -901,10 +926,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 }, { X86::VPORrr, X86::VPORrm, TB_ALIGN_16 }, { X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 }, - { X86::VPSHUFBrr128, X86::VPSHUFBrm128, TB_ALIGN_16 }, - { X86::VPSIGNBrr128, X86::VPSIGNBrm128, TB_ALIGN_16 }, - { X86::VPSIGNWrr128, X86::VPSIGNWrm128, TB_ALIGN_16 }, - { X86::VPSIGNDrr128, X86::VPSIGNDrm128, TB_ALIGN_16 }, + { X86::VPSHUFBrr, X86::VPSHUFBrm, TB_ALIGN_16 }, + { X86::VPSIGNBrr, X86::VPSIGNBrm, TB_ALIGN_16 }, + { X86::VPSIGNWrr, X86::VPSIGNWrm, TB_ALIGN_16 }, + { X86::VPSIGNDrr, X86::VPSIGNDrm, TB_ALIGN_16 }, { X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 }, { X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 }, { X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 }, @@ -939,90 +964,146 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 }, { X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 }, { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 }, + // AVX 256-bit foldable instructions + { X86::VADDPDYrr, X86::VADDPDYrm, TB_ALIGN_32 }, + { X86::VADDPSYrr, X86::VADDPSYrm, TB_ALIGN_32 }, + { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, TB_ALIGN_32 }, + { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, TB_ALIGN_32 }, + { X86::VANDNPDYrr, X86::VANDNPDYrm, TB_ALIGN_32 }, + { X86::VANDNPSYrr, X86::VANDNPSYrm, TB_ALIGN_32 }, + { X86::VANDPDYrr, X86::VANDPDYrm, TB_ALIGN_32 }, + { X86::VANDPSYrr, X86::VANDPSYrm, TB_ALIGN_32 }, + { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, TB_ALIGN_32 }, + { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, TB_ALIGN_32 }, + { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, TB_ALIGN_32 }, + { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, TB_ALIGN_32 }, + { X86::VCMPPDYrri, X86::VCMPPDYrmi, TB_ALIGN_32 }, + { X86::VCMPPSYrri, X86::VCMPPSYrmi, TB_ALIGN_32 }, + { X86::VDIVPDYrr, X86::VDIVPDYrm, TB_ALIGN_32 }, + { X86::VDIVPSYrr, X86::VDIVPSYrm, TB_ALIGN_32 }, + { X86::VHADDPDYrr, X86::VHADDPDYrm, TB_ALIGN_32 }, + { X86::VHADDPSYrr, X86::VHADDPSYrm, TB_ALIGN_32 }, + { X86::VHSUBPDYrr, X86::VHSUBPDYrm, TB_ALIGN_32 }, + { X86::VHSUBPSYrr, X86::VHSUBPSYrm, TB_ALIGN_32 }, + { X86::VINSERTF128rr, X86::VINSERTF128rm, TB_ALIGN_32 }, + { X86::VMAXPDYrr, X86::VMAXPDYrm, TB_ALIGN_32 }, + { X86::VMAXPDYrr_Int, X86::VMAXPDYrm_Int, TB_ALIGN_32 }, + { X86::VMAXPSYrr, X86::VMAXPSYrm, TB_ALIGN_32 }, + { X86::VMAXPSYrr_Int, X86::VMAXPSYrm_Int, TB_ALIGN_32 }, + { X86::VMINPDYrr, X86::VMINPDYrm, TB_ALIGN_32 }, + { X86::VMINPDYrr_Int, X86::VMINPDYrm_Int, TB_ALIGN_32 }, + { X86::VMINPSYrr, X86::VMINPSYrm, TB_ALIGN_32 }, + { X86::VMINPSYrr_Int, X86::VMINPSYrm_Int, TB_ALIGN_32 }, + { X86::VMULPDYrr, X86::VMULPDYrm, TB_ALIGN_32 }, + { X86::VMULPSYrr, X86::VMULPSYrm, TB_ALIGN_32 }, + { X86::VORPDYrr, X86::VORPDYrm, TB_ALIGN_32 }, + { X86::VORPSYrr, X86::VORPSYrm, TB_ALIGN_32 }, + { X86::VPERM2F128rr, X86::VPERM2F128rm, TB_ALIGN_32 }, + { X86::VPERMILPDYrr, X86::VPERMILPDYrm, TB_ALIGN_32 }, + { X86::VPERMILPSYrr, X86::VPERMILPSYrm, TB_ALIGN_32 }, + { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, TB_ALIGN_32 }, + { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, TB_ALIGN_32 }, + { X86::VSUBPDYrr, X86::VSUBPDYrm, TB_ALIGN_32 }, + { X86::VSUBPSYrr, X86::VSUBPSYrm, TB_ALIGN_32 }, + { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, TB_ALIGN_32 }, + { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, TB_ALIGN_32 }, + { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, TB_ALIGN_32 }, + { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, TB_ALIGN_32 }, + { X86::VXORPDYrr, X86::VXORPDYrm, TB_ALIGN_32 }, + { X86::VXORPSYrr, X86::VXORPSYrm, TB_ALIGN_32 }, // AVX2 foldable instructions - { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, TB_ALIGN_16 }, - { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, TB_ALIGN_16 }, - { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, TB_ALIGN_16 }, - { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, TB_ALIGN_16 }, - { X86::VPADDBYrr, X86::VPADDBYrm, TB_ALIGN_16 }, - { X86::VPADDDYrr, X86::VPADDDYrm, TB_ALIGN_16 }, - { X86::VPADDQYrr, X86::VPADDQYrm, TB_ALIGN_16 }, - { X86::VPADDSBYrr, X86::VPADDSBYrm, TB_ALIGN_16 }, - { X86::VPADDSWYrr, X86::VPADDSWYrm, TB_ALIGN_16 }, - { X86::VPADDUSBYrr, X86::VPADDUSBYrm, TB_ALIGN_16 }, - { X86::VPADDUSWYrr, X86::VPADDUSWYrm, TB_ALIGN_16 }, - { X86::VPADDWYrr, X86::VPADDWYrm, TB_ALIGN_16 }, - { X86::VPALIGNR256rr, X86::VPALIGNR256rm, TB_ALIGN_16 }, - { X86::VPANDNYrr, X86::VPANDNYrm, TB_ALIGN_16 }, - { X86::VPANDYrr, X86::VPANDYrm, TB_ALIGN_16 }, - { X86::VPAVGBYrr, X86::VPAVGBYrm, TB_ALIGN_16 }, - { X86::VPAVGWYrr, X86::VPAVGWYrm, TB_ALIGN_16 }, - { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, TB_ALIGN_16 }, - { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, TB_ALIGN_16 }, - { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, TB_ALIGN_16 }, - { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, TB_ALIGN_16 }, - { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, TB_ALIGN_16 }, - { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, TB_ALIGN_16 }, - { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, TB_ALIGN_16 }, - { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_16 }, - { X86::VPHADDDrr256, X86::VPHADDDrm256, TB_ALIGN_16 }, - { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_16 }, - { X86::VPHADDWrr256, X86::VPHADDWrm256, TB_ALIGN_16 }, - { X86::VPHSUBDrr256, X86::VPHSUBDrm256, TB_ALIGN_16 }, - { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_16 }, - { X86::VPHSUBWrr256, X86::VPHSUBWrm256, TB_ALIGN_16 }, - { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_16 }, - { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_16 }, - { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_16 }, - { X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_16 }, - { X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_16 }, - { X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_16 }, - { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_16 }, - { X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_16 }, - { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_16 }, - { X86::VPMULHUWYrr, X86::VPMULHUWYrm, TB_ALIGN_16 }, - { X86::VPMULHWYrr, X86::VPMULHWYrm, TB_ALIGN_16 }, - { X86::VPMULLDYrr, X86::VPMULLDYrm, TB_ALIGN_16 }, - { X86::VPMULLWYrr, X86::VPMULLWYrm, TB_ALIGN_16 }, - { X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_16 }, - { X86::VPORYrr, X86::VPORYrm, TB_ALIGN_16 }, - { X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_16 }, - { X86::VPSHUFBrr256, X86::VPSHUFBrm256, TB_ALIGN_16 }, - { X86::VPSIGNBrr256, X86::VPSIGNBrm256, TB_ALIGN_16 }, - { X86::VPSIGNWrr256, X86::VPSIGNWrm256, TB_ALIGN_16 }, - { X86::VPSIGNDrr256, X86::VPSIGNDrm256, TB_ALIGN_16 }, + { X86::VINSERTI128rr, X86::VINSERTI128rm, TB_ALIGN_16 }, + { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, TB_ALIGN_32 }, + { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, TB_ALIGN_32 }, + { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, TB_ALIGN_32 }, + { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, TB_ALIGN_32 }, + { X86::VPADDBYrr, X86::VPADDBYrm, TB_ALIGN_32 }, + { X86::VPADDDYrr, X86::VPADDDYrm, TB_ALIGN_32 }, + { X86::VPADDQYrr, X86::VPADDQYrm, TB_ALIGN_32 }, + { X86::VPADDSBYrr, X86::VPADDSBYrm, TB_ALIGN_32 }, + { X86::VPADDSWYrr, X86::VPADDSWYrm, TB_ALIGN_32 }, + { X86::VPADDUSBYrr, X86::VPADDUSBYrm, TB_ALIGN_32 }, + { X86::VPADDUSWYrr, X86::VPADDUSWYrm, TB_ALIGN_32 }, + { X86::VPADDWYrr, X86::VPADDWYrm, TB_ALIGN_32 }, + { X86::VPALIGNR256rr, X86::VPALIGNR256rm, TB_ALIGN_32 }, + { X86::VPANDNYrr, X86::VPANDNYrm, TB_ALIGN_32 }, + { X86::VPANDYrr, X86::VPANDYrm, TB_ALIGN_32 }, + { X86::VPAVGBYrr, X86::VPAVGBYrm, TB_ALIGN_32 }, + { X86::VPAVGWYrr, X86::VPAVGWYrm, TB_ALIGN_32 }, + { X86::VPBLENDDrri, X86::VPBLENDDrmi, TB_ALIGN_32 }, + { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, TB_ALIGN_32 }, + { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, TB_ALIGN_32 }, + { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, TB_ALIGN_32 }, + { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, TB_ALIGN_32 }, + { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, TB_ALIGN_32 }, + { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, TB_ALIGN_32 }, + { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, TB_ALIGN_32 }, + { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, TB_ALIGN_32 }, + { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, TB_ALIGN_32 }, + { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_32 }, + { X86::VPERM2I128rr, X86::VPERM2I128rm, TB_ALIGN_32 }, + { X86::VPERMDYrr, X86::VPERMDYrm, TB_ALIGN_32 }, + { X86::VPERMPDYrr, X86::VPERMPDYrm, TB_ALIGN_32 }, + { X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 }, + { X86::VPERMQYrr, X86::VPERMQYrm, TB_ALIGN_32 }, + { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 }, + { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 }, + { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 }, + { X86::VPHSUBDYrr, X86::VPHSUBDYrm, TB_ALIGN_32 }, + { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_32 }, + { X86::VPHSUBWYrr, X86::VPHSUBWYrm, TB_ALIGN_32 }, + { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_32 }, + { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_32 }, + { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_32 }, + { X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_32 }, + { X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_32 }, + { X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_32 }, + { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_32 }, + { X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_32 }, + { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_32 }, + { X86::VPMULHUWYrr, X86::VPMULHUWYrm, TB_ALIGN_32 }, + { X86::VPMULHWYrr, X86::VPMULHWYrm, TB_ALIGN_32 }, + { X86::VPMULLDYrr, X86::VPMULLDYrm, TB_ALIGN_32 }, + { X86::VPMULLWYrr, X86::VPMULLWYrm, TB_ALIGN_32 }, + { X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_32 }, + { X86::VPORYrr, X86::VPORYrm, TB_ALIGN_32 }, + { X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_32 }, + { X86::VPSHUFBYrr, X86::VPSHUFBYrm, TB_ALIGN_32 }, + { X86::VPSIGNBYrr, X86::VPSIGNBYrm, TB_ALIGN_32 }, + { X86::VPSIGNWYrr, X86::VPSIGNWYrm, TB_ALIGN_32 }, + { X86::VPSIGNDYrr, X86::VPSIGNDYrm, TB_ALIGN_32 }, { X86::VPSLLDYrr, X86::VPSLLDYrm, TB_ALIGN_16 }, { X86::VPSLLQYrr, X86::VPSLLQYrm, TB_ALIGN_16 }, { X86::VPSLLWYrr, X86::VPSLLWYrm, TB_ALIGN_16 }, { X86::VPSLLVDrr, X86::VPSLLVDrm, TB_ALIGN_16 }, - { X86::VPSLLVDYrr, X86::VPSLLVDYrm, TB_ALIGN_16 }, + { X86::VPSLLVDYrr, X86::VPSLLVDYrm, TB_ALIGN_32 }, { X86::VPSLLVQrr, X86::VPSLLVQrm, TB_ALIGN_16 }, - { X86::VPSLLVQYrr, X86::VPSLLVQYrm, TB_ALIGN_16 }, + { X86::VPSLLVQYrr, X86::VPSLLVQYrm, TB_ALIGN_32 }, { X86::VPSRADYrr, X86::VPSRADYrm, TB_ALIGN_16 }, { X86::VPSRAWYrr, X86::VPSRAWYrm, TB_ALIGN_16 }, { X86::VPSRAVDrr, X86::VPSRAVDrm, TB_ALIGN_16 }, - { X86::VPSRAVDYrr, X86::VPSRAVDYrm, TB_ALIGN_16 }, + { X86::VPSRAVDYrr, X86::VPSRAVDYrm, TB_ALIGN_32 }, { X86::VPSRLDYrr, X86::VPSRLDYrm, TB_ALIGN_16 }, { X86::VPSRLQYrr, X86::VPSRLQYrm, TB_ALIGN_16 }, { X86::VPSRLWYrr, X86::VPSRLWYrm, TB_ALIGN_16 }, { X86::VPSRLVDrr, X86::VPSRLVDrm, TB_ALIGN_16 }, - { X86::VPSRLVDYrr, X86::VPSRLVDYrm, TB_ALIGN_16 }, + { X86::VPSRLVDYrr, X86::VPSRLVDYrm, TB_ALIGN_32 }, { X86::VPSRLVQrr, X86::VPSRLVQrm, TB_ALIGN_16 }, - { X86::VPSRLVQYrr, X86::VPSRLVQYrm, TB_ALIGN_16 }, - { X86::VPSUBBYrr, X86::VPSUBBYrm, TB_ALIGN_16 }, - { X86::VPSUBDYrr, X86::VPSUBDYrm, TB_ALIGN_16 }, - { X86::VPSUBSBYrr, X86::VPSUBSBYrm, TB_ALIGN_16 }, - { X86::VPSUBSWYrr, X86::VPSUBSWYrm, TB_ALIGN_16 }, - { X86::VPSUBWYrr, X86::VPSUBWYrm, TB_ALIGN_16 }, - { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, TB_ALIGN_16 }, - { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, TB_ALIGN_16 }, + { X86::VPSRLVQYrr, X86::VPSRLVQYrm, TB_ALIGN_32 }, + { X86::VPSUBBYrr, X86::VPSUBBYrm, TB_ALIGN_32 }, + { X86::VPSUBDYrr, X86::VPSUBDYrm, TB_ALIGN_32 }, + { X86::VPSUBSBYrr, X86::VPSUBSBYrm, TB_ALIGN_32 }, + { X86::VPSUBSWYrr, X86::VPSUBSWYrm, TB_ALIGN_32 }, + { X86::VPSUBWYrr, X86::VPSUBWYrm, TB_ALIGN_32 }, + { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, TB_ALIGN_32 }, + { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, TB_ALIGN_32 }, { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, TB_ALIGN_16 }, - { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, TB_ALIGN_16 }, - { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, TB_ALIGN_16 }, - { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, TB_ALIGN_16 }, - { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, TB_ALIGN_16 }, - { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_16 }, - { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_16 }, + { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, TB_ALIGN_32 }, + { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, TB_ALIGN_32 }, + { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, TB_ALIGN_32 }, + { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, TB_ALIGN_32 }, + { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 }, + { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 }, // FIXME: add AVX 256-bit foldable instructions }; @@ -1082,7 +1163,6 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, switch (MI.getOpcode()) { default: llvm_unreachable(0); - break; case X86::MOVSX16rr8: case X86::MOVZX16rr8: case X86::MOVSX32rr8: @@ -1125,7 +1205,8 @@ bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, static bool isFrameLoadOpcode(int Opcode) { switch (Opcode) { - default: break; + default: + return false; case X86::MOV8rm: case X86::MOV16rm: case X86::MOV32rm: @@ -1147,9 +1228,7 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: return true; - break; } - return false; } static bool isFrameStoreOpcode(int Opcode) { @@ -1339,6 +1418,8 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, bool SeenDef = false; for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { MachineOperand &MO = Iter->getOperand(j); + if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS)) + SeenDef = true; if (!MO.isReg()) continue; if (MO.getReg() == X86::EFLAGS) { @@ -1383,6 +1464,10 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, bool SawKill = false; for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { MachineOperand &MO = Iter->getOperand(j); + // A register mask may clobber EFLAGS, but we should still look for a + // live EFLAGS def. + if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS)) + SawKill = true; if (MO.isReg() && MO.getReg() == X86::EFLAGS) { if (MO.isDef()) return MO.isDead(); if (MO.isKill()) SawKill = true; @@ -1493,7 +1578,6 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, switch (MIOpc) { default: llvm_unreachable(0); - break; case X86::SHL16ri: { unsigned ShAmt = MI->getOperand(2).getImm(); MIB.addReg(0).addImm(1 << ShAmt) @@ -1605,6 +1689,24 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, .addReg(B, getKillRegState(isKill)).addImm(M); break; } + case X86::SHUFPDrri: { + assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!"); + if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0; + + unsigned B = MI->getOperand(1).getReg(); + unsigned C = MI->getOperand(2).getReg(); + if (B != C) return 0; + unsigned A = MI->getOperand(0).getReg(); + unsigned M = MI->getOperand(3).getImm(); + + // Convert to PSHUFD mask. + M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44; + + NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) + .addReg(A, RegState::Define | getDeadRegState(isDead)) + .addReg(B, getKillRegState(isKill)).addImm(M); + break; + } case X86::SHL64ri: { assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses @@ -1733,7 +1835,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD32rr_DB: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Opc; - TargetRegisterClass *RC; + const TargetRegisterClass *RC; if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) { Opc = X86::LEA64r; RC = X86::GR64_NOSPRegisterClass; @@ -2908,6 +3010,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::AVX_SET0PSY: case X86::AVX_SET0PDY: case X86::AVX2_SETALLONES: + case X86::AVX2_SET0: Alignment = 32; break; case X86::V_SET0: @@ -2952,6 +3055,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::AVX_SET0PDY: case X86::AVX_SETALLONES: case X86::AVX2_SETALLONES: + case X86::AVX2_SET0: case X86::FsFLD0SD: case X86::FsFLD0SS: { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. @@ -2985,6 +3089,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Ty = Type::getDoubleTy(MF.getFunction()->getContext()); else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); + else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX2_SET0) + Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index ee488d8..d065d2d 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -1,4 +1,4 @@ -//===- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*- ===// +//===-- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0bc3afa..f585b47 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1,4 +1,4 @@ -//===- X86InstrInfo.td - Main X86 Instruction Definition ---*- tablegen -*-===// +//===-- X86InstrInfo.td - Main X86 Instruction Definition --*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -99,17 +99,16 @@ def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; +def SDT_X86WIN_FTOL : SDTypeProfile<0, 1, [SDTCisFP<0>]>; + def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>; -def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER, [SDNPHasChain]>; -def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE, - [SDNPHasChain]>; def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER, [SDNPHasChain]>; def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER, @@ -241,6 +240,9 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA, def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def X86WinFTOL : SDNode<"X86ISD::WIN_FTOL", SDT_X86WIN_FTOL, + [SDNPHasChain, SDNPOutGlue]>; + //===----------------------------------------------------------------------===// // X86 Operand Definitions. // @@ -251,10 +253,31 @@ def ptr_rc_nosp : PointerLikeRegClass<1>; // *mem - Operand definitions for the funky X86 addressing mode operands. // -def X86MemAsmOperand : AsmOperandClass { - let Name = "Mem"; - let SuperClasses = []; +def X86MemAsmOperand : AsmOperandClass { + let Name = "Mem"; let PredicateMethod = "isMem"; +} +def X86Mem8AsmOperand : AsmOperandClass { + let Name = "Mem8"; let PredicateMethod = "isMem8"; +} +def X86Mem16AsmOperand : AsmOperandClass { + let Name = "Mem16"; let PredicateMethod = "isMem16"; +} +def X86Mem32AsmOperand : AsmOperandClass { + let Name = "Mem32"; let PredicateMethod = "isMem32"; +} +def X86Mem64AsmOperand : AsmOperandClass { + let Name = "Mem64"; let PredicateMethod = "isMem64"; +} +def X86Mem80AsmOperand : AsmOperandClass { + let Name = "Mem80"; let PredicateMethod = "isMem80"; } +def X86Mem128AsmOperand : AsmOperandClass { + let Name = "Mem128"; let PredicateMethod = "isMem128"; +} +def X86Mem256AsmOperand : AsmOperandClass { + let Name = "Mem256"; let PredicateMethod = "isMem256"; +} + def X86AbsMemAsmOperand : AsmOperandClass { let Name = "AbsMem"; let SuperClasses = [X86MemAsmOperand]; @@ -271,17 +294,28 @@ def opaque48mem : X86MemOperand<"printopaquemem">; def opaque80mem : X86MemOperand<"printopaquemem">; def opaque512mem : X86MemOperand<"printopaquemem">; -def i8mem : X86MemOperand<"printi8mem">; -def i16mem : X86MemOperand<"printi16mem">; -def i32mem : X86MemOperand<"printi32mem">; -def i64mem : X86MemOperand<"printi64mem">; -def i128mem : X86MemOperand<"printi128mem">; -def i256mem : X86MemOperand<"printi256mem">; -def f32mem : X86MemOperand<"printf32mem">; -def f64mem : X86MemOperand<"printf64mem">; -def f80mem : X86MemOperand<"printf80mem">; -def f128mem : X86MemOperand<"printf128mem">; -def f256mem : X86MemOperand<"printf256mem">; +def i8mem : X86MemOperand<"printi8mem"> { + let ParserMatchClass = X86Mem8AsmOperand; } +def i16mem : X86MemOperand<"printi16mem"> { + let ParserMatchClass = X86Mem16AsmOperand; } +def i32mem : X86MemOperand<"printi32mem"> { + let ParserMatchClass = X86Mem32AsmOperand; } +def i64mem : X86MemOperand<"printi64mem"> { + let ParserMatchClass = X86Mem64AsmOperand; } +def i128mem : X86MemOperand<"printi128mem"> { + let ParserMatchClass = X86Mem128AsmOperand; } +def i256mem : X86MemOperand<"printi256mem"> { + let ParserMatchClass = X86Mem256AsmOperand; } +def f32mem : X86MemOperand<"printf32mem"> { + let ParserMatchClass = X86Mem32AsmOperand; } +def f64mem : X86MemOperand<"printf64mem"> { + let ParserMatchClass = X86Mem64AsmOperand; } +def f80mem : X86MemOperand<"printf80mem"> { + let ParserMatchClass = X86Mem80AsmOperand; } +def f128mem : X86MemOperand<"printf128mem"> { + let ParserMatchClass = X86Mem128AsmOperand; } +def f256mem : X86MemOperand<"printf256mem">{ + let ParserMatchClass = X86Mem256AsmOperand; } } // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of @@ -289,7 +323,7 @@ def f256mem : X86MemOperand<"printf256mem">; def i8mem_NOREX : Operand<i64> { let PrintMethod = "printi8mem"; let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; + let ParserMatchClass = X86Mem8AsmOperand; let OperandType = "OPERAND_MEMORY"; } @@ -303,7 +337,7 @@ def ptr_rc_tailcall : PointerLikeRegClass<2>; def i32mem_TC : Operand<i32> { let PrintMethod = "printi32mem"; let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; + let ParserMatchClass = X86Mem32AsmOperand; let OperandType = "OPERAND_MEMORY"; } @@ -314,7 +348,7 @@ def i64mem_TC : Operand<i64> { let PrintMethod = "printi64mem"; let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; + let ParserMatchClass = X86Mem64AsmOperand; let OperandType = "OPERAND_MEMORY"; } @@ -470,11 +504,8 @@ def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; - def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; -def HasXMM : Predicate<"Subtarget->hasXMM()">; -def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; @@ -489,15 +520,14 @@ def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">; def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; def HasBMI : Predicate<"Subtarget->hasBMI()">; def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; -def FPStackf32 : Predicate<"!Subtarget->hasXMM()">; -def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">; +def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; +def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<"!Mode64Bit">; def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate<"Mode64Bit">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; -def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">; def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; @@ -1513,6 +1543,7 @@ include "X86InstrMMX.td" include "X86Instr3DNow.td" include "X86InstrVMX.td" +include "X86InstrSVM.td" // System instructions. include "X86InstrSystem.td" @@ -1601,6 +1632,8 @@ def : MnemonicAlias<"fcmovna", "fcmovbe">; def : MnemonicAlias<"fcmovae", "fcmovnb">; def : MnemonicAlias<"fcomip", "fcompi">; def : MnemonicAlias<"fildq", "fildll">; +def : MnemonicAlias<"fistpq", "fistpll">; +def : MnemonicAlias<"fisttpq", "fisttpll">; def : MnemonicAlias<"fldcww", "fldcw">; def : MnemonicAlias<"fnstcww", "fnstcw">; def : MnemonicAlias<"fnstsww", "fnstsw">; @@ -1822,20 +1855,20 @@ def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>; // errors, since its encoding is the most compact. def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>; -// shld/shrd op,op -> shld op, op, 1 -def : InstAlias<"shldw $r1, $r2", (SHLD16rri8 GR16:$r1, GR16:$r2, 1)>; -def : InstAlias<"shldl $r1, $r2", (SHLD32rri8 GR32:$r1, GR32:$r2, 1)>; -def : InstAlias<"shldq $r1, $r2", (SHLD64rri8 GR64:$r1, GR64:$r2, 1)>; -def : InstAlias<"shrdw $r1, $r2", (SHRD16rri8 GR16:$r1, GR16:$r2, 1)>; -def : InstAlias<"shrdl $r1, $r2", (SHRD32rri8 GR32:$r1, GR32:$r2, 1)>; -def : InstAlias<"shrdq $r1, $r2", (SHRD64rri8 GR64:$r1, GR64:$r2, 1)>; - -def : InstAlias<"shldw $mem, $reg", (SHLD16mri8 i16mem:$mem, GR16:$reg, 1)>; -def : InstAlias<"shldl $mem, $reg", (SHLD32mri8 i32mem:$mem, GR32:$reg, 1)>; -def : InstAlias<"shldq $mem, $reg", (SHLD64mri8 i64mem:$mem, GR64:$reg, 1)>; -def : InstAlias<"shrdw $mem, $reg", (SHRD16mri8 i16mem:$mem, GR16:$reg, 1)>; -def : InstAlias<"shrdl $mem, $reg", (SHRD32mri8 i32mem:$mem, GR32:$reg, 1)>; -def : InstAlias<"shrdq $mem, $reg", (SHRD64mri8 i64mem:$mem, GR64:$reg, 1)>; +// shld/shrd op,op -> shld op, op, CL +def : InstAlias<"shldw $r1, $r2", (SHLD16rrCL GR16:$r1, GR16:$r2)>; +def : InstAlias<"shldl $r1, $r2", (SHLD32rrCL GR32:$r1, GR32:$r2)>; +def : InstAlias<"shldq $r1, $r2", (SHLD64rrCL GR64:$r1, GR64:$r2)>; +def : InstAlias<"shrdw $r1, $r2", (SHRD16rrCL GR16:$r1, GR16:$r2)>; +def : InstAlias<"shrdl $r1, $r2", (SHRD32rrCL GR32:$r1, GR32:$r2)>; +def : InstAlias<"shrdq $r1, $r2", (SHRD64rrCL GR64:$r1, GR64:$r2)>; + +def : InstAlias<"shldw $mem, $reg", (SHLD16mrCL i16mem:$mem, GR16:$reg)>; +def : InstAlias<"shldl $mem, $reg", (SHLD32mrCL i32mem:$mem, GR32:$reg)>; +def : InstAlias<"shldq $mem, $reg", (SHLD64mrCL i64mem:$mem, GR64:$reg)>; +def : InstAlias<"shrdw $mem, $reg", (SHRD16mrCL i16mem:$mem, GR16:$reg)>; +def : InstAlias<"shrdl $mem, $reg", (SHRD32mrCL i32mem:$mem, GR32:$reg)>; +def : InstAlias<"shrdq $mem, $reg", (SHRD64mrCL i64mem:$mem, GR64:$reg)>; /* FIXME: This is disabled because the asm matcher is currently incapable of * matching a fixed immediate like $1. diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index b2d9fca..63f96b6 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -1,4 +1,4 @@ -//====- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===// +//===-- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -105,19 +105,23 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d> { def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (Int SrcRC:$src))], d>; + [(set DstRC:$dst, (Int SrcRC:$src))], + IIC_DEFAULT, d>; def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>; + [(set DstRC:$dst, (Int (ld_frag addr:$src)))], + IIC_DEFAULT, d>; } multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d> { def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2), - asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>; + asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], + IIC_DEFAULT, d>; def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src2), asm, - [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>; + [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], + IIC_DEFAULT, d>; } //===----------------------------------------------------------------------===// @@ -175,25 +179,25 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (x86mmx VR64:$src), addr:$dst)]>; -def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "movdq2q\t{$src, $dst|$dst, $src}", +def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), + (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (x86mmx (bitconvert (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))))))]>; -def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), - "movq2dq\t{$src, $dst|$dst, $src}", +def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), + (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector (i64 (bitconvert (x86mmx VR64:$src))))))]>; let neverHasSideEffects = 1 in -def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src), - "movq2dq\t{$src, $dst|$dst, $src}", []>; +def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), + (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", []>; -def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src), - "movdq2q\t{$src, $dst|$dst, $src}", []>; +def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), + (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", []>; def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movntq\t{$src, $dst|$dst, $src}", diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 345f606..c6d1d19 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1,4 +1,4 @@ -//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===// +//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -13,6 +13,126 @@ // //===----------------------------------------------------------------------===// +class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> { + InstrItinClass rr = arg_rr; + InstrItinClass rm = arg_rm; +} + +class SizeItins<OpndItins arg_s, OpndItins arg_d> { + OpndItins s = arg_s; + OpndItins d = arg_d; +} + + +class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm, + InstrItinClass arg_ri> { + InstrItinClass rr = arg_rr; + InstrItinClass rm = arg_rm; + InstrItinClass ri = arg_ri; +} + + +// scalar +def SSE_ALU_F32S : OpndItins< + IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM +>; + +def SSE_ALU_F64S : OpndItins< + IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM +>; + +def SSE_ALU_ITINS_S : SizeItins< + SSE_ALU_F32S, SSE_ALU_F64S +>; + +def SSE_MUL_F32S : OpndItins< + IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM +>; + +def SSE_MUL_F64S : OpndItins< + IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM +>; + +def SSE_MUL_ITINS_S : SizeItins< + SSE_MUL_F32S, SSE_MUL_F64S +>; + +def SSE_DIV_F32S : OpndItins< + IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM +>; + +def SSE_DIV_F64S : OpndItins< + IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM +>; + +def SSE_DIV_ITINS_S : SizeItins< + SSE_DIV_F32S, SSE_DIV_F64S +>; + +// parallel +def SSE_ALU_F32P : OpndItins< + IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM +>; + +def SSE_ALU_F64P : OpndItins< + IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM +>; + +def SSE_ALU_ITINS_P : SizeItins< + SSE_ALU_F32P, SSE_ALU_F64P +>; + +def SSE_MUL_F32P : OpndItins< + IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM +>; + +def SSE_MUL_F64P : OpndItins< + IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM +>; + +def SSE_MUL_ITINS_P : SizeItins< + SSE_MUL_F32P, SSE_MUL_F64P +>; + +def SSE_DIV_F32P : OpndItins< + IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM +>; + +def SSE_DIV_F64P : OpndItins< + IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM +>; + +def SSE_DIV_ITINS_P : SizeItins< + SSE_DIV_F32P, SSE_DIV_F64P +>; + +def SSE_BIT_ITINS_P : OpndItins< + IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM +>; + +def SSE_INTALU_ITINS_P : OpndItins< + IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM +>; + +def SSE_INTALUQ_ITINS_P : OpndItins< + IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM +>; + +def SSE_INTMUL_ITINS_P : OpndItins< + IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM +>; + +def SSE_INTSHIFT_ITINS_P : ShiftOpndItins< + IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM, IIC_SSE_INTSH_P_RI +>; + +def SSE_MOVA_ITINS : OpndItins< + IIC_SSE_MOVA_P_RR, IIC_SSE_MOVA_P_RM +>; + +def SSE_MOVU_ITINS : OpndItins< + IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM +>; //===----------------------------------------------------------------------===// // SSE 1 & 2 Instructions Classes @@ -21,25 +141,27 @@ /// sse12_fp_scalar - SSE 1 & 2 scalar instructions class multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, RegisterClass RC, X86MemOperand x86memop, + OpndItins itins, bit Is2Addr = 1> { let isCommutable = 1 in { def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>; + [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>; } def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>; + [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>; } /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, string asm, string SSEVer, string FPSizeStr, Operand memopr, ComplexPattern mem_cpat, + OpndItins itins, bit Is2Addr = 1> { def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, @@ -47,33 +169,34 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>( !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, RC:$src2))]>; + RC:$src1, RC:$src2))], itins.rr>; def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, mem_cpat:$src2))]>; + RC:$src1, mem_cpat:$src2))], itins.rm>; } /// sse12_fp_packed - SSE 1 & 2 packed instructions class multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, RegisterClass RC, ValueType vt, X86MemOperand x86memop, PatFrag mem_frag, - Domain d, bit Is2Addr = 1> { + Domain d, OpndItins itins, bit Is2Addr = 1> { let isCommutable = 1 in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>; let mayLoad = 1 in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>; + [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], + itins.rm, d>; } /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class @@ -87,33 +210,33 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rr, d>; + pat_rr, IIC_DEFAULT, d>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rm, d>; + pat_rm, IIC_DEFAULT, d>; } /// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, string asm, string SSEVer, string FPSizeStr, X86MemOperand x86memop, PatFrag mem_frag, - Domain d, bit Is2Addr = 1> { + Domain d, OpndItins itins, bit Is2Addr = 1> { def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>( !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, RC:$src2))], d>; + RC:$src1, RC:$src2))], IIC_DEFAULT, d>; def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>( !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, (mem_frag addr:$src2)))], d>; + RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>; } //===----------------------------------------------------------------------===// @@ -171,7 +294,7 @@ def : Pat<(v4f64 (scalar_to_vector FR64:$src)), // Bitcasts between 128-bit vector types. Return the original type since // no instruction is needed for the conversion -let Predicates = [HasXMMInt] in { +let Predicates = [HasSSE2] in { def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; @@ -244,9 +367,9 @@ let Predicates = [HasAVX] in { let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isPseudo = 1 in { def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", - [(set FR32:$dst, fp32imm0)]>, Requires<[HasXMM]>; + [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>; def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, Requires<[HasXMMInt]>; + [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>; } //===----------------------------------------------------------------------===// @@ -279,16 +402,35 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>; // JIT implementatioan, it does not expand the instructions below like // X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, Predicates = [HasAVX] in { + isCodeGenOnly = 1 in { +let Predicates = [HasAVX] in { def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V; def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V; } +let Predicates = [HasAVX2], neverHasSideEffects = 1 in +def AVX2_SET0 : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "", + []>, VEX_4V; +} +let Predicates = [HasAVX2], AddedComplexity = 5 in { + def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>; + def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>; + def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>; + def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>; +} // AVX has no support for 256-bit integer instructions, but since the 128-bit // VPXOR instruction writes zero to its upper part, it's safe build zeros. +def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; +def : Pat<(bc_v32i8 (v8f32 immAllZerosV)), + (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; + +def : Pat<(v16i16 immAllZerosV), (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>; +def : Pat<(bc_v16i16 (v8f32 immAllZerosV)), + (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>; + def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; def : Pat<(bc_v8i32 (v8f32 immAllZerosV)), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; @@ -304,11 +446,11 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), // X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isCodeGenOnly = 1, ExeDomain = SSEPackedInt in { - def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllOnesV))]>; let Predicates = [HasAVX] in def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V; + def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllOnesV))]>; let Predicates = [HasAVX2] in def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V; @@ -325,22 +467,25 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // in terms of a copy, and just mentioned, we don't use movss/movsd for copies. //===----------------------------------------------------------------------===// -class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> : +class sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, string asm> : SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm, - [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>; + [(set VR128:$dst, (vt (OpNode VR128:$src1, + (scalar_to_vector RC:$src2))))], + IIC_SSE_MOV_S_RR>; // Loading from memory automatically zeroing upper bits. class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, PatFrag mem_pat, string OpcodeStr> : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (mem_pat addr:$src))]>; + [(set RC:$dst, (mem_pat addr:$src))], + IIC_SSE_MOV_S_RM>; // AVX -def VMOVSSrr : sse12_move_rr<FR32, v4f32, +def VMOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32, "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V, VEX_LIG; -def VMOVSDrr : sse12_move_rr<FR64, v2f64, +def VMOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64, "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V, VEX_LIG; @@ -348,11 +493,13 @@ def VMOVSDrr : sse12_move_rr<FR64, v2f64, let isCodeGenOnly = 1 in { def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + IIC_SSE_MOV_S_RR>, XS, VEX_4V, VEX_LIG; def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + IIC_SSE_MOV_S_RR>, XD, VEX_4V, VEX_LIG; } @@ -366,26 +513,30 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)]>, XS, VEX, VEX_LIG; + [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + XS, VEX, VEX_LIG; def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)]>, XD, VEX, VEX_LIG; + [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + XD, VEX, VEX_LIG; // SSE1 & 2 let Constraints = "$src1 = $dst" in { - def MOVSSrr : sse12_move_rr<FR32, v4f32, + def MOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32, "movss\t{$src2, $dst|$dst, $src2}">, XS; - def MOVSDrr : sse12_move_rr<FR64, v2f64, + def MOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64, "movsd\t{$src2, $dst|$dst, $src2}">, XD; // For the disassembler let isCodeGenOnly = 1 in { def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $dst|$dst, $src2}", []>, XS; + "movss\t{$src2, $dst|$dst, $src2}", [], + IIC_SSE_MOV_S_RR>, XS; def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", []>, XD; + "movsd\t{$src2, $dst|$dst, $src2}", [], + IIC_SSE_MOV_S_RR>, XD; } } @@ -398,157 +549,14 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)]>; + [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)]>; + [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; // Patterns -let Predicates = [HasSSE1] in { - let AddedComplexity = 15 in { - // Extract the low 32-bit value from one vector and insert it into another. - def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), - (MOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; - def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), - (MOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; - - // Move scalar to XMM zero-extended, zeroing a VR128 then do a - // MOVSS to the lower bits. - def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), - (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; - def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVSSrr (v4f32 (V_SET0)), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; - def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVSSrr (v4i32 (V_SET0)), - (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; - } - - let AddedComplexity = 20 in { - // MOVSSrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. - def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; - def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; - def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; - } - - // Extract and store. - def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - addr:$dst), - (MOVSSmr addr:$dst, - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - - // Shuffle with MOVSS - def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), - (MOVSSrr VR128:$src1, FR32:$src2)>; - def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; - def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; -} - -let Predicates = [HasSSE2] in { - let AddedComplexity = 15 in { - // Extract the low 64-bit value from one vector and insert it into another. - def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), - (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; - def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), - (MOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; - - // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd - def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; - def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; - - // Move scalar to XMM zero-extended, zeroing a VR128 then do a - // MOVSD to the lower bits. - def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; - } - - let AddedComplexity = 20 in { - // MOVSDrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. - def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; - def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; - def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; - def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; - def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; - } - - // Extract and store. - def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - addr:$dst), - (MOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; - - // Shuffle with MOVSD - def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), - (MOVSDrr VR128:$src1, FR64:$src2)>; - def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; - def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; - def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; - def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; - - // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem - // is during lowering, where it's not possible to recognize the fold cause - // it has two uses through a bitcast. One use disappears at isel time and the - // fold opportunity reappears. - def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>; - def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>; - def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; - def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; -} - let Predicates = [HasAVX] in { let AddedComplexity = 15 in { - // Extract the low 32-bit value from one vector and insert it into another. - def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), - (VMOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; - def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), - (VMOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; - - // Extract the low 64-bit value from one vector and insert it into another. - def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), - (VMOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; - def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), - (VMOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; - - // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd - def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; - def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>; - // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVS{S,D} to the lower bits. def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), @@ -628,7 +636,12 @@ let Predicates = [HasAVX] in { (VMOVSDrr (v2f64 (V_SET0)), (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>; - // Extract and store. + def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSDrr (v2i64 (V_SET0)), + (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>; + +// Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), (VMOVSSmr addr:$dst, @@ -639,8 +652,6 @@ let Predicates = [HasAVX] in { (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; // Shuffle with VMOVSS - def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), - (VMOVSSrr VR128:$src1, FR32:$src2)>; def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), (VMOVSSrr (v4i32 VR128:$src1), (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; @@ -649,18 +660,16 @@ let Predicates = [HasAVX] in { (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; // 256-bit variants - def : Pat<(v8i32 (X86Movsd VR256:$src1, VR256:$src2)), + def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)), (SUBREG_TO_REG (i32 0), (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss), (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>; - def : Pat<(v8f32 (X86Movsd VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)), (SUBREG_TO_REG (i32 0), (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss), (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>; // Shuffle with VMOVSD - def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), - (VMOVSDrr VR128:$src1, FR64:$src2)>; def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), (VMOVSDrr (v2i64 VR128:$src1), (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; @@ -703,6 +712,101 @@ let Predicates = [HasAVX] in { sub_sd))>; } +let Predicates = [HasSSE1] in { + let AddedComplexity = 15 in { + // Move scalar to XMM zero-extended, zeroing a VR128 then do a + // MOVSS to the lower bits. + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), + (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; + def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), + (MOVSSrr (v4f32 (V_SET0)), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; + def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), + (MOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; + } + + let AddedComplexity = 20 in { + // MOVSSrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. + def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + } + + // Extract and store. + def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSSmr addr:$dst, + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + + // Shuffle with MOVSS + def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), + (MOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), + (MOVSSrr (v4f32 VR128:$src1), + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; +} + +let Predicates = [HasSSE2] in { + let AddedComplexity = 15 in { + // Move scalar to XMM zero-extended, zeroing a VR128 then do a + // MOVSD to the lower bits. + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), + (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; + } + + let AddedComplexity = 20 in { + // MOVSDrm zeros the high parts of the register; represent this + // with SUBREG_TO_REG. + def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + def : Pat<(v2f64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + } + + // Extract and store. + def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSDmr addr:$dst, + (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + + // Shuffle with MOVSD + def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; + def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; + + // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem + // is during lowering, where it's not possible to recognize the fold cause + // it has two uses through a bitcast. One use disappears at isel time and the + // fold opportunity reappears. + def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>; + def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>; + def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; + def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Move Aligned/Unaligned FP Instructions //===----------------------------------------------------------------------===// @@ -710,93 +814,122 @@ let Predicates = [HasAVX] in { multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, X86MemOperand x86memop, PatFrag ld_frag, string asm, Domain d, + OpndItins itins, bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>; + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>; let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (ld_frag addr:$src))], d>; + [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>; } defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, - "movaps", SSEPackedSingle>, TB, VEX; + "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, + TB, VEX; defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, - "movapd", SSEPackedDouble>, TB, OpSize, VEX; + "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, + TB, OpSize, VEX; defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, - "movups", SSEPackedSingle>, TB, VEX; + "movups", SSEPackedSingle, SSE_MOVU_ITINS>, + TB, VEX; defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, - "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX; + "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, + TB, OpSize, VEX; defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, - "movaps", SSEPackedSingle>, TB, VEX; + "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, + TB, VEX; defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, - "movapd", SSEPackedDouble>, TB, OpSize, VEX; + "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, + TB, OpSize, VEX; defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, - "movups", SSEPackedSingle>, TB, VEX; + "movups", SSEPackedSingle, SSE_MOVU_ITINS>, + TB, VEX; defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, - "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX; + "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, + TB, OpSize, VEX; defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, - "movaps", SSEPackedSingle>, TB; + "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, + TB; defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, - "movapd", SSEPackedDouble>, TB, OpSize; + "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, + TB, OpSize; defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, - "movups", SSEPackedSingle>, TB; + "movups", SSEPackedSingle, SSE_MOVU_ITINS>, + TB; defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, - "movupd", SSEPackedDouble, 0>, TB, OpSize; + "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, + TB, OpSize; def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", - [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX; + [(alignedstore (v4f32 VR128:$src), addr:$dst)], + IIC_SSE_MOVA_P_MR>, VEX; def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movapd\t{$src, $dst|$dst, $src}", - [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX; + [(alignedstore (v2f64 VR128:$src), addr:$dst)], + IIC_SSE_MOVA_P_MR>, VEX; def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movups\t{$src, $dst|$dst, $src}", - [(store (v4f32 VR128:$src), addr:$dst)]>, VEX; + [(store (v4f32 VR128:$src), addr:$dst)], + IIC_SSE_MOVU_P_MR>, VEX; def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", - [(store (v2f64 VR128:$src), addr:$dst)]>, VEX; + [(store (v2f64 VR128:$src), addr:$dst)], + IIC_SSE_MOVU_P_MR>, VEX; def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movaps\t{$src, $dst|$dst, $src}", - [(alignedstore256 (v8f32 VR256:$src), addr:$dst)]>, VEX; + [(alignedstore256 (v8f32 VR256:$src), addr:$dst)], + IIC_SSE_MOVA_P_MR>, VEX; def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movapd\t{$src, $dst|$dst, $src}", - [(alignedstore256 (v4f64 VR256:$src), addr:$dst)]>, VEX; + [(alignedstore256 (v4f64 VR256:$src), addr:$dst)], + IIC_SSE_MOVA_P_MR>, VEX; def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movups\t{$src, $dst|$dst, $src}", - [(store (v8f32 VR256:$src), addr:$dst)]>, VEX; + [(store (v8f32 VR256:$src), addr:$dst)], + IIC_SSE_MOVU_P_MR>, VEX; def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", - [(store (v4f64 VR256:$src), addr:$dst)]>, VEX; + [(store (v4f64 VR256:$src), addr:$dst)], + IIC_SSE_MOVU_P_MR>, VEX; // For disassembler let isCodeGenOnly = 1 in { def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movaps\t{$src, $dst|$dst, $src}", []>, VEX; + "movaps\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>, VEX; def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movapd\t{$src, $dst|$dst, $src}", []>, VEX; + "movapd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>, VEX; def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movups\t{$src, $dst|$dst, $src}", []>, VEX; + "movups\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVU_P_RR>, VEX; def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", []>, VEX; + "movupd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVU_P_RR>, VEX; def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), - "movaps\t{$src, $dst|$dst, $src}", []>, VEX; + "movaps\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>, VEX; def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), - "movapd\t{$src, $dst|$dst, $src}", []>, VEX; + "movapd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>, VEX; def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), - "movups\t{$src, $dst|$dst, $src}", []>, VEX; + "movups\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVU_P_RR>, VEX; def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), - "movupd\t{$src, $dst|$dst, $src}", []>, VEX; + "movupd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVU_P_RR>, VEX; } let Predicates = [HasAVX] in { @@ -815,37 +948,42 @@ def : Pat<(v4f64 (X86vzmovl } -def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>; def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), (VMOVUPSYmr addr:$dst, VR256:$src)>; - -def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>; def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), (VMOVUPDYmr addr:$dst, VR256:$src)>; def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", - [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; + [(alignedstore (v4f32 VR128:$src), addr:$dst)], + IIC_SSE_MOVA_P_MR>; def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movapd\t{$src, $dst|$dst, $src}", - [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; + [(alignedstore (v2f64 VR128:$src), addr:$dst)], + IIC_SSE_MOVA_P_MR>; def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movups\t{$src, $dst|$dst, $src}", - [(store (v4f32 VR128:$src), addr:$dst)]>; + [(store (v4f32 VR128:$src), addr:$dst)], + IIC_SSE_MOVU_P_MR>; def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", - [(store (v2f64 VR128:$src), addr:$dst)]>; + [(store (v2f64 VR128:$src), addr:$dst)], + IIC_SSE_MOVU_P_MR>; // For disassembler let isCodeGenOnly = 1 in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movaps\t{$src, $dst|$dst, $src}", []>; + "movaps\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>; def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movapd\t{$src, $dst|$dst, $src}", []>; + "movapd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>; def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movups\t{$src, $dst|$dst, $src}", []>; + "movups\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVU_P_RR>; def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", []>; + "movupd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVU_P_RR>; } let Predicates = [HasAVX] in { @@ -862,44 +1000,9 @@ let Predicates = [HasSSE2] in def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), (MOVUPDmr addr:$dst, VR128:$src)>; -// Use movaps / movups for SSE integer load / store (one byte shorter). -// The instructions selected below are then converted to MOVDQA/MOVDQU -// during the SSE domain pass. -let Predicates = [HasSSE1] in { - def : Pat<(alignedloadv4i32 addr:$src), - (MOVAPSrm addr:$src)>; - def : Pat<(loadv4i32 addr:$src), - (MOVUPSrm addr:$src)>; - def : Pat<(alignedloadv2i64 addr:$src), - (MOVAPSrm addr:$src)>; - def : Pat<(loadv2i64 addr:$src), - (MOVUPSrm addr:$src)>; - - def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v2i64 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v4i32 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; -} - // Use vmovaps/vmovups for AVX integer load/store. let Predicates = [HasAVX] in { // 128-bit load/store - def : Pat<(alignedloadv4i32 addr:$src), - (VMOVAPSrm addr:$src)>; - def : Pat<(loadv4i32 addr:$src), - (VMOVUPSrm addr:$src)>; def : Pat<(alignedloadv2i64 addr:$src), (VMOVAPSrm addr:$src)>; def : Pat<(loadv2i64 addr:$src), @@ -927,10 +1030,6 @@ let Predicates = [HasAVX] in { (VMOVAPSYrm addr:$src)>; def : Pat<(loadv4i64 addr:$src), (VMOVUPSYrm addr:$src)>; - def : Pat<(alignedloadv8i32 addr:$src), - (VMOVAPSYrm addr:$src)>; - def : Pat<(loadv8i32 addr:$src), - (VMOVUPSYrm addr:$src)>; def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst), (VMOVAPSYmr addr:$dst, VR256:$src)>; def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst), @@ -949,36 +1048,71 @@ let Predicates = [HasAVX] in { (VMOVUPSYmr addr:$dst, VR256:$src)>; } +// Use movaps / movups for SSE integer load / store (one byte shorter). +// The instructions selected below are then converted to MOVDQA/MOVDQU +// during the SSE domain pass. +let Predicates = [HasSSE1] in { + def : Pat<(alignedloadv2i64 addr:$src), + (MOVAPSrm addr:$src)>; + def : Pat<(loadv2i64 addr:$src), + (MOVUPSrm addr:$src)>; + + def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v2i64 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v4i32 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v8i16 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; + def : Pat<(store (v16i8 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; +} + // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! let neverHasSideEffects = 1 in { -def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - "movaps\t{$src, $dst|$dst, $src}", []>; -def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), - "movapd\t{$src, $dst|$dst, $src}", []>; def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - "movaps\t{$src, $dst|$dst, $src}", []>, VEX; + "movaps\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>, VEX; def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), - "movapd\t{$src, $dst|$dst, $src}", []>, VEX; + "movapd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>, VEX; +def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + "movaps\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>; +def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), + "movapd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>; } // Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! let canFoldAsLoad = 1, isReMaterializable = 1 in { -def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), - "movaps\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; -def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), - "movapd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; let isCodeGenOnly = 1 in { def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>, VEX; + [(set FR32:$dst, (alignedloadfsf32 addr:$src))], + IIC_SSE_MOVA_P_RM>, VEX; def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), "movapd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>, VEX; + [(set FR64:$dst, (alignedloadfsf64 addr:$src))], + IIC_SSE_MOVA_P_RM>, VEX; } +def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (alignedloadfsf32 addr:$src))], + IIC_SSE_MOVA_P_RM>; +def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (alignedloadfsf64 addr:$src))], + IIC_SSE_MOVA_P_RM>; } //===----------------------------------------------------------------------===// @@ -986,94 +1120,68 @@ let isCodeGenOnly = 1 in { //===----------------------------------------------------------------------===// multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, - PatFrag mov_frag, string base_opc, - string asm_opr> { + SDNode psnode, SDNode pdnode, string base_opc, + string asm_opr, InstrItinClass itin> { def PSrm : PI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), !strconcat(base_opc, "s", asm_opr), [(set RC:$dst, - (mov_frag RC:$src1, + (psnode RC:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], - SSEPackedSingle>, TB; + itin, SSEPackedSingle>, TB; def PDrm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, f64mem:$src2), !strconcat(base_opc, "d", asm_opr), - [(set RC:$dst, (v2f64 (mov_frag RC:$src1, + [(set RC:$dst, (v2f64 (pdnode RC:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], - SSEPackedDouble>, TB, OpSize; + itin, SSEPackedDouble>, TB, OpSize; } let AddedComplexity = 20 in { - defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", - "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; + defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp", + "\t{$src2, $src1, $dst|$dst, $src1, $src2}", + IIC_SSE_MOV_LH>, VEX_4V; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { - defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", - "\t{$src2, $dst|$dst, $src2}">; + defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp", + "\t{$src2, $dst|$dst, $src2}", + IIC_SSE_MOV_LH>; } def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), - (iPTR 0))), addr:$dst)]>, VEX; + (iPTR 0))), addr:$dst)], + IIC_SSE_MOV_LH>, VEX; def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlpd\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (v2f64 VR128:$src), - (iPTR 0))), addr:$dst)]>, VEX; + (iPTR 0))), addr:$dst)], + IIC_SSE_MOV_LH>, VEX; def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), - (iPTR 0))), addr:$dst)]>; + (iPTR 0))), addr:$dst)], + IIC_SSE_MOV_LH>; def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlpd\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (v2f64 VR128:$src), - (iPTR 0))), addr:$dst)]>; + (iPTR 0))), addr:$dst)], + IIC_SSE_MOV_LH>; let Predicates = [HasAVX] in { - let AddedComplexity = 20 in { - // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS - def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), - (VMOVLPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), - (VMOVLPSrm VR128:$src1, addr:$src2)>; - // vector_shuffle v1, (load v2) <2, 1> using MOVLPS - def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), - (VMOVLPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), - (VMOVLPDrm VR128:$src1, addr:$src2)>; - } - - // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS - def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (VMOVLPSmr addr:$src1, VR128:$src2)>; - def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), - VR128:$src2)), addr:$src1), - (VMOVLPSmr addr:$src1, VR128:$src2)>; - - // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS - def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (VMOVLPDmr addr:$src1, VR128:$src2)>; - def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (VMOVLPDmr addr:$src1, VR128:$src2)>; - // Shuffle with VMOVLPS def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), (VMOVLPSrm VR128:$src1, addr:$src2)>; def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), (VMOVLPSrm VR128:$src1, addr:$src2)>; - def : Pat<(X86Movlps VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), - (VMOVLPSrm VR128:$src1, addr:$src2)>; // Shuffle with VMOVLPD def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), (VMOVLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), (VMOVLPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Movlpd VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))), - (VMOVLPDrm VR128:$src1, addr:$src2)>; // Store patterns def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), @@ -1091,23 +1199,10 @@ let Predicates = [HasAVX] in { } let Predicates = [HasSSE1] in { - let AddedComplexity = 20 in { - // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS - def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>; - } - // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)), (iPTR 0))), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>; - def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>; - def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), - VR128:$src2)), addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>; // Shuffle with MOVLPS def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), @@ -1115,9 +1210,6 @@ let Predicates = [HasSSE1] in { def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), (MOVLPSrm VR128:$src1, addr:$src2)>; def : Pat<(X86Movlps VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), - (MOVLPSrm VR128:$src1, addr:$src2)>; - def : Pat<(X86Movlps VR128:$src1, (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), (MOVLPSrm VR128:$src1, addr:$src2)>; @@ -1132,28 +1224,11 @@ let Predicates = [HasSSE1] in { } let Predicates = [HasSSE2] in { - let AddedComplexity = 20 in { - // vector_shuffle v1, (load v2) <2, 1> using MOVLPS - def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>; - } - - // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS - def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>; - def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>; - // Shuffle with MOVLPD def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Movlpd VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))), - (MOVLPDrm VR128:$src1, addr:$src2)>; // Store patterns def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), @@ -1169,12 +1244,14 @@ let Predicates = [HasSSE2] in { //===----------------------------------------------------------------------===// let AddedComplexity = 20 in { - defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", - "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; + defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp", + "\t{$src2, $src1, $dst|$dst, $src1, $src2}", + IIC_SSE_MOV_LH>, VEX_4V; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { - defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", - "\t{$src2, $dst|$dst, $src2}">; + defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp", + "\t{$src2, $dst|$dst, $src2}", + IIC_SSE_MOV_LH>; } // v2f64 extract element 1 is always custom lowered to unpack high to low @@ -1182,33 +1259,28 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract - (unpckh (bc_v2f64 (v4f32 VR128:$src)), - (undef)), (iPTR 0))), addr:$dst)]>, - VEX; + (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), + (bc_v2f64 (v4f32 VR128:$src))), + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhpd\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract - (v2f64 (unpckh VR128:$src, (undef))), - (iPTR 0))), addr:$dst)]>, - VEX; + (v2f64 (X86Unpckh VR128:$src, VR128:$src)), + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract - (unpckh (bc_v2f64 (v4f32 VR128:$src)), - (undef)), (iPTR 0))), addr:$dst)]>; + (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), + (bc_v2f64 (v4f32 VR128:$src))), + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhpd\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract - (v2f64 (unpckh VR128:$src, (undef))), - (iPTR 0))), addr:$dst)]>; + (v2f64 (X86Unpckh VR128:$src, VR128:$src)), + (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; let Predicates = [HasAVX] in { // VMOVHPS patterns - def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), - (VMOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; - def : Pat<(X86Movlhps VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), - (VMOVHPSrm VR128:$src1, addr:$src2)>; def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), (VMOVHPSrm VR128:$src1, addr:$src2)>; @@ -1217,65 +1289,32 @@ let Predicates = [HasAVX] in { (VMOVHPSrm VR128:$src1, addr:$src2)>; // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem - // is during lowering, where it's not possible to recognize the load fold cause - // it has two uses through a bitcast. One use disappears at isel time and the - // fold opportunity reappears. + // is during lowering, where it's not possible to recognize the load fold + // cause it has two uses through a bitcast. One use disappears at isel time + // and the fold opportunity reappears. def : Pat<(v2f64 (X86Unpckl VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))), (VMOVHPDrm VR128:$src1, addr:$src2)>; - - // FIXME: This should be matched by a X86Movhpd instead. Same as above - def : Pat<(v2f64 (X86Movlhpd VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))), - (VMOVHPDrm VR128:$src1, addr:$src2)>; - - // Store patterns - def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), - (VMOVHPSmr addr:$dst, VR128:$src)>; - def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), - (VMOVHPDmr addr:$dst, VR128:$src)>; } let Predicates = [HasSSE1] in { // MOVHPS patterns - def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), - (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; - def : Pat<(X86Movlhps VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), - (MOVHPSrm VR128:$src1, addr:$src2)>; def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), (MOVHPSrm VR128:$src1, addr:$src2)>; def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2i64 (X86vzload addr:$src2)))), (MOVHPSrm VR128:$src1, addr:$src2)>; - - // Store patterns - def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), - (MOVHPSmr addr:$dst, VR128:$src)>; } let Predicates = [HasSSE2] in { // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem - // is during lowering, where it's not possible to recognize the load fold cause - // it has two uses through a bitcast. One use disappears at isel time and the - // fold opportunity reappears. + // is during lowering, where it's not possible to recognize the load fold + // cause it has two uses through a bitcast. One use disappears at isel time + // and the fold opportunity reappears. def : Pat<(v2f64 (X86Unpckl VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))), (MOVHPDrm VR128:$src1, addr:$src2)>; - - // FIXME: This should be matched by a X86Movhpd instead. Same as above - def : Pat<(v2f64 (X86Movlhpd VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))), - (MOVHPDrm VR128:$src1, addr:$src2)>; - - // Store patterns - def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))),addr:$dst), - (MOVHPDmr addr:$dst, VR128:$src)>; } //===----------------------------------------------------------------------===// @@ -1287,13 +1326,15 @@ let AddedComplexity = 20 in { (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>, + (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], + IIC_SSE_MOV_LH>, VEX_4V; def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>, + (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], + IIC_SSE_MOV_LH>, VEX_4V; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { @@ -1301,86 +1342,36 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>; + (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], + IIC_SSE_MOV_LH>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; + (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], + IIC_SSE_MOV_LH>; } let Predicates = [HasAVX] in { // MOVLHPS patterns - let AddedComplexity = 20 in { - def : Pat<(v4f32 (movddup VR128:$src, (undef))), - (VMOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; - def : Pat<(v2i64 (movddup VR128:$src, (undef))), - (VMOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; - - // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS - def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)), - (VMOVLHPSrr VR128:$src1, VR128:$src2)>; - } - def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), - (VMOVLHPSrr VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), (VMOVLHPSrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; // MOVHLPS patterns - let AddedComplexity = 20 in { - // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS - def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), - (VMOVHLPSrr VR128:$src1, VR128:$src2)>; - - // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS - def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), - (VMOVHLPSrr VR128:$src1, VR128:$src1)>; - def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), - (VMOVHLPSrr VR128:$src1, VR128:$src1)>; - } - - def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)), - (VMOVHLPSrr VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), (VMOVHLPSrr VR128:$src1, VR128:$src2)>; } let Predicates = [HasSSE1] in { // MOVLHPS patterns - let AddedComplexity = 20 in { - def : Pat<(v4f32 (movddup VR128:$src, (undef))), - (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; - def : Pat<(v2i64 (movddup VR128:$src, (undef))), - (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; - - // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS - def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)), - (MOVLHPSrr VR128:$src1, VR128:$src2)>; - } - def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)), - (MOVLHPSrr VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), (MOVLHPSrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; // MOVHLPS patterns - let AddedComplexity = 20 in { - // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS - def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)), - (MOVHLPSrr VR128:$src1, VR128:$src2)>; - - // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS - def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))), - (MOVHLPSrr VR128:$src1, VR128:$src1)>; - def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), - (MOVHLPSrr VR128:$src1, VR128:$src1)>; - } - - def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)), - (MOVHLPSrr VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), (MOVHLPSrr VR128:$src1, VR128:$src2)>; } @@ -1389,70 +1380,97 @@ let Predicates = [HasSSE1] in { // SSE 1 & 2 - Conversion Instructions //===----------------------------------------------------------------------===// +def SSE_CVT_PD : OpndItins< + IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM +>; + +def SSE_CVT_PS : OpndItins< + IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM +>; + +def SSE_CVT_Scalar : OpndItins< + IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM +>; + +def SSE_CVT_SS2SI_32 : OpndItins< + IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM +>; + +def SSE_CVT_SS2SI_64 : OpndItins< + IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM +>; + +def SSE_CVT_SD2SI : OpndItins< + IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM +>; + multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, - string asm> { + string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (OpNode SrcRC:$src))]>; + [(set DstRC:$dst, (OpNode SrcRC:$src))], + itins.rr>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>; -} - -multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, - X86MemOperand x86memop, string asm> { - def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, []>; - let mayLoad = 1 in - def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, []>; + [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], + itins.rm>; } multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, - string asm, Domain d> { + string asm, Domain d, OpndItins itins> { def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (OpNode SrcRC:$src))], d>; + [(set DstRC:$dst, (OpNode SrcRC:$src))], + itins.rr, d>; def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>; + [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], + itins.rm, d>; } multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { +let neverHasSideEffects = 1 in { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; let mayLoad = 1 in def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src), !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; +} // neverHasSideEffects = 1 } defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, - "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX, - VEX_LIG; + "cvttss2si\t{$src, $dst|$dst, $src}", + SSE_CVT_SS2SI_32>, + XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, - "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX, - VEX_W, VEX_LIG; + "cvttss2si\t{$src, $dst|$dst, $src}", + SSE_CVT_SS2SI_64>, + XS, VEX, VEX_W, VEX_LIG; defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, - "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, - VEX_LIG; + "cvttsd2si\t{$src, $dst|$dst, $src}", + SSE_CVT_SD2SI>, + XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, - "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, - VEX, VEX_W, VEX_LIG; + "cvttsd2si\t{$src, $dst|$dst, $src}", + SSE_CVT_SD2SI>, + XD, VEX, VEX_W, VEX_LIG; // The assembler can recognize rr 64-bit instructions by seeing a rxx // register, but the same isn't true when only using memory operands, // provide other assembly "l" and "q" forms to address this explicitly // where appropriate to do so. -defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS, - VEX_4V, VEX_LIG; -defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS, - VEX_4V, VEX_W, VEX_LIG; -defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD, - VEX_4V, VEX_LIG; -defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, - VEX_4V, VEX_LIG; -defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, - VEX_4V, VEX_W, VEX_LIG; - -let Predicates = [HasAVX] in { +defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, + XS, VEX_4V, VEX_LIG; +defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, + XS, VEX_4V, VEX_W, VEX_LIG; +defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, + XD, VEX_4V, VEX_LIG; +defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, + XD, VEX_4V, VEX_LIG; +defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, + XD, VEX_4V, VEX_W, VEX_LIG; + +let Predicates = [HasAVX], AddedComplexity = 1 in { def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), @@ -1473,169 +1491,185 @@ let Predicates = [HasAVX] in { } defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, - "cvttss2si\t{$src, $dst|$dst, $src}">, XS; + "cvttss2si\t{$src, $dst|$dst, $src}", + SSE_CVT_SS2SI_32>, XS; defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, - "cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W; + "cvttss2si{q}\t{$src, $dst|$dst, $src}", + SSE_CVT_SS2SI_64>, XS, REX_W; defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, - "cvttsd2si\t{$src, $dst|$dst, $src}">, XD; + "cvttsd2si\t{$src, $dst|$dst, $src}", + SSE_CVT_SD2SI>, XD; defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, - "cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W; + "cvttsd2si{q}\t{$src, $dst|$dst, $src}", + SSE_CVT_SD2SI>, XD, REX_W; defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, - "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS; + "cvtsi2ss\t{$src, $dst|$dst, $src}", + SSE_CVT_Scalar>, XS; defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, - "cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W; + "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", + SSE_CVT_Scalar>, XS, REX_W; defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, - "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD; + "cvtsi2sd\t{$src, $dst|$dst, $src}", + SSE_CVT_Scalar>, XD; defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, - "cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W; + "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", + SSE_CVT_Scalar>, XD, REX_W; // Conversion Instructions Intrinsics - Match intrinsics which expect MM // and/or XMM operand(s). multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, - string asm> { + string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int SrcRC:$src))]>; + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>; + [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm>; } multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, - PatFrag ld_frag, string asm, bit Is2Addr = 1> { + PatFrag ld_frag, string asm, OpndItins itins, + bit Is2Addr = 1> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>; + [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], + itins.rr>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>; + [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], + itins.rm>; } -defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si">, XD, VEX; -defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, - int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">, - XD, VEX, VEX_W; - -// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_ -// Get rid of this hack or rename the intrinsics, there are several -// intructions that only match with the intrinsic form, why create duplicates -// to let them be recognized by the assembler? -defm VCVTSD2SI : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem, - "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_LIG; -defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem, - "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W, - VEX_LIG; +defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, + f128mem, load, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; +defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, + int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si", + SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si{l}">, XD; + f128mem, load, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, - f128mem, load, "cvtsd2si{q}">, XD, REX_W; + f128mem, load, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V; + int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", + SSE_CVT_Scalar, 0>, XS, VEX_4V; defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V, + int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", + SSE_CVT_Scalar, 0>, XS, VEX_4V, VEX_W; defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V; + int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", + SSE_CVT_Scalar, 0>, XD, VEX_4V; defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD, + int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", + SSE_CVT_Scalar, 0>, XD, VEX_4V, VEX_W; let Constraints = "$src1 = $dst" in { defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse_cvtsi2ss, i32mem, loadi32, - "cvtsi2ss">, XS; + "cvtsi2ss", SSE_CVT_Scalar>, XS; defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse_cvtsi642ss, i64mem, loadi64, - "cvtsi2ss{q}">, XS, REX_W; + "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W; defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse2_cvtsi2sd, i32mem, loadi32, - "cvtsi2sd">, XD; + "cvtsi2sd", SSE_CVT_Scalar>, XD; defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse2_cvtsi642sd, i64mem, loadi64, - "cvtsi2sd">, XD, REX_W; + "cvtsi2sd", SSE_CVT_Scalar>, XD, REX_W; } /// SSE 1 Only // Aliases for intrinsics defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, - f32mem, load, "cvttss2si">, XS, VEX; + f32mem, load, "cvttss2si", + SSE_CVT_SS2SI_32>, XS, VEX; defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse_cvttss2si64, f32mem, load, - "cvttss2si">, XS, VEX, VEX_W; + "cvttss2si", SSE_CVT_SS2SI_64>, + XS, VEX, VEX_W; defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - f128mem, load, "cvttsd2si">, XD, VEX; + f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>, + XD, VEX; defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, f128mem, load, - "cvttsd2si">, XD, VEX, VEX_W; + "cvttsd2si", SSE_CVT_SD2SI>, + XD, VEX, VEX_W; defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, - f32mem, load, "cvttss2si">, XS; + f32mem, load, "cvttss2si", + SSE_CVT_SS2SI_32>, XS; defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse_cvttss2si64, f32mem, load, - "cvttss2si{q}">, XS, REX_W; + "cvttss2si{q}", SSE_CVT_SS2SI_64>, + XS, REX_W; defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - f128mem, load, "cvttsd2si">, XD; + f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>, + XD; defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, f128mem, load, - "cvttsd2si{q}">, XD, REX_W; + "cvttsd2si{q}", SSE_CVT_SD2SI>, + XD, REX_W; let Pattern = []<dag> in { defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, - "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, - VEX, VEX_LIG; + "cvtss2si{l}\t{$src, $dst|$dst, $src}", + SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load, - "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX, - VEX_W, VEX_LIG; + "cvtss2si\t{$src, $dst|$dst, $src}", + SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load, "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB, VEX; + SSEPackedSingle, SSE_CVT_PS>, TB, VEX; defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load, "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB, VEX; + SSEPackedSingle, SSE_CVT_PS>, TB, VEX; } let Pattern = []<dag> in { defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, - "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS; + "cvtss2si{l}\t{$src, $dst|$dst, $src}", + SSE_CVT_SS2SI_32>, XS; defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/, - "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W; + "cvtss2si{q}\t{$src, $dst|$dst, $src}", + SSE_CVT_SS2SI_64>, XS, REX_W; defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */ + SSEPackedSingle, SSE_CVT_PS>, + TB; /* PD SSE3 form is avaiable */ } -let Predicates = [HasSSE1] in { +let Predicates = [HasAVX] in { def : Pat<(int_x86_sse_cvtss2si VR128:$src), - (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), - (CVTSS2SIrm addr:$src)>; + (VCVTSS2SIrm addr:$src)>; def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), - (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), - (CVTSS2SI64rm addr:$src)>; + (VCVTSS2SI64rm addr:$src)>; } -let Predicates = [HasAVX] in { +let Predicates = [HasSSE1] in { def : Pat<(int_x86_sse_cvtss2si VR128:$src), - (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), - (VCVTSS2SIrm addr:$src)>; + (CVTSS2SIrm addr:$src)>; def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), - (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), - (VCVTSS2SI64rm addr:$src)>; + (CVTSS2SI64rm addr:$src)>; } /// SSE 2 Only @@ -1643,43 +1677,51 @@ let Predicates = [HasAVX] in { // Convert scalar double to scalar single def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), - "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - VEX_4V, VEX_LIG; + "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR64:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; + [], IIC_SSE_CVT_Scalar_RM>, + XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, Requires<[HasAVX]>; def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (fround FR64:$src))]>; + [(set FR32:$dst, (fround FR64:$src))], + IIC_SSE_CVT_Scalar_RR>; def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD, + [(set FR32:$dst, (fround (loadf64 addr:$src)))], + IIC_SSE_CVT_Scalar_RM>, + XD, Requires<[HasSSE2, OptForSize]>; defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, - int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>, + int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", + SSE_CVT_Scalar, 0>, XS, VEX_4V; let Constraints = "$src1 = $dst" in defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, - int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS; + int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", + SSE_CVT_Scalar>, XS; // Convert scalar single to scalar double // SSE2 instructions with XS prefix def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG; + [], IIC_SSE_CVT_Scalar_RR>, + XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR32:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; + [], IIC_SSE_CVT_Scalar_RM>, + XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; let Predicates = [HasAVX] in { def : Pat<(f64 (fextend FR32:$src)), @@ -1696,11 +1738,13 @@ def : Pat<(extloadf32 addr:$src), def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (fextend FR32:$src))]>, XS, + [(set FR64:$dst, (fextend FR32:$src))], + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (extloadf32 addr:$src))]>, XS, + [(set FR64:$dst, (extloadf32 addr:$src))], + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2, OptForSize]>; // extload f32 -> f64. This matches load+fextend because we have a hack in @@ -1717,26 +1761,30 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - VR128:$src2))]>, XS, VEX_4V, + VR128:$src2))], + IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>; def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - (load addr:$src2)))]>, XS, VEX_4V, + (load addr:$src2)))], + IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - VR128:$src2))]>, XS, + VR128:$src2))], + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>; def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - (load addr:$src2)))]>, XS, + (load addr:$src2)))], + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2]>; } @@ -1744,216 +1792,275 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, // SSE2 instructions without OpSize prefix def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, + [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))], + IIC_SSE_CVT_PS_RR>, TB, VEX, Requires<[HasAVX]>; def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vcvtdq2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2ps - (bitconvert (memopv2i64 addr:$src))))]>, + (bitconvert (memopv2i64 addr:$src))))], + IIC_SSE_CVT_PS_RM>, TB, VEX, Requires<[HasAVX]>; def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, + [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))], + IIC_SSE_CVT_PS_RR>, TB, Requires<[HasSSE2]>; def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "cvtdq2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2ps - (bitconvert (memopv2i64 addr:$src))))]>, + (bitconvert (memopv2i64 addr:$src))))], + IIC_SSE_CVT_PS_RM>, TB, Requires<[HasSSE2]>; // FIXME: why the non-intrinsic version is described as SSE3? // SSE2 instructions with XS prefix def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], + IIC_SSE_CVT_PD_RR>, XS, VEX, Requires<[HasAVX]>; def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2pd - (bitconvert (memopv2i64 addr:$src))))]>, + (bitconvert (memopv2i64 addr:$src))))], + IIC_SSE_CVT_PD_RM>, XS, VEX, Requires<[HasAVX]>; def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], + IIC_SSE_CVT_PD_RR>, XS, Requires<[HasSSE2]>; def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2pd - (bitconvert (memopv2i64 addr:$src))))]>, + (bitconvert (memopv2i64 addr:$src))))], + IIC_SSE_CVT_PD_RM>, XS, Requires<[HasSSE2]>; // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtps2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PS_RR>, VEX; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtps2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PS_RM>, VEX; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtps2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PS_RR>, VEX; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtps2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PS_RM>, VEX; def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>; + "cvtps2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PS_RR>; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>; + "cvtps2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PS_RM>; def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>, + [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], + IIC_SSE_CVT_PS_RR>, VEX; def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq - (memop addr:$src)))]>, VEX; + (memop addr:$src)))], + IIC_SSE_CVT_PS_RM>, VEX; def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>; + [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], + IIC_SSE_CVT_PS_RR>; def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq - (memop addr:$src)))]>; + (memop addr:$src)))], + IIC_SSE_CVT_PS_RM>; // SSE2 packed instructions with XD prefix def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], + IIC_SSE_CVT_PD_RR>, XD, VEX, Requires<[HasAVX]>; def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq - (memop addr:$src)))]>, + (memop addr:$src)))], + IIC_SSE_CVT_PD_RM>, XD, VEX, Requires<[HasAVX]>; def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], + IIC_SSE_CVT_PD_RR>, XD, Requires<[HasSSE2]>; def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq - (memop addr:$src)))]>, + (memop addr:$src)))], + IIC_SSE_CVT_PD_RM>, XD, Requires<[HasSSE2]>; // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; -let mayLoad = 1 in + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq VR128:$src))], + IIC_SSE_CVT_PS_RR>, VEX; def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttps2dq + (memop addr:$src)))], + IIC_SSE_CVT_PS_RM>, VEX; def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; -let mayLoad = 1 in + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], + IIC_SSE_CVT_PS_RR>, VEX; def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 + (memopv8f32 addr:$src)))], + IIC_SSE_CVT_PS_RM>, VEX; + def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvttps2dq VR128:$src))]>; + (int_x86_sse2_cvttps2dq VR128:$src))], + IIC_SSE_CVT_PS_RR>; def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvttps2dq (memop addr:$src)))]>; - -def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvttps2dq VR128:$src))]>, - XS, VEX, Requires<[HasAVX]>; -def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttps2dq - (memop addr:$src)))]>, - XS, VEX, Requires<[HasAVX]>; - -let Predicates = [HasSSE2] in { - def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (Int_CVTDQ2PSrr VR128:$src)>; - def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), - (CVTTPS2DQrr VR128:$src)>; -} + (int_x86_sse2_cvttps2dq (memop addr:$src)))], + IIC_SSE_CVT_PS_RM>; let Predicates = [HasAVX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), (Int_VCVTDQ2PSrr VR128:$src)>; + def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), + (Int_VCVTDQ2PSrm addr:$src)>; + def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (VCVTTPS2DQrr VR128:$src)>; + def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))), + (VCVTTPS2DQrm addr:$src)>; + def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))), (VCVTDQ2PSYrr VR256:$src)>; + def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (memopv4i64 addr:$src)))), + (VCVTDQ2PSYrm addr:$src)>; + def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))), (VCVTTPS2DQYrr VR256:$src)>; + def : Pat<(v8i32 (fp_to_sint (memopv8f32 addr:$src))), + (VCVTTPS2DQYrm addr:$src)>; +} + +let Predicates = [HasSSE2] in { + def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), + (Int_CVTDQ2PSrr VR128:$src)>; + def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), + (Int_CVTDQ2PSrm addr:$src)>; + + def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), + (CVTTPS2DQrr VR128:$src)>; + def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))), + (CVTTPS2DQrm addr:$src)>; } def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvttpd2dq VR128:$src))]>, VEX; + (int_x86_sse2_cvttpd2dq VR128:$src))], + IIC_SSE_CVT_PD_RR>, VEX; let isCodeGenOnly = 1 in def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memop addr:$src)))]>, VEX; + (memop addr:$src)))], + IIC_SSE_CVT_PD_RM>, VEX; def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], + IIC_SSE_CVT_PD_RR>; def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memop addr:$src)))]>; + (memop addr:$src)))], + IIC_SSE_CVT_PD_RM>; // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "cvttpd2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>, VEX; // XMM only def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; + "cvttpd2dqx\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>, VEX; def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; + "cvttpd2dqx\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>, VEX; // YMM only def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; + "cvttpd2dqy\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>, VEX; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; + "cvttpd2dqy\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>, VEX, VEX_L; // Convert packed single to packed double let Predicates = [HasAVX] in { // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>, TB, VEX; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>, TB, VEX; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>, TB, VEX; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX; + "vcvtps2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>, TB, VEX; } def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; + "cvtps2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>, TB; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; + "cvtps2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>, TB; def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, + [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], + IIC_SSE_CVT_PD_RR>, TB, VEX, Requires<[HasAVX]>; def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd - (load addr:$src)))]>, + (load addr:$src)))], + IIC_SSE_CVT_PD_RM>, TB, VEX, Requires<[HasAVX]>; def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, + [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], + IIC_SSE_CVT_PD_RR>, TB, Requires<[HasSSE2]>; def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd - (load addr:$src)))]>, + (load addr:$src)))], + IIC_SSE_CVT_PD_RM>, TB, Requires<[HasSSE2]>; // Convert packed double to packed single @@ -1961,42 +2068,54 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtpd2ps\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>, VEX; def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtpd2ps\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>, VEX; // XMM only def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtpd2psx\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>, VEX; def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtpd2psx\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>, VEX; // YMM only def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX; + "cvtpd2psy\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>, VEX; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; + "cvtpd2psy\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>, VEX, VEX_L; def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; + "cvtpd2ps\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; + "cvtpd2ps\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>; def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], + IIC_SSE_CVT_PD_RR>; def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps - (memop addr:$src)))]>; + (memop addr:$src)))], + IIC_SSE_CVT_PD_RM>; def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], + IIC_SSE_CVT_PD_RR>; def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps - (memop addr:$src)))]>; + (memop addr:$src)))], + IIC_SSE_CVT_PD_RM>; // AVX 256-bit register conversion intrinsics // FIXME: Migrate SSE conversion intrinsics matching to use patterns as below @@ -2026,11 +2145,6 @@ def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src), def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)), (VCVTTPD2DQYrm addr:$src)>; -def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src), - (VCVTTPS2DQYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)), - (VCVTTPS2DQYrm addr:$src)>; - // Match fround and fextend for 128/256-bit conversions def : Pat<(v4f32 (fround (v4f64 VR256:$src))), (VCVTPD2PSYrr VR256:$src)>; @@ -2049,69 +2163,84 @@ def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, ValueType VT, PatFrag ld_frag, - string asm, string asm_alt> { + string asm, string asm_alt, + OpndItins itins> { def rr : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, - [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>; + [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], + itins.rr>; def rm : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), - (ld_frag addr:$src2), imm:$cc))]>; + (ld_frag addr:$src2), imm:$cc))], + itins.rm>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, []>; + (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [], + IIC_SSE_ALU_F32S_RR>; let mayLoad = 1 in def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, []>; + (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [], + IIC_SSE_ALU_F32S_RM>; } } defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32, "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", + SSE_ALU_F32S>, XS, VEX_4V, VEX_LIG; defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64, "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, + "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", + SSE_ALU_F32S>, // same latency as 32 bit compare XD, VEX_4V, VEX_LIG; let Constraints = "$src1 = $dst" in { defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32, "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", - "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}">, + "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>, XS; defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64, "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", - "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}">, + "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", + SSE_ALU_F32S>, // same latency as 32 bit compare XD; } multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop, - Intrinsic Int, string asm> { + Intrinsic Int, string asm, OpndItins itins> { def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, - VR128:$src, imm:$cc))]>; + VR128:$src, imm:$cc))], + itins.rr>; def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm, + (ins VR128:$src1, x86memop:$src, SSECC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, - (load addr:$src), imm:$cc))]>; + (load addr:$src), imm:$cc))], + itins.rm>; } // Aliases to match intrinsics which expect XMM operand(s). defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, - "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">, + "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", + SSE_ALU_F32S>, XS, VEX_4V; defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, - "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">, + "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", + SSE_ALU_F32S>, // same latency as f32 XD, VEX_4V; let Constraints = "$src1 = $dst" in { defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, - "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS; + "cmp${cc}ss\t{$src, $dst|$dst, $src}", + SSE_ALU_F32S>, XS; defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, - "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD; + "cmp${cc}sd\t{$src, $dst|$dst, $src}", + SSE_ALU_F32S>, // same latency as f32 + XD; } @@ -2121,11 +2250,13 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, PatFrag ld_frag, string OpcodeStr, Domain d> { def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), - [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>; + [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], + IIC_SSE_COMIS_RR, d>; def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), - (ld_frag addr:$src2)))], d>; + (ld_frag addr:$src2)))], + IIC_SSE_COMIS_RM, d>; } let Defs = [EFLAGS] in { @@ -2182,19 +2313,21 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, let isAsmParserOnly = 1 in { def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], d>; + [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], + IIC_SSE_CMPP_RR, d>; def rmi : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f128mem:$src2, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], d>; + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], + IIC_SSE_CMPP_RM, d>; } // Accept explicit immediate argument form instead of comparison code. def rri_alt : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], d>; + asm_alt, [], IIC_SSE_CMPP_RR, d>; def rmi_alt : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f128mem:$src2, i8imm:$cc), - asm_alt, [], d>; + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_CMPP_RM, d>; } defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, @@ -2224,40 +2357,40 @@ let Constraints = "$src1 = $dst" in { SSEPackedDouble>, TB, OpSize; } -let Predicates = [HasSSE1] in { -def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), - (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; -def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), - (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; -} - -let Predicates = [HasSSE2] in { -def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), - (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; -def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), - (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; -} - let Predicates = [HasAVX] in { -def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), +def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; -def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), +def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; -def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), +def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; -def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), +def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; -def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), VR256:$src2, imm:$cc)), +def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)), (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>; -def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)), +def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)), (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>; -def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), VR256:$src2, imm:$cc)), +def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)), (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>; -def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)), +def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)), (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; } +let Predicates = [HasSSE1] in { +def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), + (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; +def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), + (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; +} + +let Predicates = [HasSSE2] in { +def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), + (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), + (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Shuffle Instructions //===----------------------------------------------------------------------===// @@ -2267,14 +2400,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, ValueType vt, string asm, PatFrag mem_frag, Domain d, bit IsConvertibleToThreeAddress = 0> { def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, f128mem:$src2, i8imm:$src3), asm, - [(set RC:$dst, (vt (shufp:$src3 - RC:$src1, (mem_frag addr:$src2))))], d>; + (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm, + [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$src3), asm, - [(set RC:$dst, - (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>; + [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; } defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, @@ -2297,133 +2430,52 @@ let Constraints = "$src1 = $dst" in { TB; defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", - memopv2f64, SSEPackedDouble>, TB, OpSize; -} - -let Predicates = [HasSSE1] in { - def : Pat<(v4f32 (X86Shufps VR128:$src1, - (memopv4f32 addr:$src2), (i8 imm:$imm))), - (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; - def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; - def : Pat<(v4i32 (X86Shufps VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), - (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; - def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; - // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but - // fall back to this for SSE1) - def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), - (SHUFPSrri VR128:$src2, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - // Special unary SHUFPSrri case. - def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPSrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; -} - -let Predicates = [HasSSE2] in { - // Special binary v4i32 shuffle cases with SHUFPS. - def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), - (SHUFPSrri VR128:$src1, VR128:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - def : Pat<(v4i32 (shufp:$src3 VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (SHUFPSrmi VR128:$src1, addr:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - // Special unary SHUFPDrri cases. - def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - // Special binary v2i64 shuffle cases using SHUFPDrri. - def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), - (SHUFPDrri VR128:$src1, VR128:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - // Generic SHUFPD patterns - def : Pat<(v2f64 (X86Shufps VR128:$src1, - (memopv2f64 addr:$src2), (i8 imm:$imm))), - (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; - def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; - def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; + memopv2f64, SSEPackedDouble, 1 /* cvt to pshufd */>, + TB, OpSize; } let Predicates = [HasAVX] in { - def : Pat<(v4f32 (X86Shufps VR128:$src1, - (memopv4f32 addr:$src2), (i8 imm:$imm))), - (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; - def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; - def : Pat<(v4i32 (X86Shufps VR128:$src1, + def : Pat<(v4i32 (X86Shufp VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; - def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))), + def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; - // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but - // fall back to this for SSE1) - def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), - (VSHUFPSrri VR128:$src2, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - // Special unary SHUFPSrri case. - def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), - (VSHUFPSrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - // Special binary v4i32 shuffle cases with SHUFPS. - def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), - (VSHUFPSrri VR128:$src1, VR128:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - def : Pat<(v4i32 (shufp:$src3 VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (VSHUFPSrmi VR128:$src1, addr:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - // Special unary SHUFPDrri cases. - def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), - (VSHUFPDrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), - (VSHUFPDrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - // Special binary v2i64 shuffle cases using SHUFPDrri. - def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), - (VSHUFPDrri VR128:$src1, VR128:$src2, - (SHUFFLE_get_shuf_imm VR128:$src3))>; - - def : Pat<(v2f64 (X86Shufps VR128:$src1, - (memopv2f64 addr:$src2), (i8 imm:$imm))), + + def : Pat<(v2i64 (X86Shufp VR128:$src1, + (memopv2i64 addr:$src2), (i8 imm:$imm))), (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; - def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; - def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))), + def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; // 256-bit patterns - def : Pat<(v8i32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))), + def : Pat<(v8i32 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>; - def : Pat<(v8i32 (X86Shufps VR256:$src1, + def : Pat<(v8i32 (X86Shufp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>; - def : Pat<(v8f32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>; - def : Pat<(v8f32 (X86Shufps VR256:$src1, - (memopv8f32 addr:$src2), (i8 imm:$imm))), - (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>; - - def : Pat<(v4i64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))), + def : Pat<(v4i64 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>; - def : Pat<(v4i64 (X86Shufpd VR256:$src1, + def : Pat<(v4i64 (X86Shufp VR256:$src1, (memopv4i64 addr:$src2), (i8 imm:$imm))), (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; +} - def : Pat<(v4f64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>; - def : Pat<(v4f64 (X86Shufpd VR256:$src1, - (memopv4f64 addr:$src2), (i8 imm:$imm))), - (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; +let Predicates = [HasSSE1] in { + def : Pat<(v4i32 (X86Shufp VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), + (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; +} + +let Predicates = [HasSSE2] in { + // Generic SHUFPD patterns + def : Pat<(v2i64 (X86Shufp VR128:$src1, + (memopv2i64 addr:$src2), (i8 imm:$imm))), + (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; + def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; } //===----------------------------------------------------------------------===// @@ -2431,143 +2483,80 @@ let Predicates = [HasAVX] in { //===----------------------------------------------------------------------===// /// sse12_unpack_interleave - sse 1 & 2 unpack and interleave -multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt, +multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, PatFrag mem_frag, RegisterClass RC, X86MemOperand x86memop, string asm, Domain d> { def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), asm, [(set RC:$dst, - (vt (OpNode RC:$src1, RC:$src2)))], d>; + (vt (OpNode RC:$src1, RC:$src2)))], + IIC_SSE_UNPCK, d>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, - (mem_frag addr:$src2))))], d>; -} - -let AddedComplexity = 10 in { - defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, - VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, TB, VEX_4V; - defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, - VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, TB, OpSize, VEX_4V; - defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, - VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, TB, VEX_4V; - defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, - VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, TB, OpSize, VEX_4V; - - defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32, - VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, TB, VEX_4V; - defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64, - VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, TB, OpSize, VEX_4V; - defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32, - VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, TB, VEX_4V; - defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64, - VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, TB, OpSize, VEX_4V; - - let Constraints = "$src1 = $dst" in { - defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, - VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", - SSEPackedSingle>, TB; - defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, - VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", - SSEPackedDouble>, TB, OpSize; - defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, - VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", - SSEPackedSingle>, TB; - defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, - VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", - SSEPackedDouble>, TB, OpSize; - } // Constraints = "$src1 = $dst" -} // AddedComplexity + (mem_frag addr:$src2))))], + IIC_SSE_UNPCK, d>; +} + +defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, + VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, TB, VEX_4V; +defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64, + VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, TB, OpSize, VEX_4V; +defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32, + VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, TB, VEX_4V; +defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64, + VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, TB, OpSize, VEX_4V; + +defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, memopv8f32, + VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, TB, VEX_4V; +defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, memopv4f64, + VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, TB, OpSize, VEX_4V; +defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, memopv8f32, + VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, TB, VEX_4V; +defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, memopv4f64, + VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, TB, OpSize, VEX_4V; -let Predicates = [HasSSE1] in { - def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))), - (UNPCKLPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)), - (UNPCKLPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))), - (UNPCKHPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)), - (UNPCKHPSrr VR128:$src1, VR128:$src2)>; -} - -let Predicates = [HasSSE2] in { - def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))), - (UNPCKLPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)), - (UNPCKLPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))), - (UNPCKHPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)), - (UNPCKHPDrr VR128:$src1, VR128:$src2)>; +let Constraints = "$src1 = $dst" in { + defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, + VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; + defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64, + VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", + SSEPackedDouble>, TB, OpSize; + defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32, + VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; + defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64, + VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", + SSEPackedDouble>, TB, OpSize; +} // Constraints = "$src1 = $dst" +let Predicates = [HasAVX], AddedComplexity = 1 in { // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel // time and the fold opportunity reappears. def : Pat<(v2f64 (X86Movddup VR128:$src)), - (UNPCKLPDrr VR128:$src, VR128:$src)>; - - let AddedComplexity = 10 in - def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), - (UNPCKLPDrr VR128:$src, VR128:$src)>; + (VUNPCKLPDrr VR128:$src, VR128:$src)>; } -let Predicates = [HasAVX] in { - def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))), - (VUNPCKLPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)), - (VUNPCKLPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))), - (VUNPCKHPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)), - (VUNPCKHPSrr VR128:$src1, VR128:$src2)>; - - def : Pat<(v8f32 (X86Unpckl VR256:$src1, (memopv8f32 addr:$src2))), - (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckl VR256:$src1, VR256:$src2)), - (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8f32 (X86Unpckh VR256:$src1, (memopv8f32 addr:$src2))), - (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckh VR256:$src1, VR256:$src2)), - (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - - def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))), - (VUNPCKLPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)), - (VUNPCKLPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))), - (VUNPCKHPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)), - (VUNPCKHPDrr VR128:$src1, VR128:$src2)>; - - def : Pat<(v4f64 (X86Unpckl VR256:$src1, (memopv4f64 addr:$src2))), - (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpckl VR256:$src1, VR256:$src2)), - (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4f64 (X86Unpckh VR256:$src1, (memopv4f64 addr:$src2))), - (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpckh VR256:$src1, VR256:$src2)), - (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; - +let Predicates = [HasSSE2] in { // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel // time and the fold opportunity reappears. def : Pat<(v2f64 (X86Movddup VR128:$src)), - (VUNPCKLPDrr VR128:$src, VR128:$src)>; - let AddedComplexity = 10 in - def : Pat<(splat_lo (v2f64 VR128:$src), (undef)), - (VUNPCKLPDrr VR128:$src, VR128:$src)>; + (UNPCKLPDrr VR128:$src, VR128:$src)>; } //===----------------------------------------------------------------------===// @@ -2579,29 +2568,12 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, Domain d> { def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set GR32:$dst, (Int RC:$src))], d>; + [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>; def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W; + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], + IIC_SSE_MOVMSK, d>, REX_W; } -defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", - SSEPackedSingle>, TB; -defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", - SSEPackedDouble>, TB, OpSize; - -def : Pat<(i32 (X86fgetsign FR32:$src)), - (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>, Requires<[HasSSE1]>; -def : Pat<(i64 (X86fgetsign FR32:$src)), - (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>, Requires<[HasSSE1]>; -def : Pat<(i32 (X86fgetsign FR64:$src)), - (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>, Requires<[HasSSE2]>; -def : Pat<(i64 (X86fgetsign FR64:$src)), - (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>, Requires<[HasSSE2]>; - let Predicates = [HasAVX] in { defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", SSEPackedSingle>, TB, VEX; @@ -2629,17 +2601,105 @@ let Predicates = [HasAVX] in { // Assembler Only def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX; + "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, + SSEPackedSingle>, TB, VEX; def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB, + "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, + SSEPackedDouble>, TB, OpSize, VEX; def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), - "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX; + "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, + SSEPackedSingle>, TB, VEX; def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), - "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB, + "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, + SSEPackedDouble>, TB, OpSize, VEX; } +defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", + SSEPackedSingle>, TB; +defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", + SSEPackedDouble>, TB, OpSize; + +def : Pat<(i32 (X86fgetsign FR32:$src)), + (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, + sub_ss))>, Requires<[HasSSE1]>; +def : Pat<(i64 (X86fgetsign FR32:$src)), + (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, + sub_ss))>, Requires<[HasSSE1]>; +def : Pat<(i32 (X86fgetsign FR64:$src)), + (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, + sub_sd))>, Requires<[HasSSE2]>; +def : Pat<(i64 (X86fgetsign FR64:$src)), + (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, + sub_sd))>, Requires<[HasSSE2]>; + +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Logical Instructions +//===---------------------------------------------------------------------===// + +let ExeDomain = SSEPackedInt in { // SSE integer instructions + +/// PDI_binop_rm - Simple SSE2 binary operator. +multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, + OpndItins itins, + bit IsCommutable = 0, + bit Is2Addr = 1> { + let isCommutable = IsCommutable in + def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>; + def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (OpVT (OpNode RC:$src1, + (bitconvert (memop_frag addr:$src2)))))], + itins.rm>; +} +} // ExeDomain = SSEPackedInt + +// These are ordered here for pattern ordering requirements with the fp versions + +let Predicates = [HasAVX] in { +defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64, + i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; +defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64, + i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; +defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64, + i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; +defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64, + i128mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V; +} + +let Constraints = "$src1 = $dst" in { +defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64, + i128mem, SSE_BIT_ITINS_P, 1>; +defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64, + i128mem, SSE_BIT_ITINS_P, 1>; +defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64, + i128mem, SSE_BIT_ITINS_P, 1>; +defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64, + i128mem, SSE_BIT_ITINS_P, 0>; +} // Constraints = "$src1 = $dst" + +let Predicates = [HasAVX2] in { +defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64, + i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; +defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64, + i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; +defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64, + i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; +defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64, + i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Logical Instructions //===----------------------------------------------------------------------===// @@ -2647,31 +2707,39 @@ let Predicates = [HasAVX] in { /// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops /// multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, - FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, TB, VEX_4V; + FR32, f32, f128mem, memopfsf32, SSEPackedSingle, itins, 0>, + TB, VEX_4V; defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, - FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, TB, OpSize, VEX_4V; + FR64, f64, f128mem, memopfsf64, SSEPackedDouble, itins, 0>, + TB, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32, - f32, f128mem, memopfsf32, SSEPackedSingle>, TB; + f32, f128mem, memopfsf32, SSEPackedSingle, itins>, + TB; defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64, - f64, f128mem, memopfsf64, SSEPackedDouble>, TB, OpSize; + f64, f128mem, memopfsf64, SSEPackedDouble, itins>, + TB, OpSize; } } // Alias bitwise logical operations using SSE logical ops on packed FP values. let mayLoad = 0 in { - defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>; - defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>; - defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>; + defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand, + SSE_BIT_ITINS_P>; + defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for, + SSE_BIT_ITINS_P>; + defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor, + SSE_BIT_ITINS_P>; } let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in - defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>; + defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef, + SSE_BIT_ITINS_P>; /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops /// @@ -2758,118 +2826,145 @@ let isCommutable = 0 in /// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those /// classes below multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, + SizeItins itins, bit Is2Addr = 1> { defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), - OpNode, FR32, f32mem, Is2Addr>, XS; + OpNode, FR32, f32mem, + itins.s, Is2Addr>, XS; defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), - OpNode, FR64, f64mem, Is2Addr>, XD; + OpNode, FR64, f64mem, + itins.d, Is2Addr>, XD; } multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, + SizeItins itins, bit Is2Addr = 1> { let mayLoad = 0 in { defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, - v4f32, f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB; + v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>, + TB; defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, - v2f64, f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize; + v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>, + TB, OpSize; } } multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr, - SDNode OpNode> { + SDNode OpNode, + SizeItins itins> { let mayLoad = 0 in { defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, - v8f32, f256mem, memopv8f32, SSEPackedSingle, 0>, TB; + v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>, + TB; defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, - v4f64, f256mem, memopv4f64, SSEPackedDouble, 0>, TB, OpSize; + v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>, + TB, OpSize; } } multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, + SizeItins itins, bit Is2Addr = 1> { defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, - !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS; + !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, + itins.s, Is2Addr>, XS; defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, - !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, Is2Addr>, XD; + !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, + itins.d, Is2Addr>, XD; } multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr, + SizeItins itins, bit Is2Addr = 1> { defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128, !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32, - SSEPackedSingle, Is2Addr>, TB; + SSEPackedSingle, itins.s, Is2Addr>, + TB; defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128, !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64, - SSEPackedDouble, Is2Addr>, TB, OpSize; + SSEPackedDouble, itins.d, Is2Addr>, + TB, OpSize; } -multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> { +multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr, + SizeItins itins> { defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256, !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32, - SSEPackedSingle, 0>, TB; + SSEPackedSingle, itins.s, 0>, TB; defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256, !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64, - SSEPackedDouble, 0>, TB, OpSize; + SSEPackedDouble, itins.d, 0>, TB, OpSize; } // Binary Arithmetic instructions -defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>, - basic_sse12_fp_binop_s_int<0x58, "add", 0>, VEX_4V, VEX_LIG; -defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, - basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V; -defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>, - basic_sse12_fp_binop_s_int<0x59, "mul", 0>, VEX_4V, VEX_LIG; -defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, - basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V; +defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>, + basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>, + VEX_4V, VEX_LIG; +defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P, 0>, + basic_sse12_fp_binop_p_y<0x58, "add", fadd, SSE_ALU_ITINS_P>, + VEX_4V; +defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>, + basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>, + VEX_4V, VEX_LIG; +defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P, 0>, + basic_sse12_fp_binop_p_y<0x59, "mul", fmul, SSE_MUL_ITINS_P>, + VEX_4V; let isCommutable = 0 in { - defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>, - basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, VEX_4V, VEX_LIG; - defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, - basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V; - defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>, - basic_sse12_fp_binop_s_int<0x5E, "div", 0>, VEX_4V, VEX_LIG; - defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, - basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V; - defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>, - basic_sse12_fp_binop_s_int<0x5F, "max", 0>, VEX_4V, VEX_LIG; - defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, - basic_sse12_fp_binop_p_int<0x5F, "max", 0>, - basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, - basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V; - defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>, - basic_sse12_fp_binop_s_int<0x5D, "min", 0>, VEX_4V, VEX_LIG; - defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, - basic_sse12_fp_binop_p_int<0x5D, "min", 0>, - basic_sse12_fp_binop_p_y_int<0x5D, "min">, - basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V; + defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>, + basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>, + VEX_4V, VEX_LIG; + defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>, + basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, VEX_4V; + defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>, + basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>, + VEX_4V, VEX_LIG; + defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_ALU_ITINS_P, 0>, + basic_sse12_fp_binop_p_y<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, + VEX_4V; + defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>, + basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>, + VEX_4V, VEX_LIG; + defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P, 0>, + basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P, 0>, + basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, + basic_sse12_fp_binop_p_y_int<0x5F, "max", SSE_ALU_ITINS_P>, + VEX_4V; + defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>, + basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>, + VEX_4V, VEX_LIG; + defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P, 0>, + basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P, 0>, + basic_sse12_fp_binop_p_y_int<0x5D, "min", SSE_ALU_ITINS_P>, + basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, + VEX_4V; } let Constraints = "$src1 = $dst" in { - defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>, - basic_sse12_fp_binop_p<0x58, "add", fadd>, - basic_sse12_fp_binop_s_int<0x58, "add">; - defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>, - basic_sse12_fp_binop_p<0x59, "mul", fmul>, - basic_sse12_fp_binop_s_int<0x59, "mul">; + defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>, + basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>; + defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>, + basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>, + basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>; let isCommutable = 0 in { - defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>, - basic_sse12_fp_binop_p<0x5C, "sub", fsub>, - basic_sse12_fp_binop_s_int<0x5C, "sub">; - defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>, - basic_sse12_fp_binop_p<0x5E, "div", fdiv>, - basic_sse12_fp_binop_s_int<0x5E, "div">; - defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>, - basic_sse12_fp_binop_p<0x5F, "max", X86fmax>, - basic_sse12_fp_binop_s_int<0x5F, "max">, - basic_sse12_fp_binop_p_int<0x5F, "max">; - defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>, - basic_sse12_fp_binop_p<0x5D, "min", X86fmin>, - basic_sse12_fp_binop_s_int<0x5D, "min">, - basic_sse12_fp_binop_p_int<0x5D, "min">; + defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, + basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>; + defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>, + basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, + basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>; + defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, + basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>; + defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, + basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>; } } @@ -2881,9 +2976,25 @@ let Constraints = "$src1 = $dst" in { /// /// And, we have a special variant form for a full-vector intrinsic form. +def SSE_SQRTP : OpndItins< + IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM +>; + +def SSE_SQRTS : OpndItins< + IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM +>; + +def SSE_RCPP : OpndItins< + IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM +>; + +def SSE_RCPS : OpndItins< + IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM +>; + /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, - SDNode OpNode, Intrinsic F32Int> { + SDNode OpNode, Intrinsic F32Int, OpndItins itins> { def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode FR32:$src))]>; @@ -2893,14 +3004,14 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, // partial register update condition. def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS, + [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, Requires<[HasSSE1, OptForSize]>; def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int VR128:$src))]>; + [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>; def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int sse_load_f32:$src))]>; + [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>; } /// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. @@ -2919,64 +3030,72 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { } /// sse1_fp_unop_p - SSE1 unops in packed form. -multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> { +multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>; + [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>; def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>; + [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>; } /// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form. -multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> { +multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>; + [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], + itins.rr>; def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))]>; + [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))], + itins.rm>; } /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr, - Intrinsic V4F32Int> { + Intrinsic V4F32Int, OpndItins itins> { def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (V4F32Int VR128:$src))]>; + [(set VR128:$dst, (V4F32Int VR128:$src))], + itins.rr>; def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>; + [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], + itins.rm>; } /// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms. multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, - Intrinsic V4F32Int> { + Intrinsic V4F32Int, OpndItins itins> { def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (V4F32Int VR256:$src))]>; + [(set VR256:$dst, (V4F32Int VR256:$src))], + itins.rr>; def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>; + [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))], + itins.rm>; } /// sse2_fp_unop_s - SSE2 unops in scalar form. multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, - SDNode OpNode, Intrinsic F64Int> { + SDNode OpNode, Intrinsic F64Int, OpndItins itins> { def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set FR64:$dst, (OpNode FR64:$src))]>; + [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>; // See the comments in sse1_fp_unop_s for why this is OptForSize. def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD, + [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, Requires<[HasSSE2, OptForSize]>; def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int VR128:$src))]>; + [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>; def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int sse_load_f64:$src))]>; + [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>; } /// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. @@ -2998,45 +3117,52 @@ multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { /// sse2_fp_unop_p - SSE2 unops in vector forms. multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>; + [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>; def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>; + [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; } /// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms. -multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> { +multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>; + [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], + itins.rr>; def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))]>; + [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))], + itins.rm>; } /// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms. multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr, - Intrinsic V2F64Int> { + Intrinsic V2F64Int, OpndItins itins> { def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (V2F64Int VR128:$src))]>; + [(set VR128:$dst, (V2F64Int VR128:$src))], + itins.rr>; def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>; + [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))], + itins.rm>; } /// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms. multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, - Intrinsic V2F64Int> { + Intrinsic V2F64Int, OpndItins itins> { def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (V2F64Int VR256:$src))]>; + [(set VR256:$dst, (V2F64Int VR256:$src))], + itins.rr>; def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>; + [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))], + itins.rm>; } let Predicates = [HasAVX] in { @@ -3044,31 +3170,40 @@ let Predicates = [HasAVX] in { defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">, sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG; - defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>, - sse2_fp_unop_p<0x51, "vsqrt", fsqrt>, - sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>, - sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>, - sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>, - sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>, - sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>, - sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>, + defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>, + sse2_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>, + sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>, + sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>, + sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps, + SSE_SQRTP>, + sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd, + SSE_SQRTP>, + sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256, + SSE_SQRTP>, + sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256, + SSE_SQRTP>, VEX; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG; - defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>, - sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, - sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>, - sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX; + defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>, + sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>, + sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256, + SSE_SQRTP>, + sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps, + SSE_SQRTP>, VEX; defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG; - defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>, - sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, - sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>, - sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX; + defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp, SSE_RCPP>, + sse1_fp_unop_p_y<0x53, "vrcp", X86frcp, SSE_RCPP>, + sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256, + SSE_RCPP>, + sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps, + SSE_RCPP>, VEX; } +let AddedComplexity = 1 in { def : Pat<(f32 (fsqrt FR32:$src)), (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; def : Pat<(f32 (fsqrt (load addr:$src))), @@ -3091,8 +3226,9 @@ def : Pat<(f32 (X86frcp FR32:$src)), def : Pat<(f32 (X86frcp (load addr:$src))), (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX, OptForSize]>; +} -let Predicates = [HasAVX] in { +let Predicates = [HasAVX], AddedComplexity = 1 in { def : Pat<(int_x86_sse_sqrt_ss VR128:$src), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), (VSQRTSSr (f32 (IMPLICIT_DEF)), @@ -3127,21 +3263,26 @@ let Predicates = [HasAVX] in { } // Square root. -defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, - sse1_fp_unop_p<0x51, "sqrt", fsqrt>, - sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps>, - sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt>, - sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>; +defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, + SSE_SQRTS>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>, + sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps, SSE_SQRTS>, + sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, + SSE_SQRTS>, + sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>, + sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>, - sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>; -defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, - sse1_fp_unop_p<0x53, "rcp", X86frcp>, - sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>; +defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss, + SSE_SQRTS>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>, + sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, + SSE_SQRTS>; +defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss, + SSE_RCPS>, + sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>, + sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>; // There is no f64 version of the reciprocal approximation instructions. @@ -3154,24 +3295,22 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions (ins f128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f32 VR128:$src), - addr:$dst)]>, VEX; + addr:$dst)], + IIC_SSE_MOVNT>, VEX; def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntpd\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)]>, VEX; - def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)]>, VEX; + addr:$dst)], + IIC_SSE_MOVNT>, VEX; let ExeDomain = SSEPackedInt in def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), - addr:$dst)]>, VEX; + [(alignednontemporalstore (v2i64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; @@ -3180,23 +3319,21 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions (ins f256mem:$dst, VR256:$src), "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v8f32 VR256:$src), - addr:$dst)]>, VEX; + addr:$dst)], + IIC_SSE_MOVNT>, VEX; def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movntpd\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)]>, VEX; - def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)]>, VEX; + addr:$dst)], + IIC_SSE_MOVNT>, VEX; let ExeDomain = SSEPackedInt in def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), - addr:$dst)]>, VEX; + [(alignednontemporalstore (v4i64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; } def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src), @@ -3209,19 +3346,18 @@ def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src), let AddedComplexity = 400 in { // Prefer non-temporal versions def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], + IIC_SSE_MOVNT>; def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; - -def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>; + [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)], + IIC_SSE_MOVNT>; let ExeDomain = SSEPackedInt in def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], + IIC_SSE_MOVNT>; def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; @@ -3229,11 +3365,13 @@ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti{l}\t{$src, $dst|$dst, $src}", - [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, + [(nontemporalstore (i32 GR32:$src), addr:$dst)], + IIC_SSE_MOVNT>, TB, Requires<[HasSSE2]>; def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "movnti{q}\t{$src, $dst|$dst, $src}", - [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, + [(nontemporalstore (i64 GR64:$src), addr:$dst)], + IIC_SSE_MOVNT>, TB, Requires<[HasSSE2]>; } @@ -3242,31 +3380,40 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), //===----------------------------------------------------------------------===// // Prefetch intrinsic. -def PREFETCHT0 : VoPSI<0x18, MRM1m, (outs), (ins i8mem:$src), - "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>; -def PREFETCHT1 : VoPSI<0x18, MRM2m, (outs), (ins i8mem:$src), - "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>; -def PREFETCHT2 : VoPSI<0x18, MRM3m, (outs), (ins i8mem:$src), - "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>; -def PREFETCHNTA : VoPSI<0x18, MRM0m, (outs), (ins i8mem:$src), - "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>; +let Predicates = [HasSSE1] in { +def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), + "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))], + IIC_SSE_PREFETCH>, TB; +def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), + "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))], + IIC_SSE_PREFETCH>, TB; +def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src), + "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))], + IIC_SSE_PREFETCH>, TB; +def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), + "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))], + IIC_SSE_PREFETCH>, TB; +} // Flush cache def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), - "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, - TB, Requires<[HasSSE2]>; + "clflush\t$src", [(int_x86_sse2_clflush addr:$src)], + IIC_SSE_PREFETCH>, TB, Requires<[HasSSE2]>; // Pause. This "instruction" is encoded as "rep; nop", so even though it // was introduced with SSE2, it's backward compatible. -def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; +def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", [], IIC_SSE_PAUSE>, REP; // Load, store, and memory fence def SFENCE : I<0xAE, MRM_F8, (outs), (ins), - "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>; + "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>, + TB, Requires<[HasSSE1]>; def LFENCE : I<0xAE, MRM_E8, (outs), (ins), - "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; + "lfence", [(int_x86_sse2_lfence)], IIC_SSE_LFENCE>, + TB, Requires<[HasSSE2]>; def MFENCE : I<0xAE, MRM_F0, (outs), (ins), - "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; + "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, + TB, Requires<[HasSSE2]>; def : Pat<(X86SFence), (SFENCE)>; def : Pat<(X86LFence), (LFENCE)>; @@ -3277,14 +3424,18 @@ def : Pat<(X86MFence), (MFENCE)>; //===----------------------------------------------------------------------===// def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), - "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX; + "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], + IIC_SSE_LDMXCSR>, VEX; def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), - "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX; + "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], + IIC_SSE_STMXCSR>, VEX; def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), - "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>; + "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], + IIC_SSE_LDMXCSR>; def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), - "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>; + "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], + IIC_SSE_STMXCSR>; //===---------------------------------------------------------------------===// // SSE2 - Move Aligned/Unaligned Packed Integer Instructions @@ -3294,108 +3445,134 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions let neverHasSideEffects = 1 in { def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, + VEX; def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, + VEX; } def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; + "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, + VEX; def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; + "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, + VEX; // For Disassembler let isCodeGenOnly = 1 in { def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + "movdqa\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>, + VEX; def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + "movdqa\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>, + VEX; def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; + "movdqu\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVU_P_RR>, + VEX; def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, VEX; + "movdqu\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVU_P_RR>, + VEX; } let canFoldAsLoad = 1, mayLoad = 1 in { def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, + VEX; def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, + VEX; let Predicates = [HasAVX] in { def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, + XS, VEX; def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), - "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, + XS, VEX; } } let mayStore = 1 in { def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, + VEX; def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, + VEX; let Predicates = [HasAVX] in { def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, + XS, VEX; def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), - "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, + XS, VEX; } } let neverHasSideEffects = 1 in def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>; + "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - []>, XS, Requires<[HasSSE2]>; + [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>; // For Disassembler let isCodeGenOnly = 1 in { def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>; + "movdqa\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVA_P_RR>; def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - []>, XS, Requires<[HasSSE2]>; + [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>; } let canFoldAsLoad = 1, mayLoad = 1 in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", - [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>; + [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/], + IIC_SSE_MOVA_P_RM>; def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqu\t{$src, $dst|$dst, $src}", - [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, + [/*(set VR128:$dst, (loadv2i64 addr:$src))*/], + IIC_SSE_MOVU_P_RM>, XS, Requires<[HasSSE2]>; } let mayStore = 1 in { def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", - [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; + [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], + IIC_SSE_MOVA_P_MR>; def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, + [/*(store (v2i64 VR128:$src), addr:$dst)*/], + IIC_SSE_MOVU_P_MR>, XS, Requires<[HasSSE2]>; } // Intrinsic forms of MOVDQU load and store def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "vmovdqu\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, + [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)], + IIC_SSE_MOVU_P_MR>, XS, VEX, Requires<[HasAVX]>; def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, + [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)], + IIC_SSE_MOVU_P_MR>, XS, Requires<[HasSSE2]>; } // ExeDomain = SSEPackedInt let Predicates = [HasAVX] in { - def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>; def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src), (VMOVDQUYmr addr:$dst, VR256:$src)>; } @@ -3404,11 +3581,17 @@ let Predicates = [HasAVX] in { // SSE2 - Packed Integer Arithmetic Instructions //===---------------------------------------------------------------------===// +def SSE_PMADD : OpndItins< + IIC_SSE_PMADD, IIC_SSE_PMADD +>; + let ExeDomain = SSEPackedInt in { // SSE integer instructions multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, bit IsCommutable = 0, + X86MemOperand x86memop, + OpndItins itins, + bit IsCommutable = 0, bit Is2Addr = 1> { let isCommutable = IsCommutable in def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), @@ -3416,58 +3599,64 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2))]>; + [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))]>; + [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))], + itins.rm>; } -multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, - string OpcodeStr, Intrinsic IntId, - Intrinsic IntId2, RegisterClass RC, - bit Is2Addr = 1> { +multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, + string OpcodeStr, SDNode OpNode, + SDNode OpNode2, RegisterClass RC, + ValueType DstVT, ValueType SrcVT, PatFrag bc_frag, + ShiftOpndItins itins, + bit Is2Addr = 1> { // src2 is always 128-bit def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, VR128:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (IntId RC:$src1, VR128:$src2))]>; + [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))], + itins.rr>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, i128mem:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (IntId RC:$src1, (bitconvert (memopv2i64 addr:$src2))))]>; + [(set RC:$dst, (DstVT (OpNode RC:$src1, + (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>; def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (IntId2 RC:$src1, (i32 imm:$src2)))]>; + [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>; } -/// PDI_binop_rm - Simple SSE2 binary operator. -multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, bit IsCommutable = 0, - bit Is2Addr = 1> { +/// PDI_binop_rm - Simple SSE2 binary operator with different src and dst types +multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType DstVT, ValueType SrcVT, RegisterClass RC, + PatFrag memop_frag, X86MemOperand x86memop, + OpndItins itins, + bit IsCommutable = 0, bit Is2Addr = 1> { let isCommutable = IsCommutable in def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>; + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), (bitconvert (memop_frag addr:$src2)))))]>; } } // ExeDomain = SSEPackedInt @@ -3476,185 +3665,242 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasAVX] in { defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64, - i128mem, 1, 0 /*3addr*/>, VEX_4V; + i128mem, SSE_INTALU_ITINS_P, 1, 0 /*3addr*/>, + VEX_4V; defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64, - i128mem, 1, 0>, VEX_4V; + i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64, - i128mem, 1, 0>, VEX_4V; + i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPADDQ : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64, - i128mem, 1, 0>, VEX_4V; + i128mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V; defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64, - i128mem, 1, 0>, VEX_4V; + i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64, - i128mem, 0, 0>, VEX_4V; + i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64, - i128mem, 0, 0>, VEX_4V; + i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64, - i128mem, 0, 0>, VEX_4V; + i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64, - i128mem, 0, 0>, VEX_4V; + i128mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V; +defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, + memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, + VEX_4V; // Intrinsic forms defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, - VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, - VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, - VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, - VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; -defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_PMADD, 1, 0>, VEX_4V; defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; } let Predicates = [HasAVX2] in { defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; + i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; + i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; + i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPADDQY : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; + i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V; defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; + i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64, - i256mem, 0, 0>, VEX_4V; + i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64, - i256mem, 0, 0>, VEX_4V; + i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64, - i256mem, 0, 0>, VEX_4V; + i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64, - i256mem, 0, 0>, VEX_4V; + i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V; +defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32, + VR256, memopv4i64, i256mem, + SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; // Intrinsic forms defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b, - VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w, - VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b, - VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w, - VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPADDSWY : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; defm VPMULHWY : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; -defm VPMULUDQY : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_avx2_pmulu_dq, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_PMADD, 1, 0>, VEX_4V; defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; } let Constraints = "$src1 = $dst" in { defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64, - i128mem, 1>; + i128mem, SSE_INTALU_ITINS_P, 1>; defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64, - i128mem, 1>; + i128mem, SSE_INTALU_ITINS_P, 1>; defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64, - i128mem, 1>; + i128mem, SSE_INTALU_ITINS_P, 1>; defm PADDQ : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64, - i128mem, 1>; + i128mem, SSE_INTALUQ_ITINS_P, 1>; defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64, - i128mem, 1>; + i128mem, SSE_INTMUL_ITINS_P, 1>; defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64, - i128mem>; + i128mem, SSE_INTALU_ITINS_P>; defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64, - i128mem>; + i128mem, SSE_INTALU_ITINS_P>; defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64, - i128mem>; + i128mem, SSE_INTALU_ITINS_P>; defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64, - i128mem>; + i128mem, SSE_INTALUQ_ITINS_P>; +defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, + memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>; // Intrinsic forms defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b, - VR128, memopv2i64, i128mem>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P>; defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, - VR128, memopv2i64, i128mem>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P>; defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b, - VR128, memopv2i64, i128mem>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P>; defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w, - VR128, memopv2i64, i128mem>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P>; defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, - VR128, memopv2i64, i128mem, 1>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1>; defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, - VR128, memopv2i64, i128mem, 1>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1>; defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, - VR128, memopv2i64, i128mem, 1>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1>; defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, - VR128, memopv2i64, i128mem, 1>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1>; defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, - VR128, memopv2i64, i128mem, 1>; + VR128, memopv2i64, i128mem, + SSE_INTMUL_ITINS_P, 1>; defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, - VR128, memopv2i64, i128mem, 1>; -defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, - VR128, memopv2i64, i128mem, 1>; + VR128, memopv2i64, i128mem, + SSE_INTMUL_ITINS_P, 1>; defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, - VR128, memopv2i64, i128mem, 1>; + VR128, memopv2i64, i128mem, + SSE_PMADD, 1>; defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, - VR128, memopv2i64, i128mem, 1>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1>; defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, - VR128, memopv2i64, i128mem, 1>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1>; defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, - VR128, memopv2i64, i128mem, 1>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1>; defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, - VR128, memopv2i64, i128mem, 1>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1>; defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, - VR128, memopv2i64, i128mem, 1>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1>; defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, - VR128, memopv2i64, i128mem, 1>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1>; defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, - VR128, memopv2i64, i128mem, 1>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1>; } // Constraints = "$src1 = $dst" @@ -3663,159 +3909,138 @@ defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, //===---------------------------------------------------------------------===// let Predicates = [HasAVX] in { -defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", - int_x86_sse2_psll_w, int_x86_sse2_pslli_w, - VR128, 0>, VEX_4V; -defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", - int_x86_sse2_psll_d, int_x86_sse2_pslli_d, - VR128, 0>, VEX_4V; -defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", - int_x86_sse2_psll_q, int_x86_sse2_pslli_q, - VR128, 0>, VEX_4V; - -defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", - int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, - VR128, 0>, VEX_4V; -defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", - int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, - VR128, 0>, VEX_4V; -defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", - int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, - VR128, 0>, VEX_4V; - -defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", - int_x86_sse2_psra_w, int_x86_sse2_psrai_w, - VR128, 0>, VEX_4V; -defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", - int_x86_sse2_psra_d, int_x86_sse2_psrai_d, - VR128, 0>, VEX_4V; - -defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64, - i128mem, 1, 0>, VEX_4V; -defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64, - i128mem, 1, 0>, VEX_4V; -defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64, - i128mem, 1, 0>, VEX_4V; -defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64, - i128mem, 0, 0>, VEX_4V; +defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, + VR128, v8i16, v8i16, bc_v8i16, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; +defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, + VR128, v4i32, v4i32, bc_v4i32, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; +defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, + VR128, v2i64, v2i64, bc_v2i64, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; + +defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, + VR128, v8i16, v8i16, bc_v8i16, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; +defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, + VR128, v4i32, v4i32, bc_v4i32, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; +defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, + VR128, v2i64, v2i64, bc_v2i64, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; + +defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, + VR128, v8i16, v8i16, bc_v8i16, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; +defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, + VR128, v4i32, v4i32, bc_v4i32, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { - let neverHasSideEffects = 1 in { - // 128-bit logical shifts. - def VPSLLDQri : PDIi8<0x73, MRM7r, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), - "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - VEX_4V; - def VPSRLDQri : PDIi8<0x73, MRM3r, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), - "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - VEX_4V; - // PSRADQri doesn't exist in SSE[1-3]. - } -} + // 128-bit logical shifts. + def VPSLLDQri : PDIi8<0x73, MRM7r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>, + VEX_4V; + def VPSRLDQri : PDIi8<0x73, MRM3r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>, + VEX_4V; + // PSRADQri doesn't exist in SSE[1-3]. } +} // Predicates = [HasAVX] let Predicates = [HasAVX2] in { -defm VPSLLWY : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", - int_x86_avx2_psll_w, int_x86_avx2_pslli_w, - VR256, 0>, VEX_4V; -defm VPSLLDY : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", - int_x86_avx2_psll_d, int_x86_avx2_pslli_d, - VR256, 0>, VEX_4V; -defm VPSLLQY : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", - int_x86_avx2_psll_q, int_x86_avx2_pslli_q, - VR256, 0>, VEX_4V; - -defm VPSRLWY : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", - int_x86_avx2_psrl_w, int_x86_avx2_psrli_w, - VR256, 0>, VEX_4V; -defm VPSRLDY : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", - int_x86_avx2_psrl_d, int_x86_avx2_psrli_d, - VR256, 0>, VEX_4V; -defm VPSRLQY : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", - int_x86_avx2_psrl_q, int_x86_avx2_psrli_q, - VR256, 0>, VEX_4V; - -defm VPSRAWY : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", - int_x86_avx2_psra_w, int_x86_avx2_psrai_w, - VR256, 0>, VEX_4V; -defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", - int_x86_avx2_psra_d, int_x86_avx2_psrai_d, - VR256, 0>, VEX_4V; - -defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; -defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; -defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; -defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64, - i256mem, 0, 0>, VEX_4V; +defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, + VR256, v16i16, v8i16, bc_v8i16, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; +defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, + VR256, v8i32, v4i32, bc_v4i32, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; +defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, + VR256, v4i64, v2i64, bc_v2i64, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; + +defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, + VR256, v16i16, v8i16, bc_v8i16, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; +defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, + VR256, v8i32, v4i32, bc_v4i32, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; +defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, + VR256, v4i64, v2i64, bc_v2i64, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; + +defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, + VR256, v16i16, v8i16, bc_v8i16, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; +defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, + VR256, v8i32, v4i32, bc_v4i32, + SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { - let neverHasSideEffects = 1 in { - // 128-bit logical shifts. - def VPSLLDQYri : PDIi8<0x73, MRM7r, - (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), - "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - VEX_4V; - def VPSRLDQYri : PDIi8<0x73, MRM3r, - (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), - "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - VEX_4V; - // PSRADQYri doesn't exist in SSE[1-3]. - } -} + // 256-bit logical shifts. + def VPSLLDQYri : PDIi8<0x73, MRM7r, + (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), + "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR256:$dst, + (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>, + VEX_4V; + def VPSRLDQYri : PDIi8<0x73, MRM3r, + (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), + "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR256:$dst, + (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>, + VEX_4V; + // PSRADQYri doesn't exist in SSE[1-3]. } +} // Predicates = [HasAVX2] let Constraints = "$src1 = $dst" in { -defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", - int_x86_sse2_psll_w, int_x86_sse2_pslli_w, - VR128>; -defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", - int_x86_sse2_psll_d, int_x86_sse2_pslli_d, - VR128>; -defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", - int_x86_sse2_psll_q, int_x86_sse2_pslli_q, - VR128>; - -defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", - int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, - VR128>; -defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", - int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, - VR128>; -defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", - int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, - VR128>; - -defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", - int_x86_sse2_psra_w, int_x86_sse2_psrai_w, - VR128>; -defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", - int_x86_sse2_psra_d, int_x86_sse2_psrai_d, - VR128>; - -defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64, - i128mem, 1>; -defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64, - i128mem, 1>; -defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64, - i128mem, 1>; -defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64, - i128mem, 0>; +defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, + VR128, v8i16, v8i16, bc_v8i16, + SSE_INTSHIFT_ITINS_P>; +defm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, + VR128, v4i32, v4i32, bc_v4i32, + SSE_INTSHIFT_ITINS_P>; +defm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, + VR128, v2i64, v2i64, bc_v2i64, + SSE_INTSHIFT_ITINS_P>; + +defm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, + VR128, v8i16, v8i16, bc_v8i16, + SSE_INTSHIFT_ITINS_P>; +defm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, + VR128, v4i32, v4i32, bc_v4i32, + SSE_INTSHIFT_ITINS_P>; +defm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, + VR128, v2i64, v2i64, bc_v2i64, + SSE_INTSHIFT_ITINS_P>; + +defm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, + VR128, v8i16, v8i16, bc_v8i16, + SSE_INTSHIFT_ITINS_P>; +defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, + VR128, v4i32, v4i32, bc_v4i32, + SSE_INTSHIFT_ITINS_P>; let ExeDomain = SSEPackedInt in { - let neverHasSideEffects = 1 in { - // 128-bit logical shifts. - def PSLLDQri : PDIi8<0x73, MRM7r, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), - "pslldq\t{$src2, $dst|$dst, $src2}", []>; - def PSRLDQri : PDIi8<0x73, MRM3r, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), - "psrldq\t{$src2, $dst|$dst, $src2}", []>; - // PSRADQri doesn't exist in SSE[1-3]. - } + // 128-bit logical shifts. + def PSLLDQri : PDIi8<0x73, MRM7r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "pslldq\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>; + def PSRLDQri : PDIi8<0x73, MRM3r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "psrldq\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>; + // PSRADQri doesn't exist in SSE[1-3]. } } // Constraints = "$src1 = $dst" @@ -3824,17 +4049,13 @@ let Predicates = [HasAVX] in { (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; - def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2), - (VPSLLDQri VR128:$src1, imm:$src2)>; - def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2), - (VPSRLDQri VR128:$src1, imm:$src2)>; def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; // Shift up / down and insert zero's. - def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))), + def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))), (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; - def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))), + def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))), (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; } @@ -3843,10 +4064,6 @@ let Predicates = [HasAVX2] in { (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2), (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; - def : Pat<(int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2), - (VPSLLDQYri VR256:$src1, imm:$src2)>; - def : Pat<(int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2), - (VPSRLDQYri VR256:$src1, imm:$src2)>; } let Predicates = [HasSSE2] in { @@ -3854,17 +4071,13 @@ let Predicates = [HasSSE2] in { (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; - def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2), - (PSLLDQri VR128:$src1, imm:$src2)>; - def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2), - (PSRLDQri VR128:$src1, imm:$src2)>; def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; // Shift up / down and insert zero's. - def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))), + def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))), (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; - def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))), + def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))), (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; } @@ -3873,177 +4086,106 @@ let Predicates = [HasSSE2] in { //===---------------------------------------------------------------------===// let Predicates = [HasAVX] in { - defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; - defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; - defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, - VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; - defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, - VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; - defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, - VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; - defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, - VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; - - def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), - (VPCMPEQBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)))), - (VPCMPEQBrm VR128:$src1, addr:$src2)>; - def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), - (VPCMPEQWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPCMPEQWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), - (VPCMPEQDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPCMPEQDrm VR128:$src1, addr:$src2)>; - - def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), - (VPCMPGTBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)))), - (VPCMPGTBrm VR128:$src1, addr:$src2)>; - def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), - (VPCMPGTWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPCMPGTWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), - (VPCMPGTDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPCMPGTDrm VR128:$src1, addr:$src2)>; + defm VPCMPEQB : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v16i8, + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + defm VPCMPEQW : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v8i16, + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + defm VPCMPEQD : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v4i32, + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + defm VPCMPGTB : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v16i8, + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + defm VPCMPGTW : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v8i16, + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + defm VPCMPGTD : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v4i32, + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; } let Predicates = [HasAVX2] in { - defm VPCMPEQBY : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_avx2_pcmpeq_b, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; - defm VPCMPEQWY : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_avx2_pcmpeq_w, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; - defm VPCMPEQDY : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_avx2_pcmpeq_d, - VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; - defm VPCMPGTBY : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_avx2_pcmpgt_b, - VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; - defm VPCMPGTWY : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_avx2_pcmpgt_w, - VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; - defm VPCMPGTDY : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_avx2_pcmpgt_d, - VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; - - def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, VR256:$src2)), - (VPCMPEQBYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, - (bc_v32i8 (memopv4i64 addr:$src2)))), - (VPCMPEQBYrm VR256:$src1, addr:$src2)>; - def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, VR256:$src2)), - (VPCMPEQWYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, - (bc_v16i16 (memopv4i64 addr:$src2)))), - (VPCMPEQWYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, VR256:$src2)), - (VPCMPEQDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, - (bc_v8i32 (memopv4i64 addr:$src2)))), - (VPCMPEQDYrm VR256:$src1, addr:$src2)>; - - def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, VR256:$src2)), - (VPCMPGTBYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, - (bc_v32i8 (memopv4i64 addr:$src2)))), - (VPCMPGTBYrm VR256:$src1, addr:$src2)>; - def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, VR256:$src2)), - (VPCMPGTWYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, - (bc_v16i16 (memopv4i64 addr:$src2)))), - (VPCMPGTWYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, VR256:$src2)), - (VPCMPGTDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, - (bc_v8i32 (memopv4i64 addr:$src2)))), - (VPCMPGTDYrm VR256:$src1, addr:$src2)>; + defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8, + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16, + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32, + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; + defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8, + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16, + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; + defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32, + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; } let Constraints = "$src1 = $dst" in { - defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, - VR128, memopv2i64, i128mem, 1>; - defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, - VR128, memopv2i64, i128mem, 1>; - defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, - VR128, memopv2i64, i128mem, 1>; - defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b, - VR128, memopv2i64, i128mem>; - defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w, - VR128, memopv2i64, i128mem>; - defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d, - VR128, memopv2i64, i128mem>; + defm PCMPEQB : PDI_binop_rm<0x74, "pcmpeqb", X86pcmpeq, v16i8, + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1>; + defm PCMPEQW : PDI_binop_rm<0x75, "pcmpeqw", X86pcmpeq, v8i16, + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1>; + defm PCMPEQD : PDI_binop_rm<0x76, "pcmpeqd", X86pcmpeq, v4i32, + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 1>; + defm PCMPGTB : PDI_binop_rm<0x64, "pcmpgtb", X86pcmpgt, v16i8, + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P>; + defm PCMPGTW : PDI_binop_rm<0x65, "pcmpgtw", X86pcmpgt, v8i16, + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P>; + defm PCMPGTD : PDI_binop_rm<0x66, "pcmpgtd", X86pcmpgt, v4i32, + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P>; } // Constraints = "$src1 = $dst" -let Predicates = [HasSSE2] in { - def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), - (PCMPEQBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)))), - (PCMPEQBrm VR128:$src1, addr:$src2)>; - def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), - (PCMPEQWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)))), - (PCMPEQWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), - (PCMPEQDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (PCMPEQDrm VR128:$src1, addr:$src2)>; - - def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), - (PCMPGTBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2)))), - (PCMPGTBrm VR128:$src1, addr:$src2)>; - def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), - (PCMPGTWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2)))), - (PCMPGTWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), - (PCMPGTDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))), - (PCMPGTDrm VR128:$src1, addr:$src2)>; -} - //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Pack Instructions //===---------------------------------------------------------------------===// let Predicates = [HasAVX] in { defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128, - VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128, - VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128, - VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; } let Predicates = [HasAVX2] in { defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb, - VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw, - VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb, - VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; + VR256, memopv4i64, i256mem, + SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; } let Constraints = "$src1 = $dst" in { defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128, - VR128, memopv2i64, i128mem>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P>; defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128, - VR128, memopv2i64, i128mem>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P>; defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128, - VR128, memopv2i64, i128mem>; + VR128, memopv2i64, i128mem, + SSE_INTALU_ITINS_P>; } // Constraints = "$src1 = $dst" //===---------------------------------------------------------------------===// @@ -4051,134 +4193,75 @@ defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128, //===---------------------------------------------------------------------===// let ExeDomain = SSEPackedInt in { -multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, PatFrag pshuf_frag, - PatFrag bc_frag> { +multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, SDNode OpNode> { def ri : Ii8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1, - (undef))))]>; + (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (vt (OpNode VR128:$src1, (i8 imm:$src2))))], + IIC_SSE_PSHUF>; def mi : Ii8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (vt (pshuf_frag:$src2 - (bc_frag (memopv2i64 addr:$src1)), - (undef))))]>; + (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, + (vt (OpNode (bitconvert (memopv2i64 addr:$src1)), + (i8 imm:$src2))))], + IIC_SSE_PSHUF>; } -multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, PatFrag pshuf_frag, - PatFrag bc_frag> { +multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, SDNode OpNode> { def Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (vt (pshuf_frag:$src2 VR256:$src1, - (undef))))]>; + [(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>; def Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (vt (pshuf_frag:$src2 - (bc_frag (memopv4i64 addr:$src1)), - (undef))))]>; + [(set VR256:$dst, + (vt (OpNode (bitconvert (memopv4i64 addr:$src1)), + (i8 imm:$src2))))]>; } } // ExeDomain = SSEPackedInt let Predicates = [HasAVX] in { - let AddedComplexity = 5 in - defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize, - VEX; - - // SSE2 with ImmT == Imm8 and XS prefix. - defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS, - VEX; - - // SSE2 with ImmT == Imm8 and XD prefix. - defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD, - VEX; - - let AddedComplexity = 5 in - def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), - (VPSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; - // Unary v4f32 shuffle with VPSHUF* in order to fold a load. - def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), - (VPSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; - - def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), - (i8 imm:$imm))), - (VPSHUFDmi addr:$src1, imm:$imm)>; - def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)), - (i8 imm:$imm))), - (VPSHUFDmi addr:$src1, imm:$imm)>; - def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (VPSHUFDri VR128:$src1, imm:$imm)>; - def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (VPSHUFDri VR128:$src1, imm:$imm)>; - def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))), - (VPSHUFHWri VR128:$src, imm:$imm)>; - def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), - (i8 imm:$imm))), - (VPSHUFHWmi addr:$src, imm:$imm)>; - def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))), - (VPSHUFLWri VR128:$src, imm:$imm)>; - def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), - (i8 imm:$imm))), - (VPSHUFLWmi addr:$src, imm:$imm)>; -} + let AddedComplexity = 5 in + defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, X86PShufd>, TB, OpSize, VEX; -let Predicates = [HasAVX2] in { - let AddedComplexity = 5 in - defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, pshufd, bc_v8i32>, TB, - OpSize, VEX; + // SSE2 with ImmT == Imm8 and XS prefix. + defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, X86PShufhw>, XS, VEX; - // SSE2 with ImmT == Imm8 and XS prefix. - defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, pshufhw, bc_v16i16>, XS, - VEX; + // SSE2 with ImmT == Imm8 and XD prefix. + defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, X86PShuflw>, XD, VEX; + + def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), + (VPSHUFDmi addr:$src1, imm:$imm)>; + def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (VPSHUFDri VR128:$src1, imm:$imm)>; +} - // SSE2 with ImmT == Imm8 and XD prefix. - defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, pshuflw, bc_v16i16>, XD, - VEX; +let Predicates = [HasAVX2] in { + defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, TB, OpSize, VEX; + defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, XS, VEX; + defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX; } let Predicates = [HasSSE2] in { - let AddedComplexity = 5 in - defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize; - - // SSE2 with ImmT == Imm8 and XS prefix. - defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS; - - // SSE2 with ImmT == Imm8 and XD prefix. - defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD; - - let AddedComplexity = 5 in - def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), - (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; - // Unary v4f32 shuffle with PSHUF* in order to fold a load. - def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), - (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>; - - def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)), - (i8 imm:$imm))), - (PSHUFDmi addr:$src1, imm:$imm)>; - def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)), - (i8 imm:$imm))), - (PSHUFDmi addr:$src1, imm:$imm)>; - def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (PSHUFDri VR128:$src1, imm:$imm)>; - def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (PSHUFDri VR128:$src1, imm:$imm)>; - def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))), - (PSHUFHWri VR128:$src, imm:$imm)>; - def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), - (i8 imm:$imm))), - (PSHUFHWmi addr:$src, imm:$imm)>; - def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))), - (PSHUFLWri VR128:$src, imm:$imm)>; - def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), - (i8 imm:$imm))), - (PSHUFLWmi addr:$src, imm:$imm)>; + let AddedComplexity = 5 in + defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize; + + // SSE2 with ImmT == Imm8 and XS prefix. + defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, X86PShufhw>, XS; + + // SSE2 with ImmT == Imm8 and XD prefix. + defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, X86PShuflw>, XD; + + def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), + (PSHUFDmi addr:$src1, imm:$imm)>; + def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (PSHUFDri VR128:$src1, imm:$imm)>; } //===---------------------------------------------------------------------===// @@ -4193,7 +4276,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, !if(Is2Addr, !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))]>; + [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], + IIC_SSE_UNPCK>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -4201,7 +4285,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (OpNode VR128:$src1, (bc_frag (memopv2i64 - addr:$src2))))]>; + addr:$src2))))], + IIC_SSE_UNPCK>; } multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, @@ -4300,14 +4385,6 @@ let Predicates = [HasAVX] in { (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; } -// Splat v2f64 / v2i64 -let AddedComplexity = 10 in { - def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), - (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; - def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), - (VPUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasAVX]>; -} - //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Extract and Insert //===---------------------------------------------------------------------===// @@ -4321,7 +4398,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>; + (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>; def rmi : Ii8<0xC4, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i16mem:$src2, i32i8imm:$src3), @@ -4330,7 +4407,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), - imm:$src3))]>; + imm:$src3))], IIC_SSE_PINSRW>; } // Extract @@ -4344,7 +4421,7 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))]>; + imm:$src2))], IIC_SSE_PEXTRW>; // Insert let Predicates = [HasAVX] in { @@ -4368,9 +4445,10 @@ let ExeDomain = SSEPackedInt in { def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX; + [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], + IIC_SSE_MOVMSK>, VEX; def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX; + "pmovmskb\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK>, VEX; let Predicates = [HasAVX2] in { def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), @@ -4382,7 +4460,8 @@ def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; + [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], + IIC_SSE_MOVMSK>; } // ExeDomain = SSEPackedInt @@ -4396,21 +4475,25 @@ let Uses = [EDI] in def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX; + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)], + IIC_SSE_MASKMOV>, VEX; let Uses = [RDI] in def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX; + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)], + IIC_SSE_MASKMOV>, VEX; let Uses = [EDI] in def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)], + IIC_SSE_MASKMOV>; let Uses = [RDI] in def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)], + IIC_SSE_MASKMOV>; } // ExeDomain = SSEPackedInt @@ -4424,54 +4507,65 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (scalar_to_vector GR32:$src)))]>, VEX; + (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, + VEX; def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, + (v4i32 (scalar_to_vector (loadi32 addr:$src))))], + IIC_SSE_MOVDQ>, VEX; def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2i64 (scalar_to_vector GR64:$src)))]>, VEX; + (v2i64 (scalar_to_vector GR64:$src)))], + IIC_SSE_MOVDQ>, VEX; def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (bitconvert GR64:$src))]>, VEX; + [(set FR64:$dst, (bitconvert GR64:$src))], + IIC_SSE_MOVDQ>, VEX; def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (scalar_to_vector GR32:$src)))]>; + (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>; def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>; + (v4i32 (scalar_to_vector (loadi32 addr:$src))))], + IIC_SSE_MOVDQ>; def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2i64 (scalar_to_vector GR64:$src)))]>; + (v2i64 (scalar_to_vector GR64:$src)))], + IIC_SSE_MOVDQ>; def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (bitconvert GR64:$src))]>; + [(set FR64:$dst, (bitconvert GR64:$src))], + IIC_SSE_MOVDQ>; //===---------------------------------------------------------------------===// // Move Int Doubleword to Single Scalar // def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX; + [(set FR32:$dst, (bitconvert GR32:$src))], + IIC_SSE_MOVDQ>, VEX; def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>, + [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], + IIC_SSE_MOVDQ>, VEX; def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (bitconvert GR32:$src))]>; + [(set FR32:$dst, (bitconvert GR32:$src))], + IIC_SSE_MOVDQ>; def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>; + [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], + IIC_SSE_MOVDQ>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int to Packed Double Int @@ -4479,20 +4573,22 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))]>, VEX; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX; def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), - (iPTR 0))), addr:$dst)]>, VEX; + (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, + VEX; def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))]>; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>; def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), - (iPTR 0))), addr:$dst)]>; + (iPTR 0))), addr:$dst)], + IIC_SSE_MOVDQ>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int first element to Doubleword Int @@ -4500,13 +4596,15 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), - (iPTR 0)))]>, + (iPTR 0)))], + IIC_SSE_MOVD_ToGP>, TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>; def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), - (iPTR 0)))]>; + (iPTR 0)))], + IIC_SSE_MOVD_ToGP>; //===---------------------------------------------------------------------===// // Bitcast FR64 <-> GR64 @@ -4518,36 +4616,45 @@ def VMOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), VEX; def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (bitconvert FR64:$src))]>; + [(set GR64:$dst, (bitconvert FR64:$src))], + IIC_SSE_MOVDQ>, VEX; def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", - [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; + [(store (i64 (bitconvert FR64:$src)), addr:$dst)], + IIC_SSE_MOVDQ>, VEX; def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>; + [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], + IIC_SSE_MOVDQ>; def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (bitconvert FR64:$src))]>; + [(set GR64:$dst, (bitconvert FR64:$src))], + IIC_SSE_MOVD_ToGP>; def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", - [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; + [(store (i64 (bitconvert FR64:$src)), addr:$dst)], + IIC_SSE_MOVDQ>; //===---------------------------------------------------------------------===// // Move Scalar Single to Double Int // def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX; + [(set GR32:$dst, (bitconvert FR32:$src))], + IIC_SSE_MOVD_ToGP>, VEX; def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", - [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX; + [(store (i32 (bitconvert FR32:$src)), addr:$dst)], + IIC_SSE_MOVDQ>, VEX; def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (bitconvert FR32:$src))]>; + [(set GR32:$dst, (bitconvert FR32:$src))], + IIC_SSE_MOVD_ToGP>; def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", - [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; + [(store (i32 (bitconvert FR32:$src)), addr:$dst)], + IIC_SSE_MOVDQ>; //===---------------------------------------------------------------------===// // Patterns and instructions to describe movd/movq to XMM register zero-extends @@ -4556,23 +4663,26 @@ let AddedComplexity = 15 in { def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector GR32:$src)))))]>, - VEX; + (v4i32 (scalar_to_vector GR32:$src)))))], + IIC_SSE_MOVDQ>, VEX; def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only [(set VR128:$dst, (v2i64 (X86vzmovl - (v2i64 (scalar_to_vector GR64:$src)))))]>, + (v2i64 (scalar_to_vector GR64:$src)))))], + IIC_SSE_MOVDQ>, VEX, VEX_W; } let AddedComplexity = 15 in { def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector GR32:$src)))))]>; + (v4i32 (scalar_to_vector GR32:$src)))))], + IIC_SSE_MOVDQ>; def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only [(set VR128:$dst, (v2i64 (X86vzmovl - (v2i64 (scalar_to_vector GR64:$src)))))]>; + (v2i64 (scalar_to_vector GR64:$src)))))], + IIC_SSE_MOVDQ>; } let AddedComplexity = 20 in { @@ -4580,29 +4690,19 @@ def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86vzmovl (v4i32 (scalar_to_vector - (loadi32 addr:$src))))))]>, - VEX; + (loadi32 addr:$src))))))], + IIC_SSE_MOVDQ>, VEX; def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86vzmovl (v4i32 (scalar_to_vector - (loadi32 addr:$src))))))]>; -} - -let Predicates = [HasSSE2], AddedComplexity = 20 in { - def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), - (MOVZDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), - (MOVZDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), - (MOVZDI2PDIrm addr:$src)>; + (loadi32 addr:$src))))))], + IIC_SSE_MOVDQ>; } let Predicates = [HasAVX] in { // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. let AddedComplexity = 20 in { - def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))), - (VMOVZDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), (VMOVZDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), @@ -4617,6 +4717,13 @@ let Predicates = [HasAVX] in { (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; } +let Predicates = [HasSSE2], AddedComplexity = 20 in { + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), + (MOVZDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), + (MOVZDI2PDIrm addr:$src)>; +} + // These are the correct encodings of the instructions so that we know how to // read correct assembly, even though we continue to emit the wrong ones for // compatibility with Darwin's buggy assembler. @@ -4648,7 +4755,8 @@ def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))], + IIC_SSE_MOVDQ>, XS, Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix //===---------------------------------------------------------------------===// @@ -4657,11 +4765,13 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))), addr:$dst)]>, VEX; + (iPTR 0))), addr:$dst)], + IIC_SSE_MOVDQ>, VEX; def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))), addr:$dst)]>; + (iPTR 0))), addr:$dst)], + IIC_SSE_MOVDQ>; //===---------------------------------------------------------------------===// // Store / copy lower 64-bits of a XMM register. @@ -4671,14 +4781,16 @@ def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)], + IIC_SSE_MOVDQ>; let AddedComplexity = 20 in def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 (scalar_to_vector - (loadi64 addr:$src))))))]>, + (loadi64 addr:$src))))))], + IIC_SSE_MOVDQ>, XS, VEX, Requires<[HasAVX]>; let AddedComplexity = 20 in @@ -4686,9 +4798,19 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 (scalar_to_vector - (loadi64 addr:$src))))))]>, + (loadi64 addr:$src))))))], + IIC_SSE_MOVDQ>, XS, Requires<[HasSSE2]>; +let Predicates = [HasAVX], AddedComplexity = 20 in { + def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), + (VMOVZQI2PQIrm addr:$src)>; + def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), + (VMOVZQI2PQIrm addr:$src)>; + def : Pat<(v2i64 (X86vzload addr:$src)), + (VMOVZQI2PQIrm addr:$src)>; +} + let Predicates = [HasSSE2], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (MOVZQI2PQIrm addr:$src)>; @@ -4697,13 +4819,9 @@ let Predicates = [HasSSE2], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; } -let Predicates = [HasAVX], AddedComplexity = 20 in { - def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), - (VMOVZQI2PQIrm addr:$src)>; - def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), - (VMOVZQI2PQIrm addr:$src)>; - def : Pat<(v2i64 (X86vzload addr:$src)), - (VMOVZQI2PQIrm addr:$src)>; +let Predicates = [HasAVX] in { +def : Pat<(v4i64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>; } //===---------------------------------------------------------------------===// @@ -4713,51 +4831,58 @@ let Predicates = [HasAVX], AddedComplexity = 20 in { let AddedComplexity = 15 in def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, + [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], + IIC_SSE_MOVQ_RR>, XS, VEX, Requires<[HasAVX]>; let AddedComplexity = 15 in def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, + [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], + IIC_SSE_MOVQ_RR>, XS, Requires<[HasSSE2]>; let AddedComplexity = 20 in def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl - (loadv2i64 addr:$src))))]>, + (loadv2i64 addr:$src))))], + IIC_SSE_MOVDQ>, XS, VEX, Requires<[HasAVX]>; let AddedComplexity = 20 in { def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl - (loadv2i64 addr:$src))))]>, + (loadv2i64 addr:$src))))], + IIC_SSE_MOVDQ>, XS, Requires<[HasSSE2]>; } let AddedComplexity = 20 in { - let Predicates = [HasSSE2] in { - def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), - (MOVZPQILo2PQIrm addr:$src)>; - def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), - (MOVZPQILo2PQIrr VR128:$src)>; - } let Predicates = [HasAVX] in { - def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), + def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (VMOVZPQILo2PQIrm addr:$src)>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (VMOVZPQILo2PQIrr VR128:$src)>; } + let Predicates = [HasSSE2] in { + def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), + (MOVZPQILo2PQIrm addr:$src)>; + def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), + (MOVZPQILo2PQIrr VR128:$src)>; + } } // Instructions to match in the assembler def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), - "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; + "movq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVDQ>, VEX, VEX_W; def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), - "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; + "movq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVDQ>, VEX, VEX_W; // Recognize "movd" with GR64 destination, but encode as a "movq" def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), - "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; + "movd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_MOVDQ>, VEX, VEX_W; // Instructions for the disassembler // xr = XMM register @@ -4767,7 +4892,7 @@ let Predicates = [HasAVX] in def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movq\t{$src, $dst|$dst, $src}", []>, XS; + "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS; //===---------------------------------------------------------------------===// // SSE3 - Conversion Instructions @@ -4797,14 +4922,16 @@ def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), } def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; + "cvtpd2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>; def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; + "cvtpd2dq\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>; def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), - (VCVTPD2DQYrr VR256:$src)>; + (VCVTTPD2DQYrr VR256:$src)>; def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))), - (VCVTPD2DQYrm addr:$src)>; + (VCVTTPD2DQYrm addr:$src)>; // Convert Packed DW Integers to Packed Double FP let Predicates = [HasAVX] in { @@ -4819,9 +4946,11 @@ def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), } def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; + "cvtdq2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>; def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; + "cvtdq2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RM>; // AVX 256-bit register conversion intrinsics def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), @@ -4847,10 +4976,12 @@ multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, X86MemOperand x86memop> { def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (vt (OpNode RC:$src)))]>; + [(set RC:$dst, (vt (OpNode RC:$src)))], + IIC_SSE_MOV_LH>; def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>; + [(set RC:$dst, (OpNode (mem_frag addr:$src)))], + IIC_SSE_MOV_LH>; } let Predicates = [HasAVX] in { @@ -4868,17 +4999,6 @@ defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, memopv4f32, f128mem>; -let Predicates = [HasSSE3] in { - def : Pat<(v4i32 (X86Movshdup VR128:$src)), - (MOVSHDUPrr VR128:$src)>; - def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), - (MOVSHDUPrm addr:$src)>; - def : Pat<(v4i32 (X86Movsldup VR128:$src)), - (MOVSLDUPrr VR128:$src)>; - def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), - (MOVSLDUPrm addr:$src)>; -} - let Predicates = [HasAVX] in { def : Pat<(v4i32 (X86Movshdup VR128:$src)), (VMOVSHDUPrr VR128:$src)>; @@ -4898,82 +5018,60 @@ let Predicates = [HasAVX] in { (VMOVSLDUPYrm addr:$src)>; } +let Predicates = [HasSSE3] in { + def : Pat<(v4i32 (X86Movshdup VR128:$src)), + (MOVSHDUPrr VR128:$src)>; + def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), + (MOVSHDUPrm addr:$src)>; + def : Pat<(v4i32 (X86Movsldup VR128:$src)), + (MOVSLDUPrr VR128:$src)>; + def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), + (MOVSLDUPrm addr:$src)>; +} + //===---------------------------------------------------------------------===// // SSE3 - Replicate Double FP - MOVDDUP //===---------------------------------------------------------------------===// multiclass sse3_replicate_dfp<string OpcodeStr> { +let neverHasSideEffects = 1 in def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>; + [], IIC_SSE_MOV_LH>; def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, - (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)), - (undef))))]>; + (v2f64 (X86Movddup + (scalar_to_vector (loadf64 addr:$src)))))], + IIC_SSE_MOV_LH>; } // FIXME: Merge with above classe when there're patterns for the ymm version multiclass sse3_replicate_dfp_y<string OpcodeStr> { +def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>; +def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, + (v4f64 (X86Movddup + (scalar_to_vector (loadf64 addr:$src)))))]>; +} + let Predicates = [HasAVX] in { - def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>; - def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>; - } + defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; + defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX; } defm MOVDDUP : sse3_replicate_dfp<"movddup">; -defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; -defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX; - -let Predicates = [HasSSE3] in { - def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), - (undef)), - (MOVDDUPrm addr:$src)>; - let AddedComplexity = 5 in { - def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>; - def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)), - (MOVDDUPrm addr:$src)>; - def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>; - def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), - (MOVDDUPrm addr:$src)>; - } - def : Pat<(X86Movddup (memopv2f64 addr:$src)), - (MOVDDUPrm addr:$src)>; - def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), - (MOVDDUPrm addr:$src)>; - def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), - (MOVDDUPrm addr:$src)>; - def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), - (MOVDDUPrm addr:$src)>; - def : Pat<(X86Movddup (bc_v2f64 - (v2i64 (scalar_to_vector (loadi64 addr:$src))))), - (MOVDDUPrm addr:$src)>; -} let Predicates = [HasAVX] in { - def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), - (undef)), - (VMOVDDUPrm addr:$src)>; - let AddedComplexity = 5 in { - def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>; - def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)), - (VMOVDDUPrm addr:$src)>; - def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>; - def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), - (VMOVDDUPrm addr:$src)>; - } def : Pat<(X86Movddup (memopv2f64 addr:$src)), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; - def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), - (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; def : Pat<(X86Movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src))))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; @@ -4983,16 +5081,24 @@ let Predicates = [HasAVX] in { (VMOVDDUPYrm addr:$src)>; def : Pat<(X86Movddup (memopv4i64 addr:$src)), (VMOVDDUPYrm addr:$src)>; - def : Pat<(X86Movddup (v4f64 (scalar_to_vector (loadf64 addr:$src)))), - (VMOVDDUPYrm addr:$src)>; def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))), (VMOVDDUPYrm addr:$src)>; - def : Pat<(X86Movddup (v4f64 VR256:$src)), - (VMOVDDUPYrr VR256:$src)>; def : Pat<(X86Movddup (v4i64 VR256:$src)), (VMOVDDUPYrr VR256:$src)>; } +let Predicates = [HasSSE3] in { + def : Pat<(X86Movddup (memopv2f64 addr:$src)), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), + (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (bc_v2f64 + (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (MOVDDUPrm addr:$src)>; +} + //===---------------------------------------------------------------------===// // SSE3 - Move Unaligned Integer //===---------------------------------------------------------------------===// @@ -5007,49 +5113,51 @@ let Predicates = [HasAVX] in { } def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; + [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))], + IIC_SSE_LDDQU>; //===---------------------------------------------------------------------===// // SSE3 - Arithmetic //===---------------------------------------------------------------------===// multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, - X86MemOperand x86memop, bit Is2Addr = 1> { + X86MemOperand x86memop, OpndItins itins, + bit Is2Addr = 1> { def rr : I<0xD0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, RC:$src2))]>; + [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>; def rm : I<0xD0, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>; + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>; } let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, - f128mem, 0>, TB, XD, VEX_4V; + f128mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V; defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, - f256mem, 0>, TB, XD, VEX_4V; + f256mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V; } let ExeDomain = SSEPackedDouble in { defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, - f128mem, 0>, TB, OpSize, VEX_4V; + f128mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V; defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, - f256mem, 0>, TB, OpSize, VEX_4V; + f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V; } } let Constraints = "$src1 = $dst", Predicates = [HasSSE3] in { let ExeDomain = SSEPackedSingle in defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, - f128mem>, TB, XD; + f128mem, SSE_ALU_F32P>, TB, XD; let ExeDomain = SSEPackedDouble in defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128, - f128mem>, TB, OpSize; + f128mem, SSE_ALU_F64P>, TB, OpSize; } //===---------------------------------------------------------------------===// @@ -5063,13 +5171,14 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>; + [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], + IIC_SSE_HADDSUB_RM>; } multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { @@ -5077,13 +5186,14 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>; + [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], + IIC_SSE_HADDSUB_RM>; } let Predicates = [HasAVX] in { @@ -5131,7 +5241,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (IntId128 VR128:$src))]>, + [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>, OpSize; def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), @@ -5139,7 +5249,8 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 - (bitconvert (memopv2i64 addr:$src))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>, + OpSize; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. @@ -5188,9 +5299,52 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// +def SSE_PHADDSUBD : OpndItins< + IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM +>; +def SSE_PHADDSUBSW : OpndItins< + IIC_SSE_PHADDSUBSW_RR, IIC_SSE_PHADDSUBSW_RM +>; +def SSE_PHADDSUBW : OpndItins< + IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM +>; +def SSE_PSHUFB : OpndItins< + IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM +>; +def SSE_PSIGN : OpndItins< + IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM +>; +def SSE_PMULHRSW : OpndItins< + IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW +>; + +/// SS3I_binop_rm - Simple SSSE3 bin op +multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, OpndItins itins, + bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, + OpSize; + def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, + (OpVT (OpNode RC:$src1, + (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize; +} + /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId128, bit Is2Addr = 1> { + Intrinsic IntId128, OpndItins itins, + bit Is2Addr = 1> { let isCommutable = 1 in def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -5227,57 +5381,77 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, let ImmT = NoImm, Predicates = [HasAVX] in { let isCommutable = 0 in { - defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", - int_x86_ssse3_phadd_w_128, 0>, VEX_4V; - defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", - int_x86_ssse3_phadd_d_128, 0>, VEX_4V; + defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128, + memopv2i64, i128mem, + SSE_PHADDSUBW, 0>, VEX_4V; + defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128, + memopv2i64, i128mem, + SSE_PHADDSUBD, 0>, VEX_4V; + defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128, + memopv2i64, i128mem, + SSE_PHADDSUBW, 0>, VEX_4V; + defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128, + memopv2i64, i128mem, + SSE_PHADDSUBD, 0>, VEX_4V; + defm VPSIGNB : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128, + memopv2i64, i128mem, + SSE_PSIGN, 0>, VEX_4V; + defm VPSIGNW : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128, + memopv2i64, i128mem, + SSE_PSIGN, 0>, VEX_4V; + defm VPSIGND : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128, + memopv2i64, i128mem, + SSE_PSIGN, 0>, VEX_4V; + defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128, + memopv2i64, i128mem, + SSE_PSHUFB, 0>, VEX_4V; defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", - int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; - defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", - int_x86_ssse3_phsub_w_128, 0>, VEX_4V; - defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", - int_x86_ssse3_phsub_d_128, 0>, VEX_4V; + int_x86_ssse3_phadd_sw_128, + SSE_PHADDSUBSW, 0>, VEX_4V; defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", - int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; + int_x86_ssse3_phsub_sw_128, + SSE_PHADDSUBSW, 0>, VEX_4V; defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", - int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V; - defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", - int_x86_ssse3_pshuf_b_128, 0>, VEX_4V; - defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", - int_x86_ssse3_psign_b_128, 0>, VEX_4V; - defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", - int_x86_ssse3_psign_w_128, 0>, VEX_4V; - defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", - int_x86_ssse3_psign_d_128, 0>, VEX_4V; + int_x86_ssse3_pmadd_ub_sw_128, + SSE_PMADD, 0>, VEX_4V; } defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", - int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V; + int_x86_ssse3_pmul_hr_sw_128, + SSE_PMULHRSW, 0>, VEX_4V; } let ImmT = NoImm, Predicates = [HasAVX2] in { let isCommutable = 0 in { - defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", - int_x86_avx2_phadd_w>, VEX_4V; - defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", - int_x86_avx2_phadd_d>, VEX_4V; + defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256, + memopv4i64, i256mem, + SSE_PHADDSUBW, 0>, VEX_4V; + defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256, + memopv4i64, i256mem, + SSE_PHADDSUBW, 0>, VEX_4V; + defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256, + memopv4i64, i256mem, + SSE_PHADDSUBW, 0>, VEX_4V; + defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256, + memopv4i64, i256mem, + SSE_PHADDSUBW, 0>, VEX_4V; + defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256, + memopv4i64, i256mem, + SSE_PHADDSUBW, 0>, VEX_4V; + defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256, + memopv4i64, i256mem, + SSE_PHADDSUBW, 0>, VEX_4V; + defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256, + memopv4i64, i256mem, + SSE_PHADDSUBW, 0>, VEX_4V; + defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256, + memopv4i64, i256mem, + SSE_PHADDSUBW, 0>, VEX_4V; defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", int_x86_avx2_phadd_sw>, VEX_4V; - defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw", - int_x86_avx2_phsub_w>, VEX_4V; - defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd", - int_x86_avx2_phsub_d>, VEX_4V; defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", int_x86_avx2_phsub_sw>, VEX_4V; defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", int_x86_avx2_pmadd_ub_sw>, VEX_4V; - defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", - int_x86_avx2_pshuf_b>, VEX_4V; - defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", - int_x86_avx2_psign_b>, VEX_4V; - defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", - int_x86_avx2_psign_w>, VEX_4V; - defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", - int_x86_avx2_psign_d>, VEX_4V; } defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", int_x86_avx2_pmul_hr_sw>, VEX_4V; @@ -5286,95 +5460,34 @@ defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", // None of these have i8 immediate fields. let ImmT = NoImm, Constraints = "$src1 = $dst" in { let isCommutable = 0 in { - defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", - int_x86_ssse3_phadd_w_128>; - defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", - int_x86_ssse3_phadd_d_128>; + defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128, + memopv2i64, i128mem, SSE_PHADDSUBW>; + defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128, + memopv2i64, i128mem, SSE_PHADDSUBD>; + defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128, + memopv2i64, i128mem, SSE_PHADDSUBW>; + defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128, + memopv2i64, i128mem, SSE_PHADDSUBD>; + defm PSIGNB : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128, + memopv2i64, i128mem, SSE_PSIGN>; + defm PSIGNW : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128, + memopv2i64, i128mem, SSE_PSIGN>; + defm PSIGND : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128, + memopv2i64, i128mem, SSE_PSIGN>; + defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128, + memopv2i64, i128mem, SSE_PSHUFB>; defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", - int_x86_ssse3_phadd_sw_128>; - defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", - int_x86_ssse3_phsub_w_128>; - defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", - int_x86_ssse3_phsub_d_128>; + int_x86_ssse3_phadd_sw_128, + SSE_PHADDSUBSW>; defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", - int_x86_ssse3_phsub_sw_128>; + int_x86_ssse3_phsub_sw_128, + SSE_PHADDSUBSW>; defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", - int_x86_ssse3_pmadd_ub_sw_128>; - defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", - int_x86_ssse3_pshuf_b_128>; - defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", - int_x86_ssse3_psign_b_128>; - defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", - int_x86_ssse3_psign_w_128>; - defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", - int_x86_ssse3_psign_d_128>; + int_x86_ssse3_pmadd_ub_sw_128, SSE_PMADD>; } defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", - int_x86_ssse3_pmul_hr_sw_128>; -} - -let Predicates = [HasSSSE3] in { - def : Pat<(X86pshufb VR128:$src, VR128:$mask), - (PSHUFBrr128 VR128:$src, VR128:$mask)>; - def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), - (PSHUFBrm128 VR128:$src, addr:$mask)>; - - def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)), - (PSIGNBrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)), - (PSIGNWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), - (PSIGNDrr128 VR128:$src1, VR128:$src2)>; - - def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)), - (PHADDWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)), - (PHADDDrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)), - (PHSUBWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)), - (PHSUBDrr128 VR128:$src1, VR128:$src2)>; -} - -let Predicates = [HasAVX] in { - def : Pat<(X86pshufb VR128:$src, VR128:$mask), - (VPSHUFBrr128 VR128:$src, VR128:$mask)>; - def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), - (VPSHUFBrm128 VR128:$src, addr:$mask)>; - - def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)), - (VPSIGNBrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)), - (VPSIGNWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), - (VPSIGNDrr128 VR128:$src1, VR128:$src2)>; - - def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)), - (VPHADDWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)), - (VPHADDDrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)), - (VPHSUBWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)), - (VPHSUBDrr128 VR128:$src1, VR128:$src2)>; -} - -let Predicates = [HasAVX2] in { - def : Pat<(v32i8 (X86psign VR256:$src1, VR256:$src2)), - (VPSIGNBrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v16i16 (X86psign VR256:$src1, VR256:$src2)), - (VPSIGNWrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86psign VR256:$src1, VR256:$src2)), - (VPSIGNDrr256 VR256:$src1, VR256:$src2)>; - - def : Pat<(v16i16 (X86hadd VR256:$src1, VR256:$src2)), - (VPHADDWrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86hadd VR256:$src1, VR256:$src2)), - (VPHADDDrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v16i16 (X86hsub VR256:$src1, VR256:$src2)), - (VPHSUBWrr256 VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86hsub VR256:$src1, VR256:$src2)), - (VPHSUBDrr256 VR256:$src1, VR256:$src2)>; + int_x86_ssse3_pmul_hr_sw_128, + SSE_PMULHRSW>; } //===---------------------------------------------------------------------===// @@ -5389,7 +5502,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - []>, OpSize; + [], IIC_SSE_PALIGNR>, OpSize; let mayLoad = 1 in def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), @@ -5397,7 +5510,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - []>, OpSize; + [], IIC_SSE_PALIGNR>, OpSize; } } @@ -5424,15 +5537,15 @@ let Predicates = [HasAVX2] in let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in defm PALIGN : ssse3_palign<"palignr">; -let Predicates = [HasSSSE3] in { -def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), - (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +let Predicates = [HasAVX2] in { +def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; +def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; +def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; +def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; } let Predicates = [HasAVX] in { @@ -5446,23 +5559,36 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } +let Predicates = [HasSSSE3] in { +def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), + (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; +} + //===---------------------------------------------------------------------===// // SSSE3 - Thread synchronization //===---------------------------------------------------------------------===// let usesCustomInserter = 1 in { def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), - [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>; + [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, + Requires<[HasSSE3]>; def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2), - [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>; + [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>, + Requires<[HasSSE3]>; } let Uses = [EAX, ECX, EDX] in -def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB, - Requires<[HasSSE3]>; +def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>, + TB, Requires<[HasSSE3]>; let Uses = [ECX, EAX] in -def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB, - Requires<[HasSSE3]>; +def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", [], IIC_SSE_MWAIT>, + TB, Requires<[HasSSE3]>; def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; @@ -5536,70 +5662,80 @@ defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>; defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>; defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>; -let Predicates = [HasSSE41] in { +let Predicates = [HasAVX] in { // Common patterns involving scalar load. def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), - (PMOVSXBWrm addr:$src)>; + (VPMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), - (PMOVSXBWrm addr:$src)>; + (VPMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), - (PMOVSXWDrm addr:$src)>; + (VPMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), - (PMOVSXWDrm addr:$src)>; + (VPMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), - (PMOVSXDQrm addr:$src)>; + (VPMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), - (PMOVSXDQrm addr:$src)>; + (VPMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), - (PMOVZXBWrm addr:$src)>; + (VPMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), - (PMOVZXBWrm addr:$src)>; + (VPMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), - (PMOVZXWDrm addr:$src)>; + (VPMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), - (PMOVZXWDrm addr:$src)>; + (VPMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), - (PMOVZXDQrm addr:$src)>; + (VPMOVZXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), - (PMOVZXDQrm addr:$src)>; + (VPMOVZXDQrm addr:$src)>; } -let Predicates = [HasAVX] in { +let Predicates = [HasSSE41] in { // Common patterns involving scalar load. def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), - (VPMOVSXBWrm addr:$src)>; + (PMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), - (VPMOVSXBWrm addr:$src)>; + (PMOVSXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), - (VPMOVSXWDrm addr:$src)>; + (PMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), - (VPMOVSXWDrm addr:$src)>; + (PMOVSXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), - (VPMOVSXDQrm addr:$src)>; + (PMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), - (VPMOVSXDQrm addr:$src)>; + (PMOVSXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), - (VPMOVZXBWrm addr:$src)>; + (PMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), - (VPMOVZXBWrm addr:$src)>; + (PMOVZXBWrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), - (VPMOVZXWDrm addr:$src)>; + (PMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), - (VPMOVZXWDrm addr:$src)>; + (PMOVZXWDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), - (VPMOVZXDQrm addr:$src)>; + (PMOVZXDQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), - (VPMOVZXDQrm addr:$src)>; + (PMOVZXDQrm addr:$src)>; +} + +let Predicates = [HasAVX] in { +def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>; +def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; +} + +let Predicates = [HasSSE41] in { +def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; +def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; } @@ -5655,30 +5791,30 @@ defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>; -let Predicates = [HasSSE41] in { +let Predicates = [HasAVX] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), - (PMOVSXBDrm addr:$src)>; + (VPMOVSXBDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), - (PMOVSXWQrm addr:$src)>; + (VPMOVSXWQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), - (PMOVZXBDrm addr:$src)>; + (VPMOVZXBDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), - (PMOVZXWQrm addr:$src)>; + (VPMOVZXWQrm addr:$src)>; } -let Predicates = [HasAVX] in { +let Predicates = [HasSSE41] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), - (VPMOVSXBDrm addr:$src)>; + (PMOVSXBDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), - (VPMOVSXWQrm addr:$src)>; + (PMOVSXWQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), - (VPMOVZXBDrm addr:$src)>; + (PMOVZXBDrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), - (VPMOVZXWQrm addr:$src)>; + (PMOVZXWQrm addr:$src)>; } multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> { @@ -5723,30 +5859,30 @@ defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq", defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; -let Predicates = [HasSSE41] in { +let Predicates = [HasAVX] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq (bitconvert (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), - (PMOVSXBQrm addr:$src)>; + (VPMOVSXBQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbq (bitconvert (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), - (PMOVZXBQrm addr:$src)>; + (VPMOVZXBQrm addr:$src)>; } -let Predicates = [HasAVX] in { +let Predicates = [HasSSE41] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq (bitconvert (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), - (VPMOVSXBQrm addr:$src)>; + (PMOVSXBQrm addr:$src)>; def : Pat<(int_x86_sse41_pmovzxbq (bitconvert (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), - (VPMOVZXBQrm addr:$src)>; + (PMOVZXBQrm addr:$src)>; } //===----------------------------------------------------------------------===// @@ -5876,13 +6012,13 @@ let ExeDomain = SSEPackedSingle in { def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))), addr:$dst), - (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, - Requires<[HasSSE41]>; + (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, + Requires<[HasAVX]>; def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))), addr:$dst), - (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, - Requires<[HasAVX]>; + (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, + Requires<[HasSSE41]>; //===----------------------------------------------------------------------===// // SSE4.1 - Insert Instructions @@ -5992,19 +6128,12 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { } let ExeDomain = SSEPackedSingle in { - let Constraints = "$src1 = $dst" in - defm INSERTPS : SS41I_insertf32<0x21, "insertps">; let Predicates = [HasAVX] in defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; + let Constraints = "$src1 = $dst" in + defm INSERTPS : SS41I_insertf32<0x21, "insertps">; } -def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), - (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>, - Requires<[HasAVX]>; -def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), - (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>, - Requires<[HasSSE41]>; - //===----------------------------------------------------------------------===// // SSE4.1 - Round Instructions //===----------------------------------------------------------------------===// @@ -6347,8 +6476,6 @@ let Predicates = [HasAVX] in { let isCommutable = 0 in defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, 0>, VEX_4V; - defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq, - 0>, VEX_4V; defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb, 0>, VEX_4V; defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd, @@ -6367,19 +6494,12 @@ let Predicates = [HasAVX] in { 0>, VEX_4V; defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, 0>, VEX_4V; - - def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), - (VPCMPEQQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), - (VPCMPEQQrm VR128:$src1, addr:$src2)>; } let Predicates = [HasAVX2] in { let isCommutable = 0 in defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", int_x86_avx2_packusdw>, VEX_4V; - defm VPCMPEQQ : SS41I_binop_rm_int_y<0x29, "vpcmpeqq", - int_x86_avx2_pcmpeq_q>, VEX_4V; defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb", int_x86_avx2_pmins_b>, VEX_4V; defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd", @@ -6398,17 +6518,11 @@ let Predicates = [HasAVX2] in { int_x86_avx2_pmaxu_w>, VEX_4V; defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", int_x86_avx2_pmul_dq>, VEX_4V; - - def : Pat<(v4i64 (X86pcmpeqq VR256:$src1, VR256:$src2)), - (VPCMPEQQYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86pcmpeqq VR256:$src1, (memop addr:$src2))), - (VPCMPEQQYrm VR256:$src1, addr:$src2)>; } let Constraints = "$src1 = $dst" in { let isCommutable = 0 in defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; - defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>; defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>; defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>; defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>; @@ -6420,57 +6534,46 @@ let Constraints = "$src1 = $dst" in { defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; } -let Predicates = [HasSSE41] in { - def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), - (PCMPEQQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), - (PCMPEQQrm VR128:$src1, addr:$src2)>; -} - /// SS48I_binop_rm - Simple SSE41 binary operator. multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit Is2Addr = 1> { + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, bit Is2Addr = 1> { let isCommutable = 1 in - def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), + def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>, - OpSize; - def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, OpSize; + def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (OpNode VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2))))]>, - OpSize; + [(set RC:$dst, + (OpVT (OpNode RC:$src1, + (bitconvert (memop_frag addr:$src2)))))]>, OpSize; } -/// SS48I_binop_rm - Simple SSE41 binary operator. -multiclass SS48I_binop_rm_y<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT> { - let isCommutable = 1 in - def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (OpVT (OpNode VR256:$src1, VR256:$src2)))]>, - OpSize; - def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, i256mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (OpNode VR256:$src1, - (bc_v8i32 (memopv4i64 addr:$src2))))]>, - OpSize; +let Predicates = [HasAVX] in { + defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, + memopv2i64, i128mem, 0>, VEX_4V; +} +let Predicates = [HasAVX2] in { + defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V; + defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, + memopv4i64, i256mem, 0>, VEX_4V; } -let Predicates = [HasAVX] in - defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V; -let Predicates = [HasAVX2] in - defm VPMULLD : SS48I_binop_rm_y<0x40, "vpmulld", mul, v8i32>, VEX_4V; -let Constraints = "$src1 = $dst" in - defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>; +let Constraints = "$src1 = $dst" in { + defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, + memopv2i64, i128mem>; + defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, + memopv2i64, i128mem>; +} /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, @@ -6568,7 +6671,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], - SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, RC:$src3), @@ -6577,7 +6680,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, [(set RC:$dst, (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), RC:$src3))], - SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; } let Predicates = [HasAVX] in { @@ -6705,69 +6808,37 @@ def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), // SSE4.2 - Compare Instructions //===----------------------------------------------------------------------===// -/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator -multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId128, bit Is2Addr = 1> { - def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), +/// SS42I_binop_rm - Simple SSE 4.2 binary operator +multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, bit Is2Addr = 1> { + def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, OpSize; - def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), + def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, - (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize; + [(set RC:$dst, + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, OpSize; } -/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator -multiclass SS42I_binop_rm_int_y<bits<8> opc, string OpcodeStr, - Intrinsic IntId256> { - def Yrr : SS428I<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, - OpSize; - def Yrm : SS428I<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, i256mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, - (IntId256 VR256:$src1, (memopv4i64 addr:$src2)))]>, OpSize; -} - -let Predicates = [HasAVX] in { - defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq, - 0>, VEX_4V; - - def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), - (VPCMPGTQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), - (VPCMPGTQrm VR128:$src1, addr:$src2)>; -} - -let Predicates = [HasAVX2] in { - defm VPCMPGTQ : SS42I_binop_rm_int_y<0x37, "vpcmpgtq", int_x86_avx2_pcmpgt_q>, - VEX_4V; +let Predicates = [HasAVX] in + defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, + memopv2i64, i128mem, 0>, VEX_4V; - def : Pat<(v4i64 (X86pcmpgtq VR256:$src1, VR256:$src2)), - (VPCMPGTQYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86pcmpgtq VR256:$src1, (memop addr:$src2))), - (VPCMPGTQYrm VR256:$src1, addr:$src2)>; -} +let Predicates = [HasAVX2] in + defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, + memopv4i64, i256mem, 0>, VEX_4V; let Constraints = "$src1 = $dst" in - defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; - -let Predicates = [HasSSE42] in { - def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), - (PCMPGTQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), - (PCMPGTQrm VR128:$src1, addr:$src2)>; -} + defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, + memopv2i64, i128mem>; //===----------------------------------------------------------------------===// // SSE4.2 - String/text Processing Instructions @@ -6786,8 +6857,9 @@ multiclass pseudo_pcmpistrm<string asm> { } let Defs = [EFLAGS], usesCustomInserter = 1 in { + let AddedComplexity = 1 in + defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>; - defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; } let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in { @@ -6823,8 +6895,9 @@ multiclass pseudo_pcmpestrm<string asm> { } let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { + let AddedComplexity = 1 in + defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>; - defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; } let Predicates = [HasAVX], @@ -7017,8 +7090,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; + (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize; } // Perform One Round of an AES Encryption/Decryption Flow @@ -7044,44 +7116,6 @@ let Constraints = "$src1 = $dst" in { int_x86_aesni_aesdeclast>; } -let Predicates = [HasAES] in { - def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), - (AESENCrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), - (AESENCrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), - (AESENCLASTrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), - (AESENCLASTrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), - (AESDECrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), - (AESDECrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), - (AESDECLASTrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), - (AESDECLASTrm VR128:$src1, addr:$src2)>; -} - -let Predicates = [HasAVX, HasAES], AddedComplexity = 20 in { - def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), - (VAESENCrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), - (VAESENCrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), - (VAESENCLASTrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), - (VAESENCLASTrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), - (VAESDECrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), - (VAESDECrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), - (VAESDECLASTrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), - (VAESDECLASTrm VR128:$src1, addr:$src2)>; -} - // Perform the AES InvMixColumn Transformation let Predicates = [HasAVX, HasAES] in { def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), @@ -7093,8 +7127,7 @@ let Predicates = [HasAVX, HasAES] in { def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1), "vaesimc\t{$src1, $dst|$dst, $src1}", - [(set VR128:$dst, - (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>, + [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>, OpSize, VEX; } def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), @@ -7106,8 +7139,7 @@ def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1), "aesimc\t{$src1, $dst|$dst, $src1}", - [(set VR128:$dst, - (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>, + [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>, OpSize; // AES Round Key Generation Assist @@ -7122,8 +7154,7 @@ let Predicates = [HasAVX, HasAES] in { (ins i128mem:$src1, i8imm:$src2), "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), - imm:$src2))]>, + (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>, OpSize, VEX; } def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), @@ -7136,8 +7167,7 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), - imm:$src2))]>, + (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>, OpSize; //===----------------------------------------------------------------------===// @@ -7146,31 +7176,31 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), // Carry-less Multiplication instructions let neverHasSideEffects = 1 in { -let Constraints = "$src1 = $dst" in { -def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), +// AVX carry-less Multiplication instructions +def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", + "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; let mayLoad = 1 in -def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), +def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", + "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; -} -// AVX carry-less Multiplication instructions -def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), +let Constraints = "$src1 = $dst" in { +def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>; let mayLoad = 1 in -def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), +def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>; -} +} // Constraints = "$src1 = $dst" +} // neverHasSideEffects = 1 multiclass pclmul_alias<string asm, int immop> { @@ -7242,6 +7272,7 @@ let Predicates = [HasAVX2] in def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, int_x86_avx2_vbroadcasti128>; +let Predicates = [HasAVX] in def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), (VBROADCASTF128 addr:$src)>; @@ -7261,13 +7292,6 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), []>, VEX_4V; } -def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3), - (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; -def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), - (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; -def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), - (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; - //===----------------------------------------------------------------------===// // VEXTRACTF128 - Extract packed floating-point values // @@ -7283,12 +7307,14 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), []>, VEX; } +let Predicates = [HasAVX] in { def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; +} //===----------------------------------------------------------------------===// // VMASKMOV - Conditional SIMD Packed Loads and Stores @@ -7334,8 +7360,8 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", // multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop_f, - X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag, - Intrinsic IntVar, Intrinsic IntImm> { + X86MemOperand x86memop_i, PatFrag i_frag, + Intrinsic IntVar, ValueType vt> { def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -7349,83 +7375,91 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX; + [(set RC:$dst, (vt (X86VPermilp RC:$src1, (i8 imm:$src2))))]>, VEX; def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), (ins x86memop_f:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX; + [(set RC:$dst, + (vt (X86VPermilp (memop addr:$src1), (i8 imm:$src2))))]>, VEX; } let ExeDomain = SSEPackedSingle in { defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, - memopv4f32, memopv2i64, - int_x86_avx_vpermilvar_ps, - int_x86_avx_vpermil_ps>; + memopv2i64, int_x86_avx_vpermilvar_ps, v4f32>; defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, - memopv8f32, memopv4i64, - int_x86_avx_vpermilvar_ps_256, - int_x86_avx_vpermil_ps_256>; + memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>; } let ExeDomain = SSEPackedDouble in { defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, - memopv2f64, memopv2i64, - int_x86_avx_vpermilvar_pd, - int_x86_avx_vpermil_pd>; + memopv2i64, int_x86_avx_vpermilvar_pd, v2f64>; defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, - memopv4f64, memopv4i64, - int_x86_avx_vpermilvar_pd_256, - int_x86_avx_vpermil_pd_256>; + memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>; } -def : Pat<(v8f32 (X86VPermilp VR256:$src1, (i8 imm:$imm))), - (VPERMILPSYri VR256:$src1, imm:$imm)>; -def : Pat<(v4f64 (X86VPermilp VR256:$src1, (i8 imm:$imm))), - (VPERMILPDYri VR256:$src1, imm:$imm)>; +let Predicates = [HasAVX] in { def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPSYri VR256:$src1, imm:$imm)>; def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPDYri VR256:$src1, imm:$imm)>; -def : Pat<(v8f32 (X86VPermilp (memopv8f32 addr:$src1), (i8 imm:$imm))), - (VPERMILPSYmi addr:$src1, imm:$imm)>; -def : Pat<(v4f64 (X86VPermilp (memopv4f64 addr:$src1), (i8 imm:$imm))), - (VPERMILPDYmi addr:$src1, imm:$imm)>; def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)), (i8 imm:$imm))), (VPERMILPSYmi addr:$src1, imm:$imm)>; def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))), (VPERMILPDYmi addr:$src1, imm:$imm)>; +def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))), + (VPERMILPDri VR128:$src1, imm:$imm)>; +def : Pat<(v2i64 (X86VPermilp (memopv2i64 addr:$src1), (i8 imm:$imm))), + (VPERMILPDmi addr:$src1, imm:$imm)>; +} + //===----------------------------------------------------------------------===// // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks // -let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { +let ExeDomain = SSEPackedSingle in { def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, VEX_4V; -let mayLoad = 1 in + [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, + (i8 imm:$src3))))]>, VEX_4V; def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, i8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, VEX_4V; + [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv8f32 addr:$src2), + (i8 imm:$src3)))]>, VEX_4V; } -def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; -def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; -def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; +let Predicates = [HasAVX] in { +def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(int_x86_avx_vperm2f128_ps_256 - VR256:$src1, (memopv8f32 addr:$src2), imm:$src3), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; -def : Pat<(int_x86_avx_vperm2f128_pd_256 - VR256:$src1, (memopv4f64 addr:$src2), imm:$src3), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; -def : Pat<(int_x86_avx_vperm2f128_si_256 - VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), imm:$src3), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; +def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, + (memopv8f32 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, + (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, + (memopv4i64 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, + (memopv4f64 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, + (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, + (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +} //===----------------------------------------------------------------------===// // VZERO - Zero YMM registers @@ -7564,6 +7598,7 @@ let Predicates = [HasAVX2] in { } // AVX1 broadcast patterns +let Predicates = [HasAVX] in { def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), (VBROADCASTSSYrm addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), @@ -7577,6 +7612,7 @@ def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), (VBROADCASTSSrm addr:$src)>; def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), (VBROADCASTSSrm addr:$src)>; +} //===----------------------------------------------------------------------===// // VPERM - Permute instructions @@ -7626,25 +7662,22 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>, //===----------------------------------------------------------------------===// // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks // +let AddedComplexity = 1 in { def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR256:$dst, - (int_x86_avx2_vperm2i128 VR256:$src1, VR256:$src2, imm:$src3))]>, - VEX_4V; + [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, + (i8 imm:$src3))))]>, VEX_4V; def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, i8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR256:$dst, - (int_x86_avx2_vperm2i128 VR256:$src1, (memopv4i64 addr:$src2), - imm:$src3))]>, - VEX_4V; + [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2), + (i8 imm:$src3)))]>, VEX_4V; +} -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2], AddedComplexity = 1 in { def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), @@ -7659,44 +7692,8 @@ def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2), - (i8 imm:$imm))), - (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; } -// AVX1 patterns -def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; - -def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, - (memopv8f32 addr:$src2), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, - (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, - (memopv4i64 addr:$src2), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, - (memopv4f64 addr:$src2), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, - (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, - (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; - //===----------------------------------------------------------------------===// // VINSERTI128 - Insert packed integer values @@ -7734,6 +7731,7 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), } // AVX1 patterns +let Predicates = [HasAVX] in { def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), (i32 imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, @@ -7758,6 +7756,7 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), (i32 imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; +} //===----------------------------------------------------------------------===// // VEXTRACTI128 - Extract packed integer values @@ -7793,6 +7792,7 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), } // AVX1 patterns +let Predicates = [HasAVX] in { def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), (v4f32 (VEXTRACTF128rr (v8f32 VR256:$src1), @@ -7817,6 +7817,7 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), (v16i8 (VEXTRACTF128rr (v32i8 VR256:$src1), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +} //===----------------------------------------------------------------------===// // VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores diff --git a/lib/Target/X86/X86InstrSVM.td b/lib/Target/X86/X86InstrSVM.td new file mode 100644 index 0000000..757dcd0 --- /dev/null +++ b/lib/Target/X86/X86InstrSVM.td @@ -0,0 +1,62 @@ +//===-- X86InstrSVM.td - SVM Instruction Set Extension -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the instructions that make up the AMD SVM instruction +// set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SVM instructions + +// 0F 01 D9 +def VMMCALL : I<0x01, MRM_D9, (outs), (ins), "vmmcall", []>, TB; + +// 0F 01 DC +def STGI : I<0x01, MRM_DC, (outs), (ins), "stgi", []>, TB; + +// 0F 01 DD +def CLGI : I<0x01, MRM_DD, (outs), (ins), "clgi", []>, TB; + +// 0F 01 DE +let Uses = [EAX] in +def SKINIT : I<0x01, MRM_DE, (outs), (ins), "skinit\t{%eax|EAX}", []>, TB; + +// 0F 01 D8 +let Uses = [EAX] in +def VMRUN32 : I<0x01, MRM_D8, (outs), (ins), + "vmrun\t{%eax|EAX}", []>, TB, Requires<[In32BitMode]>; +let Uses = [RAX] in +def VMRUN64 : I<0x01, MRM_D8, (outs), (ins), + "vmrun\t{%rax|RAX}", []>, TB, Requires<[In64BitMode]>; + +// 0F 01 DA +let Uses = [EAX] in +def VMLOAD32 : I<0x01, MRM_DA, (outs), (ins), + "vmload\t{%eax|EAX}", []>, TB, Requires<[In32BitMode]>; +let Uses = [RAX] in +def VMLOAD64 : I<0x01, MRM_DA, (outs), (ins), + "vmload\t{%rax|RAX}", []>, TB, Requires<[In64BitMode]>; + +// 0F 01 DB +let Uses = [EAX] in +def VMSAVE32 : I<0x01, MRM_DB, (outs), (ins), + "vmsave\t{%eax|EAX}", []>, TB, Requires<[In32BitMode]>; +let Uses = [RAX] in +def VMSAVE64 : I<0x01, MRM_DB, (outs), (ins), + "vmsave\t{%rax|RAX}", []>, TB, Requires<[In64BitMode]>; + +// 0F 01 DF +let Uses = [EAX, ECX] in +def INVLPGA32 : I<0x01, MRM_DF, (outs), (ins), + "invlpga\t{%ecx, %eax|EAX, ECX}", []>, TB, Requires<[In32BitMode]>; +let Uses = [RAX, ECX] in +def INVLPGA64 : I<0x01, MRM_DF, (outs), (ins), + "invlpga\t{%ecx, %rax|RAX, ECX}", []>, TB, Requires<[In64BitMode]>; + diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index 58cf6e3..bdeb63f 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -1,10 +1,10 @@ -//===- X86InstrShiftRotate.td - Shift and Rotate Instrs ----*- tablegen -*-===// -// +//===-- X86InstrShiftRotate.td - Shift and Rotate Instrs ---*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the shift and rotate instructions. @@ -19,44 +19,46 @@ let Constraints = "$src1 = $dst" in { let Uses = [CL] in { def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1), "shl{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (shl GR8:$src1, CL))]>; + [(set GR8:$dst, (shl GR8:$src1, CL))], IIC_SR>; def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1), "shl{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize; + [(set GR16:$dst, (shl GR16:$src1, CL))], IIC_SR>, OpSize; def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1), "shl{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (shl GR32:$src1, CL))]>; + [(set GR32:$dst, (shl GR32:$src1, CL))], IIC_SR>; def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1), - "shl{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (shl GR64:$src1, CL))]>; + "shl{q}\t{%cl, $dst|$dst, CL}", + [(set GR64:$dst, (shl GR64:$src1, CL))], IIC_SR>; } // Uses = [CL] def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "shl{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>; + [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))], IIC_SR>; let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "shl{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize; + [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))], IIC_SR>, + OpSize; def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "shl{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>; + [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))], IIC_SR>; def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "shl{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>; + [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))], + IIC_SR>; // NOTE: We don't include patterns for shifts of a register by one, because // 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one). def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1), - "shl{b}\t$dst", []>; + "shl{b}\t$dst", [], IIC_SR>; def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1), - "shl{w}\t$dst", []>, OpSize; + "shl{w}\t$dst", [], IIC_SR>, OpSize; def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), - "shl{l}\t$dst", []>; + "shl{l}\t$dst", [], IIC_SR>; def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), - "shl{q}\t$dst", []>; + "shl{q}\t$dst", [], IIC_SR>; } // isConvertibleToThreeAddress = 1 } // Constraints = "$src = $dst" @@ -66,223 +68,266 @@ def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), let Uses = [CL] in { def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst), "shl{b}\t{%cl, $dst|$dst, CL}", - [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>; + [(store (shl (loadi8 addr:$dst), CL), addr:$dst)], IIC_SR>; def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst), "shl{w}\t{%cl, $dst|$dst, CL}", - [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; + [(store (shl (loadi16 addr:$dst), CL), addr:$dst)], IIC_SR>, + OpSize; def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst), "shl{l}\t{%cl, $dst|$dst, CL}", - [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>; + [(store (shl (loadi32 addr:$dst), CL), addr:$dst)], IIC_SR>; def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst), - "shl{q}\t{%cl, $dst|$dst, %CL}", - [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>; + "shl{q}\t{%cl, $dst|$dst, CL}", + [(store (shl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>; } def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src), "shl{b}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src), "shl{w}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>, OpSize; def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src), "shl{l}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src), "shl{q}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; // Shift by 1 def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst), "shl{b}\t$dst", - [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst), "shl{w}\t$dst", - [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>, OpSize; def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst), "shl{l}\t$dst", - [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), "shl{q}\t$dst", - [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; let Constraints = "$src1 = $dst" in { let Uses = [CL] in { def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1), "shr{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (srl GR8:$src1, CL))]>; + [(set GR8:$dst, (srl GR8:$src1, CL))], IIC_SR>; def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1), "shr{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize; + [(set GR16:$dst, (srl GR16:$src1, CL))], IIC_SR>, OpSize; def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1), "shr{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (srl GR32:$src1, CL))]>; + [(set GR32:$dst, (srl GR32:$src1, CL))], IIC_SR>; def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1), - "shr{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (srl GR64:$src1, CL))]>; + "shr{q}\t{%cl, $dst|$dst, CL}", + [(set GR64:$dst, (srl GR64:$src1, CL))], IIC_SR>; } def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), "shr{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>; + [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))], IIC_SR>; def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "shr{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize; + [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))], + IIC_SR>, OpSize; def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "shr{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>; + [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))], + IIC_SR>; def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "shr{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>; + [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))], IIC_SR>; // Shift right by 1 def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1), "shr{b}\t$dst", - [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>; + [(set GR8:$dst, (srl GR8:$src1, (i8 1)))], IIC_SR>; def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1), "shr{w}\t$dst", - [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize; + [(set GR16:$dst, (srl GR16:$src1, (i8 1)))], IIC_SR>, OpSize; def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), "shr{l}\t$dst", - [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>; + [(set GR32:$dst, (srl GR32:$src1, (i8 1)))], IIC_SR>; def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t$dst", - [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>; + [(set GR64:$dst, (srl GR64:$src1, (i8 1)))], IIC_SR>; } // Constraints = "$src = $dst" let Uses = [CL] in { def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst), "shr{b}\t{%cl, $dst|$dst, CL}", - [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>; + [(store (srl (loadi8 addr:$dst), CL), addr:$dst)], IIC_SR>; def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst), "shr{w}\t{%cl, $dst|$dst, CL}", - [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>, + [(store (srl (loadi16 addr:$dst), CL), addr:$dst)], IIC_SR>, OpSize; def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst), "shr{l}\t{%cl, $dst|$dst, CL}", - [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>; + [(store (srl (loadi32 addr:$dst), CL), addr:$dst)], IIC_SR>; def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst), - "shr{q}\t{%cl, $dst|$dst, %CL}", - [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>; + "shr{q}\t{%cl, $dst|$dst, CL}", + [(store (srl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>; } def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src), "shr{b}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src), "shr{w}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>, OpSize; def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src), "shr{l}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src), "shr{q}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; // Shift by 1 def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst), "shr{b}\t$dst", - [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst), "shr{w}\t$dst", - [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize; + [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>,OpSize; def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst), "shr{l}\t$dst", - [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), "shr{q}\t$dst", - [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; let Constraints = "$src1 = $dst" in { let Uses = [CL] in { def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), "sar{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (sra GR8:$src1, CL))]>; + [(set GR8:$dst, (sra GR8:$src1, CL))], + IIC_SR>; def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1), "sar{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize; + [(set GR16:$dst, (sra GR16:$src1, CL))], + IIC_SR>, OpSize; def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1), "sar{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (sra GR32:$src1, CL))]>; + [(set GR32:$dst, (sra GR32:$src1, CL))], + IIC_SR>; def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1), - "sar{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (sra GR64:$src1, CL))]>; + "sar{q}\t{%cl, $dst|$dst, CL}", + [(set GR64:$dst, (sra GR64:$src1, CL))], + IIC_SR>; } def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "sar{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>; + [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))], + IIC_SR>; def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "sar{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>, + [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))], + IIC_SR>, OpSize; def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "sar{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>; + [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))], + IIC_SR>; def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "sar{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>; + [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))], + IIC_SR>; // Shift by 1 def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), "sar{b}\t$dst", - [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>; + [(set GR8:$dst, (sra GR8:$src1, (i8 1)))], + IIC_SR>; def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1), "sar{w}\t$dst", - [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize; + [(set GR16:$dst, (sra GR16:$src1, (i8 1)))], + IIC_SR>, OpSize; def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1), "sar{l}\t$dst", - [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>; + [(set GR32:$dst, (sra GR32:$src1, (i8 1)))], + IIC_SR>; def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t$dst", - [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; + [(set GR64:$dst, (sra GR64:$src1, (i8 1)))], + IIC_SR>; } // Constraints = "$src = $dst" let Uses = [CL] in { def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst), "sar{b}\t{%cl, $dst|$dst, CL}", - [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>; + [(store (sra (loadi8 addr:$dst), CL), addr:$dst)], + IIC_SR>; def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst), "sar{w}\t{%cl, $dst|$dst, CL}", - [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; + [(store (sra (loadi16 addr:$dst), CL), addr:$dst)], + IIC_SR>, OpSize; def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), "sar{l}\t{%cl, $dst|$dst, CL}", - [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>; + [(store (sra (loadi32 addr:$dst), CL), addr:$dst)], + IIC_SR>; def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), - "sar{q}\t{%cl, $dst|$dst, %CL}", - [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>; + "sar{q}\t{%cl, $dst|$dst, CL}", + [(store (sra (loadi64 addr:$dst), CL), addr:$dst)], + IIC_SR>; } def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src), "sar{b}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src), "sar{w}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>, OpSize; def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src), "sar{l}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src), "sar{q}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; // Shift by 1 def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst), "sar{b}\t$dst", - [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst), "sar{w}\t$dst", - [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>, OpSize; def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst), "sar{l}\t$dst", - [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), "sar{q}\t$dst", - [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; //===----------------------------------------------------------------------===// // Rotate instructions @@ -290,125 +335,125 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), let Constraints = "$src1 = $dst" in { def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), - "rcl{b}\t$dst", []>; + "rcl{b}\t$dst", [], IIC_SR>; def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), - "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1), - "rcl{b}\t{%cl, $dst|$dst, CL}", []>; + "rcl{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1), - "rcl{w}\t$dst", []>, OpSize; + "rcl{w}\t$dst", [], IIC_SR>, OpSize; def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), - "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize; let Uses = [CL] in def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1), - "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; + "rcl{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize; def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1), - "rcl{l}\t$dst", []>; + "rcl{l}\t$dst", [], IIC_SR>; def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), - "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1), - "rcl{l}\t{%cl, $dst|$dst, CL}", []>; + "rcl{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1), - "rcl{q}\t$dst", []>; + "rcl{q}\t$dst", [], IIC_SR>; def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt), - "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; + "rcl{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1), - "rcr{b}\t$dst", []>; + "rcr{b}\t$dst", [], IIC_SR>; def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), - "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1), - "rcr{b}\t{%cl, $dst|$dst, CL}", []>; + "rcr{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1), - "rcr{w}\t$dst", []>, OpSize; + "rcr{w}\t$dst", [], IIC_SR>, OpSize; def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), - "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize; let Uses = [CL] in def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1), - "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; + "rcr{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize; def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1), - "rcr{l}\t$dst", []>; + "rcr{l}\t$dst", [], IIC_SR>; def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), - "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1), - "rcr{l}\t{%cl, $dst|$dst, CL}", []>; + "rcr{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1), - "rcr{q}\t$dst", []>; + "rcr{q}\t$dst", [], IIC_SR>; def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt), - "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1), - "rcr{q}\t{%cl, $dst|$dst, CL}", []>; + "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; } // Constraints = "$src = $dst" def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), - "rcl{b}\t$dst", []>; + "rcl{b}\t$dst", [], IIC_SR>; def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), - "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst), - "rcl{w}\t$dst", []>, OpSize; + "rcl{w}\t$dst", [], IIC_SR>, OpSize; def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt), - "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize; def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst), - "rcl{l}\t$dst", []>; + "rcl{l}\t$dst", [], IIC_SR>; def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt), - "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), - "rcl{q}\t$dst", []>; + "rcl{q}\t$dst", [], IIC_SR>; def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), - "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst), - "rcr{b}\t$dst", []>; + "rcr{b}\t$dst", [], IIC_SR>; def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt), - "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst), - "rcr{w}\t$dst", []>, OpSize; + "rcr{w}\t$dst", [], IIC_SR>, OpSize; def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt), - "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize; def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst), - "rcr{l}\t$dst", []>; + "rcr{l}\t$dst", [], IIC_SR>; def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt), - "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), - "rcr{q}\t$dst", []>; + "rcr{q}\t$dst", [], IIC_SR>; def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt), - "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; let Uses = [CL] in { def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst), - "rcl{b}\t{%cl, $dst|$dst, CL}", []>; + "rcl{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst), - "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; + "rcl{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize; def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst), - "rcl{l}\t{%cl, $dst|$dst, CL}", []>; + "rcl{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; + "rcl{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst), - "rcr{b}\t{%cl, $dst|$dst, CL}", []>; + "rcr{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), - "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; + "rcr{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize; def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), - "rcr{l}\t{%cl, $dst|$dst, CL}", []>; + "rcr{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), - "rcr{q}\t{%cl, $dst|$dst, CL}", []>; + "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; } let Constraints = "$src1 = $dst" in { @@ -416,179 +461,217 @@ let Constraints = "$src1 = $dst" in { let Uses = [CL] in { def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "rol{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (rotl GR8:$src1, CL))]>; + [(set GR8:$dst, (rotl GR8:$src1, CL))], IIC_SR>; def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1), "rol{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize; + [(set GR16:$dst, (rotl GR16:$src1, CL))], IIC_SR>, OpSize; def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "rol{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (rotl GR32:$src1, CL))]>; + [(set GR32:$dst, (rotl GR32:$src1, CL))], IIC_SR>; def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1), - "rol{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (rotl GR64:$src1, CL))]>; + "rol{q}\t{%cl, $dst|$dst, CL}", + [(set GR64:$dst, (rotl GR64:$src1, CL))], IIC_SR>; } def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "rol{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>; + [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))], IIC_SR>; def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "rol{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, + [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))], + IIC_SR>, OpSize; def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "rol{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>; + [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))], + IIC_SR>; def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "rol{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>; + [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))], + IIC_SR>; // Rotate by 1 def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "rol{b}\t$dst", - [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>; + [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))], + IIC_SR>; def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1), "rol{w}\t$dst", - [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize; + [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))], + IIC_SR>, OpSize; def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "rol{l}\t$dst", - [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>; + [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))], + IIC_SR>; def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t$dst", - [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>; + [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))], + IIC_SR>; } // Constraints = "$src = $dst" let Uses = [CL] in { def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), "rol{b}\t{%cl, $dst|$dst, CL}", - [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>; + [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)], + IIC_SR>; def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst), "rol{w}\t{%cl, $dst|$dst, CL}", - [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; + [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)], + IIC_SR>, OpSize; def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst), "rol{l}\t{%cl, $dst|$dst, CL}", - [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>; + [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)], + IIC_SR>; def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), - "rol{q}\t{%cl, $dst|$dst, %CL}", - [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>; + "rol{q}\t{%cl, $dst|$dst, %cl}", + [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)], + IIC_SR>; } def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1), "rol{b}\t{$src1, $dst|$dst, $src1}", - [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>; + [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)], + IIC_SR>; def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1), "rol{w}\t{$src1, $dst|$dst, $src1}", - [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)]>, + [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)], + IIC_SR>, OpSize; def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1), "rol{l}\t{$src1, $dst|$dst, $src1}", - [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)]>; + [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)], + IIC_SR>; def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1), "rol{q}\t{$src1, $dst|$dst, $src1}", - [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)]>; + [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)], + IIC_SR>; // Rotate by 1 def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst), "rol{b}\t$dst", - [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst), "rol{w}\t$dst", - [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>, OpSize; def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst), "rol{l}\t$dst", - [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), "rol{q}\t$dst", - [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; let Constraints = "$src1 = $dst" in { let Uses = [CL] in { def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "ror{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (rotr GR8:$src1, CL))]>; + [(set GR8:$dst, (rotr GR8:$src1, CL))], IIC_SR>; def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1), "ror{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize; + [(set GR16:$dst, (rotr GR16:$src1, CL))], IIC_SR>, OpSize; def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "ror{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (rotr GR32:$src1, CL))]>; + [(set GR32:$dst, (rotr GR32:$src1, CL))], IIC_SR>; def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1), - "ror{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (rotr GR64:$src1, CL))]>; + "ror{q}\t{%cl, $dst|$dst, CL}", + [(set GR64:$dst, (rotr GR64:$src1, CL))], IIC_SR>; } def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "ror{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>; + [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))], IIC_SR>; def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "ror{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, + [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))], + IIC_SR>, OpSize; def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "ror{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>; + [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))], + IIC_SR>; def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "ror{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>; + [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))], + IIC_SR>; // Rotate by 1 def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "ror{b}\t$dst", - [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>; + [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))], + IIC_SR>; def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1), "ror{w}\t$dst", - [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize; + [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))], + IIC_SR>, OpSize; def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "ror{l}\t$dst", - [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>; + [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))], + IIC_SR>; def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "ror{q}\t$dst", - [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>; + [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))], + IIC_SR>; } // Constraints = "$src = $dst" let Uses = [CL] in { def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst), "ror{b}\t{%cl, $dst|$dst, CL}", - [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>; + [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)], + IIC_SR>; def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst), "ror{w}\t{%cl, $dst|$dst, CL}", - [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; + [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)], + IIC_SR>, OpSize; def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), "ror{l}\t{%cl, $dst|$dst, CL}", - [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>; + [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)], + IIC_SR>; def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), - "ror{q}\t{%cl, $dst|$dst, %CL}", - [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>; + "ror{q}\t{%cl, $dst|$dst, CL}", + [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)], + IIC_SR>; } def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src), "ror{b}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src), "ror{w}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>, OpSize; def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src), "ror{l}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src), "ror{q}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], + IIC_SR>; // Rotate by 1 def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst), "ror{b}\t$dst", - [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst), "ror{w}\t$dst", - [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>, OpSize; def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst), "ror{l}\t$dst", - [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), "ror{q}\t$dst", - [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)], + IIC_SR>; //===----------------------------------------------------------------------===// @@ -601,30 +684,36 @@ let Uses = [CL] in { def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>, + [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))], + IIC_SHD16_REG_CL>, TB, OpSize; def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>, + [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))], + IIC_SHD16_REG_CL>, TB, OpSize; def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB; + [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))], + IIC_SHD32_REG_CL>, TB; def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB; + [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))], + IIC_SHD32_REG_CL>, TB; def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", - [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, + "shld{q}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))], + IIC_SHD64_REG_CL>, TB; def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", - [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, + "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))], + IIC_SHD64_REG_CL>, TB; } @@ -634,42 +723,42 @@ def SHLD16rri8 : Ii8<0xA4, MRMDestReg, (ins GR16:$src1, GR16:$src2, i8imm:$src3), "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, - (i8 imm:$src3)))]>, + (i8 imm:$src3)))], IIC_SHD16_REG_IM>, TB, OpSize; def SHRD16rri8 : Ii8<0xAC, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$src3), "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, - (i8 imm:$src3)))]>, + (i8 imm:$src3)))], IIC_SHD16_REG_IM>, TB, OpSize; def SHLD32rri8 : Ii8<0xA4, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3), "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, - (i8 imm:$src3)))]>, + (i8 imm:$src3)))], IIC_SHD32_REG_IM>, TB; def SHRD32rri8 : Ii8<0xAC, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$src3), "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, - (i8 imm:$src3)))]>, + (i8 imm:$src3)))], IIC_SHD32_REG_IM>, TB; def SHLD64rri8 : RIi8<0xA4, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3), "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, - (i8 imm:$src3)))]>, + (i8 imm:$src3)))], IIC_SHD64_REG_IM>, TB; def SHRD64rri8 : RIi8<0xAC, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$src3), "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, - (i8 imm:$src3)))]>, + (i8 imm:$src3)))], IIC_SHD64_REG_IM>, TB; } } // Constraints = "$src = $dst" @@ -678,68 +767,74 @@ let Uses = [CL] in { def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL), - addr:$dst)]>, TB, OpSize; + addr:$dst)], IIC_SHD16_MEM_CL>, TB, OpSize; def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL), - addr:$dst)]>, TB, OpSize; + addr:$dst)], IIC_SHD16_MEM_CL>, TB, OpSize; def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL), - addr:$dst)]>, TB; + addr:$dst)], IIC_SHD32_MEM_CL>, TB; def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL), - addr:$dst)]>, TB; + addr:$dst)], IIC_SHD32_MEM_CL>, TB; def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + "shld{q}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL), - addr:$dst)]>, TB; + addr:$dst)], IIC_SHD64_MEM_CL>, TB; def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, CL}", [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL), - addr:$dst)]>, TB; + addr:$dst)], IIC_SHD64_MEM_CL>, TB; } def SHLD16mri8 : Ii8<0xA4, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shld (loadi16 addr:$dst), GR16:$src2, - (i8 imm:$src3)), addr:$dst)]>, + (i8 imm:$src3)), addr:$dst)], + IIC_SHD16_MEM_IM>, TB, OpSize; def SHRD16mri8 : Ii8<0xAC, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, - (i8 imm:$src3)), addr:$dst)]>, + (i8 imm:$src3)), addr:$dst)], + IIC_SHD16_MEM_IM>, TB, OpSize; def SHLD32mri8 : Ii8<0xA4, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shld (loadi32 addr:$dst), GR32:$src2, - (i8 imm:$src3)), addr:$dst)]>, + (i8 imm:$src3)), addr:$dst)], + IIC_SHD32_MEM_IM>, TB; def SHRD32mri8 : Ii8<0xAC, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, - (i8 imm:$src3)), addr:$dst)]>, + (i8 imm:$src3)), addr:$dst)], + IIC_SHD32_MEM_IM>, TB; def SHLD64mri8 : RIi8<0xA4, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shld (loadi64 addr:$dst), GR64:$src2, - (i8 imm:$src3)), addr:$dst)]>, + (i8 imm:$src3)), addr:$dst)], + IIC_SHD64_MEM_IM>, TB; def SHRD64mri8 : RIi8<0xAC, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, - (i8 imm:$src3)), addr:$dst)]>, + (i8 imm:$src3)), addr:$dst)], + IIC_SHD64_MEM_IM>, TB; } // Defs = [EFLAGS] diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 1b43838..8843848 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -1,10 +1,10 @@ -//===- X86InstrSystem.td - System Instructions -------------*- tablegen -*-===// -// +//===-- X86InstrSystem.td - System Instructions ------------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the X86 instructions that are generally used in @@ -214,18 +214,18 @@ def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins), - "str{w}\t{$dst}", []>, TB, OpSize; + "str{w}\t$dst", []>, TB, OpSize; def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins), - "str{l}\t{$dst}", []>, TB; + "str{l}\t$dst", []>, TB; def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins), - "str{q}\t{$dst}", []>, TB; + "str{q}\t$dst", []>, TB; def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins), - "str{w}\t{$dst}", []>, TB; + "str{w}\t$dst", []>, TB; def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), - "ltr{w}\t{$src}", []>, TB; + "ltr{w}\t$src", []>, TB; def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), - "ltr{w}\t{$src}", []>, TB; + "ltr{w}\t$src", []>, TB; def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), "push{w}\t{%cs|CS}", []>, Requires<[In32BitMode]>, OpSize; diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td index 74477cd..6a8f0c8 100644 --- a/lib/Target/X86/X86InstrVMX.td +++ b/lib/Target/X86/X86InstrVMX.td @@ -1,10 +1,10 @@ -//===- X86InstrVMX.td - VMX Instruction Set Extension ------*- tablegen -*-===// -// +//===-- X86InstrVMX.td - VMX Instruction Set Extension -----*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the instructions that make up the Intel VMX instruction @@ -33,6 +33,8 @@ def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), "vmclear\t$vmcs", []>, OpSize, TB; +// OF 01 D4 +def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB; // 0F 01 C2 def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB; // 0F 01 C3 @@ -60,5 +62,5 @@ def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), // 0F 01 C4 def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), - "vmxon\t{$vmxon}", []>, XS; + "vmxon\t$vmxon", []>, XS; diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td index 64cc44d..65bbcb5 100644 --- a/lib/Target/X86/X86InstrXOP.td +++ b/lib/Target/X86/X86InstrXOP.td @@ -1,4 +1,4 @@ -//====- X86InstrXOP.td - Describe the X86 Instruction Set --*- tablegen -*-===// +//===-- X86InstrXOP.td - XOP Instruction Set ---------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -11,90 +11,123 @@ // //===----------------------------------------------------------------------===// -multiclass xop2op<bits<8> opc, string OpcodeStr, X86MemOperand x86memop> { +multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> { def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>, VEX; - def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), + [(set VR128:$dst, (Int VR128:$src))]>, VEX; + def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>, VEX; + [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX; } let isAsmParserOnly = 1 in { - defm VPHSUBWD : xop2op<0xE2, "vphsubwd", f128mem>; - defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", f128mem>; - defm VPHSUBBW : xop2op<0xE1, "vphsubbw", f128mem>; - defm VPHADDWQ : xop2op<0xC7, "vphaddwq", f128mem>; - defm VPHADDWD : xop2op<0xC6, "vphaddwd", f128mem>; - defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", f128mem>; - defm VPHADDUWD : xop2op<0xD6, "vphadduwd", f128mem>; - defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", f128mem>; - defm VPHADDUBW : xop2op<0xD1, "vphaddubw", f128mem>; - defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", f128mem>; - defm VPHADDUBD : xop2op<0xD2, "vphaddubd", f128mem>; - defm VPHADDDQ : xop2op<0xCB, "vphadddq", f128mem>; - defm VPHADDBW : xop2op<0xC1, "vphaddbw", f128mem>; - defm VPHADDBQ : xop2op<0xC3, "vphaddbq", f128mem>; - defm VPHADDBD : xop2op<0xC2, "vphaddbd", f128mem>; - defm VFRCZSS : xop2op<0x82, "vfrczss", f32mem>; - defm VFRCZSD : xop2op<0x83, "vfrczsd", f64mem>; - defm VFRCZPS : xop2op<0x80, "vfrczps", f128mem>; - defm VFRCZPD : xop2op<0x81, "vfrczpd", f128mem>; -} - -multiclass xop2op256<bits<8> opc, string OpcodeStr> { + defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, memopv2i64>; + defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, memopv2i64>; + defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, memopv2i64>; + defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, memopv2i64>; + defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, memopv2i64>; + defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, memopv2i64>; + defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, memopv2i64>; + defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, memopv2i64>; + defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, memopv2i64>; + defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, memopv2i64>; + defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, memopv2i64>; + defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, memopv2i64>; + defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, memopv2i64>; + defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, memopv2i64>; + defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, memopv2i64>; + defm VFRCZPS : xop2op<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>; + defm VFRCZPD : xop2op<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>; +} + +// Scalar load 2 addr operand instructions +let Constraints = "$src1 = $dst" in { +multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int, + Operand memop, ComplexPattern mem_cpat> { + def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, + VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX; + def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, + memop:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR128:$dst, (Int VR128:$src1, + (bitconvert mem_cpat:$src2)))]>, VEX; +} + +} // Constraints = "$src1 = $dst" + +let isAsmParserOnly = 1 in { + defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss, + ssmem, sse_load_f32>; + defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd, + sdmem, sse_load_f64>; +} + + +multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int, + PatFrag memop> { def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>, VEX, VEX_L; + [(set VR256:$dst, (Int VR256:$src))]>, VEX, VEX_L; def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - []>, VEX; + [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX; } let isAsmParserOnly = 1 in { - defm VFRCZPS : xop2op256<0x80, "vfrczps">; - defm VFRCZPD : xop2op256<0x81, "vfrczpd">; + defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, + memopv8f32>; + defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, + memopv4f64>; } -multiclass xop3op<bits<8> opc, string OpcodeStr> { +multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> { def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4VOp3; + [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX_4VOp3; def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_W; + [(set VR128:$dst, + (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>, + VEX_4V, VEX_W; def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4VOp3; + [(set VR128:$dst, + (Int (bitconvert (memopv2i64 addr:$src1)), VR128:$src2))]>, + VEX_4VOp3; } let isAsmParserOnly = 1 in { - defm VPSHLW : xop3op<0x95, "vpshlw">; - defm VPSHLQ : xop3op<0x97, "vpshlq">; - defm VPSHLD : xop3op<0x96, "vpshld">; - defm VPSHLB : xop3op<0x94, "vpshlb">; - defm VPSHAW : xop3op<0x99, "vpshaw">; - defm VPSHAQ : xop3op<0x9B, "vpshaq">; - defm VPSHAD : xop3op<0x9A, "vpshad">; - defm VPSHAB : xop3op<0x98, "vpshab">; - defm VPROTW : xop3op<0x91, "vprotw">; - defm VPROTQ : xop3op<0x93, "vprotq">; - defm VPROTD : xop3op<0x92, "vprotd">; - defm VPROTB : xop3op<0x90, "vprotb">; + defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>; + defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>; + defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>; + defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>; + defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>; + defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>; + defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>; + defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>; + defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>; + defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>; + defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>; + defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>; } multiclass xop3opimm<bits<8> opc, string OpcodeStr> { - def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, i8imm:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX; - def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src1, i8imm:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX; + let neverHasSideEffects = 1 in { + def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, i8imm:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX; + let mayLoad = 1 in + def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src1, i8imm:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX; + } } let isAsmParserOnly = 1 in { @@ -105,139 +138,170 @@ let isAsmParserOnly = 1 in { } // Instruction where second source can be memory, but third must be register -multiclass xop4opm2<bits<8> opc, string OpcodeStr> { +multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> { def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, VEX_4V, VEX_I8IMM; + [(set VR128:$dst, + (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_4V, VEX_I8IMM; def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, VEX_4V, VEX_I8IMM; + [(set VR128:$dst, + (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)), + VR128:$src3))]>, VEX_4V, VEX_I8IMM; } let isAsmParserOnly = 1 in { - defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd">; - defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd">; - defm VPMACSWW : xop4opm2<0x95, "vpmacsww">; - defm VPMACSWD : xop4opm2<0x96, "vpmacswd">; - defm VPMACSSWW : xop4opm2<0x85, "vpmacssww">; - defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd">; - defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql">; - defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh">; - defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd">; - defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql">; - defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh">; - defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd">; + defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>; + defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>; + defm VPMACSWW : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>; + defm VPMACSWD : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>; + defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>; + defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>; + defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>; + defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>; + defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>; + defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>; + defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>; + defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>; } // Instruction where second source can be memory, third must be imm8 -multiclass xop4opimm<bits<8> opc, string OpcodeStr> { +multiclass xop4opimm<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType VT> { def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, VEX_4V; + [(set VR128:$dst, + (VT (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>, VEX_4V; def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, i8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, VEX_4V; + [(set VR128:$dst, + (VT (OpNode VR128:$src1, (bitconvert (memopv2i64 addr:$src2)), + imm:$src3)))]>, VEX_4V; } let isAsmParserOnly = 1 in { - defm VPCOMW : xop4opimm<0xCD, "vpcomw">; - defm VPCOMUW : xop4opimm<0xED, "vpcomuw">; - defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq">; - defm VPCOMUD : xop4opimm<0xEE, "vpcomud">; - defm VPCOMUB : xop4opimm<0xEC, "vpcomub">; - defm VPCOMQ : xop4opimm<0xCF, "vpcomq">; - defm VPCOMD : xop4opimm<0xCE, "vpcomd">; - defm VPCOMB : xop4opimm<0xCC, "vpcomb">; + defm VPCOMB : xop4opimm<0xCC, "vpcomb", X86vpcom, v16i8>; + defm VPCOMW : xop4opimm<0xCD, "vpcomw", X86vpcom, v8i16>; + defm VPCOMD : xop4opimm<0xCE, "vpcomd", X86vpcom, v4i32>; + defm VPCOMQ : xop4opimm<0xCF, "vpcomq", X86vpcom, v2i64>; + defm VPCOMUB : xop4opimm<0xEC, "vpcomub", X86vpcomu, v16i8>; + defm VPCOMUW : xop4opimm<0xED, "vpcomuw", X86vpcomu, v8i16>; + defm VPCOMUD : xop4opimm<0xEE, "vpcomud", X86vpcomu, v4i32>; + defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq", X86vpcomu, v2i64>; } // Instruction where either second or third source can be memory -multiclass xop4op<bits<8> opc, string OpcodeStr> { +multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> { def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, VEX_4V, VEX_I8IMM; + [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, + VEX_4V, VEX_I8IMM; def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, f128mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, VEX_4V, VEX_I8IMM, XOP_W; + [(set VR128:$dst, + (Int VR128:$src1, VR128:$src2, + (bitconvert (memopv2i64 addr:$src3))))]>, + VEX_4V, VEX_I8IMM, VEX_W, MemOp4; def mr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, VEX_4V, VEX_I8IMM; + [(set VR128:$dst, + (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)), + VR128:$src3))]>, + VEX_4V, VEX_I8IMM; } let isAsmParserOnly = 1 in { - defm VPPERM : xop4op<0xA3, "vpperm">; - defm VPCMOV : xop4op<0xA2, "vpcmov">; + defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>; + defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>; } -multiclass xop4op256<bits<8> opc, string OpcodeStr> { +multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> { def rrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, VEX_4V, VEX_I8IMM; + [(set VR256:$dst, (Int VR256:$src1, VR256:$src2, VR256:$src3))]>, + VEX_4V, VEX_I8IMM; def rmY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, f256mem:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, VEX_4V, VEX_I8IMM, XOP_W; + [(set VR256:$dst, + (Int VR256:$src1, VR256:$src2, + (bitconvert (memopv4i64 addr:$src3))))]>, + VEX_4V, VEX_I8IMM, VEX_W, MemOp4; def mrY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, VR256:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, VEX_4V, VEX_I8IMM; + [(set VR256:$dst, + (Int VR256:$src1, (bitconvert (memopv4i64 addr:$src2)), + VR256:$src3))]>, + VEX_4V, VEX_I8IMM; } let isAsmParserOnly = 1 in { - defm VPCMOV : xop4op256<0xA2, "vpcmov">; + defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>; } -multiclass xop5op<bits<8> opc, string OpcodeStr> { +multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128, + Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> { def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3, i8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>; + [(set VR128:$dst, + (Int128 VR128:$src1, VR128:$src2, VR128:$src3, imm:$src4))]>; def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, f128mem:$src3, i8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>, XOP_W; + [(set VR128:$dst, + (Int128 VR128:$src1, VR128:$src2, (ld_128 addr:$src3), imm:$src4))]>, + VEX_W, MemOp4; def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, VR128:$src3, i8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>; + [(set VR128:$dst, + (Int128 VR128:$src1, (ld_128 addr:$src2), VR128:$src3, imm:$src4))]>; def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, VR256:$src3, i8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>; + [(set VR256:$dst, + (Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>; def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>, XOP_W; + [(set VR256:$dst, + (Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>, + VEX_W, MemOp4; def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>; + [(set VR256:$dst, + (Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>; } -let isAsmParserOnly = 1 in { - defm VPERMIL2PD : xop5op<0x49, "vpermil2pd">; - defm VPERMIL2PS : xop5op<0x48, "vpermil2ps">; -} +defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd, + int_x86_xop_vpermil2pd_256, memopv2f64, memopv4f64>; +defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", int_x86_xop_vpermil2ps, + int_x86_xop_vpermil2ps_256, memopv4f32, memopv8f32>; + diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 2145a33..0168d12 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -300,7 +300,10 @@ extern "C" { SIZE(X86CompilationCallback_SSE) ); # else - void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr); + // the following function is called only from this translation unit, + // unless we are under 64bit Windows with MSC, where there is + // no support for inline assembly + static void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr); _declspec(naked) void X86CompilationCallback(void) { __asm { @@ -571,6 +574,5 @@ char* X86JITInfo::allocateThreadLocalMemory(size_t size) { return TLSOffset; #else llvm_unreachable("Cannot allocate thread local storage on this arch!"); - return 0; #endif } diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h index 238420c..c76d3cc 100644 --- a/lib/Target/X86/X86JITInfo.h +++ b/lib/Target/X86/X86JITInfo.h @@ -1,4 +1,4 @@ -//===- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===// +//===-- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 9232196..a7a5c56 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -154,6 +154,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, Ctx), Ctx); break; + case X86II::MO_SECREL: RefKind = MCSymbolRefExpr::VK_SECREL; break; case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break; case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break; @@ -230,7 +231,8 @@ static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) { /// a short fixed-register form. static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) { unsigned ImmOp = Inst.getNumOperands() - 1; - assert(Inst.getOperand(0).isReg() && Inst.getOperand(ImmOp).isImm() && + assert(Inst.getOperand(0).isReg() && + (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) && ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) || Inst.getNumOperands() == 2) && "Unexpected instruction!"); @@ -335,6 +337,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = LowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); break; + case MachineOperand::MO_RegisterMask: + // Ignore call clobbers. + continue; } OutMI.addOperand(MCOp); @@ -373,6 +378,7 @@ ReSimplify: case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break; case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break; case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break; + case X86::AVX2_SET0: LowerUnaryToTwoAddr(OutMI, X86::VPXORYrr); break; case X86::MOV16r0: LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0 @@ -383,14 +389,12 @@ ReSimplify: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; - // TAILJMPr64, [WIN]CALL64r, [WIN]CALL64pcrel32 - These instructions have - // register inputs modeled as normal uses instead of implicit uses. As such, - // truncate off all but the first operand (the callee). FIXME: Change isel. + // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register + // inputs modeled as normal uses instead of implicit uses. As such, truncate + // off all but the first operand (the callee). FIXME: Change isel. case X86::TAILJMPr64: case X86::CALL64r: - case X86::CALL64pcrel32: - case X86::WINCALL64r: - case X86::WINCALL64pcrel32: { + case X86::CALL64pcrel32: { unsigned Opcode = OutMI.getOpcode(); MCOperand Saved = OutMI.getOperand(0); OutMI = MCInst(); @@ -412,7 +416,7 @@ ReSimplify: case X86::TAILJMPd64: { unsigned Opcode; switch (OutMI.getOpcode()) { - default: assert(0 && "Invalid opcode"); + default: llvm_unreachable("Invalid opcode"); case X86::TAILJMPr: Opcode = X86::JMP32r; break; case X86::TAILJMPd: case X86::TAILJMPd64: Opcode = X86::JMP_1; break; diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h index 0210072..40df3db 100644 --- a/lib/Target/X86/X86MCInstLower.h +++ b/lib/Target/X86/X86MCInstLower.h @@ -1,4 +1,4 @@ -//===-- X86MCInstLower.h - Lower MachineInstr to MCInst -------------------===// +//===-- X86MCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/X86/X86MachineFunctionInfo.cpp b/lib/Target/X86/X86MachineFunctionInfo.cpp new file mode 100644 index 0000000..568dc22 --- /dev/null +++ b/lib/Target/X86/X86MachineFunctionInfo.cpp @@ -0,0 +1,14 @@ +//===-- X86MachineFuctionInfo.cpp - X86 machine function info -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86MachineFunctionInfo.h" + +using namespace llvm; + +void X86MachineFunctionInfo::anchor() { } diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index b0bb313..c747109 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -1,10 +1,10 @@ -//====- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===// -// +//===-- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file declares X86-specific per-machine-function information. @@ -21,6 +21,8 @@ namespace llvm { /// X86MachineFunctionInfo - This class is derived from MachineFunction and /// contains private X86 target-specific information for each MachineFunction. class X86MachineFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); + /// ForceFramePointer - True if the function is required to use of frame /// pointer for reasons other than it containing dynamic allocation or /// that FP eliminatation is turned off. For example, Cygwin main function diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 4e80432..93e2744 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===// +//===-- X86RegisterInfo.cpp - X86 Register Information --------------------===// // // The LLVM Compiler Infrastructure // @@ -127,121 +127,13 @@ const TargetRegisterClass * X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned SubIdx) const { - switch (SubIdx) { - default: return 0; - case X86::sub_8bit: - if (B == &X86::GR8RegClass) { - if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8) - return A; - } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) { - if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || - A == &X86::GR64_NOREXRegClass || - A == &X86::GR64_NOSPRegClass || - A == &X86::GR64_NOREX_NOSPRegClass) - return &X86::GR64_ABCDRegClass; - else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass || - A == &X86::GR32_NOREXRegClass || - A == &X86::GR32_NOSPRegClass) - return &X86::GR32_ABCDRegClass; - else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || - A == &X86::GR16_NOREXRegClass) - return &X86::GR16_ABCDRegClass; - } else if (B == &X86::GR8_NOREXRegClass) { - if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || - A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) - return &X86::GR64_NOREXRegClass; - else if (A == &X86::GR64_ABCDRegClass) - return &X86::GR64_ABCDRegClass; - else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass || - A == &X86::GR32_NOSPRegClass) - return &X86::GR32_NOREXRegClass; - else if (A == &X86::GR32_ABCDRegClass) - return &X86::GR32_ABCDRegClass; - else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass) - return &X86::GR16_NOREXRegClass; - else if (A == &X86::GR16_ABCDRegClass) - return &X86::GR16_ABCDRegClass; - } - break; - case X86::sub_8bit_hi: - if (B->hasSubClassEq(&X86::GR8_ABCD_HRegClass)) - switch (A->getSize()) { - case 2: return getCommonSubClass(A, &X86::GR16_ABCDRegClass); - case 4: return getCommonSubClass(A, &X86::GR32_ABCDRegClass); - case 8: return getCommonSubClass(A, &X86::GR64_ABCDRegClass); - default: return 0; - } - break; - case X86::sub_16bit: - if (B == &X86::GR16RegClass) { - if (A->getSize() == 4 || A->getSize() == 8) - return A; - } else if (B == &X86::GR16_ABCDRegClass) { - if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || - A == &X86::GR64_NOREXRegClass || - A == &X86::GR64_NOSPRegClass || - A == &X86::GR64_NOREX_NOSPRegClass) - return &X86::GR64_ABCDRegClass; - else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass || - A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass) - return &X86::GR32_ABCDRegClass; - } else if (B == &X86::GR16_NOREXRegClass) { - if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || - A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) - return &X86::GR64_NOREXRegClass; - else if (A == &X86::GR64_ABCDRegClass) - return &X86::GR64_ABCDRegClass; - else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass || - A == &X86::GR32_NOSPRegClass) - return &X86::GR32_NOREXRegClass; - else if (A == &X86::GR32_ABCDRegClass) - return &X86::GR64_ABCDRegClass; - } - break; - case X86::sub_32bit: - if (B == &X86::GR32RegClass) { - if (A->getSize() == 8) - return A; - } else if (B == &X86::GR32_NOSPRegClass) { - if (A == &X86::GR64RegClass || A == &X86::GR64_NOSPRegClass) - return &X86::GR64_NOSPRegClass; - if (A->getSize() == 8) - return getCommonSubClass(A, &X86::GR64_NOSPRegClass); - } else if (B == &X86::GR32_ABCDRegClass) { - if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || - A == &X86::GR64_NOREXRegClass || - A == &X86::GR64_NOSPRegClass || - A == &X86::GR64_NOREX_NOSPRegClass) - return &X86::GR64_ABCDRegClass; - } else if (B == &X86::GR32_NOREXRegClass) { - if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass) - return &X86::GR64_NOREXRegClass; - else if (A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) - return &X86::GR64_NOREX_NOSPRegClass; - else if (A == &X86::GR64_ABCDRegClass) - return &X86::GR64_ABCDRegClass; - } else if (B == &X86::GR32_NOREX_NOSPRegClass) { - if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || - A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) - return &X86::GR64_NOREX_NOSPRegClass; - else if (A == &X86::GR64_ABCDRegClass) - return &X86::GR64_ABCDRegClass; - } - break; - case X86::sub_ss: - if (B == &X86::FR32RegClass) - return A; - break; - case X86::sub_sd: - if (B == &X86::FR64RegClass) - return A; - break; - case X86::sub_xmm: - if (B == &X86::VR128RegClass) - return A; - break; + // The sub_8bit sub-register index is more constrained in 32-bit mode. + if (!Is64Bit && SubIdx == X86::sub_8bit) { + A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi); + if (!A) + return 0; } - return 0; + return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx); } const TargetRegisterClass* @@ -334,7 +226,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -const unsigned * +const uint16_t * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { bool callsEHReturn = false; bool ghcCall = false; @@ -345,45 +237,29 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); } - static const unsigned GhcCalleeSavedRegs[] = { - 0 - }; - - static const unsigned CalleeSavedRegs32Bit[] = { - X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0 - }; - - static const unsigned CalleeSavedRegs32EHRet[] = { - X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0 - }; - - static const unsigned CalleeSavedRegs64Bit[] = { - X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 - }; - - static const unsigned CalleeSavedRegs64EHRet[] = { - X86::RAX, X86::RDX, X86::RBX, X86::R12, - X86::R13, X86::R14, X86::R15, X86::RBP, 0 - }; - - static const unsigned CalleeSavedRegsWin64[] = { - X86::RBX, X86::RBP, X86::RDI, X86::RSI, - X86::R12, X86::R13, X86::R14, X86::R15, - X86::XMM6, X86::XMM7, X86::XMM8, X86::XMM9, - X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13, - X86::XMM14, X86::XMM15, 0 - }; - - if (ghcCall) { - return GhcCalleeSavedRegs; - } else if (Is64Bit) { + if (ghcCall) + return CSR_Ghc_SaveList; + if (Is64Bit) { if (IsWin64) - return CalleeSavedRegsWin64; - else - return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit); - } else { - return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit); + return CSR_Win64_SaveList; + if (callsEHReturn) + return CSR_64EHRet_SaveList; + return CSR_64_SaveList; } + if (callsEHReturn) + return CSR_32EHRet_SaveList; + return CSR_32_SaveList; +} + +const uint32_t* +X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + if (CC == CallingConv::GHC) + return CSR_Ghc_RegMask; + if (!Is64Bit) + return CSR_32_RegMask; + if (IsWin64) + return CSR_Win64_RegMask; + return CSR_64_RegMask; } BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { @@ -428,16 +304,16 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { for (unsigned n = 0; n != 8; ++n) { // R8, R9, ... - const unsigned GPR64[] = { + static const uint16_t GPR64[] = { X86::R8, X86::R9, X86::R10, X86::R11, X86::R12, X86::R13, X86::R14, X86::R15 }; - for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI) + for (const uint16_t *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI) Reserved.set(Reg); // XMM8, XMM9, ... assert(X86::XMM15 == X86::XMM8+7); - for (const unsigned *AI = getOverlaps(X86::XMM8 + n); unsigned Reg = *AI; + for (const uint16_t *AI = getOverlaps(X86::XMM8 + n); unsigned Reg = *AI; ++AI) Reserved.set(Reg); } @@ -650,12 +526,10 @@ unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { unsigned X86RegisterInfo::getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); - return 0; } unsigned X86RegisterInfo::getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); - return 0; } namespace llvm { @@ -837,8 +711,6 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { return X86::R15; } } - - return Reg; } } diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 7d39c68..bee0393 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -1,4 +1,4 @@ -//===- X86RegisterInfo.h - X86 Register Information Impl --------*- C++ -*-===// +//===-- X86RegisterInfo.h - X86 Register Information Impl -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -95,7 +95,8 @@ public: /// getCalleeSavedRegs - Return a null-terminated list of all of the /// callee-save registers on this target. - const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + const uint32_t *getCallPreservedMask(CallingConv::ID) const; /// getReservedRegs - Returns a bitset indexed by physical register number /// indicating if a register is a special register that has particular uses and diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 9a7db36..5263a49 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -70,7 +70,7 @@ let Namespace = "X86" in { def BH : Register<"bh">; // 16-bit registers - let SubRegIndices = [sub_8bit, sub_8bit_hi] in { + let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in { def AX : RegisterWithSubRegs<"ax", [AL,AH]>; def DX : RegisterWithSubRegs<"dx", [DL,DH]>; def CX : RegisterWithSubRegs<"cx", [CL,CH]>; diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h index 990962d..857becf 100644 --- a/lib/Target/X86/X86Relocations.h +++ b/lib/Target/X86/X86Relocations.h @@ -1,4 +1,4 @@ -//===- X86Relocations.h - X86 Code Relocations ------------------*- C++ -*-===// +//===-- X86Relocations.h - X86 Code Relocations -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td new file mode 100644 index 0000000..d6d0149 --- /dev/null +++ b/lib/Target/X86/X86Schedule.td @@ -0,0 +1,262 @@ +//===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction Itinerary classes used for X86 +def IIC_DEFAULT : InstrItinClass; +def IIC_ALU_MEM : InstrItinClass; +def IIC_ALU_NONMEM : InstrItinClass; +def IIC_LEA : InstrItinClass; +def IIC_LEA_16 : InstrItinClass; +def IIC_MUL8 : InstrItinClass; +def IIC_MUL16_MEM : InstrItinClass; +def IIC_MUL16_REG : InstrItinClass; +def IIC_MUL32_MEM : InstrItinClass; +def IIC_MUL32_REG : InstrItinClass; +def IIC_MUL64 : InstrItinClass; +// imul by al, ax, eax, tax +def IIC_IMUL8 : InstrItinClass; +def IIC_IMUL16_MEM : InstrItinClass; +def IIC_IMUL16_REG : InstrItinClass; +def IIC_IMUL32_MEM : InstrItinClass; +def IIC_IMUL32_REG : InstrItinClass; +def IIC_IMUL64 : InstrItinClass; +// imul reg by reg|mem +def IIC_IMUL16_RM : InstrItinClass; +def IIC_IMUL16_RR : InstrItinClass; +def IIC_IMUL32_RM : InstrItinClass; +def IIC_IMUL32_RR : InstrItinClass; +def IIC_IMUL64_RM : InstrItinClass; +def IIC_IMUL64_RR : InstrItinClass; +// imul reg = reg/mem * imm +def IIC_IMUL16_RMI : InstrItinClass; +def IIC_IMUL16_RRI : InstrItinClass; +def IIC_IMUL32_RMI : InstrItinClass; +def IIC_IMUL32_RRI : InstrItinClass; +def IIC_IMUL64_RMI : InstrItinClass; +def IIC_IMUL64_RRI : InstrItinClass; +// div +def IIC_DIV8_MEM : InstrItinClass; +def IIC_DIV8_REG : InstrItinClass; +def IIC_DIV16 : InstrItinClass; +def IIC_DIV32 : InstrItinClass; +def IIC_DIV64 : InstrItinClass; +// idiv +def IIC_IDIV8 : InstrItinClass; +def IIC_IDIV16 : InstrItinClass; +def IIC_IDIV32 : InstrItinClass; +def IIC_IDIV64 : InstrItinClass; +// neg/not/inc/dec +def IIC_UNARY_REG : InstrItinClass; +def IIC_UNARY_MEM : InstrItinClass; +// add/sub/and/or/xor/adc/sbc/cmp/test +def IIC_BIN_MEM : InstrItinClass; +def IIC_BIN_NONMEM : InstrItinClass; +// shift/rotate +def IIC_SR : InstrItinClass; +// shift double +def IIC_SHD16_REG_IM : InstrItinClass; +def IIC_SHD16_REG_CL : InstrItinClass; +def IIC_SHD16_MEM_IM : InstrItinClass; +def IIC_SHD16_MEM_CL : InstrItinClass; +def IIC_SHD32_REG_IM : InstrItinClass; +def IIC_SHD32_REG_CL : InstrItinClass; +def IIC_SHD32_MEM_IM : InstrItinClass; +def IIC_SHD32_MEM_CL : InstrItinClass; +def IIC_SHD64_REG_IM : InstrItinClass; +def IIC_SHD64_REG_CL : InstrItinClass; +def IIC_SHD64_MEM_IM : InstrItinClass; +def IIC_SHD64_MEM_CL : InstrItinClass; +// cmov +def IIC_CMOV16_RM : InstrItinClass; +def IIC_CMOV16_RR : InstrItinClass; +def IIC_CMOV32_RM : InstrItinClass; +def IIC_CMOV32_RR : InstrItinClass; +def IIC_CMOV64_RM : InstrItinClass; +def IIC_CMOV64_RR : InstrItinClass; +// set +def IIC_SET_R : InstrItinClass; +def IIC_SET_M : InstrItinClass; +// jmp/jcc/jcxz +def IIC_Jcc : InstrItinClass; +def IIC_JCXZ : InstrItinClass; +def IIC_JMP_REL : InstrItinClass; +def IIC_JMP_REG : InstrItinClass; +def IIC_JMP_MEM : InstrItinClass; +def IIC_JMP_FAR_MEM : InstrItinClass; +def IIC_JMP_FAR_PTR : InstrItinClass; +// loop +def IIC_LOOP : InstrItinClass; +def IIC_LOOPE : InstrItinClass; +def IIC_LOOPNE : InstrItinClass; +// call +def IIC_CALL_RI : InstrItinClass; +def IIC_CALL_MEM : InstrItinClass; +def IIC_CALL_FAR_MEM : InstrItinClass; +def IIC_CALL_FAR_PTR : InstrItinClass; +// ret +def IIC_RET : InstrItinClass; +def IIC_RET_IMM : InstrItinClass; +//sign extension movs +def IIC_MOVSX : InstrItinClass; +def IIC_MOVSX_R16_R8 : InstrItinClass; +def IIC_MOVSX_R16_M8 : InstrItinClass; +def IIC_MOVSX_R16_R16 : InstrItinClass; +def IIC_MOVSX_R32_R32 : InstrItinClass; +//zero extension movs +def IIC_MOVZX : InstrItinClass; +def IIC_MOVZX_R16_R8 : InstrItinClass; +def IIC_MOVZX_R16_M8 : InstrItinClass; + +// SSE scalar/parallel binary operations +def IIC_SSE_ALU_F32S_RR : InstrItinClass; +def IIC_SSE_ALU_F32S_RM : InstrItinClass; +def IIC_SSE_ALU_F64S_RR : InstrItinClass; +def IIC_SSE_ALU_F64S_RM : InstrItinClass; +def IIC_SSE_MUL_F32S_RR : InstrItinClass; +def IIC_SSE_MUL_F32S_RM : InstrItinClass; +def IIC_SSE_MUL_F64S_RR : InstrItinClass; +def IIC_SSE_MUL_F64S_RM : InstrItinClass; +def IIC_SSE_DIV_F32S_RR : InstrItinClass; +def IIC_SSE_DIV_F32S_RM : InstrItinClass; +def IIC_SSE_DIV_F64S_RR : InstrItinClass; +def IIC_SSE_DIV_F64S_RM : InstrItinClass; +def IIC_SSE_ALU_F32P_RR : InstrItinClass; +def IIC_SSE_ALU_F32P_RM : InstrItinClass; +def IIC_SSE_ALU_F64P_RR : InstrItinClass; +def IIC_SSE_ALU_F64P_RM : InstrItinClass; +def IIC_SSE_MUL_F32P_RR : InstrItinClass; +def IIC_SSE_MUL_F32P_RM : InstrItinClass; +def IIC_SSE_MUL_F64P_RR : InstrItinClass; +def IIC_SSE_MUL_F64P_RM : InstrItinClass; +def IIC_SSE_DIV_F32P_RR : InstrItinClass; +def IIC_SSE_DIV_F32P_RM : InstrItinClass; +def IIC_SSE_DIV_F64P_RR : InstrItinClass; +def IIC_SSE_DIV_F64P_RM : InstrItinClass; + +def IIC_SSE_COMIS_RR : InstrItinClass; +def IIC_SSE_COMIS_RM : InstrItinClass; + +def IIC_SSE_HADDSUB_RR : InstrItinClass; +def IIC_SSE_HADDSUB_RM : InstrItinClass; + +def IIC_SSE_BIT_P_RR : InstrItinClass; +def IIC_SSE_BIT_P_RM : InstrItinClass; + +def IIC_SSE_INTALU_P_RR : InstrItinClass; +def IIC_SSE_INTALU_P_RM : InstrItinClass; +def IIC_SSE_INTALUQ_P_RR : InstrItinClass; +def IIC_SSE_INTALUQ_P_RM : InstrItinClass; + +def IIC_SSE_INTMUL_P_RR : InstrItinClass; +def IIC_SSE_INTMUL_P_RM : InstrItinClass; + +def IIC_SSE_INTSH_P_RR : InstrItinClass; +def IIC_SSE_INTSH_P_RM : InstrItinClass; +def IIC_SSE_INTSH_P_RI : InstrItinClass; + +def IIC_SSE_CMPP_RR : InstrItinClass; +def IIC_SSE_CMPP_RM : InstrItinClass; + +def IIC_SSE_SHUFP : InstrItinClass; +def IIC_SSE_PSHUF : InstrItinClass; + +def IIC_SSE_UNPCK : InstrItinClass; + +def IIC_SSE_MOVMSK : InstrItinClass; +def IIC_SSE_MASKMOV : InstrItinClass; + +def IIC_SSE_PEXTRW : InstrItinClass; +def IIC_SSE_PINSRW : InstrItinClass; + +def IIC_SSE_PABS_RR : InstrItinClass; +def IIC_SSE_PABS_RM : InstrItinClass; + +def IIC_SSE_SQRTP_RR : InstrItinClass; +def IIC_SSE_SQRTP_RM : InstrItinClass; +def IIC_SSE_SQRTS_RR : InstrItinClass; +def IIC_SSE_SQRTS_RM : InstrItinClass; + +def IIC_SSE_RCPP_RR : InstrItinClass; +def IIC_SSE_RCPP_RM : InstrItinClass; +def IIC_SSE_RCPS_RR : InstrItinClass; +def IIC_SSE_RCPS_RM : InstrItinClass; + +def IIC_SSE_MOV_S_RR : InstrItinClass; +def IIC_SSE_MOV_S_RM : InstrItinClass; +def IIC_SSE_MOV_S_MR : InstrItinClass; + +def IIC_SSE_MOVA_P_RR : InstrItinClass; +def IIC_SSE_MOVA_P_RM : InstrItinClass; +def IIC_SSE_MOVA_P_MR : InstrItinClass; + +def IIC_SSE_MOVU_P_RR : InstrItinClass; +def IIC_SSE_MOVU_P_RM : InstrItinClass; +def IIC_SSE_MOVU_P_MR : InstrItinClass; + +def IIC_SSE_MOVDQ : InstrItinClass; +def IIC_SSE_MOVD_ToGP : InstrItinClass; +def IIC_SSE_MOVQ_RR : InstrItinClass; + +def IIC_SSE_MOV_LH : InstrItinClass; + +def IIC_SSE_LDDQU : InstrItinClass; + +def IIC_SSE_MOVNT : InstrItinClass; + +def IIC_SSE_PHADDSUBD_RR : InstrItinClass; +def IIC_SSE_PHADDSUBD_RM : InstrItinClass; +def IIC_SSE_PHADDSUBSW_RR : InstrItinClass; +def IIC_SSE_PHADDSUBSW_RM : InstrItinClass; +def IIC_SSE_PHADDSUBW_RR : InstrItinClass; +def IIC_SSE_PHADDSUBW_RM : InstrItinClass; +def IIC_SSE_PSHUFB_RR : InstrItinClass; +def IIC_SSE_PSHUFB_RM : InstrItinClass; +def IIC_SSE_PSIGN_RR : InstrItinClass; +def IIC_SSE_PSIGN_RM : InstrItinClass; + +def IIC_SSE_PMADD : InstrItinClass; +def IIC_SSE_PMULHRSW : InstrItinClass; +def IIC_SSE_PALIGNR : InstrItinClass; +def IIC_SSE_MWAIT : InstrItinClass; +def IIC_SSE_MONITOR : InstrItinClass; + +def IIC_SSE_PREFETCH : InstrItinClass; +def IIC_SSE_PAUSE : InstrItinClass; +def IIC_SSE_LFENCE : InstrItinClass; +def IIC_SSE_MFENCE : InstrItinClass; +def IIC_SSE_SFENCE : InstrItinClass; +def IIC_SSE_LDMXCSR : InstrItinClass; +def IIC_SSE_STMXCSR : InstrItinClass; + +def IIC_SSE_CVT_PD_RR : InstrItinClass; +def IIC_SSE_CVT_PD_RM : InstrItinClass; +def IIC_SSE_CVT_PS_RR : InstrItinClass; +def IIC_SSE_CVT_PS_RM : InstrItinClass; +def IIC_SSE_CVT_PI2PS_RR : InstrItinClass; +def IIC_SSE_CVT_PI2PS_RM : InstrItinClass; +def IIC_SSE_CVT_Scalar_RR : InstrItinClass; +def IIC_SSE_CVT_Scalar_RM : InstrItinClass; +def IIC_SSE_CVT_SS2SI32_RM : InstrItinClass; +def IIC_SSE_CVT_SS2SI32_RR : InstrItinClass; +def IIC_SSE_CVT_SS2SI64_RM : InstrItinClass; +def IIC_SSE_CVT_SS2SI64_RR : InstrItinClass; +def IIC_SSE_CVT_SD2SI_RM : InstrItinClass; +def IIC_SSE_CVT_SD2SI_RR : InstrItinClass; + + +//===----------------------------------------------------------------------===// +// Processor instruction itineraries. + +def GenericItineraries : ProcessorItineraries<[], [], []>; + +include "X86ScheduleAtom.td" + + + diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td new file mode 100644 index 0000000..e8cf72a --- /dev/null +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -0,0 +1,294 @@ +//===- X86ScheduleAtom.td - X86 Atom Scheduling Definitions -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the Intel Atom (Bonnell) +// processors. +// +//===----------------------------------------------------------------------===// + +// +// Scheduling information derived from the "Intel 64 and IA32 Architectures +// Optimization Reference Manual", Chapter 13, Section 4. +// Functional Units +// Port 0 +def Port0 : FuncUnit; // ALU: ALU0, shift/rotate, load/store + // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide +def Port1 : FuncUnit; // ALU: ALU1, bit processing, jump, and LEA + // SIMD/FP: SIMD ALU, FP Adder + +def AtomItineraries : ProcessorItineraries< + [ Port0, Port1 ], + [], [ + // P0 only + // InstrItinData<class, [InstrStage<N, [P0]>] >, + // P0 or P1 + // InstrItinData<class, [InstrStage<N, [P0, P1]>] >, + // P0 and P1 + // InstrItinData<class, [InstrStage<N, [P0], 0>, InstrStage<N, [P1]>] >, + // + // Default is 1 cycle, port0 or port1 + InstrItinData<IIC_DEFAULT, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_ALU_MEM, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_LEA, [InstrStage<1, [Port1]>] >, + InstrItinData<IIC_LEA_16, [InstrStage<2, [Port0, Port1]>] >, + // mul + InstrItinData<IIC_MUL8, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_MUL16_MEM, [InstrStage<8, [Port0, Port1]>] >, + InstrItinData<IIC_MUL16_REG, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_MUL32_MEM, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_MUL32_REG, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_MUL64, [InstrStage<12, [Port0, Port1]>] >, + // imul by al, ax, eax, rax + InstrItinData<IIC_IMUL8, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL16_MEM, [InstrStage<8, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL16_REG, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL32_MEM, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL32_REG, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL64, [InstrStage<12, [Port0, Port1]>] >, + // imul reg by reg|mem + InstrItinData<IIC_IMUL16_RM, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL16_RR, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL32_RM, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_IMUL32_RR, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_IMUL64_RM, [InstrStage<12, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL64_RR, [InstrStage<12, [Port0, Port1]>] >, + // imul reg = reg/mem * imm + InstrItinData<IIC_IMUL16_RRI, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL32_RRI, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_IMUL64_RRI, [InstrStage<14, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL16_RMI, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_IMUL32_RMI, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_IMUL64_RMI, [InstrStage<14, [Port0, Port1]>] >, + // idiv + InstrItinData<IIC_IDIV8, [InstrStage<62, [Port0, Port1]>] >, + InstrItinData<IIC_IDIV16, [InstrStage<62, [Port0, Port1]>] >, + InstrItinData<IIC_IDIV32, [InstrStage<62, [Port0, Port1]>] >, + InstrItinData<IIC_IDIV64, [InstrStage<130, [Port0, Port1]>] >, + // div + InstrItinData<IIC_DIV8_REG, [InstrStage<50, [Port0, Port1]>] >, + InstrItinData<IIC_DIV8_MEM, [InstrStage<68, [Port0, Port1]>] >, + InstrItinData<IIC_DIV16, [InstrStage<50, [Port0, Port1]>] >, + InstrItinData<IIC_DIV32, [InstrStage<50, [Port0, Port1]>] >, + InstrItinData<IIC_DIV64, [InstrStage<130, [Port0, Port1]>] >, + // neg/not/inc/dec + InstrItinData<IIC_UNARY_REG, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [Port0]>] >, + // add/sub/and/or/xor/adc/sbc/cmp/test + InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_BIN_MEM, [InstrStage<1, [Port0]>] >, + // shift/rotate + InstrItinData<IIC_SR, [InstrStage<1, [Port0]>] >, + // shift double + InstrItinData<IIC_SHD16_REG_IM, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_SHD16_REG_CL, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_SHD16_MEM_IM, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_SHD16_MEM_CL, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_SHD32_REG_IM, [InstrStage<2, [Port0, Port1]>] >, + InstrItinData<IIC_SHD32_REG_CL, [InstrStage<2, [Port0, Port1]>] >, + InstrItinData<IIC_SHD32_MEM_IM, [InstrStage<4, [Port0, Port1]>] >, + InstrItinData<IIC_SHD32_MEM_CL, [InstrStage<4, [Port0, Port1]>] >, + InstrItinData<IIC_SHD64_REG_IM, [InstrStage<9, [Port0, Port1]>] >, + InstrItinData<IIC_SHD64_REG_CL, [InstrStage<8, [Port0, Port1]>] >, + InstrItinData<IIC_SHD64_MEM_IM, [InstrStage<9, [Port0, Port1]>] >, + InstrItinData<IIC_SHD64_MEM_CL, [InstrStage<9, [Port0, Port1]>] >, + // cmov + InstrItinData<IIC_CMOV16_RM, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_CMOV16_RR, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_CMOV32_RM, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_CMOV32_RR, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_CMOV64_RM, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_CMOV64_RR, [InstrStage<1, [Port0, Port1]>] >, + // set + InstrItinData<IIC_SET_M, [InstrStage<2, [Port0, Port1]>] >, + InstrItinData<IIC_SET_R, [InstrStage<1, [Port0, Port1]>] >, + // jcc + InstrItinData<IIC_Jcc, [InstrStage<1, [Port1]>] >, + // jcxz/jecxz/jrcxz + InstrItinData<IIC_JCXZ, [InstrStage<4, [Port0, Port1]>] >, + // jmp rel + InstrItinData<IIC_JMP_REL, [InstrStage<1, [Port1]>] >, + // jmp indirect + InstrItinData<IIC_JMP_REG, [InstrStage<1, [Port1]>] >, + InstrItinData<IIC_JMP_MEM, [InstrStage<2, [Port0, Port1]>] >, + // jmp far + InstrItinData<IIC_JMP_FAR_MEM, [InstrStage<32, [Port0, Port1]>] >, + InstrItinData<IIC_JMP_FAR_PTR, [InstrStage<31, [Port0, Port1]>] >, + // loop/loope/loopne + InstrItinData<IIC_LOOP, [InstrStage<18, [Port0, Port1]>] >, + InstrItinData<IIC_LOOPE, [InstrStage<8, [Port0, Port1]>] >, + InstrItinData<IIC_LOOPNE, [InstrStage<17, [Port0, Port1]>] >, + // call - all but reg/imm + InstrItinData<IIC_CALL_RI, [InstrStage<1, [Port0], 0>, + InstrStage<1, [Port1]>] >, + InstrItinData<IIC_CALL_MEM, [InstrStage<15, [Port0, Port1]>] >, + InstrItinData<IIC_CALL_FAR_MEM, [InstrStage<40, [Port0, Port1]>] >, + InstrItinData<IIC_CALL_FAR_PTR, [InstrStage<39, [Port0, Port1]>] >, + //ret + InstrItinData<IIC_RET, [InstrStage<79, [Port0, Port1]>] >, + InstrItinData<IIC_RET_IMM, [InstrStage<1, [Port0], 0>, InstrStage<1, [Port1]>] >, + //sign extension movs + InstrItinData<IIC_MOVSX,[InstrStage<1, [Port0] >] >, + InstrItinData<IIC_MOVSX_R16_R8, [InstrStage<2, [Port0, Port1]>] >, + InstrItinData<IIC_MOVSX_R16_M8, [InstrStage<3, [Port0, Port1]>] >, + InstrItinData<IIC_MOVSX_R16_R16, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_MOVSX_R32_R32, [InstrStage<1, [Port0, Port1]>] >, + //zero extension movs + InstrItinData<IIC_MOVZX,[InstrStage<1, [Port0]>] >, + InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<2, [Port0, Port1]>] >, + InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<3, [Port0, Port1]>] >, + + // SSE binary operations + // arithmetic fp scalar + InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<5, [Port1]>] >, + InstrItinData<IIC_SSE_ALU_F32S_RM, [InstrStage<5, [Port0], 0>, + InstrStage<5, [Port1]>] >, + InstrItinData<IIC_SSE_ALU_F64S_RR, [InstrStage<5, [Port1]>] >, + InstrItinData<IIC_SSE_ALU_F64S_RM, [InstrStage<5, [Port0], 0>, + InstrStage<5, [Port1]>] >, + InstrItinData<IIC_SSE_MUL_F32S_RR, [InstrStage<4, [Port0]>] >, + InstrItinData<IIC_SSE_MUL_F32S_RM, [InstrStage<4, [Port0]>] >, + InstrItinData<IIC_SSE_MUL_F64S_RR, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_SSE_MUL_F64S_RM, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_SSE_DIV_F32S_RR, [InstrStage<34, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_DIV_F32S_RM, [InstrStage<34, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_DIV_F64S_RR, [InstrStage<62, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_DIV_F64S_RM, [InstrStage<62, [Port0, Port1]>] >, + + InstrItinData<IIC_SSE_COMIS_RR, [InstrStage<9, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_COMIS_RM, [InstrStage<10, [Port0, Port1]>] >, + + InstrItinData<IIC_SSE_HADDSUB_RR, [InstrStage<8, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_HADDSUB_RM, [InstrStage<9, [Port0, Port1]>] >, + + // arithmetic fp parallel + InstrItinData<IIC_SSE_ALU_F32P_RR, [InstrStage<5, [Port1]>] >, + InstrItinData<IIC_SSE_ALU_F32P_RM, [InstrStage<5, [Port0], 0>, + InstrStage<5, [Port1]>] >, + InstrItinData<IIC_SSE_ALU_F64P_RR, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_ALU_F64P_RM, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_MUL_F32P_RR, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_SSE_MUL_F32P_RM, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_SSE_MUL_F64P_RR, [InstrStage<9, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_MUL_F64P_RM, [InstrStage<10, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_DIV_F32P_RR, [InstrStage<70, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_DIV_F32P_RM, [InstrStage<70, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_DIV_F64P_RR, [InstrStage<125, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_DIV_F64P_RM, [InstrStage<125, [Port0, Port1]>] >, + + // bitwise parallel + InstrItinData<IIC_SSE_BIT_P_RR, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_BIT_P_RM, [InstrStage<1, [Port0]>] >, + + // arithmetic int parallel + InstrItinData<IIC_SSE_INTALU_P_RR, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_INTALU_P_RM, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_SSE_INTALUQ_P_RR, [InstrStage<2, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_INTALUQ_P_RM, [InstrStage<3, [Port0, Port1]>] >, + + // multiply int parallel + InstrItinData<IIC_SSE_INTMUL_P_RR, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_SSE_INTMUL_P_RM, [InstrStage<5, [Port0]>] >, + + // shift parallel + InstrItinData<IIC_SSE_INTSH_P_RR, [InstrStage<2, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_INTSH_P_RM, [InstrStage<3, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_INTSH_P_RI, [InstrStage<1, [Port0, Port1]>] >, + + InstrItinData<IIC_SSE_CMPP_RR, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_CMPP_RM, [InstrStage<7, [Port0, Port1]>] >, + + InstrItinData<IIC_SSE_SHUFP, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_SSE_PSHUF, [InstrStage<1, [Port0]>] >, + + InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >, + + InstrItinData<IIC_SSE_SQRTP_RR, [InstrStage<13, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_SQRTP_RM, [InstrStage<14, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_SQRTS_RR, [InstrStage<11, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_SQRTS_RM, [InstrStage<12, [Port0, Port1]>] >, + + InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<10, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_RCPS_RR, [InstrStage<4, [Port0]>] >, + InstrItinData<IIC_SSE_RCPS_RM, [InstrStage<4, [Port0]>] >, + + InstrItinData<IIC_SSE_MOVMSK, [InstrStage<3, [Port0]>] >, + InstrItinData<IIC_SSE_MASKMOV, [InstrStage<2, [Port0, Port1]>] >, + + InstrItinData<IIC_SSE_PEXTRW, [InstrStage<4, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_PINSRW, [InstrStage<1, [Port0]>] >, + + InstrItinData<IIC_SSE_PABS_RR, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_PABS_RM, [InstrStage<1, [Port0]>] >, + + InstrItinData<IIC_SSE_MOV_S_RR, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_MOV_S_RM, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_SSE_MOV_S_MR, [InstrStage<1, [Port0]>] >, + + InstrItinData<IIC_SSE_MOVA_P_RR, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_MOVA_P_RM, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_SSE_MOVA_P_MR, [InstrStage<1, [Port0]>] >, + + InstrItinData<IIC_SSE_MOVU_P_RR, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_MOVU_P_RM, [InstrStage<3, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_MOVU_P_MR, [InstrStage<2, [Port0, Port1]>] >, + + InstrItinData<IIC_SSE_MOV_LH, [InstrStage<1, [Port0]>] >, + + InstrItinData<IIC_SSE_LDDQU, [InstrStage<3, [Port0, Port1]>] >, + + InstrItinData<IIC_SSE_MOVDQ, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_SSE_MOVD_ToGP, [InstrStage<3, [Port0]>] >, + InstrItinData<IIC_SSE_MOVQ_RR, [InstrStage<1, [Port0, Port1]>] >, + + InstrItinData<IIC_SSE_MOVNT, [InstrStage<1, [Port0]>] >, + + InstrItinData<IIC_SSE_PREFETCH, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_SSE_PAUSE, [InstrStage<17, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_LFENCE, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_MFENCE, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_SSE_SFENCE, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_SSE_LDMXCSR, [InstrStage<5, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_STMXCSR, [InstrStage<15, [Port0, Port1]>] >, + + InstrItinData<IIC_SSE_PHADDSUBD_RR, [InstrStage<3, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_PHADDSUBD_RM, [InstrStage<4, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_PHADDSUBSW_RR, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_PHADDSUBSW_RM, [InstrStage<8, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_PHADDSUBW_RR, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_PHADDSUBW_RM, [InstrStage<8, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_PSHUFB_RR, [InstrStage<4, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_PSHUFB_RM, [InstrStage<5, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_PSIGN_RR, [InstrStage<1, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_PSIGN_RM, [InstrStage<1, [Port0]>] >, + + InstrItinData<IIC_SSE_PMADD, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_SSE_PMULHRSW, [InstrStage<5, [Port0]>] >, + InstrItinData<IIC_SSE_PALIGNR, [InstrStage<1, [Port0]>] >, + InstrItinData<IIC_SSE_MWAIT, [InstrStage<46, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_MONITOR, [InstrStage<45, [Port0, Port1]>] >, + + // conversions + // to/from PD ... + InstrItinData<IIC_SSE_CVT_PD_RR, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_CVT_PD_RM, [InstrStage<8, [Port0, Port1]>] >, + // to/from PS except to/from PD and PS2PI + InstrItinData<IIC_SSE_CVT_PS_RR, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_CVT_PS_RM, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_CVT_Scalar_RR, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_CVT_Scalar_RM, [InstrStage<7, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_CVT_SS2SI32_RR, [InstrStage<8, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_CVT_SS2SI32_RM, [InstrStage<9, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<9, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<10, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<8, [Port0, Port1]>] >, + InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] > +]>; + diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 6406bce..9a04e35 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -65,7 +65,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, - 0, CallingConv::C, false, /*isReturnValueUsed=*/false, + 0, CallingConv::C, /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/false, DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); return CallResult.second; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 6e092c7..3eb9441 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -198,7 +198,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);} if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);} // FIXME: AVX codegen support is not ready. - //if ((ECX >> 28) & 1) { HasAVX = true; ToggleFeature(X86::FeatureAVX); } + //if ((ECX >> 28) & 1) { X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); } bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; @@ -246,6 +246,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { IsBTMemSlow = true; ToggleFeature(X86::FeatureSlowBTMem); } + // If it's Nehalem, unaligned memory access is fast. // FIXME: Nehalem is family 6. Also include Westmere and later processors? if (Family == 15 && Model == 26) { @@ -253,6 +254,12 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { ToggleFeature(X86::FeatureFastUAMem); } + // Set processor type. Currently only Atom is detected. + if (Family == 6 && Model == 28) { + X86ProcFamily = IntelAtom; + ToggleFeature(X86::FeatureLeaForSP); + } + unsigned MaxExtLevel; X86_MC::GetCpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); @@ -266,15 +273,19 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasLZCNT = true; ToggleFeature(X86::FeatureLZCNT); } - if (IsAMD && ((ECX >> 6) & 0x1)) { - HasSSE4A = true; - ToggleFeature(X86::FeatureSSE4A); - } - if (IsAMD && ((ECX >> 16) & 0x1)) { - HasFMA4 = true; - ToggleFeature(X86::FeatureFMA4); - HasXOP = true; - ToggleFeature(X86::FeatureXOP); + if (IsAMD) { + if ((ECX >> 6) & 0x1) { + HasSSE4A = true; + ToggleFeature(X86::FeatureSSE4A); + } + if ((ECX >> 11) & 0x1) { + HasXOP = true; + ToggleFeature(X86::FeatureXOP); + } + if ((ECX >> 16) & 0x1) { + HasFMA4 = true; + ToggleFeature(X86::FeatureFMA4); + } } } } @@ -291,7 +302,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } // FIXME: AVX2 codegen support is not ready. //if ((EBX >> 5) & 0x1) { - // HasAVX2 = true; + // X86SSELevel = AVX2; // ToggleFeature(X86::FeatureAVX2); //} if ((EBX >> 8) & 0x1) { @@ -306,6 +317,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, const std::string &FS, unsigned StackAlignOverride, bool is64Bit) : X86GenSubtargetInfo(TT, CPU, FS) + , X86ProcFamily(Others) , PICStyle(PICStyles::None) , X86SSELevel(NoMMXSSE) , X863DNowLevel(NoThreeDNow) @@ -313,8 +325,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasX86_64(false) , HasPOPCNT(false) , HasSSE4A(false) - , HasAVX(false) - , HasAVX2(false) , HasAES(false) , HasCLMUL(false) , HasFMA3(false) @@ -331,16 +341,19 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , IsUAMemFast(false) , HasVectorUAMem(false) , HasCmpxchg16b(false) - , stackAlignment(8) + , UseLeaForSP(false) + , PostRAScheduler(false) + , stackAlignment(4) // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) , TargetTriple(TT) , In64BitMode(is64Bit) { // Determine default and user specified characteristics + std::string CPUName = CPU; if (!FS.empty() || !CPU.empty()) { - std::string CPUName = CPU; if (CPUName.empty()) { -#if defined (__x86_64__) || defined(__i386__) +#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\ + || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) CPUName = sys::getHostCPUName(); #else CPUName = "generic"; @@ -360,6 +373,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, // If feature string is not empty, parse features string. ParseSubtargetFeatures(CPUName, FullFS); } else { + if (CPUName.empty()) { +#if defined (__x86_64__) || defined(__i386__) + CPUName = sys::getHostCPUName(); +#else + CPUName = "generic"; +#endif + } // Otherwise, use CPUID to auto-detect feature set. AutoDetectSubtargetFeatures(); @@ -368,7 +388,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, HasX86_64 = true; ToggleFeature(X86::Feature64Bit); HasCMov = true; ToggleFeature(X86::FeatureCMOV); - if (!HasAVX && X86SSELevel < SSE2) { + if (X86SSELevel < SSE2) { X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE1); ToggleFeature(X86::FeatureSSE2); @@ -376,14 +396,16 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, } } + if (X86ProcFamily == IntelAtom) { + PostRAScheduler = true; + InstrItins = getInstrItineraryForCPU(CPUName); + } + // It's important to keep the MCSubtargetInfo feature bits in sync with // target data structure which is shared with MC code emitter, etc. if (In64BitMode) ToggleFeature(X86::Mode64Bit); - if (HasAVX) - X86SSELevel = NoMMXSSE; - DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel << ", 3DNowLevel " << X863DNowLevel << ", 64bit " << HasX86_64 << "\n"); @@ -398,3 +420,12 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, isTargetSolaris() || In64BitMode) stackAlignment = 16; } + +bool X86Subtarget::enablePostRAScheduler( + CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + CriticalPathRCs.clear(); + return PostRAScheduler && OptLevel >= CodeGenOpt::Default; +} diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index ccb9be0..a36d0d8 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -1,4 +1,4 @@ -//=====---- X86Subtarget.h - Define Subtarget for the X86 -----*- C++ -*--====// +//===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -42,13 +42,20 @@ enum Style { class X86Subtarget : public X86GenSubtargetInfo { protected: enum X86SSEEnum { - NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42 + NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2 }; enum X863DNowEnum { NoThreeDNow, ThreeDNow, ThreeDNowA }; + enum X86ProcFamilyEnum { + Others, IntelAtom + }; + + /// X86ProcFamily - X86 processor family: Intel Atom, and others + X86ProcFamilyEnum X86ProcFamily; + /// PICStyle - Which PIC style to use /// PICStyles::Style PICStyle; @@ -75,12 +82,6 @@ protected: /// HasSSE4A - True if the processor supports SSE4A instructions. bool HasSSE4A; - /// HasAVX - Target has AVX instructions - bool HasAVX; - - /// HasAVX2 - Target has AVX2 instructions - bool HasAVX2; - /// HasAES - Target has AES instructions bool HasAES; @@ -131,6 +132,13 @@ protected: /// this is true for most x86-64 chips, but not the first AMD chips. bool HasCmpxchg16b; + /// UseLeaForSP - True if the LEA instruction should be used for adjusting + /// the stack pointer. This is an optimization for Intel Atom processors. + bool UseLeaForSP; + + /// PostRAScheduler - True if using post-register-allocation scheduler. + bool PostRAScheduler; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -141,6 +149,9 @@ protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; + + /// Instruction itineraries for scheduling + InstrItineraryData InstrItins; private: /// In64BitMode - True if compiling for 64-bit, false for 32-bit. @@ -185,18 +196,12 @@ public: bool hasSSSE3() const { return X86SSELevel >= SSSE3; } bool hasSSE41() const { return X86SSELevel >= SSE41; } bool hasSSE42() const { return X86SSELevel >= SSE42; } + bool hasAVX() const { return X86SSELevel >= AVX; } + bool hasAVX2() const { return X86SSELevel >= AVX2; } bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasPOPCNT() const { return HasPOPCNT; } - bool hasAVX() const { return HasAVX; } - bool hasAVX2() const { return HasAVX2; } - bool hasXMM() const { return hasSSE1() || hasAVX(); } - bool hasXMMInt() const { return hasSSE2() || hasAVX(); } - bool hasSSE3orAVX() const { return hasSSE3() || hasAVX(); } - bool hasSSSE3orAVX() const { return hasSSSE3() || hasAVX(); } - bool hasSSE41orAVX() const { return hasSSE41() || hasAVX(); } - bool hasSSE42orAVX() const { return hasSSE42() || hasAVX(); } bool hasAES() const { return HasAES; } bool hasCLMUL() const { return HasCLMUL; } bool hasFMA3() const { return HasFMA3; } @@ -213,6 +218,9 @@ public: bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } + bool useLeaForSP() const { return UseLeaForSP; } + + bool isAtom() const { return X86ProcFamily == IntelAtom; } const Triple &getTargetTriple() const { return TargetTriple; } @@ -226,38 +234,28 @@ public: // ELF is a reasonably sane default and the only other X86 targets we // support are Darwin and Windows. Just use "not those". - bool isTargetELF() const { - return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing(); - } + bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NativeClient; } bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); } bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); } - bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; } bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; } bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; } - bool isTargetCygMing() const { - return isTargetMingw() || isTargetCygwin(); - } - - /// isTargetCOFF - Return true if this is any COFF/Windows target variant. - bool isTargetCOFF() const { - return isTargetMingw() || isTargetCygwin() || isTargetWindows(); - } + bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); } + bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } + bool isTargetEnvMacho() const { return TargetTriple.isEnvironmentMachO(); } bool isTargetWin64() const { // FIXME: x86_64-cygwin has not been released yet. - return In64BitMode && (isTargetCygMing() || isTargetWindows()); - } - - bool isTargetEnvMacho() const { - return isTargetDarwin() || (TargetTriple.getEnvironment() == Triple::MachO); + return In64BitMode && TargetTriple.isOSWindows(); } bool isTargetWin32() const { + // FIXME: Cygwin is included for isTargetWin64 -- should it be included + // here too? return !In64BitMode && (isTargetMingw() || isTargetWindows()); } @@ -303,6 +301,15 @@ public: /// indicating the number of scheduling cycles of backscheduling that /// should be attempted. unsigned getSpecialAddressLatency() const; + + /// enablePostRAScheduler - run for Atom optimization. + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; + + /// getInstrItins = Return the instruction itineraries based on the + /// subtarget selection. + const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 126042e..f4b7a62 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -28,6 +28,7 @@ extern "C" void LLVMInitializeX86Target() { RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target); } +void X86_32TargetMachine::anchor() { } X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -50,6 +51,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, JITInfo(*this) { } +void X86_64TargetMachine::anchor() { } X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -76,7 +78,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, Options.StackAlignmentOverride, is64Bit), FrameLowering(*this, Subtarget), - ELFWriterInfo(is64Bit, true) { + ELFWriterInfo(is64Bit, true), + InstrItins(Subtarget.getInstrItineraryData()){ // Determine the PICStyle based on the target selected. if (getRelocationModel() == Reloc::Static) { // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None. @@ -99,10 +102,7 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, // default to hard float ABI if (Options.FloatABIType == FloatABI::Default) - this->Options.FloatABIType = FloatABI::Hard; - - if (Options.EnableSegmentedStacks && !Subtarget.isTargetELF()) - report_fatal_error("Segmented stacks are only implemented on ELF."); + this->Options.FloatABIType = FloatABI::Hard; } //===----------------------------------------------------------------------===// @@ -117,35 +117,61 @@ UseVZeroUpper("x86-use-vzeroupper", // Pass Pipeline Configuration //===----------------------------------------------------------------------===// -bool X86TargetMachine::addInstSelector(PassManagerBase &PM) { +namespace { +/// X86 Code Generator Pass Configuration Options. +class X86PassConfig : public TargetPassConfig { +public: + X86PassConfig(X86TargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + X86TargetMachine &getX86TargetMachine() const { + return getTM<X86TargetMachine>(); + } + + const X86Subtarget &getX86Subtarget() const { + return *getX86TargetMachine().getSubtargetImpl(); + } + + virtual bool addInstSelector(); + virtual bool addPreRegAlloc(); + virtual bool addPostRegAlloc(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { + return new X86PassConfig(this, PM); +} + +bool X86PassConfig::addInstSelector() { // Install an instruction selector. - PM.add(createX86ISelDag(*this, getOptLevel())); + PM.add(createX86ISelDag(getX86TargetMachine(), getOptLevel())); // For 32-bit, prepend instructions to set the "global base reg" for PIC. - if (!Subtarget.is64Bit()) + if (!getX86Subtarget().is64Bit()) PM.add(createGlobalBaseRegPass()); return false; } -bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM) { +bool X86PassConfig::addPreRegAlloc() { PM.add(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } -bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM) { +bool X86PassConfig::addPostRegAlloc() { PM.add(createX86FloatingPointStackifierPass()); return true; // -print-machineinstr should print after this. } -bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM) { +bool X86PassConfig::addPreEmitPass() { bool ShouldPrint = false; - if (getOptLevel() != CodeGenOpt::None && Subtarget.hasXMMInt()) { + if (getOptLevel() != CodeGenOpt::None && getX86Subtarget().hasSSE2()) { PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass)); ShouldPrint = true; } - if (Subtarget.hasAVX() && UseVZeroUpper) { + if (getX86Subtarget().hasAVX() && UseVZeroUpper) { PM.add(createX86IssueVZeroUpperPass()); ShouldPrint = true; } diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 3ac1769..143caba 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -27,17 +27,18 @@ #include "llvm/Target/TargetFrameLowering.h" namespace llvm { - + class formatted_raw_ostream; class StringRef; class X86TargetMachine : public LLVMTargetMachine { - X86Subtarget Subtarget; - X86FrameLowering FrameLowering; - X86ELFWriterInfo ELFWriterInfo; + X86Subtarget Subtarget; + X86FrameLowering FrameLowering; + X86ELFWriterInfo ELFWriterInfo; + InstrItineraryData InstrItins; public: - X86TargetMachine(const Target &T, StringRef TT, + X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, @@ -56,7 +57,7 @@ public: virtual const X86TargetLowering *getTargetLowering() const { llvm_unreachable("getTargetLowering not implemented"); } - virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { + virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { llvm_unreachable("getSelectionDAGInfo not implemented"); } virtual const X86RegisterInfo *getRegisterInfo() const { @@ -65,12 +66,13 @@ public: virtual const X86ELFWriterInfo *getELFWriterInfo() const { return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; } + virtual const InstrItineraryData *getInstrItineraryData() const { + return &InstrItins; + } // Set up the pass pipeline. - virtual bool addInstSelector(PassManagerBase &PM); - virtual bool addPreRegAlloc(PassManagerBase &PM); - virtual bool addPostRegAlloc(PassManagerBase &PM); - virtual bool addPreEmitPass(PassManagerBase &PM); + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); }; @@ -78,6 +80,7 @@ public: /// X86_32TargetMachine - X86 32-bit target machine. /// class X86_32TargetMachine : public X86TargetMachine { + virtual void anchor(); const TargetData DataLayout; // Calculates type size & alignment X86InstrInfo InstrInfo; X86SelectionDAGInfo TSInfo; @@ -92,7 +95,7 @@ public: virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; } - virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { + virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } virtual const X86InstrInfo *getInstrInfo() const { @@ -106,6 +109,7 @@ public: /// X86_64TargetMachine - X86 64-bit target machine. /// class X86_64TargetMachine : public X86TargetMachine { + virtual void anchor(); const TargetData DataLayout; // Calculates type size & alignment X86InstrInfo InstrInfo; X86SelectionDAGInfo TSInfo; @@ -120,7 +124,7 @@ public: virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; } - virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { + virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } virtual const X86InstrInfo *getInstrInfo() const { diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 991f322..c0d2a9c 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -1,4 +1,4 @@ -//===-- llvm/Target/X86/X86TargetObjectFile.cpp - X86 Object Info ---------===// +//===-- X86TargetObjectFile.cpp - X86 Object Info -------------------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index d7adf27..ceb7a4a 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -1,4 +1,4 @@ -//===-- llvm/Target/X86/X86TargetObjectFile.h - X86 Object Info -*- C++ -*-===// +//===-- X86TargetObjectFile.h - X86 Object Info -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index f8c30eb..2fd78a7 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -145,7 +145,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { // to insert any VZEROUPPER instructions. This is constant-time, so it is // cheap in the common case of no ymm use. bool YMMUsed = false; - TargetRegisterClass *RC = X86::VR256RegisterClass; + const TargetRegisterClass *RC = X86::VR256RegisterClass; for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e; i++) { if (MRI.isPhysRegUsed(*i)) { diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index de4abfc..0d59572 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_target(XCoreCodeGen XCoreInstrInfo.cpp XCoreISelDAGToDAG.cpp XCoreISelLowering.cpp + XCoreMachineFunctionInfo.cpp XCoreRegisterInfo.cpp XCoreSubtarget.cpp XCoreTargetMachine.cpp diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp index 42ab1b3..64f1a8e 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp @@ -10,6 +10,8 @@ #include "XCoreMCAsmInfo.h" using namespace llvm; +void XCoreMCAsmInfo::anchor() { } + XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, StringRef TT) { SupportsDebugInformation = true; Data16bitsDirective = "\t.short\t"; diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h index 8403922..24e170a 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h @@ -1,4 +1,4 @@ -//=====-- XCoreMCAsmInfo.h - XCore asm properties -------------*- C++ -*--====// +//===-- XCoreMCAsmInfo.h - XCore asm properties ----------------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -21,6 +21,7 @@ namespace llvm { class Target; class XCoreMCAsmInfo : public MCAsmInfo { + virtual void anchor(); public: explicit XCoreMCAsmInfo(const Target &T, StringRef TT); }; diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index 7d5fcce..bbfdd43 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -------*- C++ -*-===// +//===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -----------------===// // // The LLVM Compiler Infrastructure // @@ -17,6 +17,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC diff --git a/lib/Target/XCore/XCore.td b/lib/Target/XCore/XCore.td index 3840189..04a1dd5 100644 --- a/lib/Target/XCore/XCore.td +++ b/lib/Target/XCore/XCore.td @@ -1,4 +1,4 @@ -//===- XCore.td - Describe the XCore Target Machine --------*- tablegen -*-===// +//===-- XCore.td - Describe the XCore Target Machine -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// // +// This is the top level entry point for the XCore target. // //===----------------------------------------------------------------------===// diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 5007d04..4d8ef74 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -1,4 +1,4 @@ -//===-- XCoreFrameLowering.cpp - Frame info for XCore Target -----*- C++ -*-==// +//===-- XCoreFrameLowering.cpp - Frame info for XCore Target --------------===// // // The LLVM Compiler Infrastructure // @@ -93,8 +93,6 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo *MMI = &MF.getMMI(); - const XCoreRegisterInfo *RegInfo = - static_cast<const XCoreRegisterInfo*>(MF.getTarget().getRegisterInfo()); const XCoreInstrInfo &TII = *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo()); XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); @@ -119,7 +117,7 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { // FIXME could emit multiple instructions. report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize)); } - bool emitFrameMoves = RegInfo->needsFrameMoves(MF); + bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(MF); // Do we need to allocate space on the stack? if (FrameSize) { diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h index c591e93..4c51aa5 100644 --- a/lib/Target/XCore/XCoreFrameLowering.h +++ b/lib/Target/XCore/XCoreFrameLowering.h @@ -1,4 +1,4 @@ -//===-- XCoreFrameLowering.h - Frame info for XCore Target -------*- C++ -*-==// +//===-- XCoreFrameLowering.h - Frame info for XCore Target ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index c5c668e..c2d2a5d 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -1,4 +1,4 @@ -//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation ------===// +//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation ---------===// // // The LLVM Compiler Infrastructure // @@ -36,7 +36,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/VectorExtras.h" using namespace llvm; const char *XCoreTargetLowering:: @@ -188,7 +187,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); default: llvm_unreachable("unimplemented operand"); - return SDValue(); } } @@ -200,7 +198,6 @@ void XCoreTargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); - return; case ISD::ADD: case ISD::SUB: Results.push_back(ExpandADDSUB(N, DAG)); @@ -276,9 +273,8 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal()); } - if (! GVar) { + if (!GVar) { llvm_unreachable("Thread local object not a GlobalVariable?"); - return SDValue(); } Type *Ty = cast<PointerType>(GV->getType())->getElementType(); if (!Ty->isSized() || isZeroLengthArray(Ty)) { @@ -491,8 +487,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, IntPtrTy, false, false, - false, false, 0, CallingConv::C, false, - /*isReturnValueUsed=*/true, + false, false, 0, CallingConv::C, /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__misaligned_load", getPointerTy()), Args, DAG, DL); @@ -553,8 +549,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, - false, false, 0, CallingConv::C, false, - /*isReturnValueUsed=*/true, + false, false, 0, CallingConv::C, /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__misaligned_store", getPointerTy()), Args, DAG, dl); @@ -758,7 +754,7 @@ SDValue XCoreTargetLowering:: LowerVAARG(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("unimplemented"); - // FIX Arguments passed by reference need a extra dereference. + // FIXME Arguments passed by reference need a extra dereference. SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); @@ -879,7 +875,7 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { SDValue XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -1603,8 +1599,6 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, // reg + reg<<2 return AM.Scale == 4 && AM.BaseOffs == 0; } - - return false; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index d6c5b32..f5a6822 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -175,9 +175,8 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index 8002c99..1963a70 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -1,4 +1,4 @@ -//===- XCoreInstrFormats.td - XCore Instruction Formats ----*- tablegen -*-===// +//===-- XCoreInstrFormats.td - XCore Instruction Formats ---*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index a0946a1..f930623 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- XCoreInstrInfo.cpp - XCore Instruction Information -------*- C++ -*-===// +//===-- XCoreInstrInfo.cpp - XCore Instruction Information ----------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index d354802..e47d212 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -1,4 +1,4 @@ -//===- XCoreInstrInfo.h - XCore Instruction Information ---------*- C++ -*-===// +//===-- XCoreInstrInfo.h - XCore Instruction Information --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 4d2e93b..b25a08d 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -1,4 +1,4 @@ -//===- XCoreInstrInfo.td - Target Description for XCore ----*- tablegen -*-===// +//===-- XCoreInstrInfo.td - Target Description for XCore ---*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp new file mode 100644 index 0000000..7ca0672 --- /dev/null +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp @@ -0,0 +1,14 @@ +//===-- XCoreMachineFuctionInfo.cpp - XCore machine function info ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "XCoreMachineFunctionInfo.h" + +using namespace llvm; + +void XCoreFunctionInfo::anchor() { } diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h index a575a0f..f869fcf 100644 --- a/lib/Target/XCore/XCoreMachineFunctionInfo.h +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h @@ -1,4 +1,4 @@ -//====- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===// +//===-- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -26,7 +26,7 @@ class Function; /// XCoreFunctionInfo - This class is derived from MachineFunction private /// XCore target-specific information for each MachineFunction. class XCoreFunctionInfo : public MachineFunctionInfo { -private: + virtual void anchor(); bool UsesLR; int LRSpillSlot; int FPSpillSlot; diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 1b78b37..8730282 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- XCoreRegisterInfo.cpp - XCore Register Information -------*- C++ -*-===// +//===-- XCoreRegisterInfo.cpp - XCore Register Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -73,9 +73,9 @@ bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) { MF.getFunction()->needsUnwindTableEntry(); } -const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) +const uint16_t* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - static const unsigned CalleeSavedRegs[] = { + static const uint16_t CalleeSavedRegs[] = { XCore::R4, XCore::R5, XCore::R6, XCore::R7, XCore::R8, XCore::R9, XCore::R10, XCore::LR, 0 diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 5c28f39..ab6ce56 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -1,4 +1,4 @@ -//===- XCoreRegisterInfo.h - XCore Register Information Impl ----*- C++ -*-===// +//===-- XCoreRegisterInfo.h - XCore Register Information Impl ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -44,7 +44,7 @@ public: /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; BitVector getReservedRegs(const MachineFunction &MF) const; diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td index c354230..9edfda1 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.td +++ b/lib/Target/XCore/XCoreRegisterInfo.td @@ -1,4 +1,4 @@ -//===- XCoreRegisterInfo.td - XCore Register defs ----------*- tablegen -*-===// +//===-- XCoreRegisterInfo.td - XCore Register defs ---------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp index b4e9927..8cfb770 100644 --- a/lib/Target/XCore/XCoreSubtarget.cpp +++ b/lib/Target/XCore/XCoreSubtarget.cpp @@ -1,4 +1,4 @@ -//===- XCoreSubtarget.cpp - XCore Subtarget Information -----------*- C++ -*-=// +//===-- XCoreSubtarget.cpp - XCore Subtarget Information ------------------===// // // The LLVM Compiler Infrastructure // @@ -21,6 +21,8 @@ using namespace llvm; +void XCoreSubtarget::anchor() { } + XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS) : XCoreGenSubtargetInfo(TT, CPU, FS) diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h index 7b29fa2..8d0f254 100644 --- a/lib/Target/XCore/XCoreSubtarget.h +++ b/lib/Target/XCore/XCoreSubtarget.h @@ -1,4 +1,4 @@ -//=====-- XCoreSubtarget.h - Define Subtarget for the XCore -----*- C++ -*--==// +//===-- XCoreSubtarget.h - Define Subtarget for the XCore -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -25,6 +25,7 @@ namespace llvm { class StringRef; class XCoreSubtarget : public XCoreGenSubtargetInfo { + virtual void anchor(); public: /// This constructor initializes the data members to match that diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 7e1e035..f65297e 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -14,6 +14,7 @@ #include "XCore.h" #include "llvm/Module.h" #include "llvm/PassManager.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -34,8 +35,27 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, TSInfo(*this) { } -bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM) { - PM.add(createXCoreISelDag(*this, getOptLevel())); +namespace { +/// XCore Code Generator Pass Configuration Options. +class XCorePassConfig : public TargetPassConfig { +public: + XCorePassConfig(XCoreTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + XCoreTargetMachine &getXCoreTargetMachine() const { + return getTM<XCoreTargetMachine>(); + } + + virtual bool addInstSelector(); +}; +} // namespace + +TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) { + return new XCorePassConfig(this, PM); +} + +bool XCorePassConfig::addInstSelector() { + PM.add(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel())); return false; } diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 0159b1e..2c174f4 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -56,7 +56,7 @@ public: virtual const TargetData *getTargetData() const { return &DataLayout; } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM); + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); }; } // end namespace llvm diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h index 7424c78..27875e7 100644 --- a/lib/Target/XCore/XCoreTargetObjectFile.h +++ b/lib/Target/XCore/XCoreTargetObjectFile.h @@ -1,4 +1,4 @@ -//===-- llvm/Target/XCoreTargetObjectFile.h - XCore Object Info -*- C++ -*-===// +//===-- XCoreTargetObjectFile.h - XCore Object Info -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // |