diff options
| author | Stephen Hines <srhines@google.com> | 2013-08-07 15:07:10 -0700 |
|---|---|---|
| committer | Stephen Hines <srhines@google.com> | 2013-08-07 15:07:10 -0700 |
| commit | fab2daa4a1127ecb217abe2b07c1769122b6fee1 (patch) | |
| tree | 268ebfd1963fd98ba412e76819afdf95a7d4267b /lib/Target/ARM | |
| parent | 8197ac1c1a0a91baa70c4dea8cb488f254ef974c (diff) | |
| parent | 10251753b6897adcd22cc981c0cc42f348c109de (diff) | |
| download | external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.zip external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.tar.gz external_llvm-fab2daa4a1127ecb217abe2b07c1769122b6fee1.tar.bz2 | |
Merge commit '10251753b6897adcd22cc981c0cc42f348c109de' into merge-20130807
Conflicts:
lib/Archive/ArchiveReader.cpp
lib/Support/Unix/PathV2.inc
Change-Id: I29d8c1e321a4a380b6013f00bac6a8e4b593cc4e
Diffstat (limited to 'lib/Target/ARM')
39 files changed, 2346 insertions, 1157 deletions
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index f0d4dbe..e8c2f7c 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -615,7 +615,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { SmallVector<unsigned, 8> Defs = getReadDPRs(MI); bool Modified = false; - for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end(); + for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end(); I != E; ++I) { // Follow the def-use chain for this DPR through COPYs, and also through // PHIs (which are essentially multi-way COPYs). It is because of PHIs that @@ -630,7 +630,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { elideCopiesAndPHIs(Def, DefSrcs); - for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(), + for (SmallVectorImpl<MachineInstr *>::iterator II = DefSrcs.begin(), EE = DefSrcs.end(); II != EE; ++II) { MachineInstr *MI = *II; @@ -655,7 +655,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { if (NewReg != 0) { Modified = true; - for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(), + for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(), E = Uses.end(); I != E; ++I) { DEBUG(dbgs() << "Replacing operand " << **I << " with " diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 1bc9d6b..e5da3a5 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -45,6 +45,9 @@ def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", "Enable VFP4 instructions", [FeatureVFP3, FeatureFP16]>; +def FeatureV8FP : SubtargetFeature<"v8fp", "HasV8FP", + "true", "Enable ARMv8 FP", + [FeatureVFP4]>; def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", "Restrict VFP3 to 16 double registers">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", @@ -138,6 +141,9 @@ def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", "Support ARM v7 instructions", [HasV6T2Ops, FeaturePerfMon]>; +def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", + "Support ARM v8 instructions", + [HasV7Ops]>; //===----------------------------------------------------------------------===// // ARM Processors supported. @@ -173,7 +179,7 @@ def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", // FIXME: It has not been determined if A15 has these features. def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", "Cortex-A15 ARM processors", - [FeatureT2XtPk, FeatureFP16, + [FeatureT2XtPk, FeatureFP16, FeatureVFP4, FeatureAvoidPartialCPSR, FeatureTrustZone]>; def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", @@ -291,6 +297,9 @@ def : ProcessorModel<"swift", SwiftModel, FeatureDB, FeatureDSPThumb2, FeatureHasRAS]>; +// V8 Processors +def : ProcNoItin<"cortex-a53", [HasV8Ops]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 13ec208..13a22b1 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -213,77 +213,67 @@ namespace { } // end of anonymous namespace -MachineLocation ARMAsmPrinter:: -getDebugValueLocation(const MachineInstr *MI) const { - MachineLocation Location; - assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); - // Frame address. Currently handles register +- offset only. - if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) - Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); - else { - DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); - } - return Location; -} - /// EmitDwarfRegOp - Emit dwarf register operation. -void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { +void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc, + bool Indirect) const { const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) - AsmPrinter::EmitDwarfRegOp(MLoc); - else { - unsigned Reg = MLoc.getReg(); - if (Reg >= ARM::S0 && Reg <= ARM::S31) { - assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); - // S registers are described as bit-pieces of a register - // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) - // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) - - unsigned SReg = Reg - ARM::S0; - bool odd = SReg & 0x1; - unsigned Rx = 256 + (SReg >> 1); - - OutStreamer.AddComment("DW_OP_regx for S register"); - EmitInt8(dwarf::DW_OP_regx); - - OutStreamer.AddComment(Twine(SReg)); - EmitULEB128(Rx); - - if (odd) { - OutStreamer.AddComment("DW_OP_bit_piece 32 32"); - EmitInt8(dwarf::DW_OP_bit_piece); - EmitULEB128(32); - EmitULEB128(32); - } else { - OutStreamer.AddComment("DW_OP_bit_piece 32 0"); - EmitInt8(dwarf::DW_OP_bit_piece); - EmitULEB128(32); - EmitULEB128(0); - } - } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { - assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); - // Q registers Q0-Q15 are described by composing two D registers together. - // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) - // DW_OP_piece(8) - - unsigned QReg = Reg - ARM::Q0; - unsigned D1 = 256 + 2 * QReg; - unsigned D2 = D1 + 1; - - OutStreamer.AddComment("DW_OP_regx for Q register: D1"); - EmitInt8(dwarf::DW_OP_regx); - EmitULEB128(D1); - OutStreamer.AddComment("DW_OP_piece 8"); - EmitInt8(dwarf::DW_OP_piece); - EmitULEB128(8); - - OutStreamer.AddComment("DW_OP_regx for Q register: D2"); - EmitInt8(dwarf::DW_OP_regx); - EmitULEB128(D2); - OutStreamer.AddComment("DW_OP_piece 8"); - EmitInt8(dwarf::DW_OP_piece); - EmitULEB128(8); + if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) { + AsmPrinter::EmitDwarfRegOp(MLoc, Indirect); + return; + } + assert(MLoc.isReg() && !Indirect && + "This doesn't support offset/indirection - implement it if needed"); + unsigned Reg = MLoc.getReg(); + if (Reg >= ARM::S0 && Reg <= ARM::S31) { + assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); + // S registers are described as bit-pieces of a register + // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) + // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) + + unsigned SReg = Reg - ARM::S0; + bool odd = SReg & 0x1; + unsigned Rx = 256 + (SReg >> 1); + + OutStreamer.AddComment("DW_OP_regx for S register"); + EmitInt8(dwarf::DW_OP_regx); + + OutStreamer.AddComment(Twine(SReg)); + EmitULEB128(Rx); + + if (odd) { + OutStreamer.AddComment("DW_OP_bit_piece 32 32"); + EmitInt8(dwarf::DW_OP_bit_piece); + EmitULEB128(32); + EmitULEB128(32); + } else { + OutStreamer.AddComment("DW_OP_bit_piece 32 0"); + EmitInt8(dwarf::DW_OP_bit_piece); + EmitULEB128(32); + EmitULEB128(0); } + } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { + assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); + // Q registers Q0-Q15 are described by composing two D registers together. + // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) + // DW_OP_piece(8) + + unsigned QReg = Reg - ARM::Q0; + unsigned D1 = 256 + 2 * QReg; + unsigned D2 = D1 + 1; + + OutStreamer.AddComment("DW_OP_regx for Q register: D1"); + EmitInt8(dwarf::DW_OP_regx); + EmitULEB128(D1); + OutStreamer.AddComment("DW_OP_piece 8"); + EmitInt8(dwarf::DW_OP_piece); + EmitULEB128(8); + + OutStreamer.AddComment("DW_OP_regx for Q register: D2"); + EmitInt8(dwarf::DW_OP_regx); + EmitULEB128(D2); + OutStreamer.AddComment("DW_OP_piece 8"); + EmitInt8(dwarf::DW_OP_piece); + EmitULEB128(8); } } @@ -474,8 +464,14 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // This takes advantage of the 2 operand-ness of ldm/stm and that we've // already got the operands in registers that are operands to the // inline asm statement. - - O << "{" << ARMInstPrinter::getRegisterName(RegBegin); + O << "{"; + if (ARM::GPRPairRegClass.contains(RegBegin)) { + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0); + O << ARMInstPrinter::getRegisterName(Reg0) << ", ";; + RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1); + } + O << ARMInstPrinter::getRegisterName(RegBegin); // FIXME: The register allocator not only may not have given us the // registers in sequence, but may not be in ascending registers. This @@ -501,6 +497,20 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return true; unsigned Flags = FlagsOP.getImm(); unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + unsigned RC; + InlineAsm::hasRegClassConstraint(Flags, RC); + if (RC == ARM::GPRPairRegClassID) { + if (NumVals != 1) + return true; + const MachineOperand &MO = MI->getOperand(OpNum); + if (!MO.isReg()) + return true; + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + unsigned Reg = TRI->getSubReg(MO.getReg(), ExtraCode[0] == 'Q' ? + ARM::gsub_0 : ARM::gsub_1); + O << ARMInstPrinter::getRegisterName(Reg); + return false; + } if (NumVals != 2) return true; unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1; @@ -759,16 +769,9 @@ void ARMAsmPrinter::emitAttributes() { ARMBuildAttrs::Allowed); AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); - } else if (CPUString == "generic") { - // For a generic CPU, we assume a standard v7a architecture in Subtarget. - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::ApplicationProfile); - AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, - ARMBuildAttrs::Allowed); - AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumb32); - } else if (Subtarget->hasV7Ops()) { + } else if (Subtarget->hasV8Ops()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v8); + else if (Subtarget->hasV7Ops()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::AllowThumb32); @@ -782,6 +785,8 @@ void ARMAsmPrinter::emitAttributes() { AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T); else if (Subtarget->hasV4TOps()) AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); + else + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4); if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of @@ -796,8 +801,14 @@ void ARMAsmPrinter::emitAttributes() { emitFPU = false; } - /* VFPv4 + .fpu */ - if (Subtarget->hasVFP4()) { + /* V8FP + .fpu */ + if (Subtarget->hasV8FP()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowV8FPA); + if (emitFPU) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "v8fp"); + /* VFPv4 + .fpu */ + } else if (Subtarget->hasVFP4()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, ARMBuildAttrs::AllowFPv4A); if (emitFPU) @@ -821,8 +832,12 @@ void ARMAsmPrinter::emitAttributes() { /* TODO: ARMBuildAttrs::Allowed is not completely accurate, * since NEON can have 1 (allowed) or 2 (MAC operations) */ if (Subtarget->hasNEON()) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - ARMBuildAttrs::Allowed); + if (Subtarget->hasV8Ops()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::AllowedNeonV8); + else + AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::Allowed); } // Signal various FP modes. @@ -1092,23 +1107,6 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); } -void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, - raw_ostream &OS) { - unsigned NOps = MI->getNumOperands(); - assert(NOps==4); - OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); - OS << V.getName(); - OS << " <- "; - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS); - OS << ']'; - OS << "+"; - printOperand(MI, NOps-2, OS); -} - void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { assert(MI->getFlag(MachineInstr::FrameSetup) && "Only instruction which are involved into frame setup code are allowed"); @@ -1272,15 +1270,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned Opc = MI->getOpcode(); switch (Opc) { case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass"); - case ARM::DBG_VALUE: { - if (isVerbose() && OutStreamer.hasRawTextSupport()) { - SmallString<128> TmpStr; - raw_svector_ostream OS(TmpStr); - PrintDebugValueComment(MI, OS); - OutStreamer.EmitRawText(StringRef(OS.str())); - } - return; - } + case ARM::DBG_VALUE: llvm_unreachable("Should be handled by generic printing"); case ARM::LEApcrel: case ARM::tLEApcrel: case ARM::t2LEApcrel: { diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index c945e4f..de72e06 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -97,13 +97,9 @@ private: const MachineInstr *MI); public: - void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); - - virtual MachineLocation - getDebugValueLocation(const MachineInstr *MI) const LLVM_OVERRIDE; - /// EmitDwarfRegOp - Emit dwarf register operation. - virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const LLVM_OVERRIDE; + virtual void EmitDwarfRegOp(const MachineLocation &MLoc, bool Indirect) const + LLVM_OVERRIDE; virtual unsigned getISAEncoding() LLVM_OVERRIDE { // ARM/Darwin adds ISA to the DWARF info for each function. diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index ad14475..977d936 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -272,104 +272,90 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. + TBB = 0; + FBB = 0; + MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) - return false; + return false; // Empty blocks are easy. --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - // Get the last instruction in the block. - MachineInstr *LastInst = I; - unsigned LastOpc = LastInst->getOpcode(); + // Walk backwards from the end of the basic block until the branch is + // analyzed or we give up. + while (isPredicated(I) || I->isTerminator()) { - // Check if it's an indirect branch first, this should return 'unanalyzable' - // even if it's predicated. - if (isIndirectBranchOpcode(LastOpc)) - return true; - - if (!isUnpredicatedTerminator(I)) - return false; + // Flag to be raised on unanalyzeable instructions. This is useful in cases + // where we want to clean up on the end of the basic block before we bail + // out. + bool CantAnalyze = false; - // If there is only one terminator instruction, process it. - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (isUncondBranchOpcode(LastOpc)) { - TBB = LastInst->getOperand(0).getMBB(); - return false; + // Skip over DEBUG values and predicated nonterminators. + while (I->isDebugValue() || !I->isTerminator()) { + if (I == MBB.begin()) + return false; + --I; } - if (isCondBranchOpcode(LastOpc)) { - // Block ends with fall-through condbranch. - TBB = LastInst->getOperand(0).getMBB(); - Cond.push_back(LastInst->getOperand(1)); - Cond.push_back(LastInst->getOperand(2)); - return false; + + if (isIndirectBranchOpcode(I->getOpcode()) || + isJumpTableBranchOpcode(I->getOpcode())) { + // Indirect branches and jump tables can't be analyzed, but we still want + // to clean up any instructions at the tail of the basic block. + CantAnalyze = true; + } else if (isUncondBranchOpcode(I->getOpcode())) { + TBB = I->getOperand(0).getMBB(); + } else if (isCondBranchOpcode(I->getOpcode())) { + // Bail out if we encounter multiple conditional branches. + if (!Cond.empty()) + return true; + + assert(!FBB && "FBB should have been null."); + FBB = TBB; + TBB = I->getOperand(0).getMBB(); + Cond.push_back(I->getOperand(1)); + Cond.push_back(I->getOperand(2)); + } else if (I->isReturn()) { + // Returns can't be analyzed, but we should run cleanup. + CantAnalyze = !isPredicated(I); + } else { + // We encountered other unrecognized terminator. Bail out immediately. + return true; } - return true; // Can't handle indirect branch. - } - // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - unsigned SecondLastOpc = SecondLastInst->getOpcode(); - - // If AllowModify is true and the block ends with two or more unconditional - // branches, delete all but the first unconditional branch. - if (AllowModify && isUncondBranchOpcode(LastOpc)) { - while (isUncondBranchOpcode(SecondLastOpc)) { - LastInst->eraseFromParent(); - LastInst = SecondLastInst; - LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - // Return now the only terminator is an unconditional branch. - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else { - SecondLastInst = I; - SecondLastOpc = SecondLastInst->getOpcode(); + // Cleanup code - to be run for unpredicated unconditional branches and + // returns. + if (!isPredicated(I) && + (isUncondBranchOpcode(I->getOpcode()) || + isIndirectBranchOpcode(I->getOpcode()) || + isJumpTableBranchOpcode(I->getOpcode()) || + I->isReturn())) { + // Forget any previous condition branch information - it no longer applies. + Cond.clear(); + FBB = 0; + + // If we can modify the function, delete everything below this + // unconditional branch. + if (AllowModify) { + MachineBasicBlock::iterator DI = llvm::next(I); + while (DI != MBB.end()) { + MachineInstr *InstToDelete = DI; + ++DI; + InstToDelete->eraseFromParent(); + } } } - } - - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) - return true; - // If the block ends with a B and a Bcc, handle it. - if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - Cond.push_back(SecondLastInst->getOperand(1)); - Cond.push_back(SecondLastInst->getOperand(2)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } + if (CantAnalyze) + return true; - // If the block ends with two unconditional branches, handle it. The second - // one is not executed, so remove it. - if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } + if (I == MBB.begin()) + return false; - // ...likewise if it ends with a branch table followed by an unconditional - // branch. The branch folder can create these, and we must get rid of them for - // correctness of Thumb constant islands. - if ((isJumpTableBranchOpcode(SecondLastOpc) || - isIndirectBranchOpcode(SecondLastOpc)) && - isUncondBranchOpcode(LastOpc)) { - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return true; + --I; } - // Otherwise, can't handle this. - return true; + // We made it past the terminators without bailing out - we must have + // analyzed this branch successfully. + return false; } @@ -745,6 +731,9 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Opc == ARM::VORRq) Mov.addReg(Src); Mov = AddDefaultPred(Mov); + // MOVr can set CC. + if (Opc == ARM::MOVr) + Mov = AddDefaultCC(Mov); } // Add implicit super-register defs and kills to the last instruction. Mov->addRegisterDefined(DestReg, TRI); @@ -1213,16 +1202,6 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ return true; } -MachineInstr* -ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) - .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); - return &*MIB; -} - /// Create a copy of a const pool value. Update CPI to the new index and return /// the label UID. static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { @@ -3684,8 +3663,7 @@ hasHighOperandLatency(const InstrItineraryData *ItinData, return true; // Hoist VFP / NEON instructions with 4 or higher latency. - int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx, - /*FindMin=*/false); + int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); if (Latency < 0) Latency = getInstrLatency(ItinData, DefMI); if (Latency <= 3) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 4ca3d7b..96f8637 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -125,12 +125,6 @@ public: virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; - virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const; - virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 7c03055..58c06e3 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -58,30 +58,44 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); } - if (ghcCall) { - return CSR_GHC_SaveList; - } - else { - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; - } + if (ghcCall) + // GHC set of callee saved regs is empty as all those regs are + // used for passing STG regs around + return CSR_NoRegs_SaveList; + else + return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; } const uint32_t* -ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { +ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + if (CC == CallingConv::GHC) + // This is academic becase all GHC calls are (supposed to be) tail calls + return CSR_NoRegs_RegMask; return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } const uint32_t* -ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; +ARMBaseRegisterInfo::getNoPreservedMask() const { + return CSR_NoRegs_RegMask; } const uint32_t* -ARMBaseRegisterInfo::getNoPreservedMask() const { - return CSR_NoRegs_RegMask; +ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { + // This should return a register mask that is the same as that returned by + // getCallPreservedMask but that additionally preserves the register used for + // the first i32 argument (which must also be the register used to return a + // single i32 return value) + // + // In case that the calling convention does not use the same register for + // both or otherwise does not want to enable this optimization, the function + // should return NULL + if (CC == CallingConv::GHC) + // This is academic becase all GHC calls are (supposed to be) tail calls + return NULL; + return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; } BitVector ARMBaseRegisterInfo:: @@ -309,7 +323,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // 1. Dynamic stack realignment is explicitly disabled, // 2. This is a Thumb1 function (it's not useful, so we don't bother), or // 3. There are VLAs in the function and the base pointer is disabled. - if (!MF.getTarget().Options.RealignStack) + if (MF.getFunction()->hasFnAttribute("no-realign-stack")) return false; if (AFI->isThumb1OnlyFunction()) return false; @@ -702,12 +716,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } #endif // NDEBUG - // Special handling of dbg_value instructions. - if (MI.isDebugValue()) { - MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); - return; - } + assert(!MI.isDebugValue() && "DBG_VALUEs should be handled in target-independent code"); // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 03b3682..cdaad05 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -94,9 +94,18 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; - const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; const uint32_t *getNoPreservedMask() const; + /// getThisReturnPreservedMask - Returns a call preserved mask specific to the + /// case that 'returned' is on an i32 first argument if the calling convention + /// is one that can (partially) model this attribute with a preserved mask + /// (i.e. it is a calling convention that uses the same register for the first + /// i32 argument and an i32 return value) + /// + /// Should return NULL in the case that the calling convention does not have + /// this property + const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; + BitVector getReservedRegs(const MachineFunction &MF) const; const TargetRegisterClass* diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h index 11bd6a4..f614dca 100644 --- a/lib/Target/ARM/ARMBuildAttrs.h +++ b/lib/Target/ARM/ARMBuildAttrs.h @@ -89,7 +89,8 @@ namespace ARMBuildAttrs { v7 = 10, // e.g. Cortex A8, Cortex M3 v6_M = 11, // e.g. Cortex M1 v6S_M = 12, // v6_M with the System extensions - v7E_M = 13 // v7_M with DSP extensions + v7E_M = 13, // v7_M with DSP extensions + v8 = 14 // v8, AArch32 }; enum CPUArchProfile { // (=7), uleb128 @@ -105,6 +106,7 @@ namespace ARMBuildAttrs { //ARMISAUse (=8), uleb128 and THUMBISAUse (=9), uleb128 Not_Allowed = 0, Allowed = 1, + AllowedNeonV8 = 3, // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10) AllowFPv2 = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA) @@ -112,6 +114,8 @@ namespace ARMBuildAttrs { AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31 AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA) AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31 + AllowV8FPA = 7, // Use of the ARM v8-A FP ISA was permitted + AllowV8FPB = 8, // Use of the ARM v8-A FP ISA was permitted, but only D0-D15, S0-S31 // Tag_WMMX_arch, (=11), uleb128 AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions) diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 8ff666e..89c5223 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -208,9 +208,3 @@ def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS_ThisReturn, R9))>; - -// GHC set of callee saved regs is empty as all those regs are -// used for passing STG regs around -// add is a workaround for not being able to compile empty list: -// def CSR_GHC : CalleeSavedRegs<()>; -def CSR_GHC : CalleeSavedRegs<(add)>; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 4a157d7..96eb764 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -171,6 +171,8 @@ namespace { const { return 0; } unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) const { return 0; } + unsigned NEONThumb2V8PostEncoder(const MachineInstr &MI,unsigned Val) + const { return 0; } unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val) const { if (IsThumb) { diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index a4de941..ed054aa 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -42,7 +42,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -630,6 +629,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned DestReg = createResultReg(RC); + // FastISel TLS support on non-Darwin is broken, punt to SelectionDAG. + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + bool IsThreadLocal = GVar && GVar->isThreadLocal(); + if (!Subtarget->isTargetDarwin() && IsThreadLocal) return 0; + // Use movw+movt when possible, it avoids constant pool entries. // Darwin targets don't support movt with Reloc::Static, see // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support @@ -816,22 +820,19 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { switch (Opcode) { default: break; - case Instruction::BitCast: { + case Instruction::BitCast: // Look through bitcasts. return ARMComputeAddress(U->getOperand(0), Addr); - } - case Instruction::IntToPtr: { + case Instruction::IntToPtr: // Look past no-op inttoptrs. if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) return ARMComputeAddress(U->getOperand(0), Addr); break; - } - case Instruction::PtrToInt: { + case Instruction::PtrToInt: // Look past no-op ptrtoints. if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) return ARMComputeAddress(U->getOperand(0), Addr); break; - } case Instruction::GetElementPtr: { Address SavedAddr = Addr; int TmpOffset = Addr.Offset; @@ -2184,10 +2185,14 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { } unsigned ARMFastISel::getLibcallReg(const Twine &Name) { + // Manually compute the global's type to avoid building it when unnecessary. + Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0); + EVT LCREVT = TLI.getValueType(GVTy); + if (!LCREVT.isSimple()) return 0; + GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, GlobalValue::ExternalLinkage, 0, Name); - EVT LCREVT = TLI.getValueType(GV->getType()); - if (!LCREVT.isSimple()) return 0; + assert(GV->getType() == GVTy && "We miscomputed the type for the global!"); return ARMMaterializeGV(GV, LCREVT.getSimpleVT()); } @@ -2629,34 +2634,46 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, }; // Table governing the instruction(s) to be emitted. - static const struct { - // First entry for each of the following is sext, second zext. - uint16_t Opc[2]; - uint8_t Imm[2]; // All instructions have either a shift or a mask. - uint8_t hasS[2]; // Some instructions have an S bit, always set it to 0. - } OpcTbl[2][2][3] = { + static const struct InstructionTable { + uint32_t Opc : 16; + uint32_t hasS : 1; // Some instructions have an S bit, always set it to 0. + uint32_t Shift : 7; // For shift operand addressing mode, used by MOVsi. + uint32_t Imm : 8; // All instructions have either a shift or a mask. + } IT[2][2][3][2] = { { // Two instructions (first is left shift, second is in this table). - { // ARM - /* 1 */ { { ARM::ASRi, ARM::LSRi }, { 31, 31 }, { 1, 1 } }, - /* 8 */ { { ARM::ASRi, ARM::LSRi }, { 24, 24 }, { 1, 1 } }, - /* 16 */ { { ARM::ASRi, ARM::LSRi }, { 16, 16 }, { 1, 1 } } + { // ARM Opc S Shift Imm + /* 1 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 31 }, + /* 1 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 31 } }, + /* 8 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 24 }, + /* 8 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 24 } }, + /* 16 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 16 }, + /* 16 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 16 } } }, - { // Thumb - /* 1 */ { { ARM::tASRri, ARM::tLSRri }, { 31, 31 }, { 0, 0 } }, - /* 8 */ { { ARM::tASRri, ARM::tLSRri }, { 24, 24 }, { 0, 0 } }, - /* 16 */ { { ARM::tASRri, ARM::tLSRri }, { 16, 16 }, { 0, 0 } } + { // Thumb Opc S Shift Imm + /* 1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 31 }, + /* 1 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 31 } }, + /* 8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 24 }, + /* 8 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 24 } }, + /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 16 }, + /* 16 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 16 } } } }, { // Single instruction. - { // ARM - /* 1 */ { { ARM::KILL, ARM::ANDri }, { 0, 1 }, { 0, 1 } }, - /* 8 */ { { ARM::SXTB, ARM::ANDri }, { 0, 255 }, { 0, 1 } }, - /* 16 */ { { ARM::SXTH, ARM::UXTH }, { 0, 0 }, { 0, 0 } } + { // ARM Opc S Shift Imm + /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 }, + /* 1 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 1 } }, + /* 8 bit sext */ { { ARM::SXTB , 0, ARM_AM::no_shift, 0 }, + /* 8 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 255 } }, + /* 16 bit sext */ { { ARM::SXTH , 0, ARM_AM::no_shift, 0 }, + /* 16 bit zext */ { ARM::UXTH , 0, ARM_AM::no_shift, 0 } } }, - { // Thumb - /* 1 */ { { ARM::KILL, ARM::t2ANDri }, { 0, 1 }, { 0, 1 } }, - /* 8 */ { { ARM::t2SXTB, ARM::t2ANDri }, { 0, 255 }, { 0, 1 } }, - /* 16 */ { { ARM::t2SXTH, ARM::t2UXTH }, { 0, 0 }, { 0, 0 } } + { // Thumb Opc S Shift Imm + /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 }, + /* 1 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 1 } }, + /* 8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift, 0 }, + /* 8 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } }, + /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift, 0 }, + /* 16 bit zext */ { ARM::t2UXTH , 0, ARM_AM::no_shift, 0 } } } } }; @@ -2671,20 +2688,28 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, "other sizes unimplemented"); bool hasV6Ops = Subtarget->hasV6Ops(); - unsigned Bitness = countTrailingZeros(SrcBits) >> 1; // {1,8,16}=>{0,1,2} + unsigned Bitness = SrcBits / 8; // {1,8,16}=>{0,1,2} assert((Bitness < 3) && "sanity-check table bounds"); bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt]; const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr]; - unsigned Opc = OpcTbl[isSingleInstr][isThumb2][Bitness].Opc[isZExt]; + const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt]; + unsigned Opc = ITP->Opc; assert(ARM::KILL != Opc && "Invalid table entry"); - unsigned Imm = OpcTbl[isSingleInstr][isThumb2][Bitness].Imm[isZExt]; - unsigned hasS = OpcTbl[isSingleInstr][isThumb2][Bitness].hasS[isZExt]; + unsigned hasS = ITP->hasS; + ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift; + assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) && + "only MOVsi has shift operand addressing mode"); + unsigned Imm = ITP->Imm; // 16-bit Thumb instructions always set CPSR (unless they're in an IT block). bool setsCPSR = &ARM::tGPRRegClass == RC; - unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::LSLi; + unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi; unsigned ResultReg; + // MOVsi encodes shift and immediate in shift operand addressing mode. + // The following condition has the same value when emitting two + // instruction sequences: both are shifts. + bool ImmIsSO = (Shift != ARM_AM::no_shift); // Either one or two instructions are emitted. // They're always of the form: @@ -2697,13 +2722,16 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2; for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) { ResultReg = createResultReg(RC); - unsigned Opcode = ((0 == Instr) && !isSingleInstr) ? LSLOpc : Opc; + bool isLsl = (0 == Instr) && !isSingleInstr; + unsigned Opcode = isLsl ? LSLOpc : Opc; + ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift; + unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm; bool isKill = 1 == Instr; MachineInstrBuilder MIB = BuildMI( *FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opcode), ResultReg); if (setsCPSR) MIB.addReg(ARM::CPSR, RegState::Define); - AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(Imm)); + AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc)); if (hasS) AddDefaultCC(MIB); // Second instruction consumes the first's result. @@ -3025,8 +3053,6 @@ bool ARMFastISel::FastLowerArguments() { Idx = 0; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++Idx) { - if (I->use_empty()) - continue; unsigned SrcReg = GPRArgRegs[Idx]; unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. @@ -3044,13 +3070,23 @@ bool ARMFastISel::FastLowerArguments() { namespace llvm { FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { - // Completely untested on non-iOS. const TargetMachine &TM = funcInfo.MF->getTarget(); - // Darwin and thumb1 only for now. const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); - if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only()) + // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl. + bool UseFastISel = false; + UseFastISel |= Subtarget->isTargetIOS() && !Subtarget->isThumb1Only(); + UseFastISel |= Subtarget->isTargetLinux() && !Subtarget->isThumb(); + UseFastISel |= Subtarget->isTargetNaCl() && !Subtarget->isThumb(); + + if (UseFastISel) { + // iOS always has a FP for backtracking, force other targets + // to keep their FP when doing FastISel. The emitted code is + // currently superior, and in cases like test-suite's lencod + // FastISel isn't quite correct when FP is eliminated. + TM.Options.NoFramePointerElim = true; return new ARMFastISel(funcInfo, libInfo); + } return 0; } } diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 962368d..4ca3af6 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -61,7 +61,6 @@ enum AddrMode2Type { class ARMDAGToDAGISel : public SelectionDAGISel { ARMBaseTargetMachine &TM; - const ARMBaseInstrInfo *TII; /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. @@ -71,7 +70,6 @@ public: explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), TM(tm), - TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())), Subtarget(&TM.getSubtarget<ARMSubtarget>()) { } @@ -177,6 +175,7 @@ public: SDValue &OffImm); bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm); + bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); inline bool is_so_imm(unsigned Imm) const { return ARM_AM::getSOImmVal(Imm) != -1; @@ -423,7 +422,7 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (!CheckVMLxHazard) return true; - if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() && + if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9() && !Subtarget->isSwift()) return true; @@ -434,6 +433,9 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (Use->getOpcode() == ISD::CopyToReg) return true; if (Use->isMachineOpcode()) { + const ARMBaseInstrInfo *TII = + static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); if (MCID.mayStore()) return true; @@ -533,7 +535,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -557,7 +560,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -703,7 +707,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -724,7 +729,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -901,7 +907,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32); @@ -915,7 +922,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -960,7 +968,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -978,7 +987,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } ARM_AM::AddrOpc AddSub = ARM_AM::add; @@ -1202,7 +1212,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1219,7 +1230,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1267,7 +1279,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1297,7 +1310,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1326,7 +1340,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1403,6 +1418,34 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, return true; } +bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, + SDValue &OffImm) { + // This *must* succeed since it's used for the irreplacable ldrex and strex + // instructions. + Base = N; + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + + if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) + return true; + + ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!RHS) + return true; + + uint32_t RHSC = (int)RHS->getZExtValue(); + if (RHSC > 1020 || RHSC % 4 != 0) + return true; + + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy()); + } + + OffImm = CurDAG->getTargetConstant(RHSC / 4, MVT::i32); + return true; +} + //===--------------------------------------------------------------------===// /// getAL - Returns a ARMCC::AL immediate node. @@ -2587,7 +2630,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue CPIdx = CurDAG->getTargetConstantPool(ConstantInt::get( Type::getInt32Ty(*CurDAG->getContext()), Val), - TLI->getPointerTy()); + getTargetLowering()->getPointerTy()); SDNode *ResNode; if (Subtarget->isThumb1Only()) { @@ -2617,7 +2660,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::FrameIndex: { // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); if (Subtarget->isThumb1Only()) { SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; @@ -3449,24 +3493,20 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ bool Changed = false; unsigned NumOps = N->getNumOperands(); - ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>( - N->getOperand(InlineAsm::Op_AsmString)); - StringRef AsmString = StringRef(S->getSymbol()); - // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs // respectively. Since there is no constraint to explicitly specify a - // reg pair, we search %H operand inside the asm string. If it is found, the - // transformation below enforces a GPRPair reg class for "%r" for 64-bit data. - if (AsmString.find(":H}") == StringRef::npos) - return NULL; + // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, + // the 64-bit data may be referred by H, Q, R modifiers, so we still pack + // them into a GPRPair. SDLoc dl(N); - SDValue Glue = N->getOperand(NumOps-1); + SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) : SDValue(0,0); + SmallVector<bool, 8> OpChanged; // Glue node will be appended late. - for(unsigned i = 0; i < NumOps -1; ++i) { + for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { SDValue op = N->getOperand(i); AsmNodeOperands.push_back(op); @@ -3480,17 +3520,38 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ else continue; + // Immediate operands to inline asm in the SelectionDAG are modeled with + // two operands. The first is a constant of value InlineAsm::Kind_Imm, and + // the second is a constant with the value of the immediate. If we get here + // and we have a Kind_Imm, skip the next operand, and continue. + if (Kind == InlineAsm::Kind_Imm) { + SDValue op = N->getOperand(++i); + AsmNodeOperands.push_back(op); + continue; + } + + unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); + if (NumRegs) + OpChanged.push_back(false); + + unsigned DefIdx = 0; + bool IsTiedToChangedOp = false; + // If it's a use that is tied with a previous def, it has no + // reg class constraint. + if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) + IsTiedToChangedOp = OpChanged[DefIdx]; + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef && Kind != InlineAsm::Kind_RegDefEarlyClobber) continue; - unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag); unsigned RC; bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); - if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2) + if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) + || NumRegs != 2) continue; - assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm"); + assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); SDValue V0 = N->getOperand(i+1); SDValue V1 = N->getOperand(i+2); unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); @@ -3551,6 +3612,7 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ Changed = true; if(PairedReg.getNode()) { + OpChanged[OpChanged.size() -1 ] = true; Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); // Replace the current flag. @@ -3563,7 +3625,8 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ } } - AsmNodeOperands.push_back(Glue); + if (Glue.getNode()) + AsmNodeOperands.push_back(Glue); if (!Changed) return NULL; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ec0e9c2..caec11e 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -74,7 +74,7 @@ namespace { class ARMCCState : public CCState { public: ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, - const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs, + const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs, LLVMContext &C, ParmContext PC) : CCState(CC, isVarArg, MF, TM, locs, C) { assert(((PC == Call) || (PC == Prologue)) && @@ -693,10 +693,36 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); } + + // FIXME: Also set divmod for SREM on EABI setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + // Register based DivRem for AEABI (RTABI 4.2) + if (Subtarget->isTargetAEABI()) { + setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod"); + setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod"); + + setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS); + + setOperationAction(ISD::SDIVREM, MVT::i32, Custom); + setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + } else { + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + } setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); @@ -717,8 +743,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->isTargetDarwin()) { // Non-Darwin platforms may return values in these registers via the // personality function. - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setExceptionPointerRegister(ARM::R0); setExceptionSelectorRegister(ARM::R1); } @@ -1068,6 +1092,19 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; + + case ARMISD::ATOMADD64_DAG: return "ATOMADD64_DAG"; + case ARMISD::ATOMSUB64_DAG: return "ATOMSUB64_DAG"; + case ARMISD::ATOMOR64_DAG: return "ATOMOR64_DAG"; + case ARMISD::ATOMXOR64_DAG: return "ATOMXOR64_DAG"; + case ARMISD::ATOMAND64_DAG: return "ATOMAND64_DAG"; + case ARMISD::ATOMNAND64_DAG: return "ATOMNAND64_DAG"; + case ARMISD::ATOMSWAP64_DAG: return "ATOMSWAP64_DAG"; + case ARMISD::ATOMCMPXCHG64_DAG: return "ATOMCMPXCHG64_DAG"; + case ARMISD::ATOMMIN64_DAG: return "ATOMMIN64_DAG"; + case ARMISD::ATOMUMIN64_DAG: return "ATOMUMIN64_DAG"; + case ARMISD::ATOMMAX64_DAG: return "ATOMMAX64_DAG"; + case ARMISD::ATOMUMAX64_DAG: return "ATOMUMAX64_DAG"; } } @@ -1332,7 +1369,7 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, - SmallVector<SDValue, 8> &MemOpChains, + SmallVectorImpl<SDValue> &MemOpChains, ISD::ArgFlagsTy Flags) const { SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, @@ -1360,9 +1397,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &dl = CLI.DL; - SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; - SmallVector<SDValue, 32> &OutVals = CLI.OutVals; - SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; + SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; @@ -1711,10 +1748,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const uint32_t *Mask; const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI); - if (isThisReturn) - // For 'this' returns, use the R0-preserving mask + if (isThisReturn) { + // For 'this' returns, use the R0-preserving mask if applicable Mask = ARI->getThisReturnPreservedMask(CallConv); - else + if (!Mask) { + // Set isThisReturn to false if the calling convention is not one that + // allows 'returned' to be modeled in this way, so LowerCallResult does + // not try to pass 'this' straight through + isThisReturn = false; + Mask = ARI->getCallPreservedMask(CallConv); + } + } else Mask = ARI->getCallPreservedMask(CallConv); assert(Mask && "Missing call preserved mask for calling convention"); @@ -2550,8 +2594,18 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, DAG.getConstant(0, MVT::i32)); } + ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1)); + AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue()); + unsigned Domain = ARM_MB::ISH; + if (Subtarget->isSwift() && Ord == Release) { + // Swift happens to implement ISHST barriers in a way that's compatible with + // Release semantics but weaker than ISH so we'd be fools not to use + // it. Beware: other processors probably don't! + Domain = ARM_MB::ISHST; + } + return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(ARM_MB::ISH, MVT::i32)); + DAG.getConstant(Domain, MVT::i32)); } static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, @@ -2717,7 +2771,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, lastRegToSaveIndex = REnd - ARM::R0; } else { firstRegToSaveIndex = CCInfo.getFirstUnallocated - (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); + (GPRArgRegs, array_lengthof(GPRArgRegs)); lastRegToSaveIndex = 4; } @@ -4620,7 +4674,9 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (ValueCounts.size() == 0) return DAG.getUNDEF(VT); - if (isOnlyLowElement) + // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR. + // Keep going if we are hitting this case. + if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); unsigned EltSize = VT.getVectorElementType().getSizeInBits(); @@ -4719,6 +4775,24 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::BITCAST, dl, VT, Val); } + // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we + // know the default expansion would otherwise fall back on something even + // worse. For a vector with one or two non-undef values, that's + // scalar_to_vector for the elements followed by a shuffle (provided the + // shuffle is valid for the target) and materialization element by element + // on the stack followed by a load for everything else. + if (!isConstant && !usesOnlyOneValue) { + SDValue Vec = DAG.getUNDEF(VT); + for (unsigned i = 0 ; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + SDValue LaneIdx = DAG.getConstant(i, MVT::i32); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); + } + return Vec; + } + return SDValue(); } @@ -5830,6 +5904,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); + case ISD::SDIVREM: + case ISD::UDIVREM: return LowerDivRem(Op, DAG); } } @@ -7948,8 +8024,11 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, assert(AddcNode->getNumValues() == 2 && AddcNode->getValueType(0) == MVT::i32 && - AddcNode->getValueType(1) == MVT::Glue && - "Expect ADDC with two result values: i32, glue"); + "Expect ADDC with two result values. First: i32"); + + // Check that we have a glued ADDC node. + if (AddcNode->getValueType(1) != MVT::Glue) + return SDValue(); // Check that the ADDC adds the low result of the S/UMUL_LOHI. if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && @@ -8328,22 +8407,29 @@ static SDValue PerformORCombine(SDNode *N, unsigned SplatBitSize; bool HasAnyUndefs; + APInt SplatBits0, SplatBits1; BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); - APInt SplatBits0; + BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); + // Ensure that the second operand of both ands are constants if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs) { - BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); - APInt SplatBits1; - if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs && - SplatBits0 == ~SplatBits1) { - // Canonicalize the vector type to make instruction selection simpler. - EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; - SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, - N0->getOperand(1), N0->getOperand(0), - N1->getOperand(0)); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); - } + HasAnyUndefs) && !HasAnyUndefs) { + if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + // Ensure that the bit width of the constants are the same and that + // the splat arguments are logical inverses as per the pattern we + // are trying to simplify. + if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && + SplatBits0 == ~SplatBits1) { + // Canonicalize the vector type to make instruction selection + // simpler. + EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; + SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, + N0->getOperand(1), + N0->getOperand(0), + N1->getOperand(0)); + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } + } } } @@ -8753,6 +8839,98 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, return DAG.getNode(ISD::BITCAST, dl, VT, BV); } +/// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR. +static SDValue +PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR. + // At that time, we may have inserted bitcasts from integer to float. + // If these bitcasts have survived DAGCombine, change the lowering of this + // BUILD_VECTOR in something more vector friendly, i.e., that does not + // force to use floating point types. + + // Make sure we can change the type of the vector. + // This is possible iff: + // 1. The vector is only used in a bitcast to a integer type. I.e., + // 1.1. Vector is used only once. + // 1.2. Use is a bit convert to an integer type. + // 2. The size of its operands are 32-bits (64-bits are not legal). + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + + // Check 1.1. and 2. + if (EltVT.getSizeInBits() != 32 || !N->hasOneUse()) + return SDValue(); + + // By construction, the input type must be float. + assert(EltVT == MVT::f32 && "Unexpected type!"); + + // Check 1.2. + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() != ISD::BITCAST || + Use->getValueType(0).isFloatingPoint()) + return SDValue(); + + // Check profitability. + // Model is, if more than half of the relevant operands are bitcast from + // i32, turn the build_vector into a sequence of insert_vector_elt. + // Relevant operands are everything that is not statically + // (i.e., at compile time) bitcasted. + unsigned NumOfBitCastedElts = 0; + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumOfRelevantElts = NumElts; + for (unsigned Idx = 0; Idx < NumElts; ++Idx) { + SDValue Elt = N->getOperand(Idx); + if (Elt->getOpcode() == ISD::BITCAST) { + // Assume only bit cast to i32 will go away. + if (Elt->getOperand(0).getValueType() == MVT::i32) + ++NumOfBitCastedElts; + } else if (Elt.getOpcode() == ISD::UNDEF || isa<ConstantSDNode>(Elt)) + // Constants are statically casted, thus do not count them as + // relevant operands. + --NumOfRelevantElts; + } + + // Check if more than half of the elements require a non-free bitcast. + if (NumOfBitCastedElts <= NumOfRelevantElts / 2) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + // Create the new vector type. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); + // Check if the type is legal. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(VecVT)) + return SDValue(); + + // Combine: + // ARMISD::BUILD_VECTOR E1, E2, ..., EN. + // => BITCAST INSERT_VECTOR_ELT + // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1), + // (BITCAST EN), N. + SDValue Vec = DAG.getUNDEF(VecVT); + SDLoc dl(N); + for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) { + SDValue V = N->getOperand(Idx); + if (V.getOpcode() == ISD::UNDEF) + continue; + if (V.getOpcode() == ISD::BITCAST && + V->getOperand(0).getValueType() == MVT::i32) + // Fold obvious case. + V = V.getOperand(0); + else { + V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); + // Make the DAGCombiner fold the bitcasts. + DCI.AddToWorklist(V.getNode()); + } + SDValue LaneIdx = DAG.getConstant(Idx, MVT::i32); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx); + } + Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec); + // Make the DAGCombiner fold the bitcasts. + DCI.AddToWorklist(Vec.getNode()); + return Vec; +} + /// PerformInsertEltCombine - Target-specific dag combine xforms for /// ISD::INSERT_VECTOR_ELT. static SDValue PerformInsertEltCombine(SDNode *N, @@ -9131,12 +9309,27 @@ static SDValue PerformVCVTCombine(SDNode *N, !isConstVecPow2(ConstVec, isSigned, C)) return SDValue(); + MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); + MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); + if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + // These instructions only exist converting from f32 to i32. We can handle + // smaller integers by generating an extra truncate, but larger ones would + // be lossy. + return SDValue(); + } + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), - N->getValueType(0), - DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, - DAG.getConstant(Log2_64(C), MVT::i32)); + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), + NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, + DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, + DAG.getConstant(Log2_64(C), MVT::i32)); + + if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) + FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv); + + return FixConv; } /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) @@ -9167,12 +9360,28 @@ static SDValue PerformVDIVCombine(SDNode *N, !isConstVecPow2(ConstVec, isSigned, C)) return SDValue(); + MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); + MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); + if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + // These instructions only exist converting from i32 to f32. We can handle + // smaller integers by generating an extra extend, but larger ones would + // be lossy. + return SDValue(); + } + + SDValue ConvInput = Op.getOperand(0); + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) + ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, + ConvInput); + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : Intrinsic::arm_neon_vcvtfxu2fp; return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), Op.getValueType(), DAG.getConstant(IntrinsicOpcode, MVT::i32), - Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); + ConvInput, DAG.getConstant(Log2_64(C), MVT::i32)); } /// Getvshiftimm - Check if this is a valid build_vector for the immediate @@ -9658,6 +9867,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ARMISD::VLD3DUP: case ARMISD::VLD4DUP: return CombineBaseUpdate(N, DCI); + case ARMISD::BUILD_VECTOR: + return PerformARMBUILD_VECTORCombine(N, DCI); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { @@ -9782,6 +9993,21 @@ bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { return false; } +bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) + return false; + + if (!isTypeLegal(EVT::getEVT(Ty1))) + return false; + + assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); + + // Assuming the caller doesn't have a zeroext or signext return parameter, + // truncation all the way down to i1 is valid. + return true; +} + + static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; @@ -10181,9 +10407,19 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); + unsigned BitWidth = KnownOne.getBitWidth(); + KnownZero = KnownOne = APInt(BitWidth, 0); switch (Op.getOpcode()) { default: break; + case ARMISD::ADDC: + case ARMISD::ADDE: + case ARMISD::SUBC: + case ARMISD::SUBE: + // These nodes' second result is a boolean + if (Op.getResNo() == 0) + break; + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); @@ -10297,7 +10533,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight( typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; RCPair ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint.size() == 1) { // GCC ARM Constraint Letters switch (Constraint[0]) { @@ -10506,6 +10742,54 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } +SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only"); + unsigned Opcode = Op->getOpcode(); + assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && + "Invalid opcode for Div/Rem lowering"); + bool isSigned = (Opcode == ISD::SDIVREM); + EVT VT = Op->getValueType(0); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + + RTLIB::Libcall LC; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + } + + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { + EVT ArgVT = Op->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), + getPointerTy()); + + Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); + + SDLoc dl(Op); + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true, + 0, getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); + + return CallInfo.first; +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -10591,6 +10875,30 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_ldrex: { + PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.vol = true; + Info.readMem = true; + Info.writeMem = false; + return true; + } + case Intrinsic::arm_strex: { + PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.ptrVal = I.getArgOperand(1); + Info.offset = 0; + Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.vol = true; + Info.readMem = false; + Info.writeMem = true; + return true; + } case Intrinsic::arm_strexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 2b65019..44c769f 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -298,6 +298,9 @@ namespace llvm { using TargetLowering::isZExtFree; virtual bool isZExtFree(SDValue Val, EVT VT2) const; + virtual bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const; + + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; @@ -349,7 +352,7 @@ namespace llvm { std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + MVT VT) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is @@ -417,7 +420,7 @@ namespace llvm { RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, - SmallVector<SDValue, 8> &MemOpChains, + SmallVectorImpl<SDValue> &MemOpChains, ISD::ArgFlagsTy Flags) const; SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, @@ -457,6 +460,18 @@ namespace llvm { const ARMSubtarget *ST) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; + SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; + + /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster + /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be + /// expanded to FMAs when this method returns true, otherwise fmuladd is + /// expanded to fmul + fadd. + /// + /// ARM supports both fused and unfused multiply-add operations; we already + /// lower a pair of fmul and fadd to the latter so it's not clear that there + /// would be a gain or that the gain would be worthwhile enough to risk + /// correctness bugs. + virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index bd9a212..1349476 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1230,8 +1230,9 @@ class T2JTI<dag oops, dag iops, InstrItinClass itin, : Thumb2XI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>; // Move to/from coprocessor instructions -class T2Cop<bits<4> opc, dag oops, dag iops, string asm, list<dag> pattern> - : T2XI <oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2]> { +class T2Cop<bits<4> opc, dag oops, dag iops, string opcstr, string asm, + list<dag> pattern> + : T2I <oops, iops, NoItinerary, opcstr, asm, pattern>, Requires<[IsThumb2]> { let Inst{31-28} = opc; } @@ -1521,6 +1522,32 @@ class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, let Inst{4} = opcod5; } +// Double precision, unary, not-predicated +class ADuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, + string asm, list<dag> pattern> + : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, VFPUnaryFrm, itin, asm, "", pattern> { + // Instruction operands. + bits<5> Dd; + bits<5> Dm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // Double precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + // Double precision, binary class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, @@ -1547,7 +1574,36 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{4} = op4; } -// Single precision, unary +// FP, binary, not predicated +class ADbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, + InstrItinClass itin, string asm, list<dag> pattern> + : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, VFPBinaryFrm, itin, + asm, "", pattern> +{ + // Instruction operands. + bits<5> Dd; + bits<5> Dn; + bits<5> Dm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{19-16} = Dn{3-0}; + let Inst{7} = Dn{4}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // double precision + let Inst{6} = opcod3; + let Inst{4} = 0; +} + +// Single precision, unary, predicated class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -1571,6 +1627,33 @@ class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, let Inst{4} = opcod5; } +// Single precision, unary, non-predicated +class ASuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, + string asm, list<dag> pattern> + : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, + VFPUnaryFrm, itin, asm, "", pattern> { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + // Single precision unary, if no NEON. Same as ASuI except not available if // NEON is enabled. class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, @@ -1606,6 +1689,35 @@ class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, let Inst{4} = op4; } +// Single precision, binary, not predicated +class ASbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, + InstrItinClass itin, string asm, list<dag> pattern> + : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, + VFPBinaryFrm, itin, asm, "", pattern> +{ + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision + let Inst{6} = opcod3; + let Inst{4} = 0; +} + // Single precision binary, if no NEON. Same as ASbI except not available if // NEON is enabled. class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, @@ -1718,6 +1830,21 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, let DecoderNamespace = "NEON"; } +// Same as NeonI except it is not predicated +class NeonInp<dag oops, dag iops, AddrMode am, IndexMode im, Format f, + InstrItinClass itin, string opc, string dt, string asm, string cstr, + list<dag> pattern> + : InstARM<am, 4, im, f, NeonDomain, cstr, itin> { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = !strconcat(opc, ".", dt, "\t", asm); + let Pattern = pattern; + list<Predicate> Predicates = [HasNEON]; + let DecoderNamespace = "NEON"; + + let Inst{31-28} = 0b1111; +} + class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> @@ -1837,6 +1964,35 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{5} = Vm{4}; } +// Same as N2V but not predicated. +class N2Vnp<bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, + dag oops, dag iops, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, list<dag> pattern> + : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N2RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vm", "", pattern> { + bits<5> Vd; + bits<5> Vm; + + // Encode instruction operands + let Inst{22} = Vd{4}; + let Inst{15-12} = Vd{3-0}; + let Inst{5} = Vm{4}; + let Inst{3-0} = Vm{3-0}; + + // Encode constant bits + let Inst{27-23} = 0b00111; + let Inst{21-20} = 0b11; + let Inst{19-18} = 0b10; + let Inst{17-16} = op17_16; + let Inst{11} = 0; + let Inst{10-8} = op10_8; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{4} = 0; + + let DecoderNamespace = "NEON"; +} + // Same as N2V except it doesn't have a datatype suffix. class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -1918,6 +2074,32 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{5} = Vm{4}; } +class N3Vnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, dag oops, dag iops,Format f, InstrItinClass itin, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable, list<dag> pattern> + : NeonInp<oops, iops, AddrModeNone, IndexModeNone, f, itin, OpcodeStr, + Dt, "$Vd, $Vn, $Vm", "", pattern> { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + // Encode instruction operands + let Inst{22} = Vd{4}; + let Inst{15-12} = Vd{3-0}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{5} = Vm{4}; + let Inst{3-0} = Vm{3-0}; + + // Encode constant bits + let Inst{27-23} = op27_23; + let Inst{21-20} = op21_20; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; +} + class N3VLane32<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 8062111..8cdb853 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -106,13 +106,14 @@ namespace { if (TM->getRelocationModel() != Reloc::PIC_) return false; - LLVMContext* Context = &MF.getFunction()->getContext(); - GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, - GlobalValue::ExternalLinkage, 0, - "_GLOBAL_OFFSET_TABLE_"); - unsigned Id = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id); - unsigned Align = TM->getDataLayout()->getPrefTypeAlignment(GV->getType()); + LLVMContext *Context = &MF.getFunction()->getContext(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); + unsigned PCAdj = TM->getSubtarget<ARMSubtarget>().isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create( + *Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj); + + unsigned Align = TM->getDataLayout() + ->getPrefTypeAlignment(Type::getInt32PtrTy(*Context)); unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align); MachineBasicBlock &FirstMBB = MF.front(); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index da815d5..c243402 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -194,6 +194,8 @@ def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; def HasV7 : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate<"HasV7Ops", "armv7">; +def HasV8 : Predicate<"Subtarget->hasV8Ops()">, + AssemblerPredicate<"HasV8Ops", "armv8">; def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate<"FeatureVFP2", "VFP2">; @@ -201,6 +203,8 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate<"FeatureVFP3", "VFP3">; def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, AssemblerPredicate<"FeatureVFP4", "VFP4">; +def HasV8FP : Predicate<"Subtarget->hasV8FP()">, + AssemblerPredicate<"FeatureV8FP", "V8FP">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON", "NEON">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, @@ -258,7 +262,9 @@ def UseMulOps : Predicate<"Subtarget->useMulOps()">; def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" " FPOpFusion::Fast) && " "!Subtarget->isTargetDarwin()">; -def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " +def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion ==" + " FPOpFusion::Fast &&" + " Subtarget->hasVFP4()) || " "Subtarget->isTargetDarwin()">; // VGETLNi32 is microcoded on Swift - prefer VMOV. @@ -275,8 +281,8 @@ def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">; def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">; def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">; -def IsLE : Predicate<"TLI->isLittleEndian()">; -def IsBE : Predicate<"TLI->isBigEndian()">; +def IsLE : Predicate<"getTargetLowering()->isLittleEndian()">; +def IsBE : Predicate<"getTargetLowering()->isBigEndian()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -456,7 +462,7 @@ def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; } def adrlabel : Operand<i32> { let EncoderMethod = "getAdrLabelOpValue"; let ParserMatchClass = AdrLabelAsmOperand; - let PrintMethod = "printAdrLabelOperand"; + let PrintMethod = "printAdrLabelOperand<0>"; } def neon_vcvt_imm32 : Operand<i32> { @@ -1007,11 +1013,6 @@ def p_imm : Operand<i32> { let DecoderMethod = "DecodeCoprocessor"; } -def pf_imm : Operand<i32> { - let PrintMethod = "printPImmediate"; - let ParserMatchClass = CoprocNumAsmOperand; -} - def CoprocRegAsmOperand : AsmOperandClass { let Name = "CoprocReg"; let ParserMethod = "parseCoprocRegOperand"; @@ -1735,12 +1736,13 @@ def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", // The 16-bit operand $val can be used by a debugger to store more information // about the breakpoint. -def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, - "bkpt", "\t$val", []>, Requires<[IsARM]> { +def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, + "bkpt", "\t$val", []>, Requires<[IsARM]> { bits<16> val; let Inst{3-0} = val{3-0}; let Inst{19-8} = val{15-4}; let Inst{27-20} = 0b00010010; + let Inst{31-28} = 0xe; // AL let Inst{7-4} = 0b0111; } @@ -2293,7 +2295,6 @@ multiclass AI2_ldridx<bit isByte, string opc, let Inst{19-16} = addr{16-13}; let Inst{11-0} = addr{11-0}; let DecoderMethod = "DecodeLDRPreImm"; - let AsmMatchConverter = "cvtLdWriteBackRegAddrModeImm12"; } def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), @@ -2306,7 +2307,6 @@ multiclass AI2_ldridx<bit isByte, string opc, let Inst{11-0} = addr{11-0}; let Inst{4} = 0; let DecoderMethod = "DecodeLDRPreReg"; - let AsmMatchConverter = "cvtLdWriteBackRegAddrMode2"; } def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), @@ -2364,7 +2364,6 @@ multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> { let Inst{19-16} = addr{12-9}; // Rn let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let AsmMatchConverter = "cvtLdWriteBackRegAddrMode3"; let DecoderMethod = "DecodeAddrMode3Instruction"; } def _POST : AI3ldstidx<op, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), @@ -2400,7 +2399,6 @@ def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{3-0} = addr{3-0}; // imm3_0/Rm let DecoderMethod = "DecodeAddrMode3Instruction"; - let AsmMatchConverter = "cvtLdrdPre"; } def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), (ins addr_offset_none:$addr, am3offset:$offset), @@ -2503,7 +2501,6 @@ multiclass AI3ldrT<bits<4> op, string opc> { let Inst{22} = 1; let Inst{11-8} = offset{7-4}; let Inst{3-0} = offset{3-0}; - let AsmMatchConverter = "cvtLdExtTWriteBackImm"; } def r : AI3ldstidxT<op, 1, (outs GPRnopc:$Rt, GPRnopc:$base_wb), (ins addr_offset_none:$addr, postidx_reg:$Rm), @@ -2515,7 +2512,6 @@ multiclass AI3ldrT<bits<4> op, string opc> { let Inst{11-8} = 0; let Unpredictable{11-8} = 0b1111; let Inst{3-0} = Rm{3-0}; - let AsmMatchConverter = "cvtLdExtTWriteBackReg"; let DecoderMethod = "DecodeLDR"; } } @@ -2553,7 +2549,6 @@ multiclass AI2_stridx<bit isByte, string opc, let Inst{23} = addr{12}; // U (add = ('U' == 1)) let Inst{19-16} = addr{16-13}; // Rn let Inst{11-0} = addr{11-0}; // imm12 - let AsmMatchConverter = "cvtStWriteBackRegAddrModeImm12"; let DecoderMethod = "DecodeSTRPreImm"; } @@ -2567,7 +2562,6 @@ multiclass AI2_stridx<bit isByte, string opc, let Inst{19-16} = addr{16-13}; // Rn let Inst{11-0} = addr{11-0}; let Inst{4} = 0; // Inst{4} = 0 - let AsmMatchConverter = "cvtStWriteBackRegAddrMode2"; let DecoderMethod = "DecodeSTRPreReg"; } def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb), @@ -2676,7 +2670,6 @@ def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb), let Inst{19-16} = addr{12-9}; // Rn let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let AsmMatchConverter = "cvtStWriteBackRegAddrMode3"; let DecoderMethod = "DecodeAddrMode3Instruction"; } @@ -2710,7 +2703,6 @@ def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb), let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{3-0} = addr{3-0}; // imm3_0/Rm let DecoderMethod = "DecodeAddrMode3Instruction"; - let AsmMatchConverter = "cvtStrdPre"; } def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb), @@ -2817,7 +2809,6 @@ multiclass AI3strT<bits<4> op, string opc> { let Inst{22} = 1; let Inst{11-8} = offset{7-4}; let Inst{3-0} = offset{3-0}; - let AsmMatchConverter = "cvtStExtTWriteBackImm"; } def r : AI3ldstidxT<op, 0, (outs GPR:$base_wb), (ins GPR:$Rt, addr_offset_none:$addr, postidx_reg:$Rm), @@ -2828,7 +2819,6 @@ multiclass AI3strT<bits<4> op, string opc> { let Inst{22} = 0; let Inst{11-8} = 0; let Inst{3-0} = Rm{3-0}; - let AsmMatchConverter = "cvtStExtTWriteBackReg"; } } @@ -4011,8 +4001,13 @@ def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd), // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. +// We also can not replace a srl (17..31) by an arithmetic shift we would use in +// pkhtb src1, src2, asr (17..31). +def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), + (srl GPRnopc:$src2, imm16:$sh)), + (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16:$sh)>; def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), - (srl GPRnopc:$src2, imm16_31:$sh)), + (sra GPRnopc:$src2, imm16_31:$sh)), (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>; def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), (and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)), @@ -4378,14 +4373,44 @@ let usesCustomInserter = 1 in { [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>; } +def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldrex_2 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldrex_4 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + +def strex_1 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8; +}]>; + +def strex_2 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16; +}]>; + +def strex_4 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32; +}]>; + let mayLoad = 1 in { def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), NoItinerary, - "ldrexb", "\t$Rt, $addr", []>; + "ldrexb", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>; def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldrexh", "\t$Rt, $addr", []>; + NoItinerary, "ldrexh", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>; def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldrex", "\t$Rt, $addr", []>; + NoItinerary, "ldrex", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_4 addr_offset_none:$addr))]>; let hasExtraDefRegAllocReq = 1 in def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), NoItinerary, "ldrexd", "\t$Rt, $addr", []> { @@ -4395,11 +4420,14 @@ def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), let mayStore = 1, Constraints = "@earlyclobber $Rd" in { def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>; + NoItinerary, "strexb", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_1 GPR:$Rt, addr_offset_none:$addr))]>; def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>; + NoItinerary, "strexh", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_2 GPR:$Rt, addr_offset_none:$addr))]>; def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; + NoItinerary, "strex", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_4 GPR:$Rt, addr_offset_none:$addr))]>; let hasExtraSrcRegAllocReq = 1 in def STREXD : AIstrex<0b01, (outs GPR:$Rd), (ins GPRPairOp:$Rt, addr_offset_none:$addr), @@ -4409,11 +4437,21 @@ def STREXD : AIstrex<0b01, (outs GPR:$Rd), } -def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>, +def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", + [(int_arm_clrex)]>, Requires<[IsARM, HasV7]> { let Inst{31-0} = 0b11110101011111111111000000011111; } +def : ARMPat<(and (ldrex_1 addr_offset_none:$addr), 0xff), + (LDREXB addr_offset_none:$addr)>; +def : ARMPat<(and (ldrex_2 addr_offset_none:$addr), 0xffff), + (LDREXH addr_offset_none:$addr)>; +def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), + (STREXB GPR:$Rt, addr_offset_none:$addr)>; +def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), + (STREXH GPR:$Rt, addr_offset_none:$addr)>; + // SWP/SWPB are deprecated in V6/V7. let mayLoad = 1, mayStore = 1 in { def SWP : AIswp<0, (outs GPRnopc:$Rt), @@ -4447,7 +4485,7 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{23-20} = opc1; } -def CDP2 : ABXI<0b1110, (outs), (ins pf_imm:$cop, imm0_15:$opc1, +def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, @@ -5187,10 +5225,10 @@ def : MnemonicAlias<"rfeed", "rfeib">; def : MnemonicAlias<"rfe", "rfeia">; // SRS aliases -def : MnemonicAlias<"srsfa", "srsda">; -def : MnemonicAlias<"srsea", "srsdb">; -def : MnemonicAlias<"srsfd", "srsia">; -def : MnemonicAlias<"srsed", "srsib">; +def : MnemonicAlias<"srsfa", "srsib">; +def : MnemonicAlias<"srsea", "srsia">; +def : MnemonicAlias<"srsfd", "srsdb">; +def : MnemonicAlias<"srsed", "srsda">; def : MnemonicAlias<"srs", "srsia">; // QSAX == QSUBADDX diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 9d1a8ea..af4f4d1 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -656,7 +656,6 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, @@ -664,7 +663,6 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } multiclass VLD1QWB<bits<4> op7_4, string Dt> { @@ -675,7 +673,6 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, @@ -683,7 +680,6 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -713,7 +709,6 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, @@ -721,7 +716,6 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -754,7 +748,6 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, @@ -762,7 +755,6 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -811,7 +803,6 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), itin, @@ -819,7 +810,6 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -1348,7 +1338,6 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd, GPR:$wb), @@ -1357,7 +1346,6 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { @@ -1369,7 +1357,6 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd, GPR:$wb), @@ -1378,7 +1365,6 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -1419,7 +1405,6 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd, GPR:$wb), @@ -1428,7 +1413,6 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -1609,7 +1593,6 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), @@ -1618,7 +1601,6 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; } } multiclass VST1QWB<bits<4> op7_4, string Dt> { @@ -1629,7 +1611,6 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), @@ -1638,7 +1619,6 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1669,7 +1649,6 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), @@ -1678,7 +1657,6 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1714,7 +1692,6 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), @@ -1723,7 +1700,6 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -1773,7 +1749,6 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, @@ -1781,7 +1756,6 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; } } multiclass VST2QWB<bits<4> op7_4, string Dt> { @@ -1792,7 +1766,6 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), @@ -1801,7 +1774,6 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; } } @@ -2379,6 +2351,21 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; +// Same as above, but not predicated. +class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp<op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), + itin, OpcodeStr, Dt, ResTy, OpTy, + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; + +class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp<op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), + itin, OpcodeStr, Dt, ResTy, OpTy, + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; + // Narrow 2-register operations. class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -2541,6 +2528,16 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } + +class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp<op27_23, op21_20, op11_8, op6, op4, + (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, OpcodeStr, Dt, + ResTy, OpTy, IntOp, Commutable, + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; + class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane32<0, 1, op21_20, op11_8, 1, 0, @@ -2552,6 +2549,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))]> { let isCommutable = 0; } + class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, @@ -2584,6 +2582,16 @@ class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } + +class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp<op27_23, op21_20, op11_8, op6, op4, + (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, + ResTy, OpTy, IntOp, Commutable, + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; + class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> @@ -4659,6 +4667,18 @@ def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax", "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; +// VMAXNM +let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f32", + v2f32, v2f32, int_arm_neon_vmaxnm, 1>, + Requires<[HasV8, HasNEON]>; + def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f32", + v4f32, v4f32, int_arm_neon_vmaxnm, 1>, + Requires<[HasV8, HasNEON]>; +} + // VMIN : Vector Minimum defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, @@ -4673,6 +4693,18 @@ def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin", "f32", v4f32, v4f32, int_arm_neon_vmins, 1>; +// VMINNM +let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vminnm", "f32", + v2f32, v2f32, int_arm_neon_vminnm, 1>, + Requires<[HasV8, HasNEON]>; + def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vminnm", "f32", + v4f32, v4f32, int_arm_neon_vminnm, 1>, + Requires<[HasV8, HasNEON]>; +} + // Vector Pairwise Operations. // VPADD : Vector Pairwise Add @@ -5386,6 +5418,26 @@ def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v4f32, v4i32, uint_to_fp>; +// VCVT{A, N, P, M} +multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, + SDPatternOperator IntU> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; + def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; + def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; + def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; + } +} + +defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; +defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; +defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; +defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; + // VCVT : Vector Convert Between Floating-Point and Fixed-Point. let DecoderMethod = "DecodeVCVTD" in { def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", @@ -5658,6 +5710,34 @@ def VTBX4Pseudo IIC_VTBX4, "$orig = $dst", []>; } // DecoderMethod = "DecodeTBLInstruction" +// VRINT : Vector Rounding +multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + } + + def : InstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), + (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>; + def : InstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), + (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>; +} + +defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; +defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; +defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; +defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; +defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; +defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; + //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// @@ -6698,12 +6778,17 @@ def VST4qWB_register_Asm_32 : (ins VecListFourQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -// VMOV takes an optional datatype suffix +// VMOV/VMVN takes an optional datatype suffix defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; + // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. // D-register versions. def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 1fff41d..e7218c6 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -69,11 +69,6 @@ def thumb_immshifted_shamt : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(V, MVT::i32); }]>; -// ADR instruction labels. -def t_adrlabel : Operand<i32> { - let EncoderMethod = "getThumbAdrLabelOpValue"; -} - // Scaled 4 immediate. def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; } def t_imm0_1020s4 : Operand<i32> { @@ -97,12 +92,34 @@ def t_imm0_508s4_neg : Operand<i32> { // Define Thumb specific addressing modes. +// unsigned 8-bit, 2-scaled memory offset +class OperandUnsignedOffset_b8s2 : AsmOperandClass { + let Name = "UnsignedOffset_b8s2"; + let PredicateMethod = "isUnsignedOffset<8, 2>"; +} + +def UnsignedOffset_b8s2 : OperandUnsignedOffset_b8s2; + +// thumb style PC relative operand. signed, 8 bits magnitude, +// two bits shift. can be represented as either [pc, #imm], #imm, +// or relocatable expression... +def ThumbMemPC : AsmOperandClass { + let Name = "ThumbMemPC"; +} + let OperandType = "OPERAND_PCREL" in { def t_brtarget : Operand<OtherVT> { let EncoderMethod = "getThumbBRTargetOpValue"; let DecoderMethod = "DecodeThumbBROperand"; } +// ADR instruction labels. +def t_adrlabel : Operand<i32> { + let EncoderMethod = "getThumbAdrLabelOpValue"; + let PrintMethod = "printAdrLabelOperand<2>"; + let ParserMatchClass = UnsignedOffset_b8s2; +} + def t_bcctarget : Operand<i32> { let EncoderMethod = "getThumbBCCTargetOpValue"; let DecoderMethod = "DecodeThumbBCCTargetOperand"; @@ -122,6 +139,15 @@ def t_blxtarget : Operand<i32> { let EncoderMethod = "getThumbBLXTargetOpValue"; let DecoderMethod = "DecodeThumbBLXOffset"; } + +// t_addrmode_pc := <label> => pc + imm8 * 4 +// +def t_addrmode_pc : Operand<i32> { + let EncoderMethod = "getAddrModePCOpValue"; + let DecoderMethod = "DecodeThumbAddrModePC"; + let PrintMethod = "printThumbLdrLabelOperand"; + let ParserMatchClass = ThumbMemPC; +} } // t_addrmode_rr := reg + reg @@ -218,14 +244,6 @@ def t_addrmode_sp : Operand<i32>, let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } -// t_addrmode_pc := <label> => pc + imm8 * 4 -// -def t_addrmode_pc : Operand<i32> { - let EncoderMethod = "getAddrModePCOpValue"; - let DecoderMethod = "DecodeThumbAddrModePC"; - let PrintMethod = "printThumbLdrLabelOperand"; -} - //===----------------------------------------------------------------------===// // Miscellaneous Instructions. // @@ -505,6 +523,7 @@ let isBranch = 1, isTerminator = 1 in let Inst{7-0} = target; } + // Tail calls let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS versions. @@ -629,11 +648,9 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, let Inst{7-0} = addr; } -// Load tconstpool -// FIXME: Use ldr.n to work around a darwin assembler bug. -let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, - "ldr", ".n\t$Rt, $addr", + "ldr", "\t$Rt, $addr", [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>, T1Encoding<{0,1,0,0,1,?}> { // A6.2 & A8.6.59 @@ -643,17 +660,8 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, let Inst{7-0} = addr; } -// FIXME: Remove this entry when the above ldr.n workaround is fixed. -// For assembly/disassembly use only. -def tLDRpciASM : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, - "ldr", "\t$Rt, $addr", []>, - T1Encoding<{0,1,0,0,1,?}> { - // A6.2 & A8.6.59 - bits<3> Rt; - bits<8> addr; - let Inst{10-8} = Rt; - let Inst{7-0} = addr; -} +def : tInstAlias<"ldr${p}.n $Rt, $addr", + (tLDRpci tGPR:$Rt, t_addrmode_pc:$addr, pred:$p), 0>; // A8.6.194 & A8.6.192 defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4, diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index ff21bf7..84086a5 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -173,14 +173,13 @@ def t2ldr_pcrel_imm12 : Operand<i32> { // ADR instruction labels. def t2adrlabel : Operand<i32> { let EncoderMethod = "getT2AdrLabelOpValue"; - let PrintMethod = "printAdrLabelOperand"; + let PrintMethod = "printAdrLabelOperand<0>"; } - // t2addrmode_posimm8 := reg + imm8 def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";} def t2addrmode_posimm8 : Operand<i32> { - let PrintMethod = "printT2AddrModeImm8Operand"; + let PrintMethod = "printT2AddrModeImm8Operand<false>"; let EncoderMethod = "getT2AddrModeImm8OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8"; let ParserMatchClass = MemPosImm8OffsetAsmOperand; @@ -191,7 +190,7 @@ def t2addrmode_posimm8 : Operand<i32> { def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";} def t2addrmode_negimm8 : Operand<i32>, ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { - let PrintMethod = "printT2AddrModeImm8Operand"; + let PrintMethod = "printT2AddrModeImm8Operand<false>"; let EncoderMethod = "getT2AddrModeImm8OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8"; let ParserMatchClass = MemNegImm8OffsetAsmOperand; @@ -200,15 +199,22 @@ def t2addrmode_negimm8 : Operand<i32>, // t2addrmode_imm8 := reg +/- imm8 def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; } -def t2addrmode_imm8 : Operand<i32>, - ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { - let PrintMethod = "printT2AddrModeImm8Operand"; +class T2AddrMode_Imm8 : Operand<i32>, + ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { let EncoderMethod = "getT2AddrModeImm8OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8"; let ParserMatchClass = MemImm8OffsetAsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } +def t2addrmode_imm8 : T2AddrMode_Imm8 { + let PrintMethod = "printT2AddrModeImm8Operand<false>"; +} + +def t2addrmode_imm8_pre : T2AddrMode_Imm8 { + let PrintMethod = "printT2AddrModeImm8Operand<true>"; +} + def t2am_imm8_offset : Operand<i32>, ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset", [], [SDNPWantRoot]> { @@ -219,14 +225,21 @@ def t2am_imm8_offset : Operand<i32>, // t2addrmode_imm8s4 := reg +/- (imm8 << 2) def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";} -def t2addrmode_imm8s4 : Operand<i32> { - let PrintMethod = "printT2AddrModeImm8s4Operand"; +class T2AddrMode_Imm8s4 : Operand<i32> { let EncoderMethod = "getT2AddrModeImm8s4OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8s4"; let ParserMatchClass = MemImm8s4OffsetAsmOperand; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } +def t2addrmode_imm8s4 : T2AddrMode_Imm8s4 { + let PrintMethod = "printT2AddrModeImm8s4Operand<false>"; +} + +def t2addrmode_imm8s4_pre : T2AddrMode_Imm8s4 { + let PrintMethod = "printT2AddrModeImm8s4Operand<true>"; +} + def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; } def t2am_imm8s4_offset : Operand<i32> { let PrintMethod = "printT2AddrModeImm8s4OffsetOperand"; @@ -238,7 +251,8 @@ def t2am_imm8s4_offset : Operand<i32> { def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass { let Name = "MemImm0_1020s4Offset"; } -def t2addrmode_imm0_1020s4 : Operand<i32> { +def t2addrmode_imm0_1020s4 : Operand<i32>, + ComplexPattern<i32, 2, "SelectT2AddrModeExclusive"> { let PrintMethod = "printT2AddrModeImm0_1020s4Operand"; let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue"; let DecoderMethod = "DecodeT2AddrModeImm0_1020s4"; @@ -959,6 +973,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let Inst{19-16} = addr{16-13}; // Rn let Inst{15-12} = Rt; let Inst{11-0} = addr{11-0}; // imm + + let DecoderMethod = "DecodeT2LoadImm12"; } def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii, opc, "\t$Rt, $addr", @@ -979,6 +995,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let Inst{9} = addr{8}; // U let Inst{8} = 0; // The W bit. let Inst{7-0} = addr{7-0}; // imm + + let DecoderMethod = "DecodeT2LoadImm8"; } def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis, opc, ".w\t$Rt, $addr", @@ -1011,14 +1029,18 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; - let Inst{23} = ?; // add = (U == '1') let Inst{22-21} = opcod; let Inst{20} = 1; // load let Inst{19-16} = 0b1111; // Rn + bits<4> Rt; - bits<12> addr; let Inst{15-12} = Rt{3-0}; + + bits<13> addr; + let Inst{23} = addr{12}; // add = (U == '1') let Inst{11-0} = addr{11-0}; + + let DecoderMethod = "DecodeT2LoadLabel"; } } @@ -1228,15 +1250,15 @@ defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR, // Loads with zero extension defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - rGPR, UnOpFrag<(zextloadi16 node:$Src)>>; + GPR, UnOpFrag<(zextloadi16 node:$Src)>>; defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - rGPR, UnOpFrag<(zextloadi8 node:$Src)>>; + GPR, UnOpFrag<(zextloadi8 node:$Src)>>; // Loads with sign extension defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - rGPR, UnOpFrag<(sextloadi16 node:$Src)>>; + GPR, UnOpFrag<(sextloadi16 node:$Src)>>; defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - rGPR, UnOpFrag<(sextloadi8 node:$Src)>>; + GPR, UnOpFrag<(sextloadi8 node:$Src)>>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword @@ -1294,12 +1316,9 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), let mayLoad = 1, neverHasSideEffects = 1 in { def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_iu, - "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), @@ -1307,48 +1326,42 @@ def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, - "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>; + def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + []>; + def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins t2addrmode_imm8:$addr), + (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", - []> { - let AsmMatchConverter = "cvtLdWriteBackRegT2AddrModeImm8"; -} + []>; + def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, @@ -1373,6 +1386,8 @@ class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii> let Inst{11} = 1; let Inst{10-8} = 0b110; // PUW. let Inst{7-0} = addr{7-0}; + + let DecoderMethod = "DecodeT2LoadT"; } def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>; @@ -1399,27 +1414,22 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), let mayStore = 1, neverHasSideEffects = 1 in { def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), - (ins GPRnopc:$Rt, t2addrmode_imm8:$addr), + (ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "str", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { - let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; -} + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; + def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb), - (ins rGPR:$Rt, t2addrmode_imm8:$addr), + (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "strh", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { - let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; -} + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), - (ins rGPR:$Rt, t2addrmode_imm8:$addr), + (ins rGPR:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, "strb", "\t$Rt, $addr!", - "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { - let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; -} + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; } // mayStore = 1, neverHasSideEffects = 1 def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), @@ -1506,9 +1516,8 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; // For disassembly only. def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), - (ins t2addrmode_imm8s4:$addr), IIC_iLoad_d_ru, + (ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> { - let AsmMatchConverter = "cvtT2LdrdPre"; let DecoderMethod = "DecodeT2LDRDPreInstruction"; } @@ -1518,10 +1527,9 @@ def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb), "$addr.base = $wb", []>; def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb), - (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), + (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> { - let AsmMatchConverter = "cvtT2StrdPre"; let DecoderMethod = "DecodeT2STRDPreInstruction"; } @@ -1543,16 +1551,17 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { Sched<[WritePreLd]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; + let Inst{23} = 1; let Inst{22} = 0; let Inst{21} = write; let Inst{20} = 1; let Inst{15-12} = 0b1111; bits<17> addr; - let addr{12} = 1; // add = TRUE let Inst{19-16} = addr{16-13}; // Rn - let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm12 + + let DecoderMethod = "DecodeT2LoadImm12"; } def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc, @@ -1571,6 +1580,8 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { bits<13> addr; let Inst{19-16} = addr{12-9}; // Rn let Inst{7-0} = addr{7-0}; // imm8 + + let DecoderMethod = "DecodeT2LoadImm8"; } def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc, @@ -1584,7 +1595,7 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { let Inst{21} = write; let Inst{20} = 1; let Inst{15-12} = 0b1111; - let Inst{11-6} = 0000000; + let Inst{11-6} = 0b000000; bits<10> addr; let Inst{19-16} = addr{9-6}; // Rn @@ -1593,15 +1604,33 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { let DecoderMethod = "DecodeT2LoadShift"; } - // FIXME: We should have a separate 'pci' variant here. As-is we represent - // it via the i12 variant, which it's related to, but that means we can - // represent negative immediates, which aren't legal for anything except - // the 'pci' case (Rn == 15). } -defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>; -defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>; -defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>; +defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>; +defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>; +defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>; + +// pci variant is very similar to i12, but supports negative offsets +// from the PC. Only PLD and PLI have pci variants (not PLDW) +class T2Iplpci<bits<1> inst, string opc> : T2Iso<(outs), (ins t2ldrlabel:$addr), + IIC_Preload, opc, "\t$addr", + [(ARMPreload (ARMWrapper tconstpool:$addr), + (i32 0), (i32 inst))]>, Sched<[WritePreLd]> { + let Inst{31-25} = 0b1111100; + let Inst{24} = inst; + let Inst{22-20} = 0b001; + let Inst{19-16} = 0b1111; + let Inst{15-12} = 0b1111; + + bits<13> addr; + let Inst{23} = addr{12}; // add = (U == '1') + let Inst{11-0} = addr{11-0}; // imm12 + + let DecoderMethod = "DecodeT2LoadLabel"; +} + +def t2PLDpci : T2Iplpci<0, "pld">, Requires<[IsThumb2]>; +def t2PLIpci : T2Iplpci<1, "pli">, Requires<[IsThumb2,HasV7]>; //===----------------------------------------------------------------------===// // Load / store multiple Instructions. @@ -2907,7 +2936,12 @@ def t2PKHTB : T2ThreeReg< // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. -def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)), +// We also can not replace a srl (17..31) by an arithmetic shift we would use in +// pkhtb src1, src2, asr (17..31). +def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16:$sh)), + (t2PKHTB rGPR:$src1, rGPR:$src2, imm16:$sh)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (sra rGPR:$src2, imm16_31:$sh)), (t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>, Requires<[HasT2ExtractPack, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), @@ -3092,26 +3126,24 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), // memory barriers protect the atomic sequences let hasSideEffects = 1 in { -def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, - "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>, - Requires<[IsThumb, HasDB]> { +def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary, + "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>, + Requires<[HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f5; let Inst{3-0} = opt; } } -def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, - "dsb", "\t$opt", []>, - Requires<[IsThumb, HasDB]> { +def t2DSB : T2I<(outs), (ins memb_opt:$opt), NoItinerary, + "dsb", "\t$opt", []>, Requires<[HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f4; let Inst{3-0} = opt; } -def t2ISB : AInoP<(outs), (ins instsyncb_opt:$opt), ThumbFrm, NoItinerary, - "isb", "\t$opt", - []>, Requires<[IsThumb, HasDB]> { +def t2ISB : T2I<(outs), (ins instsyncb_opt:$opt), NoItinerary, + "isb", "\t$opt", []>, Requires<[HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f6; let Inst{3-0} = opt; @@ -3154,13 +3186,16 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz, let mayLoad = 1 in { def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, - "ldrexb", "\t$Rt, $addr", "", []>; + "ldrexb", "\t$Rt, $addr", "", + [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>; def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, - "ldrexh", "\t$Rt, $addr", "", []>; + "ldrexh", "\t$Rt, $addr", "", + [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>; def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr), AddrModeNone, 4, NoItinerary, - "ldrex", "\t$Rt, $addr", "", []> { + "ldrex", "\t$Rt, $addr", "", + [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]> { bits<4> Rt; bits<12> addr; let Inst{31-27} = 0b11101; @@ -3185,16 +3220,22 @@ let mayStore = 1, Constraints = "@earlyclobber $Rd" in { def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, - "strexb", "\t$Rd, $Rt, $addr", "", []>; + "strexb", "\t$Rd, $Rt, $addr", "", + [(set rGPR:$Rd, (strex_1 rGPR:$Rt, + addr_offset_none:$addr))]>; def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, - "strexh", "\t$Rd, $Rt, $addr", "", []>; + "strexh", "\t$Rd, $Rt, $addr", "", + [(set rGPR:$Rd, (strex_2 rGPR:$Rt, + addr_offset_none:$addr))]>; + def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_imm0_1020s4:$addr), AddrModeNone, 4, NoItinerary, "strex", "\t$Rd, $Rt, $addr", "", - []> { + [(set rGPR:$Rd, (strex_4 rGPR:$Rt, + t2addrmode_imm0_1020s4:$addr))]> { bits<4> Rd; bits<4> Rt; bits<12> addr; @@ -3216,7 +3257,7 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), } } -def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, +def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", [(int_arm_clrex)]>, Requires<[IsThumb2, HasV7]> { let Inst{31-16} = 0xf3bf; let Inst{15-14} = 0b10; @@ -3227,6 +3268,15 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, let Inst{3-0} = 0b1111; } +def : T2Pat<(and (ldrex_1 addr_offset_none:$addr), 0xff), + (t2LDREXB addr_offset_none:$addr)>; +def : T2Pat<(and (ldrex_2 addr_offset_none:$addr), 0xffff), + (t2LDREXH addr_offset_none:$addr)>; +def : T2Pat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), + (t2STREXB GPR:$Rt, addr_offset_none:$addr)>; +def : T2Pat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), + (t2STREXH GPR:$Rt, addr_offset_none:$addr)>; + //===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics // eh_sjlj_setjmp() is an instruction sequence to store the return @@ -3549,6 +3599,16 @@ def t2RFEIA : T2RFE<0b111010011001, (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn", [/* For disassembly only; pattern left blank */]>; +// B9.3.19 SUBS PC, LR, #imm (Thumb2) system instruction. +let Defs = [PC], Uses = [LR] in +def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary, + "subs", "\tpc, lr, $imm", []>, Requires<[IsThumb2]> { + let Inst{31-8} = 0b111100111101111010001111; + + bits<8> imm; + let Inst{7-0} = imm; +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns // @@ -3632,7 +3692,7 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm> { let DecoderMethod = "DecodeCopMemInstruction"; } def _PRE : T2CI<op31_28, - (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), + (outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr), asm, "\t$cop, $CRd, $addr!"> { bits<13> addr; bits<4> cop; @@ -3783,8 +3843,7 @@ def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn), class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops, list<dag> pattern> - : T2Cop<Op, oops, iops, - !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), + : T2Cop<Op, oops, iops, opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", pattern> { let Inst{27-24} = 0b1110; let Inst{20} = direction; @@ -3809,7 +3868,7 @@ class t2MovRRCopro<bits<4> Op, string opc, bit direction, list<dag> pattern = []> : T2Cop<Op, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), - !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> { + opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { let Inst{27-24} = 0b1100; let Inst{23-21} = 0b010; let Inst{20} = direction; @@ -3834,32 +3893,32 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; -def : t2InstAlias<"mcr $cop, $opc1, $Rt, $CRn, $CRm", +def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, - c_imm:$CRm, 0)>; + c_imm:$CRm, 0, pred:$p)>; def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; -def : t2InstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", +def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm", (t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, - c_imm:$CRm, 0)>; + c_imm:$CRm, 0, pred:$p)>; /* from coprocessor to ARM core register */ def t2MRC : t2MovRCopro<0b1110, "mrc", 1, - (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; -def : t2InstAlias<"mrc $cop, $opc1, $Rt, $CRn, $CRm", - (t2MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, - c_imm:$CRm, 0)>; +def : t2InstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", + (t2MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0, pred:$p)>; def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1, - (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; -def : t2InstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", - (t2MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, - c_imm:$CRm, 0)>; +def : t2InstAlias<"mrc2${p} $cop, $opc1, $Rt, $CRn, $CRm", + (t2MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0, pred:$p)>; def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; @@ -3886,7 +3945,7 @@ def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1>; def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", + "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, imm:$CRm, imm:$opc2)]> { let Inst{27-24} = 0b1110; @@ -3909,7 +3968,7 @@ def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), - "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", + "cdp2", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, imm:$CRm, imm:$opc2)]> { let Inst{27-24} = 0b1110; @@ -4097,9 +4156,9 @@ def : t2InstAlias<"tst${p} $Rn, $Rm", (t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; // Memory barriers -def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb, HasDB]>; -def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb, HasDB]>; -def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb, HasDB]>; +def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p)>, Requires<[IsThumb2, HasDB]>; +def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p)>, Requires<[IsThumb2, HasDB]>; +def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p)>, Requires<[IsThumb2, HasDB]>; // Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional // width specifier. @@ -4350,7 +4409,7 @@ def t2LDRSHpcrel : t2AsmPseudo<"ldrsh${p} $Rt, $addr", (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; // Version w/ the .w suffix. def : t2InstAlias<"ldr${p}.w $Rt, $addr", - (t2LDRpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; + (t2LDRpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p), 0>; def : t2InstAlias<"ldrb${p}.w $Rt, $addr", (t2LDRBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; def : t2InstAlias<"ldrh${p}.w $Rt, $addr", @@ -4362,3 +4421,10 @@ def : t2InstAlias<"ldrsh${p}.w $Rt, $addr", def : t2InstAlias<"add${p} $Rd, pc, $imm", (t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>; + +// PLD/PLDW/PLI with alternate literal form. +def : t2InstAlias<"pld${p} $addr", + (t2PLDpci t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : InstAlias<"pli${p} $addr", + (t2PLIpci t2ldr_pcrel_imm12:$addr, pred:$p)>, + Requires<[IsThumb2,HasV7]>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 597b74a..f9cfa15 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -333,6 +333,42 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, let D = VFPNeonA8Domain; } +multiclass vsel_inst<string op, bits<2> opc> { + let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { + def S : ASbInp<0b11100, opc, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"), + []>, Requires<[HasV8FP]>; + + def D : ADbInp<0b11100, opc, 0, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"), + []>, Requires<[HasV8FP]>; + } +} + +defm VSELGT : vsel_inst<"gt", 0b11>; +defm VSELGE : vsel_inst<"ge", 0b10>; +defm VSELEQ : vsel_inst<"eq", 0b00>; +defm VSELVS : vsel_inst<"vs", 0b01>; + +multiclass vmaxmin_inst<string op, bit opc> { + let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { + def S : ASbInp<0b11101, 0b00, opc, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"), + []>, Requires<[HasV8FP]>; + + def D : ADbInp<0b11101, 0b00, opc, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"), + []>, Requires<[HasV8FP]>; + } +} + +defm VMAXNM : vmaxmin_inst<"vmaxnm", 0>; +defm VMINNM : vmaxmin_inst<"vminnm", 1>; + // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)), (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>; @@ -468,7 +504,7 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, let Inst{4} = 0; } -// Between half-precision and single-precision. For disassembly only. +// Between half, single and double-precision. For disassembly only. // FIXME: Verify encoding after integrated assembler is working. def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), @@ -493,6 +529,111 @@ def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; +def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, + (outs DPR:$Dd), (ins SPR:$Sm), + NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", + []>, Requires<[HasV8FP]> { + // Instruction operands. + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; +} + +def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", + []>, Requires<[HasV8FP]> { + // Instruction operands. + bits<5> Sd; + bits<5> Dm; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; +} + +def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, + (outs DPR:$Dd), (ins SPR:$Sm), + NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", + []>, Requires<[HasV8FP]> { + // Instruction operands. + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; +} + +def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm", + []>, Requires<[HasV8FP]> { + // Instruction operands. + bits<5> Sd; + bits<5> Dm; + + // Encode instruction operands. + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; +} + +multiclass vcvt_inst<string opc, bits<2> rm> { + let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), + []>, Requires<[HasV8FP]> { + let Inst{17-16} = rm; + } + + def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"), + []>, Requires<[HasV8FP]> { + let Inst{17-16} = rm; + } + + def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"), + []>, Requires<[HasV8FP]> { + bits<5> Dm; + + let Inst{17-16} = rm; + + // Encode instruction operands + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{8} = 1; + } + + def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, + (outs SPR:$Sd), (ins DPR:$Dm), + NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"), + []>, Requires<[HasV8FP]> { + bits<5> Dm; + + let Inst{17-16} = rm; + + // Encode instruction operands + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{8} = 1; + } + } +} + +defm VCVTA : vcvt_inst<"a", 0b00>; +defm VCVTN : vcvt_inst<"n", 0b01>; +defm VCVTP : vcvt_inst<"p", 0b10>; +defm VCVTM : vcvt_inst<"m", 0b11>; + def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", @@ -507,6 +648,54 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, let D = VFPNeonA8Domain; } +multiclass vrint_inst_zrx<string opc, bit op, bit op2> { + def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", + []>, Requires<[HasV8FP]> { + let Inst{7} = op2; + let Inst{16} = op; + } + def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0, + (outs DPR:$Dd), (ins DPR:$Dm), + NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", + []>, Requires<[HasV8FP]> { + let Inst{7} = op2; + let Inst{16} = op; + } +} + +defm VRINTZ : vrint_inst_zrx<"z", 0, 1>; +defm VRINTR : vrint_inst_zrx<"r", 0, 0>; +defm VRINTX : vrint_inst_zrx<"x", 1, 0>; + +multiclass vrint_inst_anpm<string opc, bits<2> rm> { + let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"), + []>, Requires<[HasV8FP]> { + let Inst{17-16} = rm; + } + def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0, + (outs DPR:$Dd), (ins DPR:$Dm), + NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"), + []>, Requires<[HasV8FP]> { + let Inst{17-16} = rm; + } + } + + def : InstAlias<!strconcat("vrint", opc, ".f32.f32\t$Sd, $Sm"), + (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm)>; + def : InstAlias<!strconcat("vrint", opc, ".f64.f64\t$Dd, $Dm"), + (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>; +} + +defm VRINTA : vrint_inst_anpm<"a", 0b00>; +defm VRINTN : vrint_inst_anpm<"n", 0b01>; +defm VRINTP : vrint_inst_anpm<"p", 0b10>; +defm VRINTM : vrint_inst_anpm<"m", 0b11>; + def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index c8ed576..1803a8a 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -109,12 +109,12 @@ namespace { unsigned PredReg, unsigned Scratch, DebugLoc dl, - SmallVector<MachineBasicBlock::iterator, 4> &Merges); + SmallVectorImpl<MachineBasicBlock::iterator> &Merges); void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, int Opcode, unsigned Size, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, MemOpQueue &MemOps, - SmallVector<MachineBasicBlock::iterator, 4> &Merges); + SmallVectorImpl<MachineBasicBlock::iterator> &Merges); void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, @@ -371,7 +371,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, DebugLoc dl, - SmallVector<MachineBasicBlock::iterator, 4> &Merges) { + SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { // First calculate which of the registers should be killed by the merged // instruction. const unsigned insertPos = memOps[insertAfter].Position; @@ -444,10 +444,10 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, /// load / store multiple instructions. void ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, - unsigned Base, int Opcode, unsigned Size, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned Scratch, MemOpQueue &MemOps, - SmallVector<MachineBasicBlock::iterator, 4> &Merges) { + unsigned Base, int Opcode, unsigned Size, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, MemOpQueue &MemOps, + SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); int Offset = MemOps[SIndex].Offset; int SOffset = Offset; @@ -1484,7 +1484,7 @@ namespace { unsigned &PredReg, ARMCC::CondCodes &Pred, bool &isT2); bool RescheduleOps(MachineBasicBlock *MBB, - SmallVector<MachineInstr*, 4> &Ops, + SmallVectorImpl<MachineInstr *> &Ops, unsigned Base, bool isLd, DenseMap<MachineInstr*, unsigned> &MI2LocMap); bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); @@ -1602,8 +1602,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return false; // Make sure the base address satisfies i64 ld / st alignment requirement. + // At the moment, we ignore the memoryoperand's value. + // If we want to use AliasAnalysis, we should check it accordingly. if (!Op0->hasOneMemOperand() || - !(*Op0->memoperands_begin())->getValue() || (*Op0->memoperands_begin())->isVolatile()) return false; @@ -1655,7 +1656,7 @@ namespace { } bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, - SmallVector<MachineInstr*, 4> &Ops, + SmallVectorImpl<MachineInstr *> &Ops, unsigned Base, bool isLd, DenseMap<MachineInstr*, unsigned> &MI2LocMap) { bool RetVal = false; @@ -1857,9 +1858,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!StopHere) BI->second.push_back(MI); } else { - SmallVector<MachineInstr*, 4> MIs; - MIs.push_back(MI); - Base2LdsMap[Base] = MIs; + Base2LdsMap[Base].push_back(MI); LdBases.push_back(Base); } } else { @@ -1875,9 +1874,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!StopHere) BI->second.push_back(MI); } else { - SmallVector<MachineInstr*, 4> MIs; - MIs.push_back(MI); - Base2StsMap[Base] = MIs; + Base2StsMap[Base].push_back(MI); StBases.push_back(Base); } } @@ -1893,7 +1890,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { // Re-schedule loads. for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { unsigned Base = LdBases[i]; - SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; + SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; if (Lds.size() > 1) RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); } @@ -1901,7 +1898,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { // Re-schedule stores. for (unsigned i = 0, e = StBases.size(); i != e; ++i) { unsigned Base = StBases[i]; - SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; + SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; if (Sts.size() > 1) RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); } diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 0459d64..bb7d358 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -215,7 +215,7 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> { // GPRs without the PC but with APSR. Some instructions allow accessing the // APSR, while actually encoding PC in the register field. This is usefull // for assembly and disassembly only. -def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add GPR, APSR_NZCV)> { +def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV)> { let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)]; let AltOrderSelect = [{ return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only(); diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index d06ad7d..74ee50b 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1883,13 +1883,10 @@ def CortexA9Itineraries : ProcessorItineraries< // Cortex-A9 machine model for scheduling and other instruction cost heuristics. def CortexA9Model : SchedMachineModel { let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. - let MinLatency = 0; // Data dependencies are allowed within dispatch groups. + let MicroOpBufferSize = 56; // Based on available renamed registers. let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. - let ILPWindow = 10; // Don't reschedule small blocks to hide - // latency. Minimum latency requirements are already - // modeled strictly by reserving resources. let MispredictPenalty = 8; // Based on estimate of pipeline depth. let Itineraries = CortexA9Itineraries; @@ -1904,7 +1901,7 @@ def A9UnitALU : ProcResource<2>; def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } def A9UnitAGU : ProcResource<1>; def A9UnitLS : ProcResource<1>; -def A9UnitFP : ProcResource<1> { let Buffered = 0; } +def A9UnitFP : ProcResource<1> { let BufferSize = 0; } def A9UnitB : ProcResource<1>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td index b5cf251..2a41616 100644 --- a/lib/Target/ARM/ARMScheduleSwift.td +++ b/lib/Target/ARM/ARMScheduleSwift.td @@ -1076,7 +1076,7 @@ def SwiftItineraries : ProcessorItineraries< // Swift machine model for scheduling and other instruction cost heuristics. def SwiftModel : SchedMachineModel { let IssueWidth = 3; // 3 micro-ops are dispatched per cycle. - let MinLatency = 0; // Data dependencies are allowed within dispatch groups. + let MicroOpBufferSize = 45; // Based on NEON renamed registers. let LoadLatency = 3; let MispredictPenalty = 14; // A branch direction mispredict. diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 4d204ce..0a0f30c 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -77,9 +77,11 @@ void ARMSubtarget::initializeEnvironment() { HasV6Ops = false; HasV6T2Ops = false; HasV7Ops = false; + HasV8Ops = false; HasVFPv2 = false; HasVFPv3 = false; HasVFPv4 = false; + HasV8FP = false; HasNEON = false; UseNEONForSinglePrecisionFP = false; UseMulOps = UseFusedMulOps; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index bc5af96..ad7f1b3 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -37,7 +37,8 @@ protected: /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. ARMProcFamilyEnum ARMProcFamily; - /// HasV4TOps, HasV5TOps, HasV5TEOps, HasV6Ops, HasV6T2Ops, HasV7Ops - + /// HasV4TOps, HasV5TOps, HasV5TEOps, + /// HasV6Ops, HasV6T2Ops, HasV7Ops, HasV8Ops - /// Specify whether target support specific ARM ISA variants. bool HasV4TOps; bool HasV5TOps; @@ -45,12 +46,14 @@ protected: bool HasV6Ops; bool HasV6T2Ops; bool HasV7Ops; + bool HasV8Ops; - /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what + /// HasVFPv2, HasVFPv3, HasVFPv4, HasV8FP, HasNEON - Specify what /// floating point ISAs are supported. bool HasVFPv2; bool HasVFPv3; bool HasVFPv4; + bool HasV8FP; bool HasNEON; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been @@ -192,10 +195,6 @@ protected: public: enum { - isELF, isDarwin - } TargetType; - - enum { ARM_ABI_APCS, ARM_ABI_AAPCS // ARM EABI } TargetABI; @@ -231,6 +230,7 @@ public: bool hasV6Ops() const { return HasV6Ops; } bool hasV6T2Ops() const { return HasV6T2Ops; } bool hasV7Ops() const { return HasV7Ops; } + bool hasV8Ops() const { return HasV8Ops; } bool isCortexA5() const { return ARMProcFamily == CortexA5; } bool isCortexA8() const { return ARMProcFamily == CortexA8; } @@ -246,6 +246,7 @@ public: bool hasVFP2() const { return HasVFPv2; } bool hasVFP3() const { return HasVFPv3; } bool hasVFP4() const { return HasVFPv4; } + bool hasV8FP() const { return HasV8FP; } bool hasNEON() const { return HasNEON; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } @@ -279,6 +280,14 @@ public: bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NaCl; } bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } bool isTargetELF() const { return !isTargetDarwin(); } + // ARM EABI is the bare-metal EABI described in ARM ABI documents and + // can be accessed via -target arm-none-eabi. This is NOT GNUEABI. + // FIXME: Add a flag for bare-metal for that target and set Triple::EABI + // even for GNUEABI, so we can make a distinction here and still conform to + // the EABI on GNU (and Android) mode. This requires change in Clang, too. + bool isTargetAEABI() const { + return TargetTriple.getEnvironment() == Triple::EABI; + } bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index b16ab4b..e6dbcb6 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -60,7 +60,7 @@ void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { // Add first the target-independent BasicTTI pass, then our ARM pass. This // allows the ARM pass to delegate to the target independent layer when // appropriate. - PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createBasicTargetTransformInfoPass(this)); PM.add(createARMTargetTransformInfoPass(this)); } @@ -150,7 +150,7 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { bool ARMPassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge) - addPass(createGlobalMergePass(TM->getTargetLowering())); + addPass(createGlobalMergePass(TM)); return false; } @@ -169,7 +169,7 @@ bool ARMPassConfig::addPreRegAlloc() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only()) addPass(createARMLoadStoreOptimizationPass(true)); - if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9()) + if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) addPass(createMLxExpansionPass()); // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be // enabled when NEON is available. diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 53ece66..34576ba 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -124,7 +124,7 @@ public: unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; - unsigned getAddressComputationCost(Type *Val) const; + unsigned getAddressComputationCost(Type *Val, bool IsComplex) const; unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info = OK_AnyValue, @@ -411,12 +411,14 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, EVT SelCondTy = TLI->getValueType(CondTy); EVT SelValTy = TLI->getValueType(ValTy); - int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl, - array_lengthof(NEONVectorSelectTbl), - ISD, SelCondTy.getSimpleVT(), - SelValTy.getSimpleVT()); - if (Idx != -1) - return NEONVectorSelectTbl[Idx].Cost; + if (SelCondTy.isSimple() && SelValTy.isSimple()) { + int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl, + array_lengthof(NEONVectorSelectTbl), + ISD, SelCondTy.getSimpleVT(), + SelValTy.getSimpleVT()); + if (Idx != -1) + return NEONVectorSelectTbl[Idx].Cost; + } std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); return LT.first; @@ -425,7 +427,16 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); } -unsigned ARMTTI::getAddressComputationCost(Type *Ty) const { +unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { + // Address computations in vectorized code with non-consecutive addresses will + // likely result in more instructions compared to scalar code where the + // computation can more often be merged into the index mode. The resulting + // extra micro-ops can significantly decrease throughput. + unsigned NumVectorInstToHideOverhead = 10; + + if (Ty->isVectorTy() && IsComplex) + return NumVectorInstToHideOverhead; + // In many cases the address computation is not merged into the instruction // addressing mode. return 1; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index c59ca64..80e5c6e 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -161,6 +161,9 @@ class ARMAsmParser : public MCTargetAsmParser { bool hasV7Ops() const { return STI.getFeatureBits() & ARM::HasV7Ops; } + bool hasV8Ops() const { + return STI.getFeatureBits() & ARM::HasV8Ops; + } bool hasARM() const { return !(STI.getFeatureBits() & ARM::FeatureNoARM); } @@ -216,50 +219,17 @@ class ARMAsmParser : public MCTargetAsmParser { SMLoc &EndLoc); // Asm Match Converter Methods - void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtT2StrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtLdWriteBackRegAddrMode2(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtStWriteBackRegAddrModeImm12(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtStWriteBackRegAddrMode2(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtStWriteBackRegAddrMode3(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtLdExtTWriteBackImm(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtLdExtTWriteBackReg(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtStExtTWriteBackImm(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtStExtTWriteBackReg(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtLdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtStrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtLdWriteBackRegAddrMode3(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); void cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtVLDwbFixed(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtVLDwbRegister(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtVSTwbFixed(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtVSTwbRegister(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); bool validateInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); bool processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); bool shouldOmitCCOutOperand(StringRef Mnemonic, SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool shouldOmitPredicateOperand(StringRef Mnemonic, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool isDeprecated(MCInst &Inst, StringRef &Info); public: enum ARMMatchResultTy { @@ -277,7 +247,7 @@ public: MCAsmParserExtension::Initialize(_Parser); // Cache the MCRegisterInfo. - MRI = &getContext().getRegisterInfo(); + MRI = getContext().getRegisterInfo(); // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -626,6 +596,40 @@ public: bool isITMask() const { return Kind == k_ITCondMask; } bool isITCondCode() const { return Kind == k_CondCode; } bool isImm() const { return Kind == k_Immediate; } + // checks whether this operand is an unsigned offset which fits is a field + // of specified width and scaled by a specific number of bits + template<unsigned width, unsigned scale> + bool isUnsignedOffset() const { + if (!isImm()) return false; + if (dyn_cast<MCSymbolRefExpr>(Imm.Val)) return true; + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) { + int64_t Val = CE->getValue(); + int64_t Align = 1LL << scale; + int64_t Max = Align * ((1LL << width) - 1); + return ((Val % Align) == 0) && (Val >= 0) && (Val <= Max); + } + return false; + } + // checks whether this operand is a memory operand computed as an offset + // applied to PC. the offset may have 8 bits of magnitude and is represented + // with two bits of shift. textually it may be either [pc, #imm], #imm or + // relocable expression... + bool isThumbMemPC() const { + int64_t Val = 0; + if (isImm()) { + if (isa<MCSymbolRefExpr>(Imm.Val)) return true; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val); + if (!CE) return false; + Val = CE->getValue(); + } + else if (isMem()) { + if(!Memory.OffsetImm || Memory.OffsetRegNum) return false; + if(Memory.BaseRegNum != ARM::PC) return false; + Val = Memory.OffsetImm->getValue(); + } + else return false; + return ((Val % 4) == 0) && (Val >= -1020) && (Val <= 1020); + } bool isFPImm() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -1704,6 +1708,37 @@ public: Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); } + void addUnsignedOffset_b8s2Operands(MCInst &Inst, unsigned N) const { + if(const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) { + Inst.addOperand(MCOperand::CreateImm(CE->getValue() >> 2)); + return; + } + + const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val); + assert(SR && "Unknown value type!"); + Inst.addOperand(MCOperand::CreateExpr(SR)); + } + + void addThumbMemPCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + if (isImm()) { + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (CE) { + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + return; + } + + const MCSymbolRefExpr *SR = dyn_cast<MCSymbolRefExpr>(Imm.Val); + assert(SR && "Unknown value type!"); + Inst.addOperand(MCOperand::CreateExpr(SR)); + return; + } + + assert(isMem() && "Unknown value type!"); + assert(isa<MCConstantExpr>(Memory.OffsetImm) && "Unknown value type!"); + Inst.addOperand(MCOperand::CreateImm(Memory.OffsetImm->getValue())); + } + void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The operand is actually a so_imm, but we have its bitwise @@ -2278,21 +2313,24 @@ public: } static ARMOperand * - CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs, + CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned> > &Regs, SMLoc StartLoc, SMLoc EndLoc) { + assert (Regs.size() > 0 && "RegList contains no registers?"); KindTy Kind = k_RegisterList; - if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().first)) + if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().second)) Kind = k_DPRRegisterList; else if (ARMMCRegisterClasses[ARM::SPRRegClassID]. - contains(Regs.front().first)) + contains(Regs.front().second)) Kind = k_SPRRegisterList; + // Sort based on the register encoding values. + array_pod_sort(Regs.begin(), Regs.end()); + ARMOperand *Op = new ARMOperand(Kind); - for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator + for (SmallVectorImpl<std::pair<unsigned, unsigned> >::const_iterator I = Regs.begin(), E = Regs.end(); I != E; ++I) - Op->Registers.push_back(I->first); - array_pod_sort(Op->Registers.begin(), Op->Registers.end()); + Op->Registers.push_back(I->second); Op->StartLoc = StartLoc; Op->EndLoc = EndLoc; return Op; @@ -2972,12 +3010,14 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // The reglist instructions have at most 16 registers, so reserve // space for that many. - SmallVector<std::pair<unsigned, SMLoc>, 16> Registers; + int EReg = 0; + SmallVector<std::pair<unsigned, unsigned>, 16> Registers; // Allow Q regs and just interpret them as the two D sub-registers. if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { Reg = getDRegFromQReg(Reg); - Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); + EReg = MRI->getEncodingValue(Reg); + Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg)); ++Reg; } const MCRegisterClass *RC; @@ -2991,7 +3031,8 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return Error(RegLoc, "invalid register in register list"); // Store the register. - Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); + EReg = MRI->getEncodingValue(Reg); + Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg)); // This starts immediately after the first register token in the list, // so we can see either a comma or a minus (range separator) as a legal @@ -3021,7 +3062,8 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Add all the registers in the range to the register list. while (Reg != EndReg) { Reg = getNextRegister(Reg); - Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); + EReg = MRI->getEncodingValue(Reg); + Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg)); } continue; } @@ -3054,14 +3096,15 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { continue; } // VFP register lists must also be contiguous. - // It's OK to use the enumeration values directly here rather, as the - // VFP register classes have the enum sorted properly. if (RC != &ARMMCRegisterClasses[ARM::GPRRegClassID] && Reg != OldReg + 1) return Error(RegLoc, "non-contiguous register range"); - Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); - if (isQReg) - Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc)); + EReg = MRI->getEncodingValue(Reg); + Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg)); + if (isQReg) { + EReg = MRI->getEncodingValue(++Reg); + Registers.push_back(std::pair<unsigned, unsigned>(EReg, Reg)); + } } if (Parser.getTok().isNot(AsmToken::RCurly)) @@ -4039,260 +4082,9 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -/// cvtT2LdrdPre - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtT2LdrdPre(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Rt, Rt2 - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateReg(0)); - // addr - ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtT2StrdPre - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtT2StrdPre(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateReg(0)); - // Rt, Rt2 - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); - // addr - ((ARMOperand*)Operands[4])->addMemImm8s4OffsetOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdWriteBackRegT2AddrModeImm8(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - - ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStWriteBackRegT2AddrModeImm8 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStWriteBackRegT2AddrModeImm8(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addMemImm8OffsetOperands(Inst, 2); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdWriteBackRegAddrMode2(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - - ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdWriteBackRegAddrModeImm12(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - - ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - - -/// cvtStWriteBackRegAddrModeImm12 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStWriteBackRegAddrModeImm12(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addMemImm12OffsetOperands(Inst, 2); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStWriteBackRegAddrMode2(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addAddrMode2Operands(Inst, 3); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStWriteBackRegAddrMode3(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdExtTWriteBackImm - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdExtTWriteBackImm(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Rt - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // addr - ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); - // offset - ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdExtTWriteBackReg - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdExtTWriteBackReg(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Rt - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // addr - ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); - // offset - ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStExtTWriteBackImm - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStExtTWriteBackImm(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Rt - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // addr - ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); - // offset - ((ARMOperand*)Operands[4])->addPostIdxImm8Operands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStExtTWriteBackReg - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStExtTWriteBackReg(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Rt - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // addr - ((ARMOperand*)Operands[3])->addMemNoOffsetOperands(Inst, 1); - // offset - ((ARMOperand*)Operands[4])->addPostIdxRegOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdrdPre - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdrdPre(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Rt, Rt2 - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // addr - ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtStrdPre - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtStrdPre(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Rt, Rt2 - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); - // addr - ((ARMOperand*)Operands[4])->addAddrMode3Operands(Inst, 3); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. -void ARMAsmParser:: -cvtLdWriteBackRegAddrMode3(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - ((ARMOperand*)Operands[3])->addAddrMode3Operands(Inst, 3); - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -/// cvtThumbMultiply - Convert parsed operands to MCInst. -/// Needed here because the Asm Gen Matcher can't handle properly tied operands -/// when they refer multiple MIOperands inside a single one. +/// Convert parsed operands to MCInst. Needed here because this instruction +/// only has two register operands, but multiplication is commutative so +/// assemblers should accept both "mul rD, rN, rD" and "mul rD, rD, rN". void ARMAsmParser:: cvtThumbMultiply(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { @@ -4310,62 +4102,6 @@ cvtThumbMultiply(MCInst &Inst, ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2); } -void ARMAsmParser:: -cvtVLDwbFixed(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Vd - ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Vn - ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -void ARMAsmParser:: -cvtVLDwbRegister(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Vd - ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Vn - ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); - // Vm - ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -void ARMAsmParser:: -cvtVSTwbFixed(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Vn - ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); - // Vt - ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - -void ARMAsmParser:: -cvtVSTwbRegister(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // Create a writeback register dummy placeholder. - Inst.addOperand(MCOperand::CreateImm(0)); - // Vn - ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); - // Vm - ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); - // Vt - ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); - // pred - ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); -} - /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. bool ARMAsmParser:: @@ -4869,7 +4605,10 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" || - Mnemonic == "fmuls") + Mnemonic == "fmuls" || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || + Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || + Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || + Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic.startswith("vsel")) return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -4966,28 +4705,30 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, } else CanAcceptCarrySet = false; - if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" || - Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" || - Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" || - Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" || - Mnemonic == "dsb" || Mnemonic == "isb" || Mnemonic == "setend" || - (Mnemonic == "clrex" && !isThumb()) || - (Mnemonic == "nop" && isThumbOne()) || - ((Mnemonic == "pld" || Mnemonic == "pli" || Mnemonic == "pldw" || - Mnemonic == "ldc2" || Mnemonic == "ldc2l" || - Mnemonic == "stc2" || Mnemonic == "stc2l") && !isThumb()) || - ((Mnemonic.startswith("rfe") || Mnemonic.startswith("srs")) && - !isThumb()) || - Mnemonic.startswith("cps") || (Mnemonic == "movs" && isThumbOne())) { + if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" || + Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || + Mnemonic == "trap" || Mnemonic == "setend" || + Mnemonic.startswith("cps") || Mnemonic.startswith("vsel") || + Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || + Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || + Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || + Mnemonic == "vrintm") { + // These mnemonics are never predicable CanAcceptPredicationCode = false; + } else if (!isThumb()) { + // Some instructions are only predicable in Thumb mode + CanAcceptPredicationCode + = Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" && + Mnemonic != "mcrr2" && Mnemonic != "mrc2" && Mnemonic != "mrrc2" && + Mnemonic != "dmb" && Mnemonic != "dsb" && Mnemonic != "isb" && + Mnemonic != "pld" && Mnemonic != "pli" && Mnemonic != "pldw" && + Mnemonic != "ldc2" && Mnemonic != "ldc2l" && + Mnemonic != "stc2" && Mnemonic != "stc2l" && + !Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs"); + } else if (isThumbOne()) { + CanAcceptPredicationCode = Mnemonic != "nop" && Mnemonic != "movs"; } else CanAcceptPredicationCode = true; - - if (isThumb()) { - if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" || - Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp") - CanAcceptPredicationCode = false; - } } bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, @@ -5042,15 +4783,6 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, static_cast<ARMOperand*>(Operands[5])->isImm()) { // Nest conditions rather than one big 'if' statement for readability. // - // If either register is a high reg, it's either one of the SP - // variants (handled above) or a 32-bit encoding, so we just - // check against T3. If the second register is the PC, this is an - // alternate form of ADR, which uses encoding T4, so check for that too. - if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || - !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) && - static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC && - static_cast<ARMOperand*>(Operands[5])->isT2SOImm()) - return false; // If both registers are low, we're in an IT block, and the immediate is // in range, we should use encoding T1 instead, which has a cc_out. if (inITBlock() && @@ -5058,6 +4790,11 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) && static_cast<ARMOperand*>(Operands[5])->isImm0_7()) return false; + // Check against T3. If the second register is the PC, this is an + // alternate form of ADR, which uses encoding T4, so check for that too. + if (static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC && + static_cast<ARMOperand*>(Operands[5])->isT2SOImm()) + return false; // Otherwise, we use encoding T4, which does not have a cc_out // operand. @@ -5120,6 +4857,34 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, return false; } +bool ARMAsmParser::shouldOmitPredicateOperand( + StringRef Mnemonic, SmallVectorImpl<MCParsedAsmOperand *> &Operands) { + // VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON + unsigned RegIdx = 3; + if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") && + static_cast<ARMOperand *>(Operands[2])->getToken() == ".f32") { + if (static_cast<ARMOperand *>(Operands[3])->isToken() && + static_cast<ARMOperand *>(Operands[3])->getToken() == ".f32") + RegIdx = 4; + + if (static_cast<ARMOperand *>(Operands[RegIdx])->isReg() && + (ARMMCRegisterClasses[ARM::DPRRegClassID] + .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg()) || + ARMMCRegisterClasses[ARM::QPRRegClassID] + .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg()))) + return true; + } + return false; +} + +bool ARMAsmParser::isDeprecated(MCInst &Inst, StringRef &Info) { + if (hasV8Ops() && Inst.getOpcode() == ARM::SETEND) { + Info = "armv8"; + return true; + } + return false; +} + static bool isDataTypeToken(StringRef Tok) { return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" || Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" || @@ -5266,7 +5031,17 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, doesIgnoreDataTypeSuffix(Mnemonic, ExtraToken)) continue; - if (ExtraToken != ".n") { + // For for ARM mode generate an error if the .n qualifier is used. + if (ExtraToken == ".n" && !isThumb()) { + SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start); + return Error(Loc, "instruction with .n (narrow) qualifier not allowed in " + "arm mode"); + } + + // The .n qualifier is always discarded as that is what the tables + // and matcher expect. In ARM mode the .w qualifier has no effect, + // so discard it to avoid errors that can be caused by the matcher. + if (ExtraToken != ".n" && (isThumb() || ExtraToken != ".w")) { SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start); Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc)); } @@ -5312,6 +5087,15 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, delete Op; } + // Some instructions have the same mnemonic, but don't always + // have a predicate. Distinguish them here and delete the + // predicate if needed. + if (shouldOmitPredicateOperand(Mnemonic, Operands)) { + ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]); + Operands.erase(Operands.begin() + 1); + delete Op; + } + // ARM mode 'blx' need special handling, as the register operand version // is predicable, but the label operand version is not. So, we can't rely // on the Mnemonic based checking to correctly figure out when to put @@ -5363,6 +5147,26 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } } + // FIXME: As said above, this is all a pretty gross hack. This instruction + // does not fit with other "subs" and tblgen. + // Adjust operands of B9.3.19 SUBS PC, LR, #imm (Thumb2) system instruction + // so the Mnemonic is the original name "subs" and delete the predicate + // operand so it will match the table entry. + if (isThumbTwo() && Mnemonic == "sub" && Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::PC && + static_cast<ARMOperand*>(Operands[4])->isReg() && + static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::LR && + static_cast<ARMOperand*>(Operands[5])->isImm()) { + ARMOperand *Op0 = static_cast<ARMOperand*>(Operands[0]); + Operands.erase(Operands.begin()); + delete Op0; + Operands.insert(Operands.begin(), ARMOperand::CreateToken(Name, NameLoc)); + + ARMOperand *Op1 = static_cast<ARMOperand*>(Operands[1]); + Operands.erase(Operands.begin() + 1); + delete Op1; + } return false; } @@ -5581,6 +5385,10 @@ validateInstruction(MCInst &Inst, } } + StringRef DepInfo; + if (isDeprecated(Inst, DepInfo)) + Warning(Loc, "deprecated on " + DepInfo); + return false; } @@ -5862,7 +5670,9 @@ processInstruction(MCInst &Inst, case ARM::t2LDRpcrel: // Select the narrow version if the immediate will fit. if (Inst.getOperand(1).getImm() > 0 && - Inst.getOperand(1).getImm() <= 0xff) + Inst.getOperand(1).getImm() <= 0xff && + !(static_cast<ARMOperand*>(Operands[2])->isToken() && + static_cast<ARMOperand*>(Operands[2])->getToken() == ".w")) Inst.setOpcode(ARM::tLDRpci); else Inst.setOpcode(ARM::t2LDRpci); @@ -7851,8 +7661,8 @@ bool ARMAsmParser::parseDirectiveARM(SMLoc L) { /// parseDirectiveThumbFunc /// ::= .thumbfunc symbol_name bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { - const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI.hasSubsectionsViaSymbols(); + const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); + bool isMachO = MAI->hasSubsectionsViaSymbols(); StringRef Name; bool needFuncName = true; @@ -8199,11 +8009,19 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { if (HandlerDataLoc.isValid()) return Error(L, ".save or .vsave must precede .handlerdata directive"); + // RAII object to make sure parsed operands are deleted. + struct CleanupObject { + SmallVector<MCParsedAsmOperand *, 1> Operands; + ~CleanupObject() { + for (unsigned I = 0, E = Operands.size(); I != E; ++I) + delete Operands[I]; + } + } CO; + // Parse the register list - SmallVector<MCParsedAsmOperand*, 1> Operands; - if (parseRegisterList(Operands)) + if (parseRegisterList(CO.Operands)) return true; - ARMOperand *Op = (ARMOperand*)Operands[0]; + ARMOperand *Op = (ARMOperand*)CO.Operands[0]; if (!IsVector && !Op->isRegList()) return Error(L, ".save expects GPR registers"); if (IsVector && !Op->isDPRRegList()) diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index b832508..f271a93 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -49,7 +49,7 @@ add_llvm_target(ARMCodeGen Thumb2SizeReduction.cpp ) -add_dependencies(LLVMARMCodeGen intrinsics_gen) +add_dependencies(LLVMARMCodeGen ARMCommonTableGen intrinsics_gen) # workaround for hanging compilation on MSVC9, 10 if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 ) diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index a6eab33..8a06664 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -347,6 +347,14 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder); +static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder); +static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder); +static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder); static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, @@ -448,6 +456,13 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); + result = decodeInstruction(DecoderTableVFPV832, MI, insn, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address, this, STI); if (result != MCDisassembler::Fail) { @@ -484,7 +499,14 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); + result = decodeInstruction(DecoderTablev8NEON32, MI, insn, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + MI.clear(); Size = 0; return MCDisassembler::Fail; } @@ -746,23 +768,34 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return result; } - MI.clear(); - result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI); - if (result != MCDisassembler::Fail) { - Size = 4; - UpdateThumbVFPPredicate(MI); - return result; + if (fieldFromInstruction(insn32, 28, 4) == 0xE) { + MI.clear(); + result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + UpdateThumbVFPPredicate(MI); + return result; + } } MI.clear(); - result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address, - this, STI); + result = decodeInstruction(DecoderTableVFPV832, MI, insn32, Address, this, STI); if (result != MCDisassembler::Fail) { Size = 4; - Check(result, AddThumbPredicate(MI)); return result; } + if (fieldFromInstruction(insn32, 28, 4) == 0xE) { + MI.clear(); + result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + Check(result, AddThumbPredicate(MI)); + return result; + } + } + if (fieldFromInstruction(insn32, 24, 8) == 0xF9) { MI.clear(); uint32_t NEONLdStInsn = insn32; @@ -792,6 +825,17 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } + MI.clear(); + uint32_t NEONv8Insn = insn32; + NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26 + result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + + MI.clear(); Size = 0; return MCDisassembler::Fail; } @@ -908,8 +952,11 @@ static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { - if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail; - return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); + DecodeStatus S = MCDisassembler::Success; + if (RegNo == 13 || RegNo == 15) + S = MCDisassembler::SoftFail; + Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder)); + return S; } static const uint16_t SPRDecoderTable[] = { @@ -2104,7 +2151,7 @@ DecodeT2BInstruction(MCInst &Inst, unsigned Insn, unsigned imm10 = fieldFromInstruction(Insn, 16, 10); unsigned imm11 = fieldFromInstruction(Insn, 0, 11); unsigned tmp = (S << 23) | (I1 << 22) | (I2 << 21) | (imm10 << 11) | imm11; - int imm32 = SignExtend32<24>(tmp << 1); + int imm32 = SignExtend32<25>(tmp << 1); if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4, true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(imm32)); @@ -3164,6 +3211,17 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, unsigned Rm = fieldFromInstruction(Val, 2, 4); unsigned imm = fieldFromInstruction(Val, 0, 2); + // Thumb stores cannot use PC as dest register. + switch (Inst.getOpcode()) { + case ARM::t2STRHs: + case ARM::t2STRBs: + case ARM::t2STRs: + if (Rn == 15) + return MCDisassembler::Fail; + default: + break; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder))) @@ -3177,53 +3235,282 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + + if (Rn == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRBs: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRHs: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSHs: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + case ARM::t2LDRSBs: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2LDRs: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2PLDs: + Inst.setOpcode(ARM::t2PLDpci); + break; + case ARM::t2PLIs: + Inst.setOpcode(ARM::t2PLIpci); + break; + default: + return MCDisassembler::Fail; + } + + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } + + if (Rt == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRSHs: + return MCDisassembler::Fail; + case ARM::t2LDRHs: + // FIXME: this instruction is only available with MP extensions, + // this should be checked first but we don't have access to the + // feature bits here. + Inst.setOpcode(ARM::t2PLDWs); + break; + default: + break; + } + } + switch (Inst.getOpcode()) { case ARM::t2PLDs: case ARM::t2PLDWs: case ARM::t2PLIs: break; - default: { - unsigned Rt = fieldFromInstruction(Insn, 12, 4); - if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + default: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + } + + unsigned addrmode = fieldFromInstruction(Insn, 4, 2); + addrmode |= fieldFromInstruction(Insn, 0, 4) << 2; + addrmode |= fieldFromInstruction(Insn, 16, 4) << 6; + if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder))) return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned U = fieldFromInstruction(Insn, 9, 1); + unsigned imm = fieldFromInstruction(Insn, 0, 8); + imm |= (U << 8); + imm |= (Rn << 9); + + if (Rn == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRi8: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2LDRBi8: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRSBi8: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2LDRHi8: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSHi8: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + case ARM::t2PLDi8: + Inst.setOpcode(ARM::t2PLDpci); + break; + case ARM::t2PLIi8: + Inst.setOpcode(ARM::t2PLIpci); + break; + default: + return MCDisassembler::Fail; + } + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } + + if (Rt == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRSHi8: + return MCDisassembler::Fail; + default: + break; } } + switch (Inst.getOpcode()) { + case ARM::t2PLDi8: + case ARM::t2PLIi8: + case ARM::t2PLDWi8: + break; + default: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + return S; +} + +static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned Rn = fieldFromInstruction(Insn, 16, 4); - if (Rn == 0xF) { + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 12); + imm |= (Rn << 13); + + if (Rn == 15) { switch (Inst.getOpcode()) { - case ARM::t2LDRBs: - Inst.setOpcode(ARM::t2LDRBpci); - break; - case ARM::t2LDRHs: - Inst.setOpcode(ARM::t2LDRHpci); - break; - case ARM::t2LDRSHs: - Inst.setOpcode(ARM::t2LDRSHpci); - break; - case ARM::t2LDRSBs: - Inst.setOpcode(ARM::t2LDRSBpci); + case ARM::t2LDRi12: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2LDRHi12: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSHi12: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + case ARM::t2LDRBi12: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRSBi12: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2PLDi12: + Inst.setOpcode(ARM::t2PLDpci); + break; + case ARM::t2PLIi12: + Inst.setOpcode(ARM::t2PLIpci); + break; + default: + return MCDisassembler::Fail; + } + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } + + if (Rt == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRSHi12: + return MCDisassembler::Fail; + case ARM::t2LDRHi12: + Inst.setOpcode(ARM::t2PLDi12); + break; + default: + break; + } + } + + switch (Inst.getOpcode()) { + case ARM::t2PLDi12: + case ARM::t2PLDWi12: + case ARM::t2PLIi12: + break; + default: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + } + + if (!Check(S, DecodeT2AddrModeImm12(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + return S; +} + +static DecodeStatus DecodeT2LoadT(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 8); + imm |= (Rn << 9); + + if (Rn == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRT: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2LDRBT: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRHT: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSBT: + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2LDRSHT: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + default: + return MCDisassembler::Fail; + } + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } + + if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeT2AddrModeImm8(Inst, imm, Address, Decoder))) + return MCDisassembler::Fail; + return S; +} + +static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, + uint64_t Address, const void* Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned U = fieldFromInstruction(Insn, 23, 1); + int imm = fieldFromInstruction(Insn, 0, 12); + + if (Rt == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDRBpci: + case ARM::t2LDRHpci: + Inst.setOpcode(ARM::t2PLDpci); break; - case ARM::t2PLDs: - Inst.setOpcode(ARM::t2PLDi12); - Inst.addOperand(MCOperand::CreateReg(ARM::PC)); + case ARM::t2LDRSBpci: + Inst.setOpcode(ARM::t2PLIpci); break; - default: + case ARM::t2LDRSHpci: return MCDisassembler::Fail; + default: + break; } + } - int imm = fieldFromInstruction(Insn, 0, 12); - if (!fieldFromInstruction(Insn, 23, 1)) imm *= -1; - Inst.addOperand(MCOperand::CreateImm(imm)); - - return S; + switch(Inst.getOpcode()) { + case ARM::t2PLDpci: + case ARM::t2PLIpci: + break; + default: + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; } - unsigned addrmode = fieldFromInstruction(Insn, 4, 2); - addrmode |= fieldFromInstruction(Insn, 0, 4) << 2; - addrmode |= fieldFromInstruction(Insn, 16, 4) << 6; - if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder))) - return MCDisassembler::Fail; + if (!U) { + // Special case for #-0. + if (imm == 0) + imm = INT32_MIN; + else + imm = -imm; + } + Inst.addOperand(MCOperand::CreateImm(imm)); return S; } @@ -3292,6 +3579,21 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, unsigned Rn = fieldFromInstruction(Val, 9, 4); unsigned imm = fieldFromInstruction(Val, 0, 9); + // Thumb stores cannot use PC as dest register. + switch (Inst.getOpcode()) { + case ARM::t2STRT: + case ARM::t2STRBT: + case ARM::t2STRHT: + case ARM::t2STRi8: + case ARM::t2STRHi8: + case ARM::t2STRBi8: + if (Rn == 15) + return MCDisassembler::Fail; + break; + default: + break; + } + // Some instructions always use an additive offset. switch (Inst.getOpcode()) { case ARM::t2LDRT: @@ -3327,6 +3629,37 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, addr |= Rn << 9; unsigned load = fieldFromInstruction(Insn, 20, 1); + if (Rn == 15) { + switch (Inst.getOpcode()) { + case ARM::t2LDR_PRE: + case ARM::t2LDR_POST: + Inst.setOpcode(ARM::t2LDRpci); + break; + case ARM::t2LDRB_PRE: + case ARM::t2LDRB_POST: + Inst.setOpcode(ARM::t2LDRBpci); + break; + case ARM::t2LDRH_PRE: + case ARM::t2LDRH_POST: + Inst.setOpcode(ARM::t2LDRHpci); + break; + case ARM::t2LDRSB_PRE: + case ARM::t2LDRSB_POST: + if (Rt == 15) + Inst.setOpcode(ARM::t2PLIpci); + else + Inst.setOpcode(ARM::t2LDRSBpci); + break; + case ARM::t2LDRSH_PRE: + case ARM::t2LDRSH_POST: + Inst.setOpcode(ARM::t2LDRSHpci); + break; + default: + return MCDisassembler::Fail; + } + return DecodeT2LoadLabel(Inst, Insn, Address, Decoder); + } + if (!load) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3353,6 +3686,17 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, unsigned Rn = fieldFromInstruction(Val, 13, 4); unsigned imm = fieldFromInstruction(Val, 0, 12); + // Thumb stores cannot use PC as dest register. + switch (Inst.getOpcode()) { + case ARM::t2STRi12: + case ARM::t2STRBi12: + case ARM::t2STRHi12: + if (Rn == 15) + return MCDisassembler::Fail; + default: + break; + } + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(imm)); @@ -4364,10 +4708,8 @@ static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, S = MCDisassembler::SoftFail; } - if (mask == 0x0) { - mask |= 0x8; - S = MCDisassembler::SoftFail; - } + if (mask == 0x0) + return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(pred)); Inst.addOperand(MCOperand::CreateImm(mask)); diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 7fef795..97da232 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -243,15 +243,6 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - // Thumb1 NOP - if (Opcode == ARM::tMOVr && MI->getOperand(0).getReg() == ARM::R8 && - MI->getOperand(1).getReg() == ARM::R8) { - O << "\tnop"; - printPredicateOperand(MI, 2, O); - printAnnotation(O, Annot); - return; - } - // Combine 2 GPRs from disassember into a GPRPair to match with instr def. // ldrexd/strexd require even/odd GPR pair. To enforce this constraint, // a single GPRPair reg operand is used in the .td file to replace the two @@ -315,15 +306,29 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - if (MO1.isExpr()) + if (MO1.isExpr()) { O << *MO1.getExpr(); - else if (MO1.isImm()) { - O << markup("<mem:") << "[pc, " - << markup("<imm:") << "#" << formatImm(MO1.getImm()) - << markup(">]>", "]"); + return; } - else - llvm_unreachable("Unknown LDR label operand?"); + + O << markup("<mem:") << "[pc, "; + + int32_t OffImm = (int32_t)MO1.getImm(); + bool isSub = OffImm < 0; + + // Special value for #-0. All others are normal. + if (OffImm == INT32_MIN) + OffImm = 0; + if (isSub) { + O << markup("<imm:") + << "#-" << formatImm(-OffImm) + << markup(">"); + } else { + O << markup("<imm:") + << "#" << formatImm(OffImm) + << markup(">"); + } + O << "]" << markup(">"); } // so_reg is a 4-operand unit corresponding to register forms of the A5.1 @@ -895,6 +900,7 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum, llvm_unreachable("Unhandled PC-relative pseudo-instruction!"); } +template<unsigned scale> void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); @@ -904,7 +910,7 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, return; } - int32_t OffImm = (int32_t)MO.getImm(); + int32_t OffImm = (int32_t)MO.getImm() << scale; O << markup("<imm:"); if (OffImm == INT32_MIN) @@ -1065,6 +1071,7 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, O << "]" << markup(">"); } +template<bool AlwaysPrintImm0> void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -1075,22 +1082,25 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); + bool isSub = OffImm < 0; // Don't print +0. - if (OffImm != 0) - O << ", "; - if (OffImm != 0 && UseMarkup) - O << "<imm:"; if (OffImm == INT32_MIN) - O << "#-0"; - else if (OffImm < 0) - O << "#-" << -OffImm; - else if (OffImm > 0) - O << "#" << OffImm; - if (OffImm != 0 && UseMarkup) - O << ">"; + OffImm = 0; + if (isSub) { + O << ", " + << markup("<imm:") + << "#-" << -OffImm + << markup(">"); + } else if (AlwaysPrintImm0 || OffImm > 0) { + O << ", " + << markup("<imm:") + << "#" << OffImm + << markup(">"); + } O << "]" << markup(">"); } +template<bool AlwaysPrintImm0> void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -1106,22 +1116,24 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, printRegName(O, MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); + bool isSub = OffImm < 0; assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); // Don't print +0. - if (OffImm != 0) - O << ", "; - if (OffImm != 0 && UseMarkup) - O << "<imm:"; if (OffImm == INT32_MIN) - O << "#-0"; - else if (OffImm < 0) - O << "#-" << -OffImm; - else if (OffImm > 0) - O << "#" << OffImm; - if (OffImm != 0 && UseMarkup) - O << ">"; + OffImm = 0; + if (isSub) { + O << ", " + << markup("<imm:") + << "#-" << -OffImm + << markup(">"); + } else if (AlwaysPrintImm0 || OffImm > 0) { + O << ", " + << markup("<imm:") + << "#" << OffImm + << markup(">"); + } O << "]" << markup(">"); } @@ -1148,7 +1160,9 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, const MCOperand &MO1 = MI->getOperand(OpNum); int32_t OffImm = (int32_t)MO1.getImm(); O << ", " << markup("<imm:"); - if (OffImm < 0) + if (OffImm == INT32_MIN) + O << "#-0"; + else if (OffImm < 0) O << "#-" << -OffImm; else O << "#" << OffImm; @@ -1163,19 +1177,14 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); - // Don't print +0. - if (OffImm != 0) - O << ", "; - if (OffImm != 0 && UseMarkup) - O << "<imm:"; + O << ", " << markup("<imm:"); if (OffImm == INT32_MIN) O << "#-0"; else if (OffImm < 0) O << "#-" << -OffImm; - else if (OffImm > 0) + else O << "#" << OffImm; - if (OffImm != 0 && UseMarkup) - O << ">"; + O << markup(">"); } void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 5a64348..15ae8d1 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -76,6 +76,7 @@ public: void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template <unsigned scale> void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); @@ -97,8 +98,10 @@ public: template<bool AlwaysPrintImm0> void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template<bool AlwaysPrintImm0> void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template<bool AlwaysPrintImm0> void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 8baa3a6..b1e25d8 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -152,7 +152,7 @@ static unsigned getRelaxedOpcode(unsigned Op) { switch (Op) { default: return Op; case ARM::tBcc: return ARM::t2Bcc; - case ARM::tLDRpciASM: return ARM::t2LDRpci; + case ARM::tLDRpci: return ARM::t2LDRpci; case ARM::tADR: return ARM::t2ADR; case ARM::tB: return ARM::t2B; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 679d3c4..6b98205 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -109,18 +109,17 @@ public: /// This is one of the functions used to emit data into an ELF section, so the /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if /// necessary. - virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + virtual void EmitBytes(StringRef Data) { EmitDataMappingSymbol(); - MCELFStreamer::EmitBytes(Data, AddrSpace); + MCELFStreamer::EmitBytes(Data); } /// This is one of the functions used to emit data into an ELF section, so the /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if /// necessary. - virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) { EmitDataMappingSymbol(); - MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace); + MCELFStreamer::EmitValueImpl(Value, Size); } virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) { @@ -204,7 +203,7 @@ private: void EmitPersonalityFixup(StringRef Name); void FlushPendingOffset(); - void FlushUnwindOpcodes(bool AllowCompactModel0); + void FlushUnwindOpcodes(bool NoHandlerData); void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags, SectionKind Kind, const MCSymbol &Fn); @@ -336,17 +335,17 @@ void ARMELFStreamer::EmitFnEnd() { MCSymbolRefExpr::VK_ARM_PREL31, getContext()); - EmitValue(FnStartRef, 4, 0); + EmitValue(FnStartRef, 4); if (CantUnwind) { - EmitIntValue(EXIDX_CANTUNWIND, 4, 0); + EmitIntValue(EXIDX_CANTUNWIND, 4); } else if (ExTab) { // Emit a reference to the unwind opcodes in the ".ARM.extab" section. const MCSymbolRefExpr *ExTabEntryRef = MCSymbolRefExpr::Create(ExTab, MCSymbolRefExpr::VK_ARM_PREL31, getContext()); - EmitValue(ExTabEntryRef, 4, 0); + EmitValue(ExTabEntryRef, 4); } else { // For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in // the second word of exception index table entry. The size of the unwind @@ -356,7 +355,7 @@ void ARMELFStreamer::EmitFnEnd() { assert(Opcodes.size() == 4u && "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4"); EmitBytes(StringRef(reinterpret_cast<const char*>(Opcodes.data()), - Opcodes.size()), 0); + Opcodes.size())); } // Switch to the section containing FnStart @@ -377,13 +376,13 @@ void ARMELFStreamer::FlushPendingOffset() { } } -void ARMELFStreamer::FlushUnwindOpcodes(bool AllowCompactModel0) { +void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) { // Emit the unwind opcode to restore $sp. if (UsedFP) { - const MCRegisterInfo &MRI = getContext().getRegisterInfo(); + const MCRegisterInfo *MRI = getContext().getRegisterInfo(); int64_t LastRegSaveSPOffset = SPOffset - PendingOffset; UnwindOpAsm.EmitSPOffset(LastRegSaveSPOffset - FPOffset); - UnwindOpAsm.EmitSetSP(MRI.getEncodingValue(FPReg)); + UnwindOpAsm.EmitSetSP(MRI->getEncodingValue(FPReg)); } else { FlushPendingOffset(); } @@ -394,7 +393,7 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool AllowCompactModel0) { // For compact model 0, we have to emit the unwind opcodes in the .ARM.exidx // section. Thus, we don't have to create an entry in the .ARM.extab // section. - if (AllowCompactModel0 && PersonalityIndex == AEABI_UNWIND_CPP_PR0) + if (NoHandlerData && PersonalityIndex == AEABI_UNWIND_CPP_PR0) return; // Switch to .ARM.extab section. @@ -412,12 +411,22 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool AllowCompactModel0) { MCSymbolRefExpr::VK_ARM_PREL31, getContext()); - EmitValue(PersonalityRef, 4, 0); + EmitValue(PersonalityRef, 4); } // Emit unwind opcodes EmitBytes(StringRef(reinterpret_cast<const char *>(Opcodes.data()), - Opcodes.size()), 0); + Opcodes.size())); + + // According to ARM EHABI section 9.2, if the __aeabi_unwind_cpp_pr1() or + // __aeabi_unwind_cpp_pr2() is used, then the handler data must be emitted + // after the unwind opcodes. The handler data consists of several 32-bit + // words, and should be terminated by zero. + // + // In case that the .handlerdata directive is not specified by the + // programmer, we should emit zero to terminate the handler data. + if (NoHandlerData && !Personality) + EmitIntValue(0, 4); } void ARMELFStreamer::EmitHandlerData() { @@ -458,9 +467,9 @@ void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, // Collect the registers in the register list unsigned Count = 0; uint32_t Mask = 0; - const MCRegisterInfo &MRI = getContext().getRegisterInfo(); + const MCRegisterInfo *MRI = getContext().getRegisterInfo(); for (size_t i = 0; i < RegList.size(); ++i) { - unsigned Reg = MRI.getEncodingValue(RegList[i]); + unsigned Reg = MRI->getEncodingValue(RegList[i]); assert(Reg < (IsVector ? 32U : 16U) && "Register out of range"); unsigned Bit = (1u << Reg); if ((Mask & Bit) == 0) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 2aa1010..a18d465 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -315,6 +315,8 @@ public: unsigned EncodedValue) const; unsigned NEONThumb2DupPostEncoder(const MCInst &MI, unsigned EncodedValue) const; + unsigned NEONThumb2V8PostEncoder(const MCInst &MI, + unsigned EncodedValue) const; unsigned VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const; @@ -389,6 +391,17 @@ unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI, return EncodedValue; } +/// Post-process encoded NEON v8 instructions, and rewrite them to Thumb2 form +/// if we are in Thumb2. +unsigned ARMMCCodeEmitter::NEONThumb2V8PostEncoder(const MCInst &MI, + unsigned EncodedValue) const { + if (isThumb2()) { + EncodedValue |= 0xC000000; // Set bits 27-26 + } + + return EncodedValue; +} + /// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite /// them to their Thumb2 form if we are currently in Thumb2 mode. unsigned ARMMCCodeEmitter:: @@ -407,7 +420,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups) const { if (MO.isReg()) { unsigned Reg = MO.getReg(); - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg); // Q registers are encoded as 2x their register number. switch (Reg) { @@ -436,7 +449,7 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + Reg = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); int32_t SImm = MO1.getImm(); bool isAdd = true; @@ -724,8 +737,8 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx, // {2-0} = Rn const MCOperand &MO1 = MI.getOperand(OpIdx); const MCOperand &MO2 = MI.getOperand(OpIdx + 1); - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); - unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO2.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); + unsigned Rm = CTX.getRegisterInfo()->getEncodingValue(MO2.getReg()); return (Rm << 3) | Rn; } @@ -741,12 +754,12 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. Imm12 = 0; - isAdd = false ; // 'U' bit is set as part of the fixup. if (MO.isExpr()) { const MCExpr *Expr = MO.getExpr(); + isAdd = false ; // 'U' bit is set as part of the fixup. MCFixupKind Kind; if (isThumb2()) @@ -821,7 +834,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. Imm8 = 0; isAdd = false ; // 'U' bit is set as part of the fixup. @@ -857,7 +870,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, // {7-0} = imm8 const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - unsigned Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Reg = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); unsigned Imm8 = MO1.getImm(); return (Reg << 8) | Imm8; } @@ -940,8 +953,8 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); const MCOperand &MO2 = MI.getOperand(OpIdx+2); - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); - unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); + unsigned Rm = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()); bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add; ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm()); @@ -975,7 +988,7 @@ getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx, // {12} isAdd // {11-0} imm12/Rm const MCOperand &MO = MI.getOperand(OpIdx); - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups); Binary |= Rn << 14; return Binary; @@ -998,7 +1011,7 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm); Binary <<= 7; // Shift amount is bits [11:7] Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5] - Binary |= CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Rm is bits [3:0] + Binary |= CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); // Rm is bits [3:0] } return Binary | (isAdd << 12) | (isReg << 13); } @@ -1011,7 +1024,7 @@ getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); bool isAdd = MO1.getImm() != 0; - return CTX.getRegisterInfo().getEncodingValue(MO.getReg()) | (isAdd << 4); + return CTX.getRegisterInfo()->getEncodingValue(MO.getReg()) | (isAdd << 4); } uint32_t ARMMCCodeEmitter:: @@ -1029,7 +1042,7 @@ getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t Imm8 = ARM_AM::getAM3Offset(Imm); // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8 if (!isImm) - Imm8 = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + Imm8 = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); return Imm8 | (isAdd << 8) | (isImm << 9); } @@ -1047,7 +1060,7 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. if (!MO.isReg()) { - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. assert(MO.isExpr() && "Unexpected machine operand type!"); const MCExpr *Expr = MO.getExpr(); @@ -1057,14 +1070,14 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, ++MCNumCPRelocations; return (Rn << 9) | (1 << 13); } - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); unsigned Imm = MO2.getImm(); bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add; bool isImm = MO1.getReg() == 0; uint32_t Imm8 = ARM_AM::getAM3Offset(Imm); // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8 if (!isImm) - Imm8 = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + Imm8 = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13); } @@ -1092,7 +1105,7 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx, // {2-0} = Rn const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); unsigned Imm5 = MO1.getImm(); return ((Imm5 & 0x1f) << 3) | Rn; } @@ -1119,7 +1132,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. Imm8 = 0; isAdd = false; // 'U' bit is handled as part of the fixup. @@ -1165,7 +1178,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm()); // Encode Rm. - unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1190,7 +1203,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, // Encode the shift operation Rs. // Encode Rs bit[11:8]. assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); - return Binary | (CTX.getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift); + return Binary | (CTX.getRegisterInfo()->getEncodingValue(Rs) << ARMII::RegRsShift); } unsigned ARMMCCodeEmitter:: @@ -1209,7 +1222,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); // Encode Rm. - unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1248,9 +1261,9 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum, // Encoded as [Rn, Rm, imm]. // FIXME: Needs fixup support. - unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + unsigned Value = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); Value <<= 4; - Value |= CTX.getRegisterInfo().getEncodingValue(MO2.getReg()); + Value |= CTX.getRegisterInfo()->getEncodingValue(MO2.getReg()); Value <<= 2; Value |= MO3.getImm(); @@ -1264,7 +1277,7 @@ getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum, const MCOperand &MO2 = MI.getOperand(OpNum+1); // FIXME: Needs fixup support. - unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + unsigned Value = CTX.getRegisterInfo()->getEncodingValue(MO1.getReg()); // Even though the immediate is 8 bits long, we need 9 bits in order // to represent the (inverse of the) sign bit. @@ -1326,7 +1339,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); // Encode Rm. - unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1382,7 +1395,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, if (SPRRegs || DPRRegs) { // VLDM/VSTM - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg); unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff; Binary |= (RegNo & 0x1f) << 8; if (SPRRegs) @@ -1391,7 +1404,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, Binary |= NumRegs * 2; } else { for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) { - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(MI.getOperand(I).getReg()); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(MI.getOperand(I).getReg()); Binary |= 1 << RegNo; } } @@ -1407,7 +1420,7 @@ getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1430,7 +1443,7 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1456,7 +1469,7 @@ getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1475,7 +1488,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(Op); if (MO.getReg() == 0) return 0x0D; - return CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + return CTX.getRegisterInfo()->getEncodingValue(MO.getReg()); } unsigned ARMMCCodeEmitter:: diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 14fd03f..caa1949 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -59,7 +59,10 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { std::string ARMArchFeature; if (Idx) { unsigned SubVer = TT[Idx]; - if (SubVer >= '7' && SubVer <= '9') { + if (SubVer == '8') { + // FIXME: Parse v8 features + ARMArchFeature = "+v8"; + } else if (SubVer == '7') { if (Len >= Idx+2 && TT[Idx+1] == 'm') { isThumb = true; if (NoCPU) |
