diff options
author | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-05-29 02:49:00 -0700 |
commit | dce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch) | |
tree | dcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Target/ARM | |
parent | 220b921aed042f9e520c26cffd8282a94c66c3d5 (diff) | |
download | external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2 |
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'lib/Target/ARM')
67 files changed, 3197 insertions, 1944 deletions
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index 28ea879..94faf6f 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -24,7 +24,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "a15-sd-optimizer" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" @@ -39,6 +38,8 @@ using namespace llvm; +#define DEBUG_TYPE "a15-sd-optimizer" + namespace { struct A15SDOptimizer : public MachineFunctionPass { static char ID; @@ -90,7 +91,7 @@ namespace { unsigned createImplicitDef(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, DebugLoc DL); - + // // Various property checkers // @@ -259,7 +260,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { if (DPRMI && SPRMI) { // See if the first operand of this insert_subreg is IMPLICIT_DEF MachineInstr *ECDef = elideCopies(DPRMI); - if (ECDef != 0 && ECDef->isImplicitDef()) { + if (ECDef && ECDef->isImplicitDef()) { // Another corner case - if we're inserting something that is purely // a subreg copy of a DPR, just use that DPR. @@ -348,10 +349,10 @@ MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { if (!MI->isFullCopy()) return MI; if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) - return NULL; + return nullptr; MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg()); if (!Def) - return NULL; + return nullptr; return elideCopies(Def); } @@ -435,7 +436,7 @@ A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, Out) .addReg(Reg) .addImm(Lane)); - + return Out; } @@ -601,7 +602,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR // lane, and the other lane(s) of the DPR/QPR register // that we are inserting in are undefined, use the - // original DPR/QPR value. + // original DPR/QPR value. // * Otherwise, fall back on the same stategy as COPY. // // * REG_SEQUENCE: * If all except one of the input operands are @@ -693,7 +694,7 @@ bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { MI != ME;) { Modified |= runOnInstruction(MI++); } - + } for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(), diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 4412b45..55df29c 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -49,8 +49,6 @@ FunctionPass *createThumb2SizeReductionPass(); /// \brief Creates an ARM-specific Target Transformation Info pass. ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM); -FunctionPass *createARMAtomicExpandPass(const TargetMachine *TM); - void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 0fa865f..55e9fe5 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "ARMAsmPrinter.h" #include "ARM.h" #include "ARMConstantPoolValue.h" @@ -45,6 +44,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" @@ -55,6 +55,8 @@ #include <cctype> using namespace llvm; +#define DEBUG_TYPE "asm-printer" + void ARMAsmPrinter::EmitFunctionBodyEnd() { // Make sure to terminate any constant pools that were at the end // of the function. @@ -85,7 +87,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { ? MCSymbolRefExpr::VK_ARM_TARGET1 : MCSymbolRefExpr::VK_None), OutContext); - + OutStreamer.EmitValue(E, Size); } @@ -96,7 +98,28 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { AFI = MF.getInfo<ARMFunctionInfo>(); MCP = MF.getConstantPool(); - return AsmPrinter::runOnMachineFunction(MF); + SetupMachineFunction(MF); + + if (Subtarget->isTargetCOFF()) { + bool Internal = MF.getFunction()->hasInternalLinkage(); + COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC + : COFF::IMAGE_SYM_CLASS_EXTERNAL; + int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; + + OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); + OutStreamer.EmitCOFFSymbolStorageClass(Scl); + OutStreamer.EmitCOFFSymbolType(Type); + OutStreamer.EndCOFFSymbolDef(); + } + + // Have common code print out the function header with linkage info etc. + EmitFunctionHeader(); + + // Emit the rest of the function body. + EmitFunctionBody(); + + // We didn't modify anything. + return false; } void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, @@ -239,7 +262,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (ARM::GPRPairRegClass.contains(RegBegin)) { const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0); - O << ARMInstPrinter::getRegisterName(Reg0) << ", ";; + O << ARMInstPrinter::getRegisterName(Reg0) << ", "; RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1); } O << ARMInstPrinter::getRegisterName(RegBegin); @@ -383,7 +406,7 @@ void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, // If either end mode is unknown (EndInfo == NULL) or different than // the start mode, then restore the start mode. const bool WasThumb = isThumb(StartInfo); - if (EndInfo == NULL || WasThumb != isThumb(*EndInfo)) { + if (!EndInfo || WasThumb != isThumb(*EndInfo)) { OutStreamer.EmitAssemblerFlag(WasThumb ? MCAF_Code16 : MCAF_Code32); } } @@ -456,6 +479,29 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { emitAttributes(); } +static void +emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel, + MachineModuleInfoImpl::StubValueTy &MCSym) { + // L_foo$stub: + OutStreamer.EmitLabel(StubLabel); + // .indirect_symbol _foo + OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol); + + if (MCSym.getInt()) + // External to current translation unit. + OutStreamer.EmitIntValue(0, 4/*size*/); + else + // Internal to current translation unit. + // + // When we place the LSDA into the TEXT section, the type info + // pointers need to be indirect and pc-rel. We accomplish this by + // using NLPs; however, sometimes the types are local to the file. + // We need to fill in the value for the NLP in those cases. + OutStreamer.EmitValue( + MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()), + 4 /*size*/); +} + void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (Subtarget->isTargetMachO()) { @@ -472,27 +518,9 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // Switch with ".non_lazy_symbol_pointer" directive. OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); EmitAlignment(2); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$stub: - OutStreamer.EmitLabel(Stubs[i].first); - // .indirect_symbol _foo - MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second; - OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),MCSA_IndirectSymbol); - - if (MCSym.getInt()) - // External to current translation unit. - OutStreamer.EmitIntValue(0, 4/*size*/); - else - // Internal to current translation unit. - // - // When we place the LSDA into the TEXT section, the type info - // pointers need to be indirect and pc-rel. We accomplish this by - // using NLPs; however, sometimes the types are local to the file. - // We need to fill in the value for the NLP in those cases. - OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), - OutContext), - 4/*size*/); - } + + for (auto &Stub : Stubs) + emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second); Stubs.clear(); OutStreamer.AddBlankLine(); @@ -500,17 +528,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { Stubs = MMIMacho.GetHiddenGVStubList(); if (!Stubs.empty()) { - OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); EmitAlignment(2); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$stub: - OutStreamer.EmitLabel(Stubs[i].first); - // .long _foo - OutStreamer.EmitValue(MCSymbolRefExpr:: - Create(Stubs[i].second.getPointer(), - OutContext), - 4/*size*/); - } + + for (auto &Stub : Stubs) + emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second); Stubs.clear(); OutStreamer.AddBlankLine(); @@ -523,6 +545,28 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // generates code that does this, it is always safe to set. OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } + + // Emit a .data.rel section containing any stubs that were created. + if (Subtarget->isTargetELF()) { + const TargetLoweringObjectFileELF &TLOFELF = + static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); + + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); + const DataLayout *TD = TM.getDataLayout(); + + for (auto &stub: Stubs) { + OutStreamer.EmitLabel(stub.first); + OutStreamer.EmitSymbolValue(stub.second.getPointer(), + TD->getPointerSize(0)); + } + Stubs.clear(); + } + } } //===----------------------------------------------------------------------===// @@ -575,7 +619,7 @@ void ARMAsmPrinter::emitAttributes() { getArchForCPU(CPUString, Subtarget)); // Tag_CPU_arch_profile must have the default value of 0 when "Architecture - // profile is not applicable (e.g. pre v7, or cross-profile code)". + // profile is not applicable (e.g. pre v7, or cross-profile code)". if (Subtarget->hasV7Ops()) { if (Subtarget->isAClass()) { ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, @@ -627,6 +671,20 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitFPU(ARM::VFPV2); } + if (TM.getRelocationModel() == Reloc::PIC_) { + // PIC specific attributes. + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data, + ARMBuildAttrs::AddressRWPCRel); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RO_data, + ARMBuildAttrs::AddressROPCRel); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use, + ARMBuildAttrs::AddressGOT); + } else { + // Allow direct addressing of imported data for all other relocation models. + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use, + ARMBuildAttrs::AddressDirect); + } + // Signal various FP modes. if (!TM.Options.UnsafeFPMath) { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed); @@ -723,7 +781,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, MachineModuleInfoImpl::StubValueTy &StubSym = GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) : MMIMachO.getGVStubEntry(MCSym); - if (StubSym.getPointer() == 0) + if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl:: StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); return MCSym; @@ -971,7 +1029,8 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { RegList.push_back(SrcReg); break; } - ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); + if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) + ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); } else { // Changes of stack / frame pointer. if (SrcReg == ARM::SP) { @@ -1016,18 +1075,20 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { } } - if (DstReg == FramePtr && FramePtr != ARM::SP) - // Set-up of the frame pointer. Positive values correspond to "add" - // instruction. - ATS.emitSetFP(FramePtr, ARM::SP, -Offset); - else if (DstReg == ARM::SP) { - // Change of SP by an offset. Positive values correspond to "sub" - // instruction. - ATS.emitPad(Offset); - } else { - // Move of SP to a register. Positive values correspond to an "add" - // instruction. - ATS.emitMovSP(DstReg, -Offset); + if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) { + if (DstReg == FramePtr && FramePtr != ARM::SP) + // Set-up of the frame pointer. Positive values correspond to "add" + // instruction. + ATS.emitSetFP(FramePtr, ARM::SP, -Offset); + else if (DstReg == ARM::SP) { + // Change of SP by an offset. Positive values correspond to "sub" + // instruction. + ATS.emitPad(Offset); + } else { + // Move of SP to a register. Positive values correspond to an "add" + // instruction. + ATS.emitMovSP(DstReg, -Offset); + } } } else if (DstReg == ARM::SP) { MI->dump(); diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 46c2626..7c103c6 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -47,16 +47,17 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter { bool InConstantPool; public: explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL), InConstantPool(false) { - Subtarget = &TM.getSubtarget<ARMSubtarget>(); - } + : AsmPrinter(TM, Streamer), AFI(nullptr), MCP(nullptr), + InConstantPool(false) { + Subtarget = &TM.getSubtarget<ARMSubtarget>(); + } const char *getPassName() const override { return "ARM Assembly / Object Emitter"; } void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, - const char *Modifier = 0); + const char *Modifier = nullptr); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, diff --git a/lib/Target/ARM/ARMAtomicExpandPass.cpp b/lib/Target/ARM/ARMAtomicExpandPass.cpp deleted file mode 100644 index 18e0783..0000000 --- a/lib/Target/ARM/ARMAtomicExpandPass.cpp +++ /dev/null @@ -1,406 +0,0 @@ -//===-- ARMAtomicExpandPass.cpp - Expand atomic instructions --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass (at IR level) to replace atomic instructions with -// appropriate (intrinsic-based) ldrex/strex loops. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm-atomic-expand" -#include "ARM.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -namespace { - class ARMAtomicExpandPass : public FunctionPass { - const TargetLowering *TLI; - public: - static char ID; // Pass identification, replacement for typeid - explicit ARMAtomicExpandPass(const TargetMachine *TM = 0) - : FunctionPass(ID), TLI(TM->getTargetLowering()) {} - - bool runOnFunction(Function &F) override; - bool expandAtomicInsts(Function &F); - - bool expandAtomicLoad(LoadInst *LI); - bool expandAtomicStore(StoreInst *LI); - bool expandAtomicRMW(AtomicRMWInst *AI); - bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); - - AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord); - void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord); - - /// Perform a load-linked operation on Addr, returning a "Value *" with the - /// corresponding pointee type. This may entail some non-trivial operations - /// to truncate or reconstruct illegal types since intrinsics must be legal - Value *loadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord); - - /// Perform a store-conditional operation to Addr. Return the status of the - /// store: 0 if the it succeeded, non-zero otherwise. - Value *storeConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, - AtomicOrdering Ord); - - /// Return true if the given (atomic) instruction should be expanded by this - /// pass. - bool shouldExpandAtomic(Instruction *Inst); - }; -} - -char ARMAtomicExpandPass::ID = 0; - -FunctionPass *llvm::createARMAtomicExpandPass(const TargetMachine *TM) { - return new ARMAtomicExpandPass(TM); -} - -bool ARMAtomicExpandPass::runOnFunction(Function &F) { - SmallVector<Instruction *, 1> AtomicInsts; - - // Changing control-flow while iterating through it is a bad idea, so gather a - // list of all atomic instructions before we start. - for (BasicBlock &BB : F) - for (Instruction &Inst : BB) { - if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) || - (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) || - (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic())) - AtomicInsts.push_back(&Inst); - } - - bool MadeChange = false; - for (Instruction *Inst : AtomicInsts) { - if (!shouldExpandAtomic(Inst)) - continue; - - if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) - MadeChange |= expandAtomicRMW(AI); - else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst)) - MadeChange |= expandAtomicCmpXchg(CI); - else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) - MadeChange |= expandAtomicLoad(LI); - else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) - MadeChange |= expandAtomicStore(SI); - else - llvm_unreachable("Unknown atomic instruction"); - } - - return MadeChange; -} - -bool ARMAtomicExpandPass::expandAtomicLoad(LoadInst *LI) { - // Load instructions don't actually need a leading fence, even in the - // SequentiallyConsistent case. - AtomicOrdering MemOpOrder = - TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering(); - - // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is - // an ldrexd (A3.5.3). - IRBuilder<> Builder(LI); - Value *Val = loadLinked(Builder, LI->getPointerOperand(), MemOpOrder); - - insertTrailingFence(Builder, LI->getOrdering()); - - LI->replaceAllUsesWith(Val); - LI->eraseFromParent(); - - return true; -} - -bool ARMAtomicExpandPass::expandAtomicStore(StoreInst *SI) { - // The only atomic 64-bit store on ARM is an strexd that succeeds, which means - // we need a loop and the entire instruction is essentially an "atomicrmw - // xchg" that ignores the value loaded. - IRBuilder<> Builder(SI); - AtomicRMWInst *AI = - Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), - SI->getValueOperand(), SI->getOrdering()); - SI->eraseFromParent(); - - // Now we have an appropriate swap instruction, lower it as usual. - return expandAtomicRMW(AI); -} - -bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) { - AtomicOrdering Order = AI->getOrdering(); - Value *Addr = AI->getPointerOperand(); - BasicBlock *BB = AI->getParent(); - Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); - - // Given: atomicrmw some_op iN* %addr, iN %incr ordering - // - // The standard expansion we produce is: - // [...] - // fence? - // atomicrmw.start: - // %loaded = @load.linked(%addr) - // %new = some_op iN %loaded, %incr - // %stored = @store_conditional(%new, %addr) - // %try_again = icmp i32 ne %stored, 0 - // br i1 %try_again, label %loop, label %atomicrmw.end - // atomicrmw.end: - // fence? - // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); - BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - - // This grabs the DebugLoc from AI. - IRBuilder<> Builder(AI); - - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we might want a fence too. It's easiest to just remove - // the branch entirely. - std::prev(BB->end())->eraseFromParent(); - Builder.SetInsertPoint(BB); - AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order); - Builder.CreateBr(LoopBB); - - // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - Value *Loaded = loadLinked(Builder, Addr, MemOpOrder); - - Value *NewVal; - switch (AI->getOperation()) { - case AtomicRMWInst::Xchg: - NewVal = AI->getValOperand(); - break; - case AtomicRMWInst::Add: - NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Sub: - NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::And: - NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Nand: - NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()), - "new"); - break; - case AtomicRMWInst::Or: - NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Xor: - NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Max: - NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Min: - NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::UMax: - NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::UMin: - NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - default: - llvm_unreachable("Unknown atomic op"); - } - - Value *StoreSuccess = storeConditional(Builder, NewVal, Addr, MemOpOrder); - Value *TryAgain = Builder.CreateICmpNE( - StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); - Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); - - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - insertTrailingFence(Builder, Order); - - AI->replaceAllUsesWith(Loaded); - AI->eraseFromParent(); - - return true; -} - -bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { - AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); - AtomicOrdering FailureOrder = CI->getFailureOrdering(); - Value *Addr = CI->getPointerOperand(); - BasicBlock *BB = CI->getParent(); - Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); - - // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord - // - // The full expansion we produce is: - // [...] - // fence? - // cmpxchg.start: - // %loaded = @load.linked(%addr) - // %should_store = icmp eq %loaded, %desired - // br i1 %should_store, label %cmpxchg.trystore, - // label %cmpxchg.end/%cmpxchg.barrier - // cmpxchg.trystore: - // %stored = @store_conditional(%new, %addr) - // %try_again = icmp i32 ne %stored, 0 - // br i1 %try_again, label %loop, label %cmpxchg.end - // cmpxchg.barrier: - // fence? - // br label %cmpxchg.end - // cmpxchg.end: - // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); - auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB); - auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, BarrierBB); - auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); - - // This grabs the DebugLoc from CI - IRBuilder<> Builder(CI); - - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we might want a fence too. It's easiest to just remove - // the branch entirely. - std::prev(BB->end())->eraseFromParent(); - Builder.SetInsertPoint(BB); - AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder); - Builder.CreateBr(LoopBB); - - // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - Value *Loaded = loadLinked(Builder, Addr, MemOpOrder); - Value *ShouldStore = - Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); - - // If the the cmpxchg doesn't actually need any ordering when it fails, we can - // jump straight past that fence instruction (if it exists). - BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB; - Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); - - Builder.SetInsertPoint(TryStoreBB); - Value *StoreSuccess = - storeConditional(Builder, CI->getNewValOperand(), Addr, MemOpOrder); - Value *TryAgain = Builder.CreateICmpNE( - StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); - Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB); - - // Finally, make sure later instructions don't get reordered with a fence if - // necessary. - Builder.SetInsertPoint(BarrierBB); - insertTrailingFence(Builder, SuccessOrder); - Builder.CreateBr(ExitBB); - - CI->replaceAllUsesWith(Loaded); - CI->eraseFromParent(); - - return true; -} - -Value *ARMAtomicExpandPass::loadLinked(IRBuilder<> &Builder, Value *Addr, - AtomicOrdering Ord) { - Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - Type *ValTy = cast<PointerType>(Addr->getType())->getElementType(); - bool IsAcquire = - Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent; - - // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd - // intrinsic must return {i32, i32} and we have to recombine them into a - // single i64 here. - if (ValTy->getPrimitiveSizeInBits() == 64) { - Intrinsic::ID Int = - IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; - Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); - - Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); - Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); - - Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); - Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); - Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); - Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); - return Builder.CreateOr( - Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); - } - - Type *Tys[] = { Addr->getType() }; - Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; - Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); - - return Builder.CreateTruncOrBitCast( - Builder.CreateCall(Ldrex, Addr), - cast<PointerType>(Addr->getType())->getElementType()); -} - -Value *ARMAtomicExpandPass::storeConditional(IRBuilder<> &Builder, Value *Val, - Value *Addr, AtomicOrdering Ord) { - Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - bool IsRelease = - Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; - - // Since the intrinsics must have legal type, the i64 intrinsics take two - // parameters: "i32, i32". We must marshal Val into the appropriate form - // before the call. - if (Val->getType()->getPrimitiveSizeInBits() == 64) { - Intrinsic::ID Int = - IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; - Function *Strex = Intrinsic::getDeclaration(M, Int); - Type *Int32Ty = Type::getInt32Ty(M->getContext()); - - Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); - Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); - Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); - return Builder.CreateCall3(Strex, Lo, Hi, Addr); - } - - Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; - Type *Tys[] = { Addr->getType() }; - Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); - - return Builder.CreateCall2( - Strex, Builder.CreateZExtOrBitCast( - Val, Strex->getFunctionType()->getParamType(0)), - Addr); -} - -AtomicOrdering ARMAtomicExpandPass::insertLeadingFence(IRBuilder<> &Builder, - AtomicOrdering Ord) { - if (!TLI->getInsertFencesForAtomic()) - return Ord; - - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - Builder.CreateFence(Release); - - // The exclusive operations don't need any barrier if we're adding separate - // fences. - return Monotonic; -} - -void ARMAtomicExpandPass::insertTrailingFence(IRBuilder<> &Builder, - AtomicOrdering Ord) { - if (!TLI->getInsertFencesForAtomic()) - return; - - if (Ord == Acquire || Ord == AcquireRelease) - Builder.CreateFence(Acquire); - else if (Ord == SequentiallyConsistent) - Builder.CreateFence(SequentiallyConsistent); -} - -bool ARMAtomicExpandPass::shouldExpandAtomic(Instruction *Inst) { - // Loads and stores less than 64-bits are already atomic; ones above that - // are doomed anyway, so defer to the default libcall and blame the OS when - // things go wrong: - if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) - return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64; - else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) - return LI->getType()->getPrimitiveSizeInBits() == 64; - - // For the real atomic operations, we have ldrex/strex up to 64 bits. - return Inst->getType()->getPrimitiveSizeInBits() <= 64; -} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 47f5bf9..bc266e8 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -37,11 +37,13 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +using namespace llvm; + +#define DEBUG_TYPE "arm-instrinfo" + #define GET_INSTRINFO_CTOR_DTOR #include "ARMGenInstrInfo.inc" -using namespace llvm; - static cl::opt<bool> EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); @@ -125,14 +127,14 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // FIXME: Thumb2 support. if (!EnableARM3Addr) - return NULL; + return nullptr; MachineInstr *MI = MBBI; MachineFunction &MF = *MI->getParent()->getParent(); uint64_t TSFlags = MI->getDesc().TSFlags; bool isPre = false; switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { - default: return NULL; + default: return nullptr; case ARMII::IndexModePre: isPre = true; break; @@ -144,10 +146,10 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // operation. unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); if (MemOpc == 0) - return NULL; + return nullptr; - MachineInstr *UpdateMI = NULL; - MachineInstr *MemMI = NULL; + MachineInstr *UpdateMI = nullptr; + MachineInstr *MemMI = nullptr; unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); @@ -169,7 +171,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (ARM_AM::getSOImmVal(Amt) == -1) // Can't encode it in a so_imm operand. This transformation will // add more than 1 instruction. Abandon! - return NULL; + return nullptr; UpdateMI = BuildMI(MF, MI->getDebugLoc(), get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) .addReg(BaseReg).addImm(Amt) @@ -273,8 +275,8 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { - TBB = 0; - FBB = 0; + TBB = nullptr; + FBB = nullptr; MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) @@ -331,7 +333,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, I->isReturn())) { // Forget any previous condition branch information - it no longer applies. Cond.clear(); - FBB = 0; + FBB = nullptr; // If we can modify the function, delete everything below this // unconditional branch. @@ -405,7 +407,7 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, assert((Cond.size() == 2 || Cond.size() == 0) && "ARM branch conditions have two components!"); - if (FBB == 0) { + if (!FBB) { if (Cond.empty()) { // Unconditional branch? if (isThumb) BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); @@ -535,7 +537,8 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { return true; } -template<> bool IsCPSRDead<MachineInstr>(MachineInstr* MI) { +namespace llvm { +template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isUndef() || MO.isUse()) @@ -548,6 +551,7 @@ template<> bool IsCPSRDead<MachineInstr>(MachineInstr* MI) { // all definitions of CPSR are dead return true; } +} /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. LLVM_ATTRIBUTE_NOINLINE @@ -620,7 +624,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); unsigned JTI = JTOP.getIndex(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - assert(MJTI != 0); + assert(MJTI != nullptr); const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); assert(JTI < JT.size()); // Thumb instructions are 2 byte aligned, but JT entries are 4 byte @@ -1248,7 +1252,7 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); unsigned PCLabelId = AFI->createPICLabelUId(); - ARMConstantPoolValue *NewCPV = 0; + ARMConstantPoolValue *NewCPV = nullptr; // FIXME: The below assumes PIC relocation model and that the function // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and @@ -1659,10 +1663,10 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); // MOVCC AL can't be inverted. Shouldn't happen. if (CC == ARMCC::AL || PredReg != ARM::CPSR) - return NULL; + return nullptr; MI = TargetInstrInfo::commuteInstruction(MI, NewMI); if (!MI) - return NULL; + return nullptr; // After swapping the MOVCC operands, also invert the condition. MI->getOperand(MI->findFirstPredOperandIdx()) .setImm(ARMCC::getOppositeCondition(CC)); @@ -1678,35 +1682,36 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) - return 0; + return nullptr; if (!MRI.hasOneNonDBGUse(Reg)) - return 0; + return nullptr; MachineInstr *MI = MRI.getVRegDef(Reg); if (!MI) - return 0; + return nullptr; // MI is folded into the MOVCC by predicating it. if (!MI->isPredicable()) - return 0; + return nullptr; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading CPSR. for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); // Reject frame index operands, PEI can't handle the predicated pseudos. if (MO.isFI() || MO.isCPI() || MO.isJTI()) - return 0; + return nullptr; if (!MO.isReg()) continue; // MI can't have any tied operands, that would conflict with predication. if (MO.isTied()) - return 0; + return nullptr; if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - return 0; + return nullptr; if (MO.isDef() && !MO.isDead()) - return 0; + return nullptr; } bool DontMoveAcrossStores = true; - if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) - return 0; + if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ nullptr, + DontMoveAcrossStores)) + return nullptr; return MI; } @@ -1741,14 +1746,14 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, if (!DefMI) DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); if (!DefMI) - return 0; + return nullptr; // Find new register class to use. MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); unsigned DestReg = MI->getOperand(0).getReg(); const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); if (!MRI.constrainRegClass(DestReg, PreviousClass)) - return 0; + return nullptr; // Create a new predicated version of DefMI. // Rfalse is the first use. @@ -2254,7 +2259,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Masked compares sometimes use the same register as the corresponding 'and'. if (CmpMask != ~0) { if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { - MI = 0; + MI = nullptr; for (MachineRegisterInfo::use_instr_iterator UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end(); UI != UE; ++UI) { @@ -2281,17 +2286,17 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // One is MI, the other is a SUB instruction. // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). - MachineInstr *Sub = NULL; + MachineInstr *Sub = nullptr; if (SrcReg2 != 0) // MI is not a candidate for CMPrr. - MI = NULL; + MI = nullptr; else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. // For CMPri, we need to check Sub, thus we can't return here. if (CmpInstr->getOpcode() == ARM::CMPri || CmpInstr->getOpcode() == ARM::t2CMPri) - MI = NULL; + MI = nullptr; else return false; } @@ -3295,7 +3300,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, if (Idx == -1) { Dist = 0; - return 0; + return nullptr; } UseIdx = Idx; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 3ddddcb..4b3e740 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -261,7 +261,7 @@ private: unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, - unsigned *PredCost = 0) const override; + unsigned *PredCost = nullptr) const override; int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const override; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 8130a2d..a2eee9f 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -44,14 +44,18 @@ using namespace llvm; ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), - FramePtr((STI.isTargetMachO() || STI.isThumb()) ? ARM::R7 : ARM::R11), - BasePtr(ARM::R6) { + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) { + if (STI.isTargetMachO()) + FramePtr = ARM::R7; + else if (STI.isTargetWindows()) + FramePtr = ARM::R11; + else // ARM EABI + FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11; } -const uint16_t* +const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - const uint16_t *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + const MCPhysReg *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; @@ -107,7 +111,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { // should return NULL if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls - return NULL; + return nullptr; return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; } @@ -173,7 +177,7 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind const TargetRegisterClass * ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &ARM::CCRRegClass) - return 0; // Can't copy CCR registers. + return nullptr; // Can't copy CCR registers. return RC; } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 66b3c82..91df565 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -100,8 +100,8 @@ protected: public: /// Code Generation virtual methods... - const uint16_t * - getCalleeSavedRegs(const MachineFunction *MF = 0) const override; + const MCPhysReg * + getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; const uint32_t *getCallPreservedMask(CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const; @@ -186,7 +186,7 @@ public: void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const override; + RegScavenger *RS = nullptr) const override; }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 4f94ad2..dc41c1c 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -28,7 +28,7 @@ namespace llvm { static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { - static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; // Try to get the first register. if (unsigned Reg = State.AllocateReg(RegList, 4)) @@ -71,10 +71,10 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { - static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; - static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; - static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 }; - static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 }; + static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 }; + static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 }; + static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); if (Reg == 0) { @@ -123,8 +123,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State) { - static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; - static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; + static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 }; + static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 }; unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); if (Reg == 0) @@ -160,6 +160,105 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, State); } +static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, + ARM::S4, ARM::S5, ARM::S6, ARM::S7, + ARM::S8, ARM::S9, ARM::S10, ARM::S11, + ARM::S12, ARM::S13, ARM::S14, ARM::S15 }; +static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; +static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; + +// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA +// has InConsecutiveRegs set, and that the last member also has +// InConsecutiveRegsLast set. We must process all members of the HA before +// we can allocate it, as we need to know the total number of registers that +// will be needed in order to (attempt to) allocate a contiguous block. +static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs(); + // AAPCS HFAs must have 1-4 elements, all of the same type + assert(PendingHAMembers.size() < 8); + if (PendingHAMembers.size() > 0) + assert(PendingHAMembers[0].getLocVT() == LocVT); + + // Add the argument to the list to be allocated once we know the size of the + // HA + PendingHAMembers.push_back( + CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); + + if (ArgFlags.isInConsecutiveRegsLast()) { + assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 && + "Homogeneous aggregates must have between 1 and 4 members"); + + // Try to allocate a contiguous block of registers, each of the correct + // size to hold one member. + const uint16_t *RegList; + unsigned NumRegs; + switch (LocVT.SimpleTy) { + case MVT::i32: + case MVT::f32: + RegList = SRegList; + NumRegs = 16; + break; + case MVT::f64: + RegList = DRegList; + NumRegs = 8; + break; + case MVT::v2f64: + RegList = QRegList; + NumRegs = 4; + break; + default: + llvm_unreachable("Unexpected member type for HA"); + break; + } + + unsigned RegResult = + State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size()); + + if (RegResult) { + for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin(); + It != PendingHAMembers.end(); ++It) { + It->convertToReg(RegResult); + State.addLoc(*It); + ++RegResult; + } + PendingHAMembers.clear(); + return true; + } + + // Register allocation failed, fall back to the stack + + // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp) + for (unsigned regNo = 0; regNo < 16; ++regNo) + State.AllocateReg(SRegList[regNo]); + + unsigned Size = LocVT.getSizeInBits() / 8; + unsigned Align = Size; + + if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) { + // Vectors are always aligned to 8 bytes. If we've seen an i32 here + // it's because it's been split from a larger type, also with align 8. + Align = 8; + } + + for (auto It : PendingHAMembers) { + It.convertToMem(State.AllocateStack(Size, Align)); + State.addLoc(It); + + // Only the first member needs to be aligned. + Align = 1; + } + + // All pending members have now been allocated + PendingHAMembers.clear(); + } + + // This will be allocated by the last member of the HA + return true; +} + } // End llvm namespace #endif diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 7cffd82..526089b 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -174,6 +174,9 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + // HFAs are passed in a contiguous block of registers, or on the stack + CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_HA">>, + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 7359a11..2fd7edd 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMConstantPoolValue.h" @@ -40,6 +39,8 @@ #endif using namespace llvm; +#define DEBUG_TYPE "jit" + STATISTIC(NumEmitted, "Number of machine instructions emitted"); namespace { @@ -65,10 +66,10 @@ namespace { static char ID; public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(ID), JTI(0), + : MachineFunctionPass(ID), JTI(nullptr), II((const ARMBaseInstrInfo *)tm.getInstrInfo()), TD(tm.getDataLayout()), TM(tm), - MCE(mce), MCPEs(0), MJTEs(0), + MCE(mce), MCPEs(nullptr), MJTEs(nullptr), IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} /// getBinaryCodeForInstr - This function, generated by the @@ -373,7 +374,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); MCPEs = &MF.getConstantPool()->getConstants(); - MJTEs = 0; + MJTEs = nullptr; if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); IsPIC = TM.getRelocationModel() == Reloc::PIC_; IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction(); diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index ba05171..ce264ee 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-cp-islands" #include "ARM.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" @@ -36,6 +35,8 @@ #include <algorithm> using namespace llvm; +#define DEBUG_TYPE "arm-cp-islands" + STATISTIC(NumCPEs, "Number of constpool entries"); STATISTIC(NumSplit, "Number of uncond branches inserted"); STATISTIC(NumCBrFixed, "Number of cond branches fixed"); @@ -593,7 +594,7 @@ ARMConstantIslands::CPEntry if (CPEs[i].CPEMI == CPEMI) return &CPEs[i]; } - return NULL; + return nullptr; } /// getCPELogAlign - Returns the required alignment of the constant pool entry @@ -1102,7 +1103,7 @@ bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI, assert(CPE && "Unexpected!"); if (--CPE->RefCount == 0) { removeDeadCPEMI(CPEMI); - CPE->CPEMI = NULL; + CPE->CPEMI = nullptr; --NumCPEs; return true; } @@ -1135,7 +1136,7 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) if (CPEs[i].CPEMI == CPEMI) continue; // Removing CPEs can leave empty entries, skip - if (CPEs[i].CPEMI == NULL) + if (CPEs[i].CPEMI == nullptr) continue; if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), U.NegOk)) { @@ -1317,7 +1318,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, ++MI; unsigned CPUIndex = CPUserIndex+1; unsigned NumCPUsers = CPUsers.size(); - MachineInstr *LastIT = 0; + MachineInstr *LastIT = nullptr; for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); Offset < BaseInsertOffset; Offset += TII->GetInstSizeInBytes(MI), MI = std::next(MI)) { @@ -1491,7 +1492,7 @@ bool ARMConstantIslands::removeUnusedCPEntries() { for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { removeDeadCPEMI(CPEs[j].CPEMI); - CPEs[j].CPEMI = NULL; + CPEs[j].CPEMI = nullptr; MadeChange = true; } } @@ -1844,7 +1845,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { // FIXME: After the tables are shrunk, can we get rid some of the // constantpool tables? MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - if (MJTI == 0) return false; + if (!MJTI) return false; const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { @@ -1970,7 +1971,7 @@ bool ARMConstantIslands::reorderThumb2JumpTables() { bool MadeChange = false; MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - if (MJTI == 0) return false; + if (!MJTI) return false; const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { @@ -2012,7 +2013,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple // heuristic. FIXME: We can definitely improve it. - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; SmallVector<MachineOperand, 4> CondPrior; MachineFunction::iterator BBi = BB; @@ -2032,7 +2033,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // Update numbering to account for the block being moved. MF->RenumberBlocks(); ++NumJTMoved; - return NULL; + return nullptr; } // Create a new MBB for the code after the jump BB. diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index bd4ee44..6045738 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-pseudo" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" @@ -23,6 +22,7 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" @@ -31,6 +31,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "arm-pseudo" + static cl::opt<bool> VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden, cl::desc("Verify machine code after expanding ARM pseudos")); @@ -345,7 +347,7 @@ static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) { std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode); if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode) return I; - return NULL; + return nullptr; } /// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register, @@ -614,6 +616,39 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, MI.eraseFromParent(); } +static bool IsAnAddressOperand(const MachineOperand &MO) { + // This check is overly conservative. Unless we are certain that the machine + // operand is not a symbol reference, we return that it is a symbol reference. + // This is important as the load pair may not be split up Windows. + switch (MO.getType()) { + case MachineOperand::MO_Register: + case MachineOperand::MO_Immediate: + case MachineOperand::MO_CImmediate: + case MachineOperand::MO_FPImmediate: + return false; + case MachineOperand::MO_MachineBasicBlock: + return true; + case MachineOperand::MO_FrameIndex: + return false; + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_BlockAddress: + return true; + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: + return false; + case MachineOperand::MO_Metadata: + case MachineOperand::MO_MCSymbol: + return true; + case MachineOperand::MO_CFIIndex: + return false; + } + llvm_unreachable("unhandled machine operand type"); +} + void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; @@ -624,10 +659,14 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, bool DstIsDead = MI.getOperand(0).isDead(); bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); + bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO); MachineInstrBuilder LO16, HI16; if (!STI->hasV6T2Ops() && (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) { + // FIXME Windows CE supports older ARM CPUs + assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+"); + // Expand into a movi + orr. LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri)) @@ -664,17 +703,29 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg); - if (MO.isImm()) { + switch (MO.getType()) { + case MachineOperand::MO_Immediate: { unsigned Imm = MO.getImm(); unsigned Lo16 = Imm & 0xffff; unsigned Hi16 = (Imm >> 16) & 0xffff; LO16 = LO16.addImm(Lo16); HI16 = HI16.addImm(Hi16); - } else { + break; + } + case MachineOperand::MO_ExternalSymbol: { + const char *ES = MO.getSymbolName(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16); + HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16); + break; + } + default: { const GlobalValue *GV = MO.getGlobal(); unsigned TF = MO.getTargetFlags(); LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); + break; + } } LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); @@ -682,6 +733,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, LO16.addImm(Pred).addReg(PredReg); HI16.addImm(Pred).addReg(PredReg); + if (RequiresBundling) + finalizeBundle(MBB, &*LO16, &*MBBI); + TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); } diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index c442444..6f8fb1a 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -166,8 +166,6 @@ class ARMFastISel final : public FastISel { // Utility routines. private: - unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned OpNum, - unsigned Op); bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, @@ -191,6 +189,8 @@ class ARMFastISel final : public FastISel { unsigned ARMSelectCallOp(bool UseReg); unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT); + const TargetLowering *getTargetLowering() { return TM.getTargetLowering(); } + // Call handling routines. private: CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, @@ -283,23 +283,6 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { return MIB; } -unsigned ARMFastISel::constrainOperandRegClass(const MCInstrDesc &II, - unsigned Op, unsigned OpNum) { - if (TargetRegisterInfo::isVirtualRegister(Op)) { - const TargetRegisterClass *RegClass = - TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); - if (!MRI.constrainRegClass(Op, RegClass)) { - // If it's not legal to COPY between the register classes, something - // has gone very wrong before we got here. - unsigned NewOp = createResultReg(RegClass); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), NewOp).addReg(Op)); - return NewOp; - } - } - return Op; -} - unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { @@ -769,7 +752,7 @@ bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { // Computes the address to get to an object. bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { // Some boilerplate from the X86 FastISel. - const User *U = NULL; + const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast<Instruction>(Obj)) { // Don't walk into other basic blocks unless the object is an alloca from @@ -1400,7 +1383,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, const APInt &CIVal = ConstInt->getValue(); Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue(); // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather - // then a cmn, because there is no way to represent 2147483648 as a + // then a cmn, because there is no way to represent 2147483648 as a // signed 32-bit int. if (Imm < 0 && Imm != (int)0x80000000) { isNegativeImm = true; @@ -2182,7 +2165,8 @@ unsigned ARMFastISel::getLibcallReg(const Twine &Name) { if (!LCREVT.isSimple()) return 0; GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false, - GlobalValue::ExternalLinkage, 0, Name); + GlobalValue::ExternalLinkage, nullptr, + Name); assert(GV->getType() == GVTy && "We miscomputed the type for the global!"); return ARMMaterializeGV(GV, LCREVT.getSimpleVT()); } @@ -2286,7 +2270,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { } bool ARMFastISel::SelectCall(const Instruction *I, - const char *IntrMemName = 0) { + const char *IntrMemName = nullptr) { const CallInst *CI = cast<CallInst>(I); const Value *Callee = CI->getCalledValue(); @@ -3092,6 +3076,6 @@ namespace llvm { TM.Options.NoFramePointerElim = true; return new ARMFastISel(funcInfo, libInfo); } - return 0; + return nullptr; } } diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h index a30f4cd..e191a3c 100644 --- a/lib/Target/ARM/ARMFeatures.h +++ b/lib/Target/ARM/ARMFeatures.h @@ -1,4 +1,4 @@ -//===-- ARMFeatures.h - Checks for ARM instruction features ------*- C++ -*-===// +//===-- ARMFeatures.h - Checks for ARM instruction features -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,11 +16,11 @@ #include "MCTargetDesc/ARMMCTargetDesc.h" +namespace llvm { + template<typename InstrType> // could be MachineInstr or MCInst bool IsCPSRDead(InstrType *Instr); -namespace llvm { - template<typename InstrType> // could be MachineInstr or MCInst inline bool isV8EligibleForIT(InstrType *Instr) { switch (Instr->getOpcode()) { diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 36ecfca..0caf4bf 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -87,7 +87,7 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { static bool isCSRestore(MachineInstr *MI, const ARMBaseInstrInfo &TII, - const uint16_t *CSRegs) { + const MCPhysReg *CSRegs) { // Integer spill area is handled with "pop". if (isPopOpcode(MI->getOpcode())) { // The first two operands are predicates. The last two are @@ -142,6 +142,14 @@ static int sizeOfSPAdjustment(const MachineInstr *MI) { return count; } +static bool WindowsRequiresStackProbe(const MachineFunction &MF, + size_t StackSizeInBytes) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + if (MFI->getStackProtectorIndex() > 0) + return StackSizeInBytes >= 4080; + return StackSizeInBytes >= 4096; +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -149,15 +157,16 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineModuleInfo &MMI = MF.getMMI(); MCContext &Context = MMI.getContext(); + const TargetMachine &TM = MF.getTarget(); const MCRegisterInfo *MRI = Context.getRegisterInfo(); const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo()); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = TM.getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); @@ -187,7 +196,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { .addCFIIndex(CFIIndex); } - if (!AFI->hasStackFrame()) { + if (!AFI->hasStackFrame() && + (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) { if (NumBytes - ArgRegsSaveSize != 0) { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); @@ -284,6 +294,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } else NumBytes = DPRCSOffset; + if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) { + uint32_t NumWords = NumBytes >> 2; + + if (NumWords < 65536) + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) + .addImm(NumWords) + .setMIFlags(MachineInstr::FrameSetup)); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4) + .addImm(NumWords) + .setMIFlags(MachineInstr::FrameSetup); + + switch (TM.getCodeModel()) { + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Default: + case CodeModel::Kernel: + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit) + .setMIFlags(MachineInstr::FrameSetup); + break; + case CodeModel::Large: + case CodeModel::JITDefault: + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12) + .addExternalSymbol("__chkstk") + .setMIFlags(MachineInstr::FrameSetup); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addReg(ARM::R12, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit) + .setMIFlags(MachineInstr::FrameSetup); + break; + } + + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), + ARM::SP) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::R4, RegState::Kill) + .setMIFlags(MachineInstr::FrameSetup))); + NumBytes = 0; + } + unsigned adjustedGPRCS1Size = GPRCS1Size; if (NumBytes) { // Adjust SP after all the callee-save spills. @@ -316,10 +371,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock::iterator Pos = ++GPRCS1Push; BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - unsigned Reg = I->getReg(); - int FI = I->getFrameIdx(); + for (const auto &Entry : CSI) { + unsigned Reg = Entry.getReg(); + int FI = Entry.getFrameIdx(); switch (Reg) { case ARM::R8: case ARM::R9: @@ -382,10 +436,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } - for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - unsigned Reg = I->getReg(); - int FI = I->getFrameIdx(); + for (const auto &Entry : CSI) { + unsigned Reg = Entry.getReg(); + int FI = Entry.getFrameIdx(); switch (Reg) { case ARM::R8: case ARM::R9: @@ -411,7 +464,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { do { MachineBasicBlock::iterator Push = DPRCSPush++; if (!HasFP) { - CFAOffset -= sizeOfSPAdjustment(Push);; + CFAOffset -= sizeOfSPAdjustment(Push); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -419,10 +472,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } } while (DPRCSPush->getOpcode() == ARM::VSTMDDB_UPD); - for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - unsigned Reg = I->getReg(); - int FI = I->getFrameIdx(); + for (const auto &Entry : CSI) { + unsigned Reg = Entry.getReg(); + int FI = Entry.getFrameIdx(); if ((Reg >= ARM::D0 && Reg <= ARM::D31) && (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) { unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); @@ -540,7 +592,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize); } else { // Unwind MBBI to point to first LDR / VLDRD. - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); if (MBBI != MBB.begin()) { do { --MBBI; @@ -1205,12 +1257,9 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII) { unsigned FnSize = 0; - for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - const MachineBasicBlock &MBB = *MBBI; - for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); - I != E; ++I) - FnSize += TII.GetInstSizeInBytes(I); + for (auto &MBB : MF) { + for (auto &MI : MBB) + FnSize += TII.GetInstSizeInBytes(&MI); } return FnSize; } @@ -1223,21 +1272,21 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const TargetFrameLowering *TFI) { const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned Limit = (1 << 12) - 1; - for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - if (!I->getOperand(i).isFI()) continue; + for (auto &MBB : MF) { + for (auto &MI : MBB) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + if (!MI.getOperand(i).isFI()) + continue; // When using ADDri to get the address of a stack object, 255 is the // largest offset guaranteed to fit in the immediate offset. - if (I->getOpcode() == ARM::ADDri) { + if (MI.getOpcode() == ARM::ADDri) { Limit = std::min(Limit, (1U << 8) - 1); break; } // Otherwise check the addressing mode. - switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { + switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) { case ARMII::AddrMode3: case ARMII::AddrModeT2_i8: Limit = std::min(Limit, (1U << 8) - 1); @@ -1374,7 +1423,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; @@ -1486,6 +1535,10 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (hasFP(MF)) { MRI.setPhysRegUsed(FramePtr); + auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), + FramePtr); + if (FPPos != UnspilledCS1GPRs.end()) + UnspilledCS1GPRs.erase(FPPos); NumGPRSpills++; } @@ -1681,7 +1734,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { if (MF.getFunction()->isVarArg()) report_fatal_error("Segmented stacks do not support vararg functions."); if (!ST->isTargetAndroid() && !ST->isTargetLinux()) - report_fatal_error("Segmented stacks not supported on this platfrom."); + report_fatal_error("Segmented stacks not supported on this platform."); MachineBasicBlock &prologueMBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1693,6 +1746,12 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL; + uint64_t StackSize = MFI->getStackSize(); + + // Do not generate a prologue for functions with a stack of size zero + if (StackSize == 0) + return; + // Use R4 and R5 as scratch registers. // We save R4 and R5 before use and restore them before leaving the function. unsigned ScratchReg0 = ARM::R4; @@ -1722,8 +1781,6 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MF.push_front(PrevStackMBB); // The required stack size that is aligned to ARM constant criterion. - uint64_t StackSize = MFI->getStackSize(); - AlignedStackSize = alignToARMConstant(StackSize); // When the frame size is less than 256 we just compare the stack diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 524ee36..981d320 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -57,7 +57,7 @@ public: void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const override; - void adjustForSegmentedStacks(MachineFunction &MF) const; + void adjustForSegmentedStacks(MachineFunction &MF) const override; private: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 61d4e12..0885c4e 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -77,7 +77,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { } void ARMHazardRecognizer::Reset() { - LastMI = 0; + LastMI = nullptr; FpMLxStalls = 0; ScoreboardHazardRecognizer::Reset(); } @@ -95,7 +95,7 @@ void ARMHazardRecognizer::EmitInstruction(SUnit *SU) { void ARMHazardRecognizer::AdvanceCycle() { if (FpMLxStalls && --FpMLxStalls == 0) // Stalled for 4 cycles but still can't schedule any other instructions. - LastMI = 0; + LastMI = nullptr; ScoreboardHazardRecognizer::AdvanceCycle(); } diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h index e88cd0d..a8198e2 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.h +++ b/lib/Target/ARM/ARMHazardRecognizer.h @@ -35,7 +35,7 @@ public: ARMHazardRecognizer(const InstrItineraryData *ItinData, const ScheduleDAG *DAG) : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), - LastMI(0) {} + LastMI(nullptr) {} HazardType getHazardType(SUnit *SU, int Stalls) override; void Reset() override; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 70e11c5..08d598d 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-isel" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMTargetMachine.h" @@ -37,6 +36,8 @@ using namespace llvm; +#define DEBUG_TYPE "arm-isel" + static cl::opt<bool> DisableShifterOp("disable-shifter-op", cl::Hidden, cl::desc("Disable isel of shifter-op"), @@ -72,6 +73,13 @@ public: Subtarget(&TM.getSubtarget<ARMSubtarget>()) { } + bool runOnMachineFunction(MachineFunction &MF) override { + // Reset the subtarget each time through. + Subtarget = &TM.getSubtarget<ARMSubtarget>(); + SelectionDAGISel::runOnMachineFunction(MF); + return true; + } + const char *getPassName() const override { return "ARM Instruction Selection"; } @@ -397,7 +405,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() { N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, N1, CurDAG->getConstant(TZ, MVT::i32)); CurDAG->UpdateNodeOperands(N, N0, N1); - } + } } /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS @@ -1440,7 +1448,7 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM == ISD::UNINDEXED) - return NULL; + return nullptr; EVT LoadedVT = LD->getMemoryVT(); SDValue Offset, AMOpc; @@ -1506,14 +1514,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { } } - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM == ISD::UNINDEXED) - return NULL; + return nullptr; EVT LoadedVT = LD->getMemoryVT(); bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; @@ -1540,7 +1548,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; break; default: - return NULL; + return nullptr; } Match = true; } @@ -1554,7 +1562,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { MVT::Other, Ops); } - return NULL; + return nullptr; } /// \brief Form a GPRPair pseudo register from a pair of GPR regs. @@ -1699,10 +1707,10 @@ static bool isVSTfixed(unsigned Opc) case ARM::VST1d16wb_fixed : return true; case ARM::VST1d32wb_fixed : return true; case ARM::VST1d64wb_fixed : return true; - case ARM::VST1q8wb_fixed : return true; - case ARM::VST1q16wb_fixed : return true; - case ARM::VST1q32wb_fixed : return true; - case ARM::VST1q64wb_fixed : return true; + case ARM::VST1q8wb_fixed : return true; + case ARM::VST1q16wb_fixed : return true; + case ARM::VST1q32wb_fixed : return true; + case ARM::VST1q64wb_fixed : return true; case ARM::VST1d64TPseudoWB_fixed : return true; case ARM::VST1d64QPseudoWB_fixed : return true; case ARM::VST2d8wb_fixed : return true; @@ -1776,7 +1784,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue MemAddr, Align; unsigned AddrOpIdx = isUpdating ? 1 : 2; if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) - return NULL; + return nullptr; SDValue Chain = N->getOperand(0); EVT VT = N->getValueType(0); @@ -1895,7 +1903,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); if (isUpdating) ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, @@ -1909,7 +1917,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, unsigned AddrOpIdx = isUpdating ? 1 : 2; unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) - return NULL; + return nullptr; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); @@ -2055,7 +2063,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned AddrOpIdx = isUpdating ? 1 : 2; unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) - return NULL; + return nullptr; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); @@ -2160,7 +2168,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); if (isUpdating) ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, @@ -2171,7 +2179,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, SDValue MemAddr, Align; if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) - return NULL; + return nullptr; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); @@ -2243,7 +2251,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); if (isUpdating) ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, @@ -2282,7 +2290,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) - return NULL; + return nullptr; unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) @@ -2295,7 +2303,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, // The immediate is a mask of the low bits iff imm & (imm+1) == 0 if (And_imm & (And_imm + 1)) - return NULL; + return nullptr; unsigned Srl_imm = 0; if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, @@ -2315,7 +2323,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); } // ARM models shift instructions as MOVsi with shifter operand. @@ -2325,17 +2333,17 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, MVT::i32); SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); } SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), CurDAG->getTargetConstant(Width, MVT::i32), - getAL(CurDAG), Reg0 }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + getAL(CurDAG), Reg0 }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); } } - return NULL; + return nullptr; } // Otherwise, we're looking for a shift of a shift @@ -2349,16 +2357,16 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Width = 32 - Srl_imm - 1; int LSB = Srl_imm - Shl_imm; if (LSB < 0) - return NULL; + return nullptr; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), CurDAG->getTargetConstant(Width, MVT::i32), getAL(CurDAG), Reg0 }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); } } - return NULL; + return nullptr; } /// Target-specific DAG combining for ISD::XOR. @@ -2377,10 +2385,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ EVT VT = N->getValueType(0); if (Subtarget->isThumb1Only()) - return NULL; + return nullptr; if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) - return NULL; + return nullptr; SDValue ADDSrc0 = XORSrc0.getOperand(0); SDValue ADDSrc1 = XORSrc0.getOperand(1); @@ -2391,13 +2399,13 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ unsigned Size = XType.getSizeInBits() - 1; if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && - XType.isInteger() && SRAConstant != NULL && + XType.isInteger() && SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); } - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { @@ -2414,7 +2422,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) { N->setNodeId(-1); - return NULL; // Already selected. + return nullptr; // Already selected. } switch (N->getOpcode()) { @@ -2478,7 +2486,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops); } ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0)); - return NULL; + return nullptr; } // Other cases are autogenerated. @@ -2492,14 +2500,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (Subtarget->isThumb1Only()) { SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops); } else { unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? ARM::t2ADDri : ARM::ADDri); SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); } } case ISD::SRL: @@ -2526,10 +2534,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (Subtarget->isThumb()) { SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6); + return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7); + return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); } } if (isPowerOf2_32(RHSV+1)) { // 2^n-1? @@ -2542,10 +2550,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (Subtarget->isThumb()) { SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6); + return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7); + return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); } } } @@ -2660,7 +2668,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } } case ISD::LOAD: { - SDNode *ResNode = 0; + SDNode *ResNode = nullptr; if (Subtarget->isThumb() && Subtarget->hasThumb2()) ResNode = SelectT2IndexedLoad(N); else @@ -2707,13 +2715,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } ReplaceUses(SDValue(N, 0), SDValue(Chain.getNode(), Chain.getResNo())); - return NULL; + return nullptr; } case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: return NULL; + default: return nullptr; case MVT::v8i8: Opc = ARM::VZIPd8; break; case MVT::v4i16: Opc = ARM::VZIPd16; break; case MVT::v2f32: @@ -2733,7 +2741,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned Opc = 0; EVT VT = N->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: return NULL; + default: return nullptr; case MVT::v8i8: Opc = ARM::VUZPd8; break; case MVT::v4i16: Opc = ARM::VUZPd16; break; case MVT::v2f32: @@ -2753,7 +2761,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned Opc = 0; EVT VT = N->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: return NULL; + default: return nullptr; case MVT::v8i8: Opc = ARM::VTRNd8; break; case MVT::v4i16: Opc = ARM::VTRNd16; break; case MVT::v2f32: @@ -2834,7 +2842,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed, ARM::VLD1q64wb_fixed }; - return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0); + return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); } case ARMISD::VLD2_UPD: { @@ -2845,7 +2853,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q32PseudoWB_fixed }; - return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0); + return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); } case ARMISD::VLD3_UPD: { @@ -2912,7 +2920,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST1q16wb_fixed, ARM::VST1q32wb_fixed, ARM::VST1q64wb_fixed }; - return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0); + return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); } case ARMISD::VST2_UPD: { @@ -2923,7 +2931,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, ARM::VST2q16PseudoWB_fixed, ARM::VST2q32PseudoWB_fixed }; - return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0); + return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); } case ARMISD::VST3_UPD: { @@ -3047,7 +3055,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ReplaceUses(SDValue(N, 1), Result); } ReplaceUses(SDValue(N, 2), OutChain); - return NULL; + return nullptr; } case Intrinsic::arm_stlexd: case Intrinsic::arm_strexd: { @@ -3093,7 +3101,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD1d32, ARM::VLD1d64 }; static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, ARM::VLD1q32, ARM::VLD1q64}; - return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0); + return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); } case Intrinsic::arm_neon_vld2: { @@ -3101,7 +3109,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD2d32, ARM::VLD1q64 }; static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, ARM::VLD2q32Pseudo }; - return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0); + return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); } case Intrinsic::arm_neon_vld3: { @@ -3164,7 +3172,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST1d32, ARM::VST1d64 }; static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, ARM::VST1q32, ARM::VST1q64 }; - return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0); + return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); } case Intrinsic::arm_neon_vst2: { @@ -3172,7 +3180,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST2d32, ARM::VST1q64 }; static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, ARM::VST2q32Pseudo }; - return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0); + return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); } case Intrinsic::arm_neon_vst3: { @@ -3306,7 +3314,8 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ // them into a GPRPair. SDLoc dl(N); - SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) : SDValue(0,0); + SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) + : SDValue(nullptr,0); SmallVector<bool, 8> OpChanged; // Glue node will be appended late. @@ -3388,7 +3397,7 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ // Update the original glue user. std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); Ops.push_back(T1.getValue(1)); - CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(GU, Ops); GU = T1.getNode(); } else { @@ -3435,11 +3444,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ if (Glue.getNode()) AsmNodeOperands.push_back(Glue); if (!Changed) - return NULL; + return nullptr; SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), - CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], - AsmNodeOperands.size()); + CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); New->setNodeId(-1); return New.getNode(); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 2ebad8e..00d07e8 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-isel" #include "ARMISelLowering.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" @@ -37,18 +36,22 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" #include <utility> using namespace llvm; +#define DEBUG_TYPE "arm-isel" + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); @@ -79,7 +82,7 @@ namespace { } // The APCS parameter registers. -static const uint16_t GPRArgRegs[] = { +static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; @@ -155,7 +158,8 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) { static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { if (TM.getSubtarget<ARMSubtarget>().isTargetMachO()) return new TargetLoweringObjectFileMachO(); - + if (TM.getSubtarget<ARMSubtarget>().isTargetWindows()) + return new TargetLoweringObjectFileCOFF(); return new ARMElfTargetObjectFile(); } @@ -170,7 +174,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (Subtarget->isTargetMachO()) { // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2() && - Subtarget->hasARMOps()) { + Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) { // Single-precision floating-point arithmetic. setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); @@ -246,173 +250,134 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // These libcalls are not available in 32-bit. - setLibcallName(RTLIB::SHL_I128, 0); - setLibcallName(RTLIB::SRL_I128, 0); - setLibcallName(RTLIB::SRA_I128, 0); + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() && !Subtarget->isTargetWindows()) { - // Double-precision floating-point arithmetic helper functions - // RTABI chapter 4.1.2, Table 2 - setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); - setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); - setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); - setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); - setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); - - // Double-precision floating-point comparison helper functions - // RTABI chapter 4.1.2, Table 3 - setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); - setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); - setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); - setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); - setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); - setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); - setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); - setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); - setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); - setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); - setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); - setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); - setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); - setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); - setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); - setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); - setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); - - // Single-precision floating-point arithmetic helper functions - // RTABI chapter 4.1.2, Table 4 - setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); - setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); - setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); - setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); - setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); - - // Single-precision floating-point comparison helper functions - // RTABI chapter 4.1.2, Table 5 - setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); - setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); - setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); - setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); - setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); - setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); - setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); - setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); - setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); - setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); - setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); - setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); - setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); - setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); - setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); - setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); - setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); - - // Floating-point to integer conversions. - // RTABI chapter 4.1.2, Table 6 - setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); - setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); - setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); - setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); - setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); - setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); - setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); - setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); - setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); - - // Conversions between floating types. - // RTABI chapter 4.1.2, Table 7 - setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); - setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); - setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); - - // Integer to floating-point conversions. - // RTABI chapter 4.1.2, Table 8 - setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); - setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); - setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); - setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); - setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); - setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); - setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); - setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); - setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); - - // Long long helper functions - // RTABI chapter 4.2, Table 9 - setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); - setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); - setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); - setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); - setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); - - // Integer division functions - // RTABI chapter 4.3.1 - setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); - setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); - setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); - - // Memory operations - // RTABI chapter 4.3.4 - setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); - setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); - setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); - setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + const ISD::CondCode Cond; + } LibraryCalls[] = { + // Double-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 2 + { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Double-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 3 + { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, + { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, + + // Single-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 4 + { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Single-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 5 + { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, + { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, + + // Floating-point to integer conversions. + // RTABI chapter 4.1.2, Table 6 + { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Conversions between floating types. + // RTABI chapter 4.1.2, Table 7 + { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Integer to floating-point conversions. + // RTABI chapter 4.1.2, Table 8 + { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Long long helper functions + // RTABI chapter 4.2, Table 9 + { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Integer division functions + // RTABI chapter 4.3.1 + { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Memory operations + // RTABI chapter 4.3.4 + { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + if (LC.Cond != ISD::SETCC_INVALID) + setCmpLibcallCC(LC.Op, LC.Cond); + } + } + + if (Subtarget->isTargetWindows()) { + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + } LibraryCalls[] = { + { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + } } // Use divmod compiler-rt calls for iOS 5.0 and later. @@ -444,6 +409,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); @@ -631,6 +603,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } + setOperationAction(ISD::SADDO, MVT::i32, Custom); + setOperationAction(ISD::UADDO, MVT::i32, Custom); + setOperationAction(ISD::SSUBO, MVT::i32, Custom); + setOperationAction(ISD::USUBO, MVT::i32, Custom); + // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); @@ -850,7 +827,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); } } - + // Combine sin / cos into one node or libcall if possible. if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); @@ -913,7 +890,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // and extractions. std::pair<const TargetRegisterClass*, uint8_t> ARMTargetLowering::findRepresentativeClass(MVT VT) const{ - const TargetRegisterClass *RRC = 0; + const TargetRegisterClass *RRC = nullptr; uint8_t Cost = 1; switch (VT.SimpleTy) { default: @@ -950,7 +927,7 @@ ARMTargetLowering::findRepresentativeClass(MVT VT) const{ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; + default: return nullptr; case ARMISD::Wrapper: return "ARMISD::Wrapper"; case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; @@ -1204,40 +1181,58 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, #include "ARMGenCallingConv.inc" -/// CCAssignFnForNode - Selects the correct CCAssignFn for a the -/// given CallingConvention value. -CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, - bool Return, - bool isVarArg) const { +/// getEffectiveCallingConv - Get the effective calling convention, taking into +/// account presence of floating point hardware and calling convention +/// limitations, such as support for variadic functions. +CallingConv::ID +ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, + bool isVarArg) const { switch (CC) { default: llvm_unreachable("Unsupported calling convention"); - case CallingConv::Fast: - if (Subtarget->hasVFP2() && !isVarArg) { - if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); - // For AAPCS ABI targets, just use VFP variant of the calling convention. - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - } - // Fallthrough - case CallingConv::C: { - // Use target triple & subtarget features to do actual dispatch. + case CallingConv::ARM_AAPCS: + case CallingConv::ARM_APCS: + case CallingConv::GHC: + return CC; + case CallingConv::ARM_AAPCS_VFP: + return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; + case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + return CallingConv::ARM_APCS; else if (Subtarget->hasVFP2() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && !isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; + case CallingConv::Fast: + if (!Subtarget->isAAPCS_ABI()) { + if (Subtarget->hasVFP2() && !isVarArg) + return CallingConv::Fast; + return CallingConv::ARM_APCS; + } else if (Subtarget->hasVFP2() && !isVarArg) + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; } - case CallingConv::ARM_AAPCS_VFP: - if (!isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - // Fallthrough - case CallingConv::ARM_AAPCS: - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); +} + +/// CCAssignFnForNode - Selects the correct CCAssignFn for the given +/// CallingConvention. +CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, + bool Return, + bool isVarArg) const { + switch (getEffectiveCallingConv(CC, isVarArg)) { + default: + llvm_unreachable("Unsupported calling convention"); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + case CallingConv::ARM_AAPCS: + return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + case CallingConv::ARM_AAPCS_VFP: + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + case CallingConv::Fast: + return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); case CallingConv::GHC: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); } @@ -1286,6 +1281,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); if (VA.getLocVT() == MVT::v2f64) { @@ -1301,6 +1298,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(1, MVT::i32)); @@ -1351,16 +1350,17 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); - RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); + unsigned id = Subtarget->isLittle() ? 0 : 1; + RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); if (NextVA.isRegLoc()) - RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); + RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); else { assert(NextVA.isMemLoc()); - if (StackPtr.getNode() == 0) + if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); - MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), dl, DAG, NextVA, Flags)); } @@ -1398,6 +1398,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG); + if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); // We don't support GuaranteedTailCallOpt for ARM, only automatically // detected sibcalls. if (isTailCall) { @@ -1542,7 +1545,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, - Ops, array_lengthof(Ops))); + Ops)); } } else if (!isSibCall) { assert(VA.isMemLoc()); @@ -1553,8 +1556,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. @@ -1741,10 +1743,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) - return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), @@ -2049,8 +2051,7 @@ static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false)); - return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, - RetOps.data(), RetOps.size()); + return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); } SDValue @@ -2074,6 +2075,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SDValue Flag; SmallVector<SDValue, 4> RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) + bool isLittleEndian = Subtarget->isLittle(); // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; @@ -2100,12 +2102,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Half); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + HalfGPRs.getValue(isLittleEndian ? 0 : 1), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - HalfGPRs.getValue(1), Flag); + HalfGPRs.getValue(isLittleEndian ? 1 : 0), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc @@ -2117,12 +2122,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is // available. SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); + DAG.getVTList(MVT::i32, MVT::i32), Arg); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + fmrrd.getValue(isLittleEndian ? 0 : 1), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + fmrrd.getValue(isLittleEndian ? 1 : 0), Flag); } else Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); @@ -2151,8 +2159,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, return LowerInterruptReturn(RetOps, dl, DAG); } - return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, - RetOps.data(), RetOps.size()); + return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { @@ -2314,13 +2321,13 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Entry.Node = Argument; Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); + // FIXME: is there useful debug info available here? - TargetLowering::CallLoweringInfo CLI(Chain, - (Type *) Type::getInt32Ty(*DAG.getContext()), - false, false, false, false, - 0, CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), + DAG.getExternalSymbol("__tls_get_addr", PtrVT), &Args, 0); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.first; } @@ -2466,6 +2473,23 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, return Result; } +SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); + assert(Subtarget->useMovt() && "Windows on ARM expects to use movw/movt"); + + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + EVT PtrVT = getPointerTy(); + SDLoc DL(Op); + + ++NumMovwMovt; + + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into two nodes. + return DAG.getNode(ARMISD::Wrapper, DL, PtrVT, + DAG.getTargetGlobalAddress(GV, DL, PtrVT)); +} + SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && @@ -2654,7 +2678,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } - + if (!Subtarget->isLittle()) + std::swap (ArgValue, ArgValue2); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } @@ -2803,8 +2828,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return FrameIndex; } else { if (ArgSize == 0) { @@ -2834,8 +2858,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // If there is no regs to be stored, just point address after last // argument passed via stack. int FrameIndex = - StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), - 0, ArgOffset, 0, ForceMutable, 0, TotalArgRegsSaveSize); + StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, + CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable, + 0, TotalArgRegsSaveSize); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -3166,11 +3191,96 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } +std::pair<SDValue, SDValue> +ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, + SDValue &ARMcc) const { + assert(Op.getValueType() == MVT::i32 && "Unsupported value type"); + + SDValue Value, OverflowCmp; + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + + // FIXME: We are currently always generating CMPs because we don't support + // generating CMN through the backend. This is not as good as the natural + // CMP case because it causes a register dependency and cannot be folded + // later. + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown overflow instruction!"); + case ISD::SADDO: + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); + break; + case ISD::UADDO: + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); + break; + case ISD::SSUBO: + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); + break; + case ISD::USUBO: + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); + break; + } // switch (...) + + return std::make_pair(Value, OverflowCmp); +} + + +SDValue +ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) + return SDValue(); + + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + // We use 0 and 1 as false and true values. + SDValue TVal = DAG.getConstant(1, MVT::i32); + SDValue FVal = DAG.getConstant(0, MVT::i32); + EVT VT = Op.getValueType(); + + SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal, + ARMcc, CCR, OverflowCmp); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow); +} + + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); SDValue SelectFalse = Op.getOperand(2); SDLoc dl(Op); + unsigned Opc = Cond.getOpcode(); + + if (Cond.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO)) { + if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) + return SDValue(); + + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + EVT VT = Op.getValueType(); + + return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse, + ARMcc, CCR, OverflowCmp); + + } // Convert: // @@ -3472,7 +3582,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { ARMcc = DAG.getConstant(CondCode, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; - return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); + return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); } return SDValue(); @@ -3512,11 +3622,11 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; - SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); if (CondCode2 != ARMCC::AL) { ARMcc = DAG.getConstant(CondCode2, MVT::i32); SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; - Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); } return Res; } @@ -3713,7 +3823,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp1, 1).getValue(1); + Tmp1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); // Or in the signbit with integer operations. @@ -3729,7 +3839,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // f64: Or the high part with signbit and then combine two parts. Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp0, 1); + Tmp0); SDValue Lo = Tmp0.getValue(0); SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); @@ -3761,14 +3871,16 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + const ARMBaseRegisterInfo &ARI = + *static_cast<const ARMBaseRegisterInfo*>(RegInfo); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetMachO()) - ? ARM::R7 : ARM::R11; + unsigned FrameReg = ARI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, @@ -3777,6 +3889,18 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned ARMTargetLowering::getRegisterByName(const char* RegName, + EVT VT) const { + unsigned Reg = StringSwitch<unsigned>(RegName) + .Case("sp", ARM::SP) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -3806,8 +3930,15 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { // Turn f64->i64 into VMOVRRD. if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { - SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + SDValue Cvt; + if (TLI.isBigEndian() && SrcVT.isVector() && + SrcVT.getVectorNumElements() > 1) + Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), + DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op)); + else + Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), Op); // Merge the pieces into a single i64 value. return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); } @@ -3863,7 +3994,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two @@ -3897,7 +4028,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, @@ -4102,7 +4233,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; - Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); + Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi); // The low part is an ARMISD::RRX operand, which shifts the carry in. Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); @@ -4859,7 +4990,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(N); Ops.push_back(Op.getOperand(I)); Ops.push_back(DAG.getConstant(I, MVT::i32)); - N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); + N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops); } } return N; @@ -4870,7 +5001,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); - SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); + SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); Val = LowerBUILD_VECTOR(Val, DAG, ST); if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); @@ -4906,7 +5037,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); - SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } @@ -5213,12 +5344,10 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, if (V2.getNode()->getOpcode() == ISD::UNDEF) return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, - &VTBLMask[0], 8)); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, - &VTBLMask[0], 8)); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); } static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, @@ -5371,7 +5500,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(ShuffleMask[i] & (NumElts-1), MVT::i32))); } - SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } @@ -5608,7 +5737,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); + MVT::getVectorVT(TruncVT, NumElts), Ops); } static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { @@ -5946,12 +6075,12 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { ? "__sincos_stret" : "__sincosf_stret"; SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, - CallingConv::C, /*isTaillCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed*/false, - Callee, Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee, + &Args, 0) + .setDiscardResult(); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, @@ -5998,8 +6127,7 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, }; Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, - DAG.getVTList(MVT::i32, MVT::Other), &Ops[0], - array_lengthof(Ops)); + DAG.getVTList(MVT::i32, MVT::Other), Ops); OutChain = Cycles32.getValue(1); } else { // Intrinsic is defined to return 0 on unsupported platforms. Technically @@ -6022,8 +6150,15 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: - return Subtarget->isTargetMachO() ? LowerGlobalAddressDarwin(Op, DAG) : - LowerGlobalAddressELF(Op, DAG); + switch (Subtarget->getTargetTriple().getObjectFormat()) { + default: llvm_unreachable("unknown object format"); + case Triple::COFF: + return LowerGlobalAddressWindows(Op, DAG); + case Triple::ELF: + return LowerGlobalAddressELF(Op, DAG); + case Triple::MachO: + return LowerGlobalAddressDarwin(Op, DAG); + } case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -6068,6 +6203,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + return LowerXALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); @@ -6558,7 +6698,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { } // N.B. the order the invoke BBs are processed in doesn't matter here. - const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); + const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector<MachineBasicBlock*, 64> MBBLPads; for (SmallPtrSet<MachineBasicBlock*, 64>::iterator I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { @@ -6755,8 +6895,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UnitSize = 0; - const TargetRegisterClass *TRC = 0; - const TargetRegisterClass *VecTRC = 0; + const TargetRegisterClass *TRC = nullptr; + const TargetRegisterClass *VecTRC = nullptr; bool IsThumb1 = Subtarget->isThumb1Only(); bool IsThumb2 = Subtarget->isThumb2(); @@ -6790,7 +6930,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, ? (const TargetRegisterClass *)&ARM::DPairRegClass : UnitSize == 8 ? (const TargetRegisterClass *)&ARM::DPRRegClass - : 0; + : nullptr; unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -7520,8 +7660,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, llvm_unreachable("Invalid vector element type for padd optimization."); } - SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), - widenType, &Ops[0], Ops.size()); + SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, Ops); unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE; return DAG.getNode(ExtOp, SDLoc(N), VT, tmp); } @@ -7581,7 +7720,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // Look for the glued ADDE. SDNode* AddeNode = AddcNode->getGluedUser(); - if (AddeNode == NULL) + if (!AddeNode) return SDValue(); // Make sure it is really an ADDE. @@ -7616,9 +7755,9 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // Figure out the high and low input values to the MLAL node. SDValue* HiMul = &MULOp; - SDValue* HiAdd = NULL; - SDValue* LoMul = NULL; - SDValue* LowAdd = NULL; + SDValue* HiAdd = nullptr; + SDValue* LoMul = nullptr; + SDValue* LowAdd = nullptr; if (IsLeftOperandMUL) HiAdd = &AddeOp1; @@ -7635,7 +7774,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, LowAdd = &AddcOp0; } - if (LoMul == NULL) + if (!LoMul) return SDValue(); if (LoMul->getNode() != HiMul->getNode()) @@ -7652,8 +7791,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, Ops.push_back(*HiAdd); SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), - DAG.getVTList(MVT::i32, MVT::i32), - &Ops[0], Ops.size()); + DAG.getVTList(MVT::i32, MVT::i32), Ops); // Replace the ADDs' nodes uses by the MLA node's values. SDValue HiMLALResult(MLALNode.getNode(), 1); @@ -8290,8 +8428,7 @@ static SDValue PerformSTORECombine(SDNode *N, Increment); Chains.push_back(Ch); } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], - Chains.size()); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } if (!ISD::isNormalStore(St)) @@ -8302,16 +8439,18 @@ static SDValue PerformSTORECombine(SDNode *N, if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; + bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian(); SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore(St->getChain(), DL, - StVal.getNode()->getOperand(0), BasePtr, - St->getPointerInfo(), St->isVolatile(), + StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), + BasePtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, MVT::i32)); - return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), + return DAG.getStore(NewST1.getValue(0), DL, + StVal.getNode()->getOperand(isBigEndian ? 0 : 1), OffsetPtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), std::min(4U, St->getAlignment() / 2)); @@ -8387,7 +8526,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, DCI.AddToWorklist(V.getNode()); } EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, BV); } @@ -8470,7 +8609,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Fold obvious case. V = V.getOperand(0); else { - V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); + V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(V.getNode()); } @@ -8666,7 +8805,7 @@ static SDValue CombineBaseUpdate(SDNode *N, Tys[n] = VecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); + SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs+2)); SmallVector<SDValue, 8> Ops; Ops.push_back(N->getOperand(0)); // incoming chain Ops.push_back(N->getOperand(AddrOpIdx)); @@ -8676,8 +8815,7 @@ static SDValue CombineBaseUpdate(SDNode *N, } MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, - Ops.data(), Ops.size(), - MemInt->getMemoryVT(), + Ops, MemInt->getMemoryVT(), MemInt->getMemOperand()); // Update the uses. @@ -8746,11 +8884,11 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { for (n = 0; n < NumVecs; ++n) Tys[n] = VT; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); + SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumVecs+1)); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, - Ops, 2, VLDMemInt->getMemoryVT(), + Ops, VLDMemInt->getMemoryVT(), VLDMemInt->getMemOperand()); // Update the uses. @@ -9348,7 +9486,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { if (Res.getNode()) { APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); + DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); // Capture demanded bits information that would be otherwise lost. if (KnownZero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, @@ -9935,11 +10073,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, return true; } -void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { unsigned BitWidth = KnownOne.getBitWidth(); KnownZero = KnownOne = APInt(BitWidth, 0); switch (Op.getOpcode()) { @@ -9955,11 +10093,11 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); if (KnownZero == 0 && KnownOne == 0) return; APInt KnownZeroRHS, KnownOneRHS; - DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); + DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); KnownZero &= KnownZeroRHS; KnownOne &= KnownOneRHS; return; @@ -10053,7 +10191,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight( Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. @@ -10132,7 +10270,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector<SDValue>&Ops, SelectionDAG &DAG) const { - SDValue Result(0, 0); + SDValue Result; // Currently only support length 1 constraints. if (Constraint.length() != 1) return; @@ -10331,13 +10469,12 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); SDLoc dl(Op); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true, - 0, getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); return CallInfo.first; } @@ -10494,3 +10631,160 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return false; return true; } + +bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { + // Loads and stores less than 64-bits are already atomic; ones above that + // are doomed anyway, so defer to the default libcall and blame the OS when + // things go wrong: + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64; + else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + return LI->getType()->getPrimitiveSizeInBits() == 64; + + // For the real atomic operations, we have ldrex/strex up to 64 bits. + return Inst->getType()->getPrimitiveSizeInBits() <= 64; +} + +Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Type *ValTy = cast<PointerType>(Addr->getType())->getElementType(); + bool IsAcquire = + Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd + // intrinsic must return {i32, i32} and we have to recombine them into a + // single i64 here. + if (ValTy->getPrimitiveSizeInBits() == 64) { + Intrinsic::ID Int = + IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; + Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); + + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); + + Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); + Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); + Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); + Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); + return Builder.CreateOr( + Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); + } + + Type *Tys[] = { Addr->getType() }; + Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; + Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateTruncOrBitCast( + Builder.CreateCall(Ldrex, Addr), + cast<PointerType>(Addr->getType())->getElementType()); +} + +Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + bool IsRelease = + Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since the intrinsics must have legal type, the i64 intrinsics take two + // parameters: "i32, i32". We must marshal Val into the appropriate form + // before the call. + if (Val->getType()->getPrimitiveSizeInBits() == 64) { + Intrinsic::ID Int = + IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; + Function *Strex = Intrinsic::getDeclaration(M, Int); + Type *Int32Ty = Type::getInt32Ty(M->getContext()); + + Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); + Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + return Builder.CreateCall3(Strex, Lo, Hi, Addr); + } + + Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; + Type *Tys[] = { Addr->getType() }; + Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateCall2( + Strex, Builder.CreateZExtOrBitCast( + Val, Strex->getFunctionType()->getParamType(0)), + Addr); +} + +enum HABaseType { + HA_UNKNOWN = 0, + HA_FLOAT, + HA_DOUBLE, + HA_VECT64, + HA_VECT128 +}; + +static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, + uint64_t &Members) { + if (const StructType *ST = dyn_cast<StructType>(Ty)) { + for (unsigned i = 0; i < ST->getNumElements(); ++i) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers)) + return false; + Members += SubMembers; + } + } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers)) + return false; + Members += SubMembers * AT->getNumElements(); + } else if (Ty->isFloatTy()) { + if (Base != HA_UNKNOWN && Base != HA_FLOAT) + return false; + Members = 1; + Base = HA_FLOAT; + } else if (Ty->isDoubleTy()) { + if (Base != HA_UNKNOWN && Base != HA_DOUBLE) + return false; + Members = 1; + Base = HA_DOUBLE; + } else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) { + Members = 1; + switch (Base) { + case HA_FLOAT: + case HA_DOUBLE: + return false; + case HA_VECT64: + return VT->getBitWidth() == 64; + case HA_VECT128: + return VT->getBitWidth() == 128; + case HA_UNKNOWN: + switch (VT->getBitWidth()) { + case 64: + Base = HA_VECT64; + return true; + case 128: + Base = HA_VECT128; + return true; + default: + return false; + } + } + } + + return (Members > 0 && Members <= 4); +} + +/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. +bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { + if (getEffectiveCallingConv(CallConv, isVarArg) != + CallingConv::ARM_AAPCS_VFP) + return false; + + HABaseType Base = HA_UNKNOWN; + uint64_t Members = 0; + bool result = isHomogeneousAggregate(Ty, Base, Members); + DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n"); + return result; +} diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index f33e6db..c15305c 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -313,10 +313,10 @@ namespace llvm { SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; - void computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const override; + void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const override; bool ExpandInlineAsm(CallInst *CI) const override; @@ -384,6 +384,18 @@ namespace llvm { bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; + /// \brief Returns true if an argument of type Ty needs to be passed in a + /// contiguous block of registers in calling convention CallConv. + bool functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; + + Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const override; + Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, AtomicOrdering Ord) const override; + + bool shouldExpandAtomicInIR(Instruction *Inst) const override; + protected: std::pair<const TargetRegisterClass*, uint8_t> findRepresentativeClass(MVT VT) const override; @@ -404,6 +416,7 @@ namespace llvm { void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); void addDRTypeForNEON(MVT VT); void addQRTypeForNEON(MVT VT); + std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const; typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector; void PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, @@ -417,6 +430,8 @@ namespace llvm { SDValue &Root, SelectionDAG &DAG, SDLoc dl) const; + CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC, + bool isVarArg) const; CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, @@ -430,6 +445,7 @@ namespace llvm { SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const; @@ -438,6 +454,7 @@ namespace llvm { TLSModel::Model model) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; @@ -454,6 +471,8 @@ namespace llvm { SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; + unsigned getRegisterByName(const char* RegName, EVT VT) const override; + /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be /// expanded to FMAs when this method returns true, otherwise fmuladd is @@ -567,7 +586,6 @@ namespace llvm { OtherModImm }; - namespace ARM { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index aafff98..59e9260 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -2029,7 +2029,7 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, // Same as N2V but not predicated. class N2Vnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, dag oops, dag iops, InstrItinClass itin, string OpcodeStr, - string Dt, ValueType ResTy, ValueType OpTy, list<dag> pattern> + string Dt, list<dag> pattern> : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N2RegFrm, itin, OpcodeStr, Dt, "$Vd, $Vm", "", pattern> { bits<5> Vd; @@ -2138,8 +2138,7 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, class N3Vnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops,Format f, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDPatternOperator IntOp, bit Commutable, list<dag> pattern> + string OpcodeStr, string Dt, list<dag> pattern> : NeonInp<oops, iops, AddrModeNone, IndexModeNone, f, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", pattern> { bits<5> Vd; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 75a109e..718d5da 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -991,6 +991,81 @@ def addrmode6oneL32 : Operand<i32>, let EncoderMethod = "getAddrMode6OneLane32AddressOpValue"; } +// Base class for addrmode6 with specific alignment restrictions. +class AddrMode6Align : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ + let PrintMethod = "printAddrMode6Operand"; + let MIOperandInfo = (ops GPR:$addr, i32imm:$align); + let EncoderMethod = "getAddrMode6AddressOpValue"; + let DecoderMethod = "DecodeAddrMode6Operand"; +} + +// Special version of addrmode6 to handle no allowed alignment encoding for +// VLD/VST instructions and checking the alignment is not specified. +def AddrMode6AlignNoneAsmOperand : AsmOperandClass { + let Name = "AlignedMemoryNone"; + let DiagnosticType = "AlignedMemoryRequiresNone"; +} +def addrmode6alignNone : AddrMode6Align { + // The alignment specifier can only be omitted. + let ParserMatchClass = AddrMode6AlignNoneAsmOperand; +} + +// Special version of addrmode6 to handle 16-bit alignment encoding for +// VLD/VST instructions and checking the alignment value. +def AddrMode6Align16AsmOperand : AsmOperandClass { + let Name = "AlignedMemory16"; + let DiagnosticType = "AlignedMemoryRequires16"; +} +def addrmode6align16 : AddrMode6Align { + // The alignment specifier can only be 16 or omitted. + let ParserMatchClass = AddrMode6Align16AsmOperand; +} + +// Special version of addrmode6 to handle 32-bit alignment encoding for +// VLD/VST instructions and checking the alignment value. +def AddrMode6Align32AsmOperand : AsmOperandClass { + let Name = "AlignedMemory32"; + let DiagnosticType = "AlignedMemoryRequires32"; +} +def addrmode6align32 : AddrMode6Align { + // The alignment specifier can only be 32 or omitted. + let ParserMatchClass = AddrMode6Align32AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit alignment encoding for +// VLD/VST instructions and checking the alignment value. +def AddrMode6Align64AsmOperand : AsmOperandClass { + let Name = "AlignedMemory64"; + let DiagnosticType = "AlignedMemoryRequires64"; +} +def addrmode6align64 : AddrMode6Align { + // The alignment specifier can only be 64 or omitted. + let ParserMatchClass = AddrMode6Align64AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding +// for VLD/VST instructions and checking the alignment value. +def AddrMode6Align64or128AsmOperand : AsmOperandClass { + let Name = "AlignedMemory64or128"; + let DiagnosticType = "AlignedMemoryRequires64or128"; +} +def addrmode6align64or128 : AddrMode6Align { + // The alignment specifier can only be 64, 128 or omitted. + let ParserMatchClass = AddrMode6Align64or128AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit, 128-bit or 256-bit alignment +// encoding for VLD/VST instructions and checking the alignment value. +def AddrMode6Align64or128or256AsmOperand : AsmOperandClass { + let Name = "AlignedMemory64or128or256"; + let DiagnosticType = "AlignedMemoryRequires64or128or256"; +} +def addrmode6align64or128or256 : AddrMode6Align { + // The alignment specifier can only be 64, 128, 256 or omitted. + let ParserMatchClass = AddrMode6Align64or128or256AsmOperand; +} + // Special version of addrmode6 to handle alignment encoding for VLD-dup // instructions, specifically VLD4-dup. def addrmode6dup : Operand<i32>, @@ -1003,6 +1078,69 @@ def addrmode6dup : Operand<i32>, let ParserMatchClass = AddrMode6AsmOperand; } +// Base class for addrmode6dup with specific alignment restrictions. +class AddrMode6DupAlign : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ + let PrintMethod = "printAddrMode6Operand"; + let MIOperandInfo = (ops GPR:$addr, i32imm); + let EncoderMethod = "getAddrMode6DupAddressOpValue"; +} + +// Special version of addrmode6 to handle no allowed alignment encoding for +// VLD-dup instruction and checking the alignment is not specified. +def AddrMode6dupAlignNoneAsmOperand : AsmOperandClass { + let Name = "DupAlignedMemoryNone"; + let DiagnosticType = "DupAlignedMemoryRequiresNone"; +} +def addrmode6dupalignNone : AddrMode6DupAlign { + // The alignment specifier can only be omitted. + let ParserMatchClass = AddrMode6dupAlignNoneAsmOperand; +} + +// Special version of addrmode6 to handle 16-bit alignment encoding for VLD-dup +// instruction and checking the alignment value. +def AddrMode6dupAlign16AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory16"; + let DiagnosticType = "DupAlignedMemoryRequires16"; +} +def addrmode6dupalign16 : AddrMode6DupAlign { + // The alignment specifier can only be 16 or omitted. + let ParserMatchClass = AddrMode6dupAlign16AsmOperand; +} + +// Special version of addrmode6 to handle 32-bit alignment encoding for VLD-dup +// instruction and checking the alignment value. +def AddrMode6dupAlign32AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory32"; + let DiagnosticType = "DupAlignedMemoryRequires32"; +} +def addrmode6dupalign32 : AddrMode6DupAlign { + // The alignment specifier can only be 32 or omitted. + let ParserMatchClass = AddrMode6dupAlign32AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit alignment encoding for VLD +// instructions and checking the alignment value. +def AddrMode6dupAlign64AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory64"; + let DiagnosticType = "DupAlignedMemoryRequires64"; +} +def addrmode6dupalign64 : AddrMode6DupAlign { + // The alignment specifier can only be 64 or omitted. + let ParserMatchClass = AddrMode6dupAlign64AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding +// for VLD instructions and checking the alignment value. +def AddrMode6dupAlign64or128AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory64or128"; + let DiagnosticType = "DupAlignedMemoryRequires64or128"; +} +def addrmode6dupalign64or128 : AddrMode6DupAlign { + // The alignment specifier can only be 64, 128 or omitted. + let ParserMatchClass = AddrMode6dupAlign64or128AsmOperand; +} + // addrmodepc := pc + reg // def addrmodepc : Operand<i32>, @@ -1689,7 +1827,8 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, } def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary, - "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> { + "hint", "\t$imm", [(int_arm_hint imm0_239:$imm)]>, + Requires<[IsARM, HasV6]> { bits<8> imm; let Inst{27-8} = 0b00110010000011110000; let Inst{7-0} = imm; @@ -1702,8 +1841,6 @@ def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>; def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>; def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>; -def : Pat<(int_arm_sevl), (HINT 5)>; - def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { bits<4> Rd; @@ -1830,6 +1967,18 @@ def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", let Inst{3-0} = opt; } +// A8.8.247 UDF - Undefined (Encoding A1) +def UDF : AInoP<(outs), (ins imm0_65535:$imm16), MiscFrm, NoItinerary, + "udf", "\t$imm16", [(int_arm_undefined imm0_65535:$imm16)]> { + bits<16> imm16; + let Inst{31-28} = 0b1110; // AL + let Inst{27-25} = 0b011; + let Inst{24-20} = 0b11111; + let Inst{19-8} = imm16{15-4}; + let Inst{7-4} = 0b1111; + let Inst{3-0} = imm16{3-0}; +} + /* * A5.4 Permanently UNDEFINED instructions. * @@ -2282,12 +2431,6 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $Rt2, $addr", []>, Requires<[IsARM, HasV5TE]>; - - // GNU Assembler extension (compatibility) - let isAsmParserOnly = 1 in - def LDRD_PAIR : AI3ld<0b1101, 0, (outs GPRPairOp:$Rt), (ins addrmode3:$addr), - LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $addr", []>, - Requires<[IsARM, HasV5TE]>; } def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), @@ -2557,14 +2700,6 @@ let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { Requires<[IsARM, HasV5TE]> { let Inst{21} = 0; } - - // GNU Assembler extension (compatibility) - let isAsmParserOnly = 1 in - def STRD_PAIR : AI3str<0b1111, (outs), (ins GPRPairOp:$Rt, addrmode3:$addr), - StMiscFrm, IIC_iStore_d_r, "strd", "\t$Rt, $addr", []>, - Requires<[IsARM, HasV5TE]> { - let Inst{21} = 0; - } } // Indexed stores @@ -3999,6 +4134,11 @@ def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), Requires<[IsARM, HasV6]>, Sched<[WriteALU]>; +def : ARMV6Pat<(srl (bswap (extloadi16 addrmode3:$addr)), (i32 16)), + (REV16 (LDRH addrmode3:$addr))>; +def : ARMV6Pat<(truncstorei16 (srl (bswap GPR:$Rn), (i32 16)), addrmode3:$addr), + (STRH (REV16 GPR:$Rn), addrmode3:$addr)>; + let AddedComplexity = 5 in def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "revsh", "\t$Rd, $Rm", @@ -4816,7 +4956,7 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>, Requires<[PreV8]>; -def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm", +def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0)>; def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, @@ -4824,7 +4964,7 @@ def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>, Requires<[PreV8]>; -def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm", +def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 0d46c49..b32b5d2 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -39,6 +39,49 @@ def nImmVMOVI32 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmVMOVI32AsmOperand; } + +def nImmVMOVI16AsmOperandByteReplicate : + AsmOperandClass { + let Name = "NEONi16vmovByteReplicate"; + let PredicateMethod = "isNEONi16ByteReplicate"; + let RenderMethod = "addNEONvmovByteReplicateOperands"; +} +def nImmVMOVI32AsmOperandByteReplicate : + AsmOperandClass { + let Name = "NEONi32vmovByteReplicate"; + let PredicateMethod = "isNEONi32ByteReplicate"; + let RenderMethod = "addNEONvmovByteReplicateOperands"; +} +def nImmVMVNI16AsmOperandByteReplicate : + AsmOperandClass { + let Name = "NEONi16invByteReplicate"; + let PredicateMethod = "isNEONi16ByteReplicate"; + let RenderMethod = "addNEONinvByteReplicateOperands"; +} +def nImmVMVNI32AsmOperandByteReplicate : + AsmOperandClass { + let Name = "NEONi32invByteReplicate"; + let PredicateMethod = "isNEONi32ByteReplicate"; + let RenderMethod = "addNEONinvByteReplicateOperands"; +} + +def nImmVMOVI16ByteReplicate : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMOVI16AsmOperandByteReplicate; +} +def nImmVMOVI32ByteReplicate : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMOVI32AsmOperandByteReplicate; +} +def nImmVMVNI16ByteReplicate : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMVNI16AsmOperandByteReplicate; +} +def nImmVMVNI32ByteReplicate : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMVNI32AsmOperandByteReplicate; +} + def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } def nImmVMOVI32Neg : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; @@ -617,37 +660,37 @@ class VLDQQQQWBPseudo<InstrItinClass itin> let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // VLD1 : Vector Load (multiple single elements) -class VLD1D<bits<4> op7_4, string Dt> +class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), - (ins addrmode6:$Rn), IIC_VLD1, + (ins AddrMode:$Rn), IIC_VLD1, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -class VLD1Q<bits<4> op7_4, string Dt> +class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), - (ins addrmode6:$Rn), IIC_VLD1x2, + (ins AddrMode:$Rn), IIC_VLD1x2, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -def VLD1d8 : VLD1D<{0,0,0,?}, "8">; -def VLD1d16 : VLD1D<{0,1,0,?}, "16">; -def VLD1d32 : VLD1D<{1,0,0,?}, "32">; -def VLD1d64 : VLD1D<{1,1,0,?}, "64">; +def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; +def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; +def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; +def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; -def VLD1q8 : VLD1Q<{0,0,?,?}, "8">; -def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; -def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; -def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; +def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; +def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; +def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; +def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; // ...with address register writeback: -multiclass VLD1DWB<bits<4> op7_4, string Dt> { +multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), - (ins addrmode6:$Rn), IIC_VLD1u, + (ins AddrMode:$Rn), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -655,16 +698,16 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } } -multiclass VLD1QWB<bits<4> op7_4, string Dt> { +multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), - (ins addrmode6:$Rn), IIC_VLD1x2u, + (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -672,7 +715,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; @@ -680,27 +723,27 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> { } } -defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; -defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; -defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; -defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; -defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; -defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; -defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; -defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; +defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; +defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; +defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; +defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; +defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; +defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; +defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; +defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; // ...with 3 registers -class VLD1D3<bits<4> op7_4, string Dt> +class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), - (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, + (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -multiclass VLD1D3WB<bits<4> op7_4, string Dt> { +multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), - (ins addrmode6:$Rn), IIC_VLD1x2u, + (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -708,7 +751,7 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; @@ -716,32 +759,32 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> { } } -def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; -def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; -def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; -def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; +def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; +def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; +def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; +def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; -defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; -defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; -defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; -defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; +defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; +defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; +defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; +defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>; def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>; // ...with 4 registers -class VLD1D4<bits<4> op7_4, string Dt> +class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), - (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, + (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -multiclass VLD1D4WB<bits<4> op7_4, string Dt> { +multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), - (ins addrmode6:$Rn), IIC_VLD1x2u, + (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -749,7 +792,7 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; @@ -757,15 +800,15 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> { } } -def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; -def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; -def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; -def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; +def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; +def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; +def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; +def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; -defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; -defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; -defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; -defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; +defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; +defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; +defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; +defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>; @@ -773,22 +816,28 @@ def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>; // VLD2 : Vector Load (multiple 2-element structures) class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, - InstrItinClass itin> + InstrItinClass itin, Operand AddrMode> : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), - (ins addrmode6:$Rn), itin, + (ins AddrMode:$Rn), itin, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } -def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; -def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; -def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; +def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, + addrmode6align64or128>; +def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, + addrmode6align64or128>; +def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, + addrmode6align64or128>; -def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; -def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; -def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; +def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, + addrmode6align64or128or256>; +def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, + addrmode6align64or128or256>; +def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, + addrmode6align64or128or256>; def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; @@ -796,9 +845,9 @@ def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; // ...with address register writeback: multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, - RegisterOperand VdTy, InstrItinClass itin> { + RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6:$Rn), itin, + (ins AddrMode:$Rn), itin, "vld2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -806,7 +855,7 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), itin, + (ins AddrMode:$Rn, rGPR:$Rm), itin, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; @@ -814,13 +863,19 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, } } -defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; -defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; -defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; +defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, + addrmode6align64or128>; +defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, + addrmode6align64or128>; +defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, + addrmode6align64or128>; -defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; -defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; -defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; +defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, + addrmode6align64or128or256>; +defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, + addrmode6align64or128or256>; +defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, + addrmode6align64or128or256>; def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; @@ -830,12 +885,18 @@ def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; // ...with double-spaced registers -def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>; -def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>; -def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>; -defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>; -defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>; -defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>; +def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, + addrmode6align64or128>; +def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, + addrmode6align64or128>; +def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, + addrmode6align64or128>; +defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, + addrmode6align64or128>; +defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, + addrmode6align64or128>; +defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, + addrmode6align64or128>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1293,47 +1354,55 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // VLD1DUP : Vector Load (single element to all lanes) -class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> +class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, + Operand AddrMode> : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), - (ins addrmode6dup:$Rn), + (ins AddrMode:$Rn), IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", [(set VecListOneDAllLanes:$Vd, - (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { + (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; -def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; -def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; +def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, + addrmode6dupalignNone>; +def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, + addrmode6dupalign16>; +def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, + addrmode6dupalign32>; def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPd32 addrmode6:$addr)>; -class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> +class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, + Operand AddrMode> : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), - (ins addrmode6dup:$Rn), IIC_VLD1dup, + (ins AddrMode:$Rn), IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", [(set VecListDPairAllLanes:$Vd, - (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { + (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>; -def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>; -def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; +def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, + addrmode6dupalignNone>; +def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, + addrmode6dupalign16>; +def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, + addrmode6dupalign32>; def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPq32 addrmode6:$addr)>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: -multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { +multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn), IIC_VLD1dupu, + (ins AddrMode:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1342,17 +1411,17 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } } -multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { +multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn), IIC_VLD1dupu, + (ins AddrMode:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1361,7 +1430,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; @@ -1369,38 +1438,47 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { } } -defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; -defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; -defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; +defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; +defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; +defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; -defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; -defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; -defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; +defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; +defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; +defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; // VLD2DUP : Vector Load (single 2-element structure to all lanes) -class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> +class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), - (ins addrmode6dup:$Rn), IIC_VLD2dup, + (ins AddrMode:$Rn), IIC_VLD2dup, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; } -def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>; -def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>; -def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>; +def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, + addrmode6dupalign16>; +def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, + addrmode6dupalign32>; +def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, + addrmode6dupalign64>; +// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or +// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". // ...with double-spaced registers -def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>; -def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; -def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; +def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, + addrmode6dupalign16>; +def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, + addrmode6dupalign32>; +def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, + addrmode6dupalign64>; // ...with address register writeback: -multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { +multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, + Operand AddrMode> { def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn), IIC_VLD2dupu, + (ins AddrMode:$Rn), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1409,7 +1487,7 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { } def _register : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; @@ -1417,13 +1495,19 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { } } -defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>; -defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>; -defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>; +defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, + addrmode6dupalign16>; +defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, + addrmode6dupalign32>; +defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, + addrmode6dupalign64>; -defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>; -defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; -defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; +defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, + addrmode6dupalign16>; +defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, + addrmode6dupalign32>; +defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, + addrmode6dupalign64>; // VLD3DUP : Vector Load (single 3-element structure to all lanes) class VLD3DUP<bits<4> op7_4, string Dt> @@ -1449,22 +1533,22 @@ def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; // ...with address register writeback: -class VLD3DUPWB<bits<4> op7_4, string Dt> +class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, + (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = 0; let DecoderMethod = "DecodeVLD3DupInstruction"; } -def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; -def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; -def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; +def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; +def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; +def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; -def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; -def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; -def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; +def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; +def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; +def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; @@ -1560,35 +1644,35 @@ class VSTQQQQWBPseudo<InstrItinClass itin> "$addr.addr = $wb">; // VST1 : Vector Store (multiple single elements) -class VST1D<bits<4> op7_4, string Dt> - : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), +class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> + : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -class VST1Q<bits<4> op7_4, string Dt> - : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), +class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -def VST1d8 : VST1D<{0,0,0,?}, "8">; -def VST1d16 : VST1D<{0,1,0,?}, "16">; -def VST1d32 : VST1D<{1,0,0,?}, "32">; -def VST1d64 : VST1D<{1,1,0,?}, "64">; +def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; +def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; +def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; +def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; -def VST1q8 : VST1Q<{0,0,?,?}, "8">; -def VST1q16 : VST1Q<{0,1,?,?}, "16">; -def VST1q32 : VST1Q<{1,0,?,?}, "32">; -def VST1q64 : VST1Q<{1,1,?,?}, "64">; +def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; +def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; +def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; +def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; // ...with address register writeback: -multiclass VST1DWB<bits<4> op7_4, string Dt> { +multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, + (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1596,7 +1680,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1604,9 +1688,9 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } } -multiclass VST1QWB<bits<4> op7_4, string Dt> { +multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, + (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1614,7 +1698,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1623,28 +1707,28 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> { } } -defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; -defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; -defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; -defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; +defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; +defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; +defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; +defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; -defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; -defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; -defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; -defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; +defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; +defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; +defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; +defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; // ...with 3 registers -class VST1D3<bits<4> op7_4, string Dt> +class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), - (ins addrmode6:$Rn, VecListThreeD:$Vd), + (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -multiclass VST1D3WB<bits<4> op7_4, string Dt> { +multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, + (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1652,7 +1736,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1661,33 +1745,33 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> { } } -def VST1d8T : VST1D3<{0,0,0,?}, "8">; -def VST1d16T : VST1D3<{0,1,0,?}, "16">; -def VST1d32T : VST1D3<{1,0,0,?}, "32">; -def VST1d64T : VST1D3<{1,1,0,?}, "64">; +def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; +def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; +def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; +def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; -defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; -defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; -defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; -defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; +defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; +defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; +defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; +defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>; def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; // ...with 4 registers -class VST1D4<bits<4> op7_4, string Dt> +class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), - (ins addrmode6:$Rn, VecListFourD:$Vd), + (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -multiclass VST1D4WB<bits<4> op7_4, string Dt> { +multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, + (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1695,7 +1779,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1704,15 +1788,15 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> { } } -def VST1d8Q : VST1D4<{0,0,?,?}, "8">; -def VST1d16Q : VST1D4<{0,1,?,?}, "16">; -def VST1d32Q : VST1D4<{1,0,?,?}, "32">; -def VST1d64Q : VST1D4<{1,1,?,?}, "64">; +def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; +def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; +def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; +def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; -defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; -defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; -defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; -defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; +defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; +defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; +defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; +defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>; @@ -1720,21 +1804,27 @@ def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; // VST2 : Vector Store (multiple 2-element structures) class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, - InstrItinClass itin> - : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), + InstrItinClass itin, Operand AddrMode> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } -def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; -def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; -def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; +def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, + addrmode6align64or128>; +def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, + addrmode6align64or128>; +def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, + addrmode6align64or128>; -def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; -def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; -def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; +def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, + addrmode6align64or128or256>; +def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, + addrmode6align64or128or256>; +def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, + addrmode6align64or128or256>; def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; @@ -1742,9 +1832,9 @@ def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; // ...with address register writeback: multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, - RegisterOperand VdTy> { + RegisterOperand VdTy, Operand AddrMode> { def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, + (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1752,16 +1842,16 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, + (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } } -multiclass VST2QWB<bits<4> op7_4, string Dt> { +multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, + (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1769,7 +1859,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> { let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1778,13 +1868,16 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> { } } -defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; -defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; -defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, + addrmode6align64or128>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, + addrmode6align64or128>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, + addrmode6align64or128>; -defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; -defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; -defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; +defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; +defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; +defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; @@ -1794,12 +1887,18 @@ def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; // ...with double-spaced registers -def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>; -def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>; -def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>; -defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>; -defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>; -defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>; +def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, + addrmode6align64or128>; +def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, + addrmode6align64or128>; +def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, + addrmode6align64or128>; +defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, + addrmode6align64or128>; +defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, + addrmode6align64or128>; +defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, + addrmode6align64or128>; // VST3 : Vector Store (multiple 3-element structures) class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -2267,9 +2366,9 @@ def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), (VST1q64 addrmode6:$addr, QPR:$value)>; def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), - (VLD1q32 addrmode6:$addr)>; + (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>; def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), - (VST1q32 addrmode6:$addr, QPR:$value)>; + (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), @@ -2357,14 +2456,14 @@ class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), - itin, OpcodeStr, Dt, ResTy, OpTy, + itin, OpcodeStr, Dt, [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), - itin, OpcodeStr, Dt, ResTy, OpTy, + itin, OpcodeStr, Dt, [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; // Similar to NV2VQIntnp with some more encoding bits exposed (crypto). @@ -2372,7 +2471,7 @@ class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), - itin, OpcodeStr, Dt, ResTy, OpTy, + itin, OpcodeStr, Dt, [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; // Same as N2VQIntXnp but with Vd as a src register. @@ -2381,7 +2480,7 @@ class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), - itin, OpcodeStr, Dt, ResTy, OpTy, + itin, OpcodeStr, Dt, [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { let Constraints = "$src = $Vd"; } @@ -2555,7 +2654,6 @@ class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, SDPatternOperator IntOp, bit Commutable> : N3Vnp<op27_23, op21_20, op11_8, op6, op4, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, - ResTy, OpTy, IntOp, Commutable, [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, @@ -2609,7 +2707,6 @@ class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, SDPatternOperator IntOp, bit Commutable> : N3Vnp<op27_23, op21_20, op11_8, op6, op4, (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, - ResTy, OpTy, IntOp, Commutable, [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; // Same as N3VQIntnp but with Vd as a src register. @@ -2618,8 +2715,8 @@ class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> : N3Vnp<op27_23, op21_20, op11_8, op6, op4, - (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, - Dt, ResTy, OpTy, IntOp, Commutable, + (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), + f, itin, OpcodeStr, Dt, [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { let Constraints = "$src = $Vd"; @@ -2939,7 +3036,6 @@ class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, SDPatternOperator IntOp, bit Commutable> : N3Vnp<op27_23, op21_20, op11_8, op6, op4, (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, - ResTy, OpTy, IntOp, Commutable, [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, @@ -5245,6 +5341,35 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; } // isReMaterializable +// Add support for bytes replication feature, so it could be GAS compatible. +// E.g. instructions below: +// "vmov.i32 d0, 0xffffffff" +// "vmov.i32 d0, 0xabababab" +// "vmov.i16 d0, 0xabab" +// are incorrect, but we could deal with such cases. +// For last two instructions, for example, it should emit: +// "vmov.i8 d0, 0xab" +def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm", + (VMOVv8i8 DPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm", + (VMOVv8i8 DPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm", + (VMOVv16i8 QPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm", + (VMOVv16i8 QPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>; + +// Also add same support for VMVN instructions. So instruction: +// "vmvn.i32 d0, 0xabababab" +// actually means: +// "vmov.i8 d0, 0x54" +def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm", + (VMOVv8i8 DPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm", + (VMOVv8i8 DPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm", + (VMOVv16i8 QPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm", + (VMOVv16i8 QPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>; // On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" // require zero cycles to execute so they should be used wherever possible for @@ -5617,22 +5742,22 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; } -def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", +def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", +def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", +def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", +def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", +def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", +def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", +def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", +def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; @@ -6051,67 +6176,145 @@ def : Pat<(f32 (bitconvert GPR:$a)), //===----------------------------------------------------------------------===// // bit_convert -def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; -def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; -def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; +} def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; -def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; -def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; -def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; -def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; -def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; +} def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; +} def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; -def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; +} def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; -def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; -def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; +} -def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; -def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; -def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; +} def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; -def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; -def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; -def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; -def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; -def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; +} def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; +} def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; -def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; -def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; -def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; +} def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; -def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; -def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; -def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; -def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +} + +let Predicates = [IsBE] in { + // 64 bit conversions + def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; + def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; + + // 128 bit conversions + def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; +} // Fold extracting an element out of a v2i32 into a vfp register. def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), @@ -6120,7 +6323,7 @@ def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), // Vector lengthening move with load, matching extending loads. // extload, zextload and sextload for a standard lengthening load. Example: -// Lengthen_Single<"8", "i16", "8"> = +// Lengthen_Single<"8", "i16", "8"> = // Pat<(v8i16 (extloadvi8 addrmode6:$addr)) // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, // (f64 (IMPLICIT_DEF)), (i32 0)))>; @@ -6147,7 +6350,7 @@ multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { // half the lanes available. Example: // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), -// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, // (f64 (IMPLICIT_DEF)), (i32 0))), // dsub_0)>; multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, @@ -6257,7 +6460,7 @@ defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (VLD1LNd16 addrmode6:$addr, + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 @@ -6311,379 +6514,442 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD1LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD1LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VLD1LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD1LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD1LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, rGPR:$Rm, pred:$p)>; def VLD1LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; // VST1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST1LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST1LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VST1LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST1LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST1LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, rGPR:$Rm, pred:$p)>; def VST1LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; // VLD2 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD2LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VLD2LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD2LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD2LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD2LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD2LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, rGPR:$Rm, pred:$p)>; def VLD2LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VLD2LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD2LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VLD2LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; // VST2 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST2LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VST2LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST2LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST2LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST2LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST2LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, rGPR:$Rm, pred:$p)>; def VST2LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VST2LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST2LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VST2LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; // VLD3 all-lanes pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPqWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; // VLD3 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; // VLD3 multiple structure pseudo-instructions. These need special handling for // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3dWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3dWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3qWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3qWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3qWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; // VST3 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST3LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST3LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST3LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST3LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; @@ -6691,168 +6957,190 @@ def VST3LNqWB_register_Asm_32 : // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3dWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3dWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3dWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3qWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3qWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3qWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3dWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3dWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3dWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3qWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3qWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3qWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; // VLD4 all-lanes pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; def VLD4DUPdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; def VLD4DUPdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; def VLD4DUPdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; def VLD4DUPqWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; def VLD4DUPqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; def VLD4DUPqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; def VLD4DUPdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, + (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, + (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourDAllLanes:$list, + addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPqWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, + (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, + (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourQAllLanes:$list, + addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; // VLD4 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VLD4LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD4LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD4LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VLD4LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD4LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VLD4LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VLD4LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD4LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourDWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; def VLD4LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD4LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourQWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; @@ -6860,168 +7148,202 @@ def VLD4LNqWB_register_Asm_32 : // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4dWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4dWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4qWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4qWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4qWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; // VST4 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VST4LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST4LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST4LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VST4LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST4LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VST4LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VST4LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST4LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourDWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; def VST4LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST4LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourQWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; // VST4 multiple structure pseudo-instructions. These need special handling for // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4dWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4dWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4qWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4qWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4qWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; // VMOV/VMVN takes an optional datatype suffix diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 754295f..e17f73a 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -269,7 +269,8 @@ class T1SystemEncoding<bits<8> opc> let Inst{7-0} = opc; } -def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm", []>, +def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm", + [(int_arm_hint imm0_15:$imm)]>, T1SystemEncoding<0x00>, Requires<[IsThumb, HasV6M]> { bits<4> imm; @@ -288,7 +289,6 @@ def : tHintAlias<"sev$p", (tHINT 4, pred:$p)>; // A8.6.157 def : tInstAlias<"sevl$p", (tHINT 5, pred:$p)> { let Predicates = [IsThumb2, HasV8]; } -def : T2Pat<(int_arm_sevl), (tHINT 5)>; // The imm operand $val can be used by a debugger to store more information // about the breakpoint. @@ -1193,6 +1193,15 @@ def tTST : // A8.6.230 [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>, Sched<[WriteALU]>; +// A8.8.247 UDF - Undefined (Encoding T1) +def tUDF : TI<(outs), (ins imm0_255:$imm8), IIC_Br, "udf\t$imm8", + [(int_arm_undefined imm0_255:$imm8)]>, Encoding16 { + bits<8> imm8; + let Inst{15-12} = 0b1101; + let Inst{11-8} = 0b1110; + let Inst{7-0} = imm8; +} + // Zero-extend byte def tUXTB : // A8.6.262 T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), @@ -1308,6 +1317,18 @@ def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs), def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs), (tSUBrr tGPR:$lhs, tGPR:$rhs)>; +// Bswap 16 with load/store +def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rrs2:$addr)), (i32 16)), + (tREV16 (tLDRHr t_addrmode_rrs2:$addr))>; +def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)), + (tREV16 (tLDRHi t_addrmode_is2:$addr))>; +def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), + t_addrmode_rrs2:$addr), + (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rrs2:$addr)>; +def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), + t_addrmode_is2:$addr), + (tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>; + // ConstantPool def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 387bd60..c30d6ab 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1445,7 +1445,7 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, // Store doubleword let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), - (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr), + (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>; // Indexed stores @@ -1676,7 +1676,7 @@ defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>; // pci variant is very similar to i12, but supports negative offsets // from the PC. Only PLD and PLI have pci variants (not PLDW) class T2Iplpci<bits<1> inst, string opc> : T2Iso<(outs), (ins t2ldrlabel:$addr), - IIC_Preload, opc, "\t$addr", + IIC_Preload, opc, "\t$addr", [(ARMPreload (ARMWrapper tconstpool:$addr), (i32 0), (i32 inst))]>, Sched<[WritePreLd]> { let Inst{31-25} = 0b1111100; @@ -1918,7 +1918,7 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi, let DecoderMethod = "DecodeT2MOVTWInstruction"; } -def : t2InstAlias<"mov${p} $Rd, $imm", +def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p)>; def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), @@ -2407,6 +2407,19 @@ def t2UBFX: T2TwoRegBitFI< let Inst{15} = 0; } +// A8.8.247 UDF - Undefined (Encoding T2) +def t2UDF : T2XI<(outs), (ins imm0_65535:$imm16), IIC_Br, "udf.w\t$imm16", + [(int_arm_undefined imm0_65535:$imm16)]> { + bits<16> imm16; + let Inst{31-29} = 0b111; + let Inst{28-27} = 0b10; + let Inst{26-20} = 0b1111111; + let Inst{19-16} = imm16{15-12}; + let Inst{15} = 0b1; + let Inst{14-12} = 0b010; + let Inst{11-0} = imm16{11-0}; +} + // A8.6.18 BFI - Bitfield insert (Encoding T1) let Constraints = "$src = $Rd" in { def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd), @@ -3495,8 +3508,8 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, let Inst{25-16} = target{20-11}; let Inst{10-0} = target{10-0}; let DecoderMethod = "DecodeT2BInstruction"; - let AsmMatchConverter = "cvtThumbBranches"; -} + let AsmMatchConverter = "cvtThumbBranches"; +} let isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : t2PseudoInst<(outs), @@ -3671,7 +3684,8 @@ def : t2InstAlias<"cps.w $mode", (t2CPS1p imm0_31:$mode), 0>; // A6.3.4 Branches and miscellaneous control // Table A6-14 Change Processor State, and hint instructions -def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",[]> { +def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm", + [(int_arm_hint imm0_239:$imm)]> { bits<8> imm; let Inst{31-3} = 0b11110011101011111000000000000; let Inst{7-0} = imm; @@ -3698,7 +3712,7 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> { // Secure Monitor Call is a system instruction. // Option = Inst{19-16} -def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", +def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []>, Requires<[IsThumb2, HasTrustZone]> { let Inst{31-27} = 0b11110; let Inst{26-20} = 0b1111111; @@ -4278,7 +4292,7 @@ def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm", // Aliases for ADD without the ".w" optional width specifier. def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", - (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, + (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"add${p} $Rd, $Rn, $imm", (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 73c6eb7..8821c2d 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "ARMJITInfo.h" #include "ARMConstantPoolValue.h" #include "ARMRelocations.h" @@ -25,6 +24,8 @@ #include <cstdlib> using namespace llvm; +#define DEBUG_TYPE "jit" + void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction"); } @@ -319,13 +320,13 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, break; } case ARM::reloc_arm_movw: { - ResultPtr = ResultPtr & 0xFFFF; + ResultPtr = ResultPtr & 0xFFFF; *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; break; } case ARM::reloc_arm_movt: { - ResultPtr = (ResultPtr >> 16) & 0xFFFF; + ResultPtr = (ResultPtr >> 16) & 0xFFFF; *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; break; diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 48e0bd7..ee7df54 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-ldst-opt" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Thumb1RegisterInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -42,6 +43,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "arm-ldst-opt" + STATISTIC(NumLDMGened , "Number of ldm instructions generated"); STATISTIC(NumSTMGened , "Number of stm instructions generated"); STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); @@ -65,9 +68,10 @@ namespace { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const ARMSubtarget *STI; + const TargetLowering *TL; ARMFunctionInfo *AFI; RegScavenger *RS; - bool isThumb2; + bool isThumb1, isThumb2; bool runOnMachineFunction(MachineFunction &Fn) override; @@ -93,7 +97,10 @@ namespace { void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd); - + void UpdateBaseRegUses(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc dl, unsigned Base, unsigned WordOffset, + ARMCC::CondCodes Pred, unsigned PredReg); bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, @@ -119,7 +126,6 @@ namespace { ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, MemOpQueue &MemOps, SmallVectorImpl<MachineBasicBlock::iterator> &Merges); - void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); @@ -159,6 +165,21 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::db: return ARM::STMDB; case ARM_AM::ib: return ARM::STMIB; } + case ARM::tLDRi: + // tLDMIA is writeback-only - unless the base register is in the input + // reglist. + ++NumLDMGened; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::tLDMIA; + } + case ARM::tSTRi: + // There is no non-writeback tSTMIA either. + ++NumSTMGened; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::tSTMIA_UPD; + } case ARM::t2LDRi8: case ARM::t2LDRi12: ++NumLDMGened; @@ -217,6 +238,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { case ARM::LDMIA_UPD: case ARM::STMIA: case ARM::STMIA_UPD: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: case ARM::t2LDMIA_RET: case ARM::t2LDMIA: case ARM::t2LDMIA_UPD: @@ -263,12 +287,20 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { } // end namespace ARM_AM } // end namespace llvm +static bool isT1i32Load(unsigned Opc) { + return Opc == ARM::tLDRi; +} + static bool isT2i32Load(unsigned Opc) { return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; } static bool isi32Load(unsigned Opc) { - return Opc == ARM::LDRi12 || isT2i32Load(Opc); + return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; +} + +static bool isT1i32Store(unsigned Opc) { + return Opc == ARM::tSTRi; } static bool isT2i32Store(unsigned Opc) { @@ -276,7 +308,102 @@ static bool isT2i32Store(unsigned Opc) { } static bool isi32Store(unsigned Opc) { - return Opc == ARM::STRi12 || isT2i32Store(Opc); + return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); +} + +static unsigned getImmScale(unsigned Opc) { + switch (Opc) { + default: llvm_unreachable("Unhandled opcode!"); + case ARM::tLDRi: + case ARM::tSTRi: + return 1; + case ARM::tLDRHi: + case ARM::tSTRHi: + return 2; + case ARM::tLDRBi: + case ARM::tSTRBi: + return 4; + } +} + +/// Update future uses of the base register with the offset introduced +/// due to writeback. This function only works on Thumb1. +void +ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc dl, unsigned Base, + unsigned WordOffset, + ARMCC::CondCodes Pred, unsigned PredReg) { + assert(isThumb1 && "Can only update base register uses for Thumb1!"); + + // Start updating any instructions with immediate offsets. Insert a sub before + // the first non-updateable instruction (if any). + for (; MBBI != MBB.end(); ++MBBI) { + if (MBBI->readsRegister(Base)) { + unsigned Opc = MBBI->getOpcode(); + int Offset; + bool InsertSub = false; + + if (Opc == ARM::tLDRi || Opc == ARM::tSTRi || + Opc == ARM::tLDRHi || Opc == ARM::tSTRHi || + Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) { + // Loads and stores with immediate offsets can be updated, but only if + // the new offset isn't negative. + // The MachineOperand containing the offset immediate is the last one + // before predicates. + MachineOperand &MO = + MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); + // The offsets are scaled by 1, 2 or 4 depending on the Opcode + Offset = MO.getImm() - WordOffset * getImmScale(Opc); + if (Offset >= 0) + MO.setImm(Offset); + else + InsertSub = true; + + } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) { + // SUB/ADD using this register. Merge it with the update. + // If the merged offset is too large, insert a new sub instead. + MachineOperand &MO = + MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); + Offset = (Opc == ARM::tSUBi8) ? + MO.getImm() + WordOffset * 4 : + MO.getImm() - WordOffset * 4 ; + if (TL->isLegalAddImmediate(Offset)) { + MO.setImm(Offset); + // The base register has now been reset, so exit early. + return; + } else { + InsertSub = true; + } + + } else { + // Can't update the instruction. + InsertSub = true; + } + + if (InsertSub) { + // An instruction above couldn't be updated, so insert a sub. + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base)) + .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) + .addImm(Pred).addReg(PredReg); + return; + } + } + + if (MBBI->killsRegister(Base)) + // Register got killed. Stop updating. + return; + } + + // The end of the block was reached. This means register liveness escapes the + // block, and it's necessary to insert a sub before the last instruction. + if (MBB.succ_size() > 0) + // But only insert the SUB if there is actually a successor block. + // FIXME: Check more carefully if register is live at this point, e.g. by + // also examining the successor block's register liveness information. + AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base)) + .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) + .addImm(Pred).addReg(PredReg); } /// MergeOps - Create and insert a LDM or STM with Base as base register and @@ -296,18 +423,19 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; ARM_AM::AMSubMode Mode = ARM_AM::ia; - // VFP and Thumb2 do not support IB or DA modes. + // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); - bool haveIBAndDA = isNotVFP && !isThumb2; - if (Offset == 4 && haveIBAndDA) + bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; + + if (Offset == 4 && haveIBAndDA) { Mode = ARM_AM::ib; - else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) + } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { Mode = ARM_AM::da; - else if (Offset == -4 * (int)NumRegs && isNotVFP) + } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { // VLDM/VSTM do not support DB mode without also updating the base reg. Mode = ARM_AM::db; - else if (Offset != 0) { - // Check if this is a supported opcode before we insert instructions to + } else if (Offset != 0) { + // Check if this is a supported opcode before inserting instructions to // calculate a new base register. if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; @@ -318,41 +446,98 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; unsigned NewBase; - if (isi32Load(Opcode)) + if (isi32Load(Opcode)) { // If it is a load, then just use one of the destination register to // use as the new base. NewBase = Regs[NumRegs-1].first; - else { + } else { // Use the scratch register to use as a new base. NewBase = Scratch; if (NewBase == 0) return false; } - int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; + + int BaseOpc = + isThumb2 ? ARM::t2ADDri : + isThumb1 ? ARM::tADDi8 : ARM::ADDri; + if (Offset < 0) { - BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; + BaseOpc = + isThumb2 ? ARM::t2SUBri : + isThumb1 ? ARM::tSUBi8 : ARM::SUBri; Offset = - Offset; } - int ImmedOffset = isThumb2 - ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); - if (ImmedOffset == -1) - // FIXME: Try t2ADDri12 or t2SUBri12? - return false; // Probably not worth it then. - - BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) - .addImm(Pred).addReg(PredReg).addReg(0); + + if (!TL->isLegalAddImmediate(Offset)) + // FIXME: Try add with register operand? + return false; // Probably not worth it then. + + if (isThumb1) { + if (Base != NewBase) { + // Need to insert a MOV to the new base first. + // FIXME: If the immediate fits in 3 bits, use ADD instead. + BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(Pred).addReg(PredReg); + } + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)) + .addReg(NewBase, getKillRegState(true)).addImm(Offset) + .addImm(Pred).addReg(PredReg); + } else { + BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) + .addImm(Pred).addReg(PredReg).addReg(0); + } + Base = NewBase; - BaseKill = true; // New base is always killed right its use. + BaseKill = true; // New base is always killed straight away. } bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD); + + // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with + // base register writeback. Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); if (!Opcode) return false; - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(Pred).addReg(PredReg); + + bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. + + // Exception: If the base register is in the input reglist, Thumb1 LDM is + // non-writeback. Check for this. + if (Opcode == ARM::tLDRi && isThumb1) + for (unsigned I = 0; I < NumRegs; ++I) + if (Base == Regs[I].first) { + Writeback = false; + break; + } + + MachineInstrBuilder MIB; + + if (Writeback) { + if (Opcode == ARM::tLDMIA) + // Update tLDMIA with writeback if necessary. + Opcode = ARM::tLDMIA_UPD; + + // The base isn't dead after a merged instruction with writeback. Update + // future uses of the base with the added offset (if possible), or reset + // the base register as necessary. + if (!BaseKill) + UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); + + MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + + // Thumb1: we might need to set base writeback when building the MI. + MIB.addReg(Base, getDefRegState(true)) + .addReg(Base, getKillRegState(BaseKill)); + } else { + // No writeback, simply build the MachineInstr. + MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + MIB.addReg(Base, getKillRegState(BaseKill)); + } + + MIB.addImm(Pred).addReg(PredReg); + for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); @@ -492,7 +677,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, // affected uses. for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), E = UsesOfImpDefs.end(); - I != E; ++I) + I != E; ++I) (*I)->setIsUndef(); for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { @@ -589,7 +774,6 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); - return; } static bool definesCPSR(MachineInstr *MI) { @@ -616,6 +800,7 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, bool CheckCPSRDef = false; switch (MI->getOpcode()) { default: return false; + case ARM::tSUBi8: case ARM::t2SUBri: case ARM::SUBri: CheckCPSRDef = true; @@ -628,10 +813,11 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (Bytes == 0 || (Limit && Bytes >= Limit)) return false; - unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME + unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || + MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && + (MI->getOperand(2).getImm() * Scale) == Bytes && getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -649,6 +835,7 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, bool CheckCPSRDef = false; switch (MI->getOpcode()) { default: return false; + case ARM::tADDi8: case ARM::t2ADDri: case ARM::ADDri: CheckCPSRDef = true; @@ -661,10 +848,11 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, // Make sure the offset fits in 8 bits. return false; - unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME + unsigned Scale = (MI->getOpcode() == ARM::tADDspi || + MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && + (MI->getOperand(2).getImm() * Scale) == Bytes && getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -677,6 +865,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { default: return 0; case ARM::LDRi12: case ARM::STRi12: + case ARM::tLDRi: + case ARM::tSTRi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -695,6 +885,9 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::STMDA: case ARM::STMDB: case ARM::STMIB: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: case ARM::t2LDMIA: case ARM::t2LDMDB: case ARM::t2STMIA: @@ -791,6 +984,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool &Advance, MachineBasicBlock::iterator &I) { + // Thumb1 is already using updating loads/stores. + if (isThumb1) return false; + MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(0).getReg(); bool BaseKill = MI->getOperand(0).isKill(); @@ -927,6 +1123,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, const TargetInstrInfo *TII, bool &Advance, MachineBasicBlock::iterator &I) { + // Thumb1 doesn't have updating LDR/STR. + // FIXME: Use LDM/STM with single register instead. + if (isThumb1) return false; + MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(1).getReg(); bool BaseKill = MI->getOperand(1).isKill(); @@ -1002,7 +1202,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return false; if (isAM5) { - // VLDM[SD}_UPD, VSTM[SD]_UPD + // VLDM[SD]_UPD, VSTM[SD]_UPD // (There are no base-updating versions of VLDR/VSTR instructions, but the // updating load/store-multiple instructions can be used with only one // register.) @@ -1100,6 +1300,8 @@ static bool isMemoryOp(const MachineInstr *MI) { return MI->getOperand(1).isReg(); case ARM::LDRi12: case ARM::STRi12: + case ARM::tLDRi: + case ARM::tSTRi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -1137,6 +1339,10 @@ static int getMemoryOpOffset(const MachineInstr *MI) { Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) return OffField; + // Thumb1 immediate offsets are scaled by 4 + if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) + return OffField * 4; + int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; if (isAM3) { @@ -1408,16 +1614,20 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (MBBI == E) // Reach the end of the block, try merging the memory instructions. TryMerge = true; - } else + } else { TryMerge = true; + } if (TryMerge) { if (NumMemOps > 1) { // Try to find a free register to use as a new base in case it's needed. // First advance to the instruction just before the start of the chain. AdvanceRS(MBB, MemOps); + // Find a scratch register. - unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass); + unsigned Scratch = + RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); + // Process the load / store instructions. RS->forward(std::prev(MBBI)); @@ -1483,6 +1693,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { /// => /// ldmfd sp!, {..., pc} bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { + // Thumb1 LDM doesn't allow high registers. + if (isThumb1) return false; if (MBB.empty()) return false; MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); @@ -1513,12 +1725,14 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); + TL = TM.getTargetLowering(); AFI = Fn.getInfo<ARMFunctionInfo>(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); STI = &TM.getSubtarget<ARMSubtarget>(); RS = new RegScavenger(); isThumb2 = AFI->isThumb2Function(); + isThumb1 = AFI->isThumbFunction() && !isThumb2; bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; @@ -1666,11 +1880,11 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD unsigned Scale = 1; unsigned Opcode = Op0->getOpcode(); - if (Opcode == ARM::LDRi12) + if (Opcode == ARM::LDRi12) { NewOpc = ARM::LDRD; - else if (Opcode == ARM::STRi12) + } else if (Opcode == ARM::STRi12) { NewOpc = ARM::STRD; - else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { + } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { NewOpc = ARM::t2LDRDi8; Scale = 4; isT2 = true; @@ -1678,8 +1892,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, NewOpc = ARM::t2STRDi8; Scale = 4; isT2 = true; - } else + } else { return false; + } // Make sure the base address satisfies i64 ld / st alignment requirement. // At the moment, we ignore the memoryoperand's value. @@ -1746,8 +1961,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, while (Ops.size() > 1) { unsigned FirstLoc = ~0U; unsigned LastLoc = 0; - MachineInstr *FirstOp = 0; - MachineInstr *LastOp = 0; + MachineInstr *FirstOp = nullptr; + MachineInstr *LastOp = nullptr; int LastOffset = 0; unsigned LastOpcode = 0; unsigned LastBytes = 0; diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp index 20619fa..2a49255 100644 --- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp +++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp @@ -8,8 +8,6 @@ // //===------------------------------------------------------------------------------------------===// -#define DEBUG_TYPE "double barriers" - #include "ARM.h" #include "ARMMachineFunctionInfo.h" #include "ARMInstrInfo.h" @@ -17,6 +15,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; +#define DEBUG_TYPE "double barriers" + STATISTIC(NumDMBsRemoved, "Number of DMBs removed"); namespace { @@ -25,9 +25,9 @@ public: static char ID; ARMOptimizeBarriersPass() : MachineFunctionPass(ID) {} - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "optimise barriers pass"; } diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 7f0fe05..b290e7f 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -116,13 +116,13 @@ def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>; } // VFP3 defines 16 additional double registers -def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>; +def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>; def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>; def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>; def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>; def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>; def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>; -def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>; +def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>; def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>; def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>; def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>; @@ -158,11 +158,11 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>; // Current Program Status Register. // We model fpscr with two registers: FPSCR models the control bits and will be // reserved. FPSCR_NZCV models the flag bits and will be unreserved. APSR_NZCV -// models the APSR when it's accessed by some special instructions. In such cases +// models the APSR when it's accessed by some special instructions. In such cases // it has the same encoding as PC. def CPSR : ARMReg<0, "cpsr">; def APSR : ARMReg<1, "apsr">; -def APSR_NZCV : ARMReg<15, "apsr_nzcv">; +def APSR_NZCV : ARMReg<15, "apsr_nzcv">; def SPSR : ARMReg<2, "spsr">; def FPSCR : ARMReg<3, "fpscr">; def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> { diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 0ace9bc..57d0bfb 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -93,7 +93,7 @@ def ARMV6Itineraries : ProcessorItineraries< InstrItinData<IIC_iMAC32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>, InstrItinData<IIC_iMUL64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>, InstrItinData<IIC_iMAC64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>, - + // Integer load pipeline // // Immediate offset @@ -181,7 +181,7 @@ def ARMV6Itineraries : ProcessorItineraries< // // Store multiple + update InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>, - + // Branch // // no delay slots, so the latency of a branch is unimportant diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index ba3cf4d..008ad64 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-selectiondag-info" #include "ARMTargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DerivedTypes.h" using namespace llvm; +#define DEBUG_TYPE "arm-selectiondag-info" + ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM) : TargetSelectionDAGInfo(TM), Subtarget(&TM.getSubtarget<ARMSubtarget>()) { @@ -52,9 +53,10 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, EVT VT = MVT::i32; unsigned VTSize = 4; unsigned i = 0; - const unsigned MAX_LOADS_IN_LDM = 6; - SDValue TFOps[MAX_LOADS_IN_LDM]; - SDValue Loads[MAX_LOADS_IN_LDM]; + // Emit a maximum of 4 loads in Thumb1 since we have fewer registers + const unsigned MAX_LOADS_IN_LDM = Subtarget->isThumb1Only() ? 4 : 6; + SDValue TFOps[6]; + SDValue Loads[6]; uint64_t SrcOff = 0, DstOff = 0; // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the @@ -71,7 +73,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { @@ -82,7 +85,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, isVolatile, false, 0); DstOff += VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); EmittedNumMemOps += i; } @@ -112,7 +116,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SrcOff += VTSize; BytesLeft -= VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); i = 0; BytesLeft = BytesLeftSave; @@ -133,7 +138,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, DstOff += VTSize; BytesLeft -= VTSize; } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); } // Adjust parameters for memset, EABI uses format (ptr, size, value), @@ -146,7 +152,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { // Use default for non-AAPCS (or MachO) subtargets - if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetMachO()) + if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetMachO() || + Subtarget->isTargetWindows()) return SDValue(); const ARMTargetLowering &TLI = @@ -179,22 +186,14 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, Args.push_back(Entry); // Emit __eabi_memset call - TargetLowering::CallLoweringInfo CLI(Chain, - Type::getVoidTy(*DAG.getContext()), // return type - false, // return sign ext - false, // return zero ext - false, // is var arg - false, // is in regs - 0, // number of fixed arguments - TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv - false, // is tail call - false, // does not return - false, // is return val used - DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), - TLI.getPointerTy()), // callee - Args, DAG, dl); - std::pair<SDValue,SDValue> CallResult = - TLI.LowerCallTo(CLI); - + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMSET), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), + TLI.getPointerTy()), &Args, 0) + .setDiscardResult(); + + std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 73e2018..5b204f6 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -21,12 +21,14 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" +using namespace llvm; + +#define DEBUG_TYPE "arm-subtarget" + #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "ARMGenSubtargetInfo.inc" -using namespace llvm; - static cl::opt<bool> ReserveR9("arm-reserve-r9", cl::Hidden, cl::desc("Reserve R9, making it unavailable as GPR")); diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 3855419..38536b2 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -31,7 +31,7 @@ class TargetOptions; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { - Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, + Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, CortexR5, Swift, CortexA53, CortexA57, Krait }; enum ARMProcClassEnum { @@ -242,9 +242,7 @@ protected: /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. unsigned getMaxInlineSizeThreshold() const { - // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb1. - // Change this once Thumb1 ldmia / stmia support is added. - return isThumb1Only() ? 0 : 64; + return 64; } /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -396,7 +394,7 @@ public: bool isLittle() const { return IsLittle; } unsigned getMispredictionPenalty() const; - + /// This function returns true if the target has sincos() routine in its /// compiler runtime or math libraries. bool hasSinCos() const; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 4ae539a..8876227 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -228,7 +228,7 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { bool ARMPassConfig::addPreISel() { const ARMSubtarget *Subtarget = &getARMSubtarget(); if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) - addPass(createARMAtomicExpandPass(TM)); + addPass(createAtomicExpandLoadLinkedPass(TM)); if (TM->getOptLevel() != CodeGenOpt::None) addPass(createGlobalMergePass(TM)); @@ -247,8 +247,7 @@ bool ARMPassConfig::addInstSelector() { } bool ARMPassConfig::addPreRegAlloc() { - // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only()) + if (getOptLevel() != CodeGenOpt::None) addPass(createARMLoadStoreOptimizationPass(true)); if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) addPass(createMLxExpansionPass()); @@ -262,12 +261,10 @@ bool ARMPassConfig::addPreRegAlloc() { } bool ARMPassConfig::addPreSched2() { - // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None) { - if (!getARMSubtarget().isThumb1Only()) { - addPass(createARMLoadStoreOptimizationPass()); - printAndVerify("After ARM load / store optimizer"); - } + addPass(createARMLoadStoreOptimizationPass()); + printAndVerify("After ARM load / store optimizer"); + if (getARMSubtarget().hasNEON()) addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 0c80a95..664c992 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -23,7 +23,6 @@ #include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetMachine.h" @@ -102,7 +101,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { /// ARMLETargetMachine - ARM little endian target machine. /// class ARMLETargetMachine : public ARMTargetMachine { - virtual void anchor(); + void anchor() override; public: ARMLETargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -113,7 +112,7 @@ public: /// ARMBETargetMachine - ARM big endian target machine. /// class ARMBETargetMachine : public ARMTargetMachine { - virtual void anchor(); + void anchor() override; public: ARMBETargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -128,12 +127,12 @@ public: class ThumbTargetMachine : public ARMBaseTargetMachine { virtual void anchor(); // Either Thumb1InstrInfo or Thumb2InstrInfo. - OwningPtr<ARMBaseInstrInfo> InstrInfo; + std::unique_ptr<ARMBaseInstrInfo> InstrInfo; const DataLayout DL; // Calculates type size & alignment ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; // Either Thumb1FrameLowering or ARMFrameLowering. - OwningPtr<ARMFrameLowering> FrameLowering; + std::unique_ptr<ARMFrameLowering> FrameLowering; public: ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -169,7 +168,7 @@ public: /// ThumbLETargetMachine - Thumb little endian target machine. /// class ThumbLETargetMachine : public ThumbTargetMachine { - virtual void anchor(); + void anchor() override; public: ThumbLETargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -180,10 +179,10 @@ public: /// ThumbBETargetMachine - Thumb big endian target machine. /// class ThumbBETargetMachine : public ThumbTargetMachine { - virtual void anchor(); + void anchor() override; public: - ThumbBETargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, + ThumbBETargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 3379f85..48238bf 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -11,6 +11,7 @@ #include "ARMSubtarget.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Mangler.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" @@ -31,7 +32,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, InitializeELF(isAAPCS_ABI); if (isAAPCS_ABI) { - LSDASection = NULL; + LSDASection = nullptr; } AttributesSection = @@ -45,6 +46,10 @@ const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference( const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI, MCStreamer &Streamer) const { + if (TM.getMCAsmInfo()->getExceptionHandlingType() != ExceptionHandling::ARM) + return TargetLoweringObjectFileELF::getTTypeGlobalReference( + GV, Encoding, Mang, TM, MMI, Streamer); + assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only"); return MCSymbolRefExpr::Create(TM.getSymbol(GV, Mang), diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index 5f8d612..c926421 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -23,7 +23,7 @@ protected: public: ARMElfTargetObjectFile() : TargetLoweringObjectFileELF(), - AttributesSection(NULL) + AttributesSection(nullptr) {} void Initialize(MCContext &Ctx, const TargetMachine &TM) override; diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index d3b43cd..57df7da 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -14,7 +14,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "armtti" #include "ARM.h" #include "ARMTargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -23,8 +22,10 @@ #include "llvm/Target/TargetLowering.h" using namespace llvm; +#define DEBUG_TYPE "armtti" + // Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the +// don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. namespace llvm { void initializeARMTTIPass(PassRegistry &); @@ -42,7 +43,7 @@ class ARMTTI final : public ImmutablePass, public TargetTransformInfo { unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; public: - ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + ARMTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk index 4be95aa..095955b 100644 --- a/lib/Target/ARM/Android.mk +++ b/lib/Target/ARM/Android.mk @@ -17,7 +17,6 @@ arm_codegen_TBLGEN_TABLES := \ arm_codegen_SRC_FILES := \ A15SDOptimizer.cpp \ ARMAsmPrinter.cpp \ - ARMAtomicExpandPass.cpp \ ARMBaseInstrInfo.cpp \ ARMBaseRegisterInfo.cpp \ ARMCodeEmitter.cpp \ diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 9c57a24..5cdf394 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -13,7 +13,6 @@ #include "MCTargetDesc/ARMArchName.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -23,9 +22,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFStreamer.h" -#include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" @@ -345,7 +342,8 @@ public: }; ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &MII) + const MCInstrInfo &MII, + const MCTargetOptions &Options) : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), UC(_Parser) { MCAsmParserExtension::Initialize(_Parser); @@ -416,7 +414,7 @@ class ARMOperand : public MCParsedAsmOperand { k_Token } Kind; - SMLoc StartLoc, EndLoc; + SMLoc StartLoc, EndLoc, AlignmentLoc; SmallVector<unsigned, 8> Registers; struct CCOp { @@ -633,6 +631,12 @@ public: /// operand. SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } + /// getAlignmentLoc - Get the location of the Alignment token of this operand. + SMLoc getAlignmentLoc() const { + assert(Kind == k_Memory && "Invalid access!"); + return AlignmentLoc; + } + ARMCC::CondCodes getCondCode() const { assert(Kind == k_CondCode && "Invalid access!"); return CC.Val; @@ -1089,12 +1093,12 @@ public: bool isPostIdxReg() const { return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift; } - bool isMemNoOffset(bool alignOK = false) const { + bool isMemNoOffset(bool alignOK = false, unsigned Alignment = 0) const { if (!isMem()) return false; // No offset of any kind. - return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 && - (alignOK || Memory.Alignment == 0); + return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr && + (alignOK || Memory.Alignment == Alignment); } bool isMemPCRelImm12() const { if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) @@ -1110,6 +1114,65 @@ public: bool isAlignedMemory() const { return isMemNoOffset(true); } + bool isAlignedMemoryNone() const { + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemoryNone() const { + return isMemNoOffset(false, 0); + } + bool isAlignedMemory16() const { + if (isMemNoOffset(false, 2)) // alignment in bytes for 16-bits is 2. + return true; + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemory16() const { + if (isMemNoOffset(false, 2)) // alignment in bytes for 16-bits is 2. + return true; + return isMemNoOffset(false, 0); + } + bool isAlignedMemory32() const { + if (isMemNoOffset(false, 4)) // alignment in bytes for 32-bits is 4. + return true; + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemory32() const { + if (isMemNoOffset(false, 4)) // alignment in bytes for 32-bits is 4. + return true; + return isMemNoOffset(false, 0); + } + bool isAlignedMemory64() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemory64() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + return isMemNoOffset(false, 0); + } + bool isAlignedMemory64or128() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16. + return true; + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemory64or128() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16. + return true; + return isMemNoOffset(false, 0); + } + bool isAlignedMemory64or128or256() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16. + return true; + if (isMemNoOffset(false, 32)) // alignment in bytes for 256-bits is 32. + return true; + return isMemNoOffset(false, 0); + } bool isAddrMode2() const { if (!isMem() || Memory.Alignment != 0) return false; // Check for register offset. @@ -1545,7 +1608,10 @@ public: } bool isNEONi16splat() const { - if (!isImm()) return false; + if (isNEONByteReplicate(2)) + return false; // Leave that for bytes replication and forbid by default. + if (!isImm()) + return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; @@ -1555,7 +1621,10 @@ public: } bool isNEONi32splat() const { - if (!isImm()) return false; + if (isNEONByteReplicate(4)) + return false; // Leave that for bytes replication and forbid by default. + if (!isImm()) + return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; @@ -1567,11 +1636,36 @@ public: (Value >= 0x01000000 && Value <= 0xff000000); } + bool isNEONByteReplicate(unsigned NumBytes) const { + if (!isImm()) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // Must be a constant. + if (!CE) + return false; + int64_t Value = CE->getValue(); + if (!Value) + return false; // Don't bother with zero. + + unsigned char B = Value & 0xff; + for (unsigned i = 1; i < NumBytes; ++i) { + Value >>= 8; + if ((Value & 0xff) != B) + return false; + } + return true; + } + bool isNEONi16ByteReplicate() const { return isNEONByteReplicate(2); } + bool isNEONi32ByteReplicate() const { return isNEONByteReplicate(4); } bool isNEONi32vmov() const { - if (!isImm()) return false; + if (isNEONByteReplicate(4)) + return false; // Let it to be classified as byte-replicate case. + if (!isImm()) + return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. - if (!CE) return false; + if (!CE) + return false; int64_t Value = CE->getValue(); // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X, // for VMOV/VMVN only, 00Xf or 0Xff are also accepted. @@ -1612,7 +1706,7 @@ public: void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. - if (Expr == 0) + if (!Expr) Inst.addOperand(MCOperand::CreateImm(0)); else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) Inst.addOperand(MCOperand::CreateImm(CE->getValue())); @@ -1926,6 +2020,50 @@ public: Inst.addOperand(MCOperand::CreateImm(Memory.Alignment)); } + void addDupAlignedMemoryNoneOperands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemoryNoneOperands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory16Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addDupAlignedMemory16Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory32Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addDupAlignedMemory32Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory64Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addDupAlignedMemory64Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory64or128Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addDupAlignedMemory64or128Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory64or128or256Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + void addAddrMode2Operands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; @@ -2275,6 +2413,19 @@ public: Inst.addOperand(MCOperand::CreateImm(Value)); } + void addNEONinvByteReplicateOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Value = CE->getValue(); + assert((Inst.getOpcode() == ARM::VMOVv8i8 || + Inst.getOpcode() == ARM::VMOVv16i8) && + "All vmvn instructions that wants to replicate non-zero byte " + "always must be replaced with VMOVv8i8 or VMOVv16i8."); + unsigned B = ((~Value) & 0xff); + B |= 0xe00; // cmode = 0b1110 + Inst.addOperand(MCOperand::CreateImm(B)); + } void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. @@ -2289,6 +2440,19 @@ public: Inst.addOperand(MCOperand::CreateImm(Value)); } + void addNEONvmovByteReplicateOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Value = CE->getValue(); + assert((Inst.getOpcode() == ARM::VMOVv8i8 || + Inst.getOpcode() == ARM::VMOVv16i8) && + "All instructions that wants to replicate non-zero byte " + "always must be replaced with VMOVv8i8 or VMOVv16i8."); + unsigned B = Value & 0xff; + B |= 0xe00; // cmode = 0b1110 + Inst.addOperand(MCOperand::CreateImm(B)); + } void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. @@ -2523,7 +2687,8 @@ public: unsigned ShiftImm, unsigned Alignment, bool isNegative, - SMLoc S, SMLoc E) { + SMLoc S, SMLoc E, + SMLoc AlignmentLoc = SMLoc()) { ARMOperand *Op = new ARMOperand(k_Memory); Op->Memory.BaseRegNum = BaseRegNum; Op->Memory.OffsetImm = OffsetImm; @@ -2534,6 +2699,7 @@ public: Op->Memory.isNegative = isNegative; Op->StartLoc = S; Op->EndLoc = E; + Op->AlignmentLoc = AlignmentLoc; return Op; } @@ -2806,7 +2972,7 @@ int ARMAsmParser::tryParseShiftRegister( // The source register for the shift has already been added to the // operand list, so we need to pop it off and combine it into the shifted // register operand instead. - OwningPtr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val()); + std::unique_ptr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val()); if (!PrevOp->isReg()) return Error(PrevOp->getStartLoc(), "shift must be of a register"); int SrcReg = PrevOp->getReg(); @@ -2825,7 +2991,7 @@ int ARMAsmParser::tryParseShiftRegister( Parser.getTok().is(AsmToken::Dollar)) { Parser.Lex(); // Eat hash. SMLoc ImmLoc = Parser.getTok().getLoc(); - const MCExpr *ShiftExpr = 0; + const MCExpr *ShiftExpr = nullptr; if (getParser().parseExpression(ShiftExpr, EndLoc)) { Error(ImmLoc, "invalid immediate shift value"); return -1; @@ -2855,12 +3021,12 @@ int ARMAsmParser::tryParseShiftRegister( EndLoc = Parser.getTok().getEndLoc(); ShiftReg = tryParseRegister(); if (ShiftReg == -1) { - Error (L, "expected immediate or register in shift operand"); + Error(L, "expected immediate or register in shift operand"); return -1; } } else { - Error (Parser.getTok().getLoc(), - "expected immediate or register in shift operand"); + Error(Parser.getTok().getLoc(), + "expected immediate or register in shift operand"); return -1; } } @@ -4323,8 +4489,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { E = Tok.getEndLoc(); Parser.Lex(); // Eat right bracket token. - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift, - 0, 0, false, S, E)); + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, 0, + ARM_AM::no_shift, 0, 0, false, + S, E)); // If there's a pre-indexing writeback marker, '!', just add it as a token // operand. It's rather odd, but syntactically valid. @@ -4346,6 +4513,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (Parser.getTok().is(AsmToken::Colon)) { Parser.Lex(); // Eat the ':'. E = Parser.getTok().getLoc(); + SMLoc AlignmentLoc = Tok.getLoc(); const MCExpr *Expr; if (getParser().parseExpression(Expr)) @@ -4378,9 +4546,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Don't worry about range checking the value here. That's handled by // the is*() predicates. - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, 0, ARM_AM::no_shift, 0, Align, - false, S, E)); + false, S, E, AlignmentLoc)); // If there's a pre-indexing writeback marker, '!', just add it as a token // operand. @@ -4471,7 +4639,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum, + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, OffsetRegNum, ShiftType, ShiftImm, 0, isNegative, S, E)); @@ -4926,8 +5094,9 @@ getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || - Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic.startswith("crc32") || - Mnemonic.startswith("cps") || Mnemonic.startswith("vsel") || + Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic == "udf" || + Mnemonic.startswith("crc32") || Mnemonic.startswith("cps") || + Mnemonic.startswith("vsel") || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || @@ -5404,21 +5573,24 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } // GNU Assembler extension (compatibility) - if ((Mnemonic == "ldrd" || Mnemonic == "strd") && !isThumb() && - Operands.size() == 4) { - ARMOperand *Op = static_cast<ARMOperand *>(Operands[2]); - assert(Op->isReg() && "expected register argument"); + if ((Mnemonic == "ldrd" || Mnemonic == "strd")) { + ARMOperand *Op2 = static_cast<ARMOperand *>(Operands[2]); + ARMOperand *Op3 = static_cast<ARMOperand *>(Operands[3]); + if (Op3->isMem()) { + assert(Op2->isReg() && "expected register argument"); - unsigned SuperReg = MRI->getMatchingSuperReg( - Op->getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID)); + unsigned SuperReg = MRI->getMatchingSuperReg( + Op2->getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID)); - assert(SuperReg && "expected register pair"); + assert(SuperReg && "expected register pair"); - unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1); + unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1); - Operands.insert(Operands.begin() + 3, - ARMOperand::CreateReg(PairedReg, Op->getStartLoc(), - Op->getEndLoc())); + Operands.insert(Operands.begin() + 3, + ARMOperand::CreateReg(PairedReg, + Op2->getStartLoc(), + Op2->getEndLoc())); + } } // FIXME: As said above, this is all a pretty gross hack. This instruction @@ -5748,6 +5920,30 @@ validateInstruction(MCInst &Inst, return Error(Operands[Op]->getStartLoc(), "branch target out of range"); break; } + case ARM::MOVi16: + case ARM::t2MOVi16: + case ARM::t2MOVTi16: + { + // We want to avoid misleadingly allowing something like "mov r0, <symbol>" + // especially when we turn it into a movw and the expression <symbol> does + // not have a :lower16: or :upper16 as part of the expression. We don't + // want the behavior of silently truncating, which can be unexpected and + // lead to bugs that are difficult to find since this is an easy mistake + // to make. + int i = (Operands[3]->isImm()) ? 3 : 4; + ARMOperand *Op = static_cast<ARMOperand*>(Operands[i]); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); + if (CE) break; + const MCExpr *E = dyn_cast<MCExpr>(Op->getImm()); + if (!E) break; + const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(E); + if (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 && + ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)) { + return Error(Op->getStartLoc(), + "immediate expression for mov requires :lower16: or :upper16"); + break; + } + } } return false; @@ -5898,7 +6094,7 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD; case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD; - case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPq16_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD; case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD; case ARM::VLD3DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD; case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; @@ -7860,9 +8056,11 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { return Match_Success; } -template<> inline bool IsCPSRDead<MCInst>(MCInst* Instr) { +namespace llvm { +template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) { return true; // In an assembly source, no need to second-guess } +} static const char *getSubtargetFeatureName(unsigned Val); bool ARMAsmParser:: @@ -7965,6 +8163,42 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; return Error(ErrorLoc, "immediate operand must be in the range [0,239]"); } + case Match_AlignedMemoryRequiresNone: + case Match_DupAlignedMemoryRequiresNone: + case Match_AlignedMemoryRequires16: + case Match_DupAlignedMemoryRequires16: + case Match_AlignedMemoryRequires32: + case Match_DupAlignedMemoryRequires32: + case Match_AlignedMemoryRequires64: + case Match_DupAlignedMemoryRequires64: + case Match_AlignedMemoryRequires64or128: + case Match_DupAlignedMemoryRequires64or128: + case Match_AlignedMemoryRequires64or128or256: + { + SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getAlignmentLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + switch (MatchResult) { + default: + llvm_unreachable("Missing Match_Aligned type"); + case Match_AlignedMemoryRequiresNone: + case Match_DupAlignedMemoryRequiresNone: + return Error(ErrorLoc, "alignment must be omitted"); + case Match_AlignedMemoryRequires16: + case Match_DupAlignedMemoryRequires16: + return Error(ErrorLoc, "alignment must be 16 or omitted"); + case Match_AlignedMemoryRequires32: + case Match_DupAlignedMemoryRequires32: + return Error(ErrorLoc, "alignment must be 32 or omitted"); + case Match_AlignedMemoryRequires64: + case Match_DupAlignedMemoryRequires64: + return Error(ErrorLoc, "alignment must be 64 or omitted"); + case Match_AlignedMemoryRequires64or128: + case Match_DupAlignedMemoryRequires64or128: + return Error(ErrorLoc, "alignment must be 64, 128 or omitted"); + case Match_AlignedMemoryRequires64or128or256: + return Error(ErrorLoc, "alignment must be 64, 128, 256 or omitted"); + } + } } llvm_unreachable("Implement any new match types added!"); @@ -7972,6 +8206,10 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, /// parseDirective parses the arm specific directives bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { + const MCObjectFileInfo::Environment Format = + getContext().getObjectFileInfo()->getObjectFileType(); + bool IsMachO = Format == MCObjectFileInfo::IsMachO; + StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") return parseLiteralValues(4, DirectiveID.getLoc()); @@ -7989,16 +8227,6 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveSyntax(DirectiveID.getLoc()); else if (IDVal == ".unreq") return parseDirectiveUnreq(DirectiveID.getLoc()); - else if (IDVal == ".arch") - return parseDirectiveArch(DirectiveID.getLoc()); - else if (IDVal == ".eabi_attribute") - return parseDirectiveEabiAttr(DirectiveID.getLoc()); - else if (IDVal == ".cpu") - return parseDirectiveCPU(DirectiveID.getLoc()); - else if (IDVal == ".fpu") - return parseDirectiveFPU(DirectiveID.getLoc()); - else if (IDVal == ".fnstart") - return parseDirectiveFnStart(DirectiveID.getLoc()); else if (IDVal == ".fnend") return parseDirectiveFnEnd(DirectiveID.getLoc()); else if (IDVal == ".cantunwind") @@ -8015,12 +8243,6 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveRegSave(DirectiveID.getLoc(), false); else if (IDVal == ".vsave") return parseDirectiveRegSave(DirectiveID.getLoc(), true); - else if (IDVal == ".inst") - return parseDirectiveInst(DirectiveID.getLoc()); - else if (IDVal == ".inst.n") - return parseDirectiveInst(DirectiveID.getLoc(), 'n'); - else if (IDVal == ".inst.w") - return parseDirectiveInst(DirectiveID.getLoc(), 'w'); else if (IDVal == ".ltorg" || IDVal == ".pool") return parseDirectiveLtorg(DirectiveID.getLoc()); else if (IDVal == ".even") @@ -8029,18 +8251,38 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectivePersonalityIndex(DirectiveID.getLoc()); else if (IDVal == ".unwind_raw") return parseDirectiveUnwindRaw(DirectiveID.getLoc()); - else if (IDVal == ".tlsdescseq") - return parseDirectiveTLSDescSeq(DirectiveID.getLoc()); else if (IDVal == ".movsp") return parseDirectiveMovSP(DirectiveID.getLoc()); - else if (IDVal == ".object_arch") - return parseDirectiveObjectArch(DirectiveID.getLoc()); else if (IDVal == ".arch_extension") return parseDirectiveArchExtension(DirectiveID.getLoc()); else if (IDVal == ".align") return parseDirectiveAlign(DirectiveID.getLoc()); else if (IDVal == ".thumb_set") return parseDirectiveThumbSet(DirectiveID.getLoc()); + + if (!IsMachO) { + if (IDVal == ".arch") + return parseDirectiveArch(DirectiveID.getLoc()); + else if (IDVal == ".cpu") + return parseDirectiveCPU(DirectiveID.getLoc()); + else if (IDVal == ".eabi_attribute") + return parseDirectiveEabiAttr(DirectiveID.getLoc()); + else if (IDVal == ".fpu") + return parseDirectiveFPU(DirectiveID.getLoc()); + else if (IDVal == ".fnstart") + return parseDirectiveFnStart(DirectiveID.getLoc()); + else if (IDVal == ".inst") + return parseDirectiveInst(DirectiveID.getLoc()); + else if (IDVal == ".inst.n") + return parseDirectiveInst(DirectiveID.getLoc(), 'n'); + else if (IDVal == ".inst.w") + return parseDirectiveInst(DirectiveID.getLoc(), 'w'); + else if (IDVal == ".object_arch") + return parseDirectiveObjectArch(DirectiveID.getLoc()); + else if (IDVal == ".tlsdescseq") + return parseDirectiveTLSDescSeq(DirectiveID.getLoc()); + } + return true; } @@ -8121,32 +8363,6 @@ void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) { if (NextSymbolIsThumb) { getParser().getStreamer().EmitThumbFunc(Symbol); NextSymbolIsThumb = false; - return; - } - - if (!isThumb()) - return; - - const MCObjectFileInfo::Environment Format = - getContext().getObjectFileInfo()->getObjectFileType(); - switch (Format) { - case MCObjectFileInfo::IsCOFF: { - const MCSymbolData &SD = - getParser().getStreamer().getOrCreateSymbolData(Symbol); - char Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; - if (SD.getFlags() & (Type << COFF::SF_TypeShift)) - getParser().getStreamer().EmitThumbFunc(Symbol); - break; - } - case MCObjectFileInfo::IsELF: { - const MCSymbolData &SD = - getParser().getStreamer().getOrCreateSymbolData(Symbol); - if (MCELF::GetType(SD) & (ELF::STT_FUNC << ELF_STT_Shift)) - getParser().getStreamer().EmitThumbFunc(Symbol); - break; - } - case MCObjectFileInfo::IsMachO: - break; } } @@ -8303,14 +8519,6 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { /// parseDirectiveArch /// ::= .arch token bool ARMAsmParser::parseDirectiveArch(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(L, ".arch directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - StringRef Arch = getParser().parseStringToEndOfStatement().trim(); unsigned ID = StringSwitch<unsigned>(Arch) @@ -8334,14 +8542,6 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) { /// ::= .eabi_attribute int, int [, "str"] /// ::= .eabi_attribute Tag_name, int [, "str"] bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(L, ".eabi_attribute directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - int64_t Tag; SMLoc TagLoc; TagLoc = Parser.getTok().getLoc(); @@ -8447,14 +8647,6 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { /// parseDirectiveCPU /// ::= .cpu str bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(L, ".cpu directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - StringRef CPU = getParser().parseStringToEndOfStatement().trim(); getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU); return false; @@ -8463,14 +8655,6 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { /// parseDirectiveFPU /// ::= .fpu str bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(L, ".fpu directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - StringRef FPU = getParser().parseStringToEndOfStatement().trim(); unsigned ID = StringSwitch<unsigned>(FPU) @@ -8490,14 +8674,6 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { /// parseDirectiveFnStart /// ::= .fnstart bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(L, ".fnstart directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - if (UC.hasFnStart()) { Error(L, ".fnstart starts before the end of previous one"); UC.emitFnStartLocNotes(); @@ -8777,14 +8953,6 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { /// ::= .inst.n opcode [, ...] /// ::= .inst.w opcode [, ...] bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(Loc, ".inst directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - int Width; if (isThumb()) { @@ -9033,14 +9201,6 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) { /// parseDirectiveTLSDescSeq /// ::= .tlsdescseq tls-variable bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(L, ".tlsdescseq directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - if (getLexer().isNot(AsmToken::Identifier)) { TokError("expected variable after '.tlsdescseq' directive"); Parser.eatToEndOfStatement(); @@ -9128,14 +9288,6 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) { /// parseDirectiveObjectArch /// ::= .object_arch name bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(L, ".object_arch directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - if (getLexer().isNot(AsmToken::Identifier)) { Error(getLexer().getLoc(), "unexpected token"); Parser.eatToEndOfStatement(); @@ -9221,36 +9373,7 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) { Lex(); MCSymbol *Alias = getContext().GetOrCreateSymbol(Name); - if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Value)) { - MCSymbol *Sym = getContext().LookupSymbol(SRE->getSymbol().getName()); - if (!Sym->isDefined()) { - getStreamer().EmitSymbolAttribute(Sym, MCSA_Global); - getStreamer().EmitAssignment(Alias, Value); - return false; - } - - const MCObjectFileInfo::Environment Format = - getContext().getObjectFileInfo()->getObjectFileType(); - switch (Format) { - case MCObjectFileInfo::IsCOFF: { - char Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; - getStreamer().EmitCOFFSymbolType(Type); - // .set values are always local in COFF - getStreamer().EmitSymbolAttribute(Alias, MCSA_Local); - break; - } - case MCObjectFileInfo::IsELF: - getStreamer().EmitSymbolAttribute(Alias, MCSA_ELF_TypeFunction); - break; - case MCObjectFileInfo::IsMachO: - break; - } - } - - // FIXME: set the function as being a thumb function via the assembler - getStreamer().EmitThumbFunc(Alias); - getStreamer().EmitAssignment(Alias, Value); - + getTargetStreamer().emitThumbSet(Alias, Value); return false; } @@ -9365,8 +9488,8 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, int64_t Value; if (!SOExpr->EvaluateAsAbsolute(Value)) return Match_Success; - assert((Value >= INT32_MIN && Value <= INT32_MAX) && - "expression value must be representiable in 32 bits"); + assert((Value >= INT32_MIN && Value <= UINT32_MAX) && + "expression value must be representable in 32 bits"); } break; case MCK_GPRPair: diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 8e14883..9b5fa75 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -17,7 +17,6 @@ add_public_tablegen_target(ARMCommonTableGen) add_llvm_target(ARMCodeGen A15SDOptimizer.cpp ARMAsmPrinter.cpp - ARMAtomicExpandPass.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp ARMCodeEmitter.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 9e40381..4d4038d 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-disassembler" - #include "llvm/MC/MCDisassembler.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" @@ -29,6 +27,8 @@ using namespace llvm; +#define DEBUG_TYPE "arm-disassembler" + typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { @@ -90,8 +90,8 @@ class ARMDisassembler : public MCDisassembler { public: /// Constructor - Initializes the disassembler. /// - ARMDisassembler(const MCSubtargetInfo &STI) : - MCDisassembler(STI) { + ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : + MCDisassembler(STI, Ctx) { } ~ARMDisassembler() { @@ -109,8 +109,8 @@ class ThumbDisassembler : public MCDisassembler { public: /// Constructor - Initializes the disassembler. /// - ThumbDisassembler(const MCSubtargetInfo &STI) : - MCDisassembler(STI) { + ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : + MCDisassembler(STI, Ctx) { } ~ThumbDisassembler() { @@ -400,12 +400,16 @@ static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); #include "ARMGenDisassemblerTables.inc" -static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) { - return new ARMDisassembler(STI); +static MCDisassembler *createARMDisassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new ARMDisassembler(STI, Ctx); } -static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtargetInfo &STI) { - return new ThumbDisassembler(STI); +static MCDisassembler *createThumbDisassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new ThumbDisassembler(STI, Ctx); } DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index da3fe01..e4b785d 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "ARMInstPrinter.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" @@ -23,6 +22,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + #include "ARMGenAsmWriter.inc" /// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing. diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 1db517f..7acd9cc 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -306,8 +306,36 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } +static uint32_t swapHalfWords(uint32_t Value, bool IsLittleEndian) { + if (IsLittleEndian) { + // Note that the halfwords are stored high first and low second in thumb; + // so we need to swap the fixup value here to map properly. + uint32_t Swapped = (Value & 0xFFFF0000) >> 16; + Swapped |= (Value & 0x0000FFFF) << 16; + return Swapped; + } + else + return Value; +} + +static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf, + bool IsLittleEndian) { + uint32_t Value; + + if (IsLittleEndian) { + Value = (SecondHalf & 0xFFFF) << 16; + Value |= (FirstHalf & 0xFFFF); + } else { + Value = (SecondHalf & 0xFFFF); + Value |= (FirstHalf & 0xFFFF) << 16; + } + + return Value; +} + static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, - bool IsPCRel, MCContext *Ctx) { + bool IsPCRel, MCContext *Ctx, + bool IsLittleEndian) { unsigned Kind = Fixup.getKind(); switch (Kind) { default: @@ -316,6 +344,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case FK_Data_2: case FK_Data_4: return Value; + case FK_SecRel_2: + return Value; + case FK_SecRel_4: + return Value; case ARM::fixup_arm_movt_hi16: if (!IsPCRel) Value >>= 16; @@ -342,9 +374,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // inst{14-12} = Mid3; // inst{7-0} = Lo8; Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8); - uint64_t swapped = (Value & 0xFFFF0000) >> 16; - swapped |= (Value & 0x0000FFFF) << 16; - return swapped; + return swapHalfWords(Value, IsLittleEndian); } case ARM::fixup_arm_ldst_pcrel_12: // ARM PC-relative values are offset by 8. @@ -364,11 +394,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // Same addressing mode as fixup_arm_pcrel_10, // but with 16-bit halfwords swapped. - if (Kind == ARM::fixup_t2_ldst_pcrel_12) { - uint64_t swapped = (Value & 0xFFFF0000) >> 16; - swapped |= (Value & 0x0000FFFF) << 16; - return swapped; - } + if (Kind == ARM::fixup_t2_ldst_pcrel_12) + return swapHalfWords(Value, IsLittleEndian); return Value; } @@ -401,9 +428,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, out |= (Value & 0x700) << 4; out |= (Value & 0x0FF); - uint64_t swapped = (out & 0xFFFF0000) >> 16; - swapped |= (out & 0x0000FFFF) << 16; - return swapped; + return swapHalfWords(out, IsLittleEndian); } case ARM::fixup_arm_condbranch: @@ -434,9 +459,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, out |= (Value & 0x1FF800) << 5; // imm6 field out |= (Value & 0x0007FF); // imm11 field - uint64_t swapped = (out & 0xFFFF0000) >> 16; - swapped |= (out & 0x0000FFFF) << 16; - return swapped; + return swapHalfWords(out, IsLittleEndian); } case ARM::fixup_t2_condbranch: { Value = Value - 4; @@ -449,9 +472,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, out |= (Value & 0x1F800) << 5; // imm6 field out |= (Value & 0x007FF); // imm11 field - uint32_t swapped = (out & 0xFFFF0000) >> 16; - swapped |= (out & 0x0000FFFF) << 16; - return swapped; + return swapHalfWords(out, IsLittleEndian); } case ARM::fixup_arm_thumb_bl: { // The value doesn't encode the low bit (always zero) and is offset by @@ -475,13 +496,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, uint32_t imm10Bits = (offset & 0x1FF800) >> 11; uint32_t imm11Bits = (offset & 0x000007FF); - uint32_t Binary = 0; - uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits); - uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | + uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits); + uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | (uint16_t)imm11Bits); - Binary |= secondHalf << 16; - Binary |= firstHalf; - return Binary; + return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian); } case ARM::fixup_arm_thumb_blx: { // The value doesn't encode the low two bits (always zero) and is offset by @@ -508,13 +526,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, uint32_t imm10HBits = (offset & 0xFFC00) >> 10; uint32_t imm10LBits = (offset & 0x3FF); - uint32_t Binary = 0; - uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits); - uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | + uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits); + uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | ((uint16_t)imm10LBits) << 1); - Binary |= secondHalf << 16; - Binary |= firstHalf; - return Binary; + return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian); } case ARM::fixup_arm_thumb_cp: // Offset by 4, and don't encode the low two bits. Two bytes of that @@ -566,11 +581,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords // swapped. - if (Kind == ARM::fixup_t2_pcrel_10) { - uint32_t swapped = (Value & 0xFFFF0000) >> 16; - swapped |= (Value & 0x0000FFFF) << 16; - return swapped; - } + if (Kind == ARM::fixup_t2_pcrel_10) + return swapHalfWords(Value, IsLittleEndian); return Value; } @@ -603,7 +615,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // the offset when the destination has the same MCFragment. if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); - MCSymbolData &SymData = Asm.getSymbolData(Sym); + const MCSymbolData &SymData = Asm.getSymbolData(Sym); IsResolved = (SymData.getFragment() == DF); } // We must always generate a relocation for BL/BLX instructions if we have @@ -618,7 +630,8 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // Try to get the encoded value for the fixup as-if we're mapping it into // the instruction. This allows adjustFixupValue() to issue a diagnostic // if the value aren't invalid. - (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext()); + (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(), + IsLittleEndian); } /// getFixupKindNumBytes - The number of bytes the fixup may change. @@ -662,6 +675,11 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movw_lo16: return 4; + + case FK_SecRel_2: + return 2; + case FK_SecRel_4: + return 4; } } @@ -716,7 +734,7 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); - Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr); + Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian); if (!Value) return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); @@ -724,8 +742,11 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, // Used to point to big endian bytes. unsigned FullSizeBytes; - if (!IsLittleEndian) + if (!IsLittleEndian) { FullSizeBytes = getFixupKindContainerSizeBytes(Fixup.getKind()); + assert((Offset + FullSizeBytes) <= DataSize && "Invalid fixup size!"); + assert(NumBytes <= FullSizeBytes && "Invalid fixup size!"); + } // For each byte of the fragment that the fixup touches, mask in the bits from // the fixup value. The Value has been "split up" into the appropriate @@ -737,6 +758,15 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, } namespace { +// FIXME: This should be in a separate file. +class ARMWinCOFFAsmBackend : public ARMAsmBackend { +public: + ARMWinCOFFAsmBackend(const Target &T, const StringRef &Triple) + : ARMAsmBackend(T, Triple, true) { } + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false); + } +}; // FIXME: This should be in a separate file. // ELF is an ELF of course... @@ -777,7 +807,9 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, bool isLittle) { Triple TheTriple(TT); - if (TheTriple.isOSBinFormatMachO()) { + switch (TheTriple.getObjectFormat()) { + default: llvm_unreachable("unsupported object format"); + case Triple::MachO: { MachO::CPUSubTypeARM CS = StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName()) .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T) @@ -792,15 +824,14 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, return new DarwinARMAsmBackend(T, TT, CS); } - -#if 0 - // FIXME: Introduce yet another checker but assert(0). - if (TheTriple.isOSBinFormatCOFF()) - assert(0 && "Windows not supported on ARM"); -#endif - - uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); - return new ELFARMAsmBackend(T, TT, OSABI, isLittle); + case Triple::COFF: + assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported"); + return new ARMWinCOFFAsmBackend(T, TT); + case Triple::ELF: + assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target"); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); + return new ELFARMAsmBackend(T, TT, OSABI, isLittle); + } } MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T, diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index a4661b1..1c84263 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -74,7 +74,7 @@ unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target, unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { - MCSymbolRefExpr::VariantKind Modifier = Fixup.getAccessVariant(); + MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); unsigned Type = 0; if (IsPCRel) { @@ -91,6 +91,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_GOTTPOFF: Type = ELF::R_ARM_TLS_IE32; break; + case MCSymbolRefExpr::VK_GOTPCREL: + Type = ELF::R_ARM_GOT_PREL; + break; } break; case ARM::fixup_arm_blx: @@ -167,6 +170,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_GOTOFF: Type = ELF::R_ARM_GOTOFF32; break; + case MCSymbolRefExpr::VK_GOTPCREL: + Type = ELF::R_ARM_GOT_PREL; + break; case MCSymbolRefExpr::VK_ARM_TARGET1: Type = ELF::R_ARM_TARGET1; break; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 5a01d26..a4d13ed 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -30,6 +30,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" @@ -62,7 +63,7 @@ static const char *GetFPUName(unsigned ID) { #define ARM_FPU_NAME(NAME, ID) case ARM::ID: return NAME; #include "ARMFPUName.def" } - return NULL; + return nullptr; } static const char *GetArchName(unsigned ID) { @@ -75,7 +76,7 @@ static const char *GetArchName(unsigned ID) { #define ARM_ARCH_ALIAS(NAME, ID) /* empty */ #include "ARMArchName.def" } - return NULL; + return nullptr; } static const char *GetArchDefaultCPUName(unsigned ID) { @@ -88,7 +89,7 @@ static const char *GetArchDefaultCPUName(unsigned ID) { #define ARM_ARCH_ALIAS(NAME, ID) /* empty */ #include "ARMArchName.def" } - return NULL; + return nullptr; } static unsigned GetArchDefaultCPUArch(unsigned ID) { @@ -139,6 +140,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer { void finishAttributeSection() override; void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override; + void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override; public: ARMTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS, @@ -260,6 +262,10 @@ ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { OS << "\t.tlsdescseq\t" << S->getSymbol().getName(); } +void ARMTargetAsmStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) { + OS << "\t.thumb_set\t" << *Symbol << ", " << *Value << '\n'; +} + void ARMTargetAsmStreamer::emitInst(uint32_t Inst, char Suffix) { OS << "\t.inst"; if (Suffix) @@ -310,7 +316,7 @@ private: for (size_t i = 0; i < Contents.size(); ++i) if (Contents[i].Tag == Attribute) return &Contents[i]; - return 0; + return nullptr; } void setAttributeItem(unsigned Attribute, unsigned Value, @@ -406,8 +412,10 @@ private: void emitFPU(unsigned FPU) override; void emitInst(uint32_t Inst, char Suffix = '\0') override; void finishAttributeSection() override; + void emitLabel(MCSymbol *Symbol) override; void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override; + void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override; size_t calculateContentSize() const; @@ -415,7 +423,7 @@ public: ARMTargetELFStreamer(MCStreamer &S) : ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::INVALID_FPU), Arch(ARM::INVALID_ARCH), EmittedArch(ARM::INVALID_ARCH), - AttributeSection(0) {} + AttributeSection(nullptr) {} }; /// Extend the generic ELFStreamer class so that it can emit mapping symbols at @@ -531,7 +539,8 @@ public: /// This is one of the functions used to emit data into an ELF section, so the /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if /// necessary. - void EmitValueImpl(const MCExpr *Value, unsigned Size) override { + void EmitValueImpl(const MCExpr *Value, unsigned Size, + const SMLoc &Loc) override { EmitDataMappingSymbol(); MCELFStreamer::EmitValueImpl(Value, Size); } @@ -600,12 +609,8 @@ private: } void EmitThumbFunc(MCSymbol *Func) override { - // FIXME: Anything needed here to flag the function as thumb? - getAssembler().setIsThumbFunc(Func); - - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func); - SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc); + EmitSymbolAttribute(Func, MCSA_ELF_TypeFunction); } // Helper functions for ARM exception handling directives @@ -980,10 +985,35 @@ void ARMTargetELFStreamer::finishAttributeSection() { Contents.clear(); FPU = ARM::INVALID_FPU; } + +void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) { + ARMELFStreamer &Streamer = getStreamer(); + if (!Streamer.IsThumb) + return; + + const MCSymbolData &SD = Streamer.getOrCreateSymbolData(Symbol); + if (MCELF::GetType(SD) & (ELF::STT_FUNC << ELF_STT_Shift)) + Streamer.EmitThumbFunc(Symbol); +} + void ARMTargetELFStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { getStreamer().EmitFixup(S, FK_Data_4); } + +void ARMTargetELFStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) { + if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Value)) { + const MCSymbol &Sym = SRE->getSymbol(); + if (!Sym.isDefined()) { + getStreamer().EmitAssignment(Symbol, Value); + return; + } + } + + getStreamer().EmitThumbFunc(Symbol); + getStreamer().EmitAssignment(Symbol, Value); +} + void ARMTargetELFStreamer::emitInst(uint32_t Inst, char Suffix) { getStreamer().emitInst(Inst, Suffix); } @@ -1012,7 +1042,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, } // Get .ARM.extab or .ARM.exidx section - const MCSectionELF *EHSection = NULL; + const MCSectionELF *EHSection = nullptr; if (const MCSymbol *Group = FnSection.getGroup()) { EHSection = getContext().getELFSection( EHSecName, Type, Flags | ELF::SHF_GROUP, Kind, @@ -1049,9 +1079,9 @@ void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) { } void ARMELFStreamer::Reset() { - ExTab = NULL; - FnStart = NULL; - Personality = NULL; + ExTab = nullptr; + FnStart = nullptr; + Personality = nullptr; PersonalityIndex = ARM::EHABI::NUM_PERSONALITY_INDEX; FPReg = ARM::SP; FPOffset = 0; @@ -1065,7 +1095,7 @@ void ARMELFStreamer::Reset() { } void ARMELFStreamer::emitFnStart() { - assert(FnStart == 0); + assert(FnStart == nullptr); FnStart = getContext().CreateTempSymbol(); EmitLabel(FnStart); } @@ -1104,11 +1134,14 @@ void ARMELFStreamer::emitFnEnd() { // the second word of exception index table entry. The size of the unwind // opcodes should always be 4 bytes. assert(PersonalityIndex == ARM::EHABI::AEABI_UNWIND_CPP_PR0 && - "Compact model must use __aeabi_cpp_unwind_pr0 as personality"); + "Compact model must use __aeabi_unwind_cpp_pr0 as personality"); assert(Opcodes.size() == 4u && - "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4"); - EmitBytes(StringRef(reinterpret_cast<const char*>(Opcodes.data()), - Opcodes.size())); + "Unwind opcode size for __aeabi_unwind_cpp_pr0 must be equal to 4"); + uint64_t Intval = Opcodes[0] | + Opcodes[1] << 8 | + Opcodes[2] << 16 | + Opcodes[3] << 24; + EmitIntValue(Intval, Opcodes.size()); } // Switch to the section containing FnStart @@ -1180,8 +1213,15 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) { } // Emit unwind opcodes - EmitBytes(StringRef(reinterpret_cast<const char *>(Opcodes.data()), - Opcodes.size())); + assert((Opcodes.size() % 4) == 0 && + "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be multiple of 4"); + for (unsigned I = 0; I != Opcodes.size(); I += 4) { + uint64_t Intval = Opcodes[I] | + Opcodes[I + 1] << 8 | + Opcodes[I + 2] << 16 | + Opcodes[I + 3] << 24; + EmitIntValue(Intval, 4); + } // According to ARM EHABI section 9.2, if the __aeabi_unwind_cpp_pr1() or // __aeabi_unwind_cpp_pr2() is used, then the handler data must be emitted @@ -1283,13 +1323,11 @@ void ARMELFStreamer::emitUnwindRaw(int64_t Offset, namespace llvm { MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useCFI, - bool useDwarfDirectory, + bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) { - MCStreamer *S = - llvm::createAsmStreamer(Ctx, OS, isVerboseAsm, useCFI, useDwarfDirectory, - InstPrint, CE, TAB, ShowInst); + MCStreamer *S = llvm::createAsmStreamer( + Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); new ARMTargetAsmStreamer(*S, OS, *InstPrint, isVerboseAsm); return S; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index b7f96e0..7a19208 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -25,7 +25,7 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(StringRef TT) { (TheTriple.getArch() == Triple::thumbeb)) IsLittleEndian = false; - Data64bitsDirective = 0; + Data64bitsDirective = nullptr; CommentString = "@"; Code16Directive = ".code\t16"; Code32Directive = ".code\t32"; @@ -50,7 +50,7 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) { // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; - Data64bitsDirective = 0; + Data64bitsDirective = nullptr; CommentString = "@"; Code16Directive = ".code\t16"; Code32Directive = ".code\t32"; @@ -59,7 +59,14 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) { SupportsDebugInformation = true; // Exceptions handling - ExceptionsType = ExceptionHandling::ARM; + switch (TheTriple.getOS()) { + case Triple::NetBSD: + ExceptionsType = ExceptionHandling::DwarfCFI; + break; + default: + ExceptionsType = ExceptionHandling::ARM; + break; + } // foo(plt) instead of foo@plt UseParensForSymbolVariant = true; @@ -89,6 +96,7 @@ void ARMCOFFMCAsmInfoGNU::anchor() { } ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() { AlignmentIsInBytes = false; + HasSingleParameterDotFile = true; CommentString = "@"; Code16Directive = ".code\t16"; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index beaf6a4..51cfa0a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h @@ -35,13 +35,13 @@ namespace llvm { }; class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft { - void anchor(); + void anchor() override; public: explicit ARMCOFFMCAsmInfoMicrosoft(); }; class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF { - void anchor(); + void anchor() override; public: explicit ARMCOFFMCAsmInfoGNU(); }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 5564e0a..5b51a52 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mccodeemitter" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" @@ -31,6 +30,8 @@ using namespace llvm; +#define DEBUG_TYPE "mccodeemitter" + STATISTIC(MCNumEmitted, "Number of MC instructions emitted."); STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created."); @@ -1036,16 +1037,17 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, : ARM::fixup_arm_movw_lo16); break; } + Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc())); return 0; } // If the expression doesn't have :upper16: or :lower16: on it, - // it's just a plain immediate expression, and those evaluate to + // it's just a plain immediate expression, previously those evaluated to // the lower 16 bits of the expression regardless of whether - // we have a movt or a movw. - Kind = MCFixupKind(isThumb2(STI) ? ARM::fixup_t2_movw_lo16 - : ARM::fixup_arm_movw_lo16); - Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc())); + // we have a movt or a movw, but that led to misleadingly results. + // This is now disallowed in the the AsmParser in validateInstruction() + // so this should never happen. + assert(0 && "expression without :upper16: or :lower16:"); return 0; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index fc8505b..87ea875 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -7,12 +7,13 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "armmcexpr" #include "ARMMCExpr.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" using namespace llvm; +#define DEBUG_TYPE "armmcexpr" + const ARMMCExpr* ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr, MCContext &Ctx) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 949a3d5..04d63a7 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -106,9 +107,11 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { unsigned SubVer = TT[Idx]; if (SubVer == '8') { if (NoCPU) - // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, FeatureMP, - // FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, FeatureT2XtPk, FeatureCrypto, FeatureCRC - ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,+trustzone,+t2xtpk,+crypto,+crc"; + // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, + // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, + // FeatureT2XtPk, FeatureCrypto, FeatureCRC + ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm," + "+trustzone,+t2xtpk,+crypto,+crc"; else // Use CPU to figure out the exact features ARMArchFeature = "+v8"; @@ -245,7 +248,7 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { } unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true); - MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(0, Reg, 0)); + MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0)); return MAI; } @@ -273,18 +276,20 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, bool NoExecStack) { Triple TheTriple(TT); - if (TheTriple.isOSBinFormatMachO()) { + switch (TheTriple.getObjectFormat()) { + default: llvm_unreachable("unsupported object format"); + case Triple::MachO: { MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, false); new ARMTargetStreamer(*S); return S; } - - if (TheTriple.isOSWindows()) { - llvm_unreachable("ARM does not support Windows COFF format"); + case Triple::COFF: + assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported"); + return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS); + case Triple::ELF: + return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack, + TheTriple.getArch() == Triple::thumb); } - - return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack, - TheTriple.getArch() == Triple::thumb); } static MCInstPrinter *createARMMCInstPrinter(const Target &T, @@ -295,7 +300,7 @@ static MCInstPrinter *createARMMCInstPrinter(const Target &T, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) return new ARMInstPrinter(MAI, MII, MRI, STI); - return 0; + return nullptr; } static MCRelocationInfo *createARMMCRelocationInfo(StringRef TT, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index e81876f..8853a8c 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -47,8 +47,7 @@ namespace ARM_MC { } MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useCFI, - bool useDwarfDirectory, + bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst); @@ -78,6 +77,11 @@ MCAsmBackend *createThumbLEAsmBackend(const Target &T, const MCRegisterInfo &MRI MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); +/// createARMWinCOFFStreamer - Construct a PE/COFF machine code streamer which +/// will generate a PE/COFF object file. +MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, + MCCodeEmitter &Emitter, raw_ostream &OS); + /// createARMELFObjectWriter - Construct an ELF Mach-O object writer. MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS, uint8_t OSABI, @@ -89,6 +93,8 @@ MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, uint32_t CPUType, uint32_t CPUSubtype); +/// createARMWinCOFFObjectWriter - Construct an ARM PE/COFF object writer. +MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit); /// createARMMachORelocationInfo - Construct ARM Mach-O relocation info. MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 3bf5cf1..ecfa4e5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -156,7 +156,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); + const MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) Asm.getContext().FatalError(Fixup.getLoc(), @@ -170,7 +170,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, FixedValue += SecAddr; if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) Asm.getContext().FatalError(Fixup.getLoc(), @@ -206,11 +206,11 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, // The thumb bit shouldn't be set in the 'other-half' bit of the // relocation, but it will be set in FixedValue if the base symbol // is a thumb function. Clear it out here. - if (A_SD->getFlags() & SF_ThumbFunc) + if (Asm.isThumbFunc(A)) FixedValue &= 0xfffffffe; break; case ARM::fixup_t2_movt_hi16: - if (A_SD->getFlags() & SF_ThumbFunc) + if (Asm.isThumbFunc(A)) FixedValue &= 0xfffffffe; MovtBit = 1; // Fallthrough @@ -259,7 +259,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); + const MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) Asm.getContext().FatalError(Fixup.getLoc(), @@ -272,7 +272,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, uint32_t Value2 = 0; if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) Asm.getContext().FatalError(Fixup.getLoc(), @@ -378,7 +378,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, } // Get the symbol data, if any. - MCSymbolData *SD = 0; + const MCSymbolData *SD = nullptr; if (Target.getSymA()) SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index fdc0ed7..e3cfb05 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -109,7 +109,7 @@ ConstantPool * AssemblerConstantPools::getConstantPool(const MCSection *Section) { ConstantPoolMapTy::iterator CP = ConstantPools.find(Section); if (CP == ConstantPools.end()) - return 0; + return nullptr; return &CP->second; } @@ -246,3 +246,7 @@ void ARMTargetStreamer::AnnotateTLSDescriptorSequence( const MCSymbolRefExpr *SRE) { llvm_unreachable("unimplemented"); } + +void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) { + llvm_unreachable("unimplemented"); +} diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp new file mode 100644 index 0000000..d31f1f4 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -0,0 +1,82 @@ +//===-- ARMWinCOFFObjectWriter.cpp - ARM Windows COFF Object Writer -- C++ -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/ARMFixupKinds.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCWinCOFFObjectWriter.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +namespace { +class ARMWinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { +public: + ARMWinCOFFObjectWriter(bool Is64Bit) + : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) { + assert(!Is64Bit && "AArch64 support not yet implemented"); + } + virtual ~ARMWinCOFFObjectWriter() { } + + unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsCrossSection) const override; + + bool recordRelocation(const MCFixup &) const override; +}; + +unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsCrossSection) const { + assert(getMachine() == COFF::IMAGE_FILE_MACHINE_ARMNT && + "AArch64 support not yet implemented"); + + MCSymbolRefExpr::VariantKind Modifier = + Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + + switch (static_cast<unsigned>(Fixup.getKind())) { + default: llvm_unreachable("unsupported relocation type"); + case FK_Data_4: + switch (Modifier) { + case MCSymbolRefExpr::VK_COFF_IMGREL32: + return COFF::IMAGE_REL_ARM_ADDR32NB; + case MCSymbolRefExpr::VK_SECREL: + return COFF::IMAGE_REL_ARM_SECREL; + default: + return COFF::IMAGE_REL_ARM_ADDR32; + } + case FK_SecRel_2: + return COFF::IMAGE_REL_ARM_SECTION; + case FK_SecRel_4: + return COFF::IMAGE_REL_ARM_SECREL; + case ARM::fixup_t2_condbranch: + return COFF::IMAGE_REL_ARM_BRANCH20T; + case ARM::fixup_t2_uncondbranch: + return COFF::IMAGE_REL_ARM_BRANCH24T; + case ARM::fixup_arm_thumb_bl: + case ARM::fixup_arm_thumb_blx: + return COFF::IMAGE_REL_ARM_BLX23T; + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movt_hi16: + return COFF::IMAGE_REL_ARM_MOV32T; + } +} + +bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { + return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16; +} +} + +namespace llvm { +MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit) { + MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit); + return createWinCOFFObjectWriter(MOTW, OS); +} +} + diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp new file mode 100644 index 0000000..b344ced --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -0,0 +1,46 @@ +//===-- ARMWinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARMMCTargetDesc.h" +#include "llvm/MC/MCWinCOFFStreamer.h" + +using namespace llvm; + +namespace { +class ARMWinCOFFStreamer : public MCWinCOFFStreamer { +public: + ARMWinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE, + raw_ostream &OS) + : MCWinCOFFStreamer(C, AB, CE, OS) { } + + void EmitAssemblerFlag(MCAssemblerFlag Flag) override; + void EmitThumbFunc(MCSymbol *Symbol) override; +}; + +void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + switch (Flag) { + default: llvm_unreachable("not implemented"); + case MCAF_SyntaxUnified: + case MCAF_Code16: + break; + } +} + +void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) { + getAssembler().setIsThumbFunc(Symbol); +} +} + +namespace llvm { +MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, + MCCodeEmitter &Emitter, raw_ostream &OS) { + return new ARMWinCOFFStreamer(Context, MAB, Emitter, OS); +} +} + diff --git a/lib/Target/ARM/MCTargetDesc/Android.mk b/lib/Target/ARM/MCTargetDesc/Android.mk index 074d29e..a5827f7 100644 --- a/lib/Target/ARM/MCTargetDesc/Android.mk +++ b/lib/Target/ARM/MCTargetDesc/Android.mk @@ -17,7 +17,9 @@ arm_mc_desc_SRC_FILES := \ ARMMachObjectWriter.cpp \ ARMMachORelocationInfo.cpp \ ARMTargetStreamer.cpp \ - ARMUnwindOpAsm.cpp + ARMUnwindOpAsm.cpp \ + ARMWinCOFFObjectWriter.cpp \ + ARMWinCOFFStreamer.cpp \ # For the host # ===================================================== diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index 06812d4..9582e8c 100644 --- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -1,14 +1,16 @@ add_llvm_library(LLVMARMDesc ARMAsmBackend.cpp ARMELFObjectWriter.cpp + ARMELFObjectWriter.cpp ARMELFStreamer.cpp + ARMMachObjectWriter.cpp + ARMMachORelocationInfo.cpp ARMMCAsmInfo.cpp ARMMCCodeEmitter.cpp ARMMCExpr.cpp ARMMCTargetDesc.cpp - ARMMachObjectWriter.cpp - ARMELFObjectWriter.cpp ARMTargetStreamer.cpp ARMUnwindOpAsm.cpp - ARMMachORelocationInfo.cpp + ARMWinCOFFObjectWriter.cpp + ARMWinCOFFStreamer.cpp ) diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 80af859..f6d24e9 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mlx-expansion" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "mlx-expansion" + static cl::opt<bool> ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden); static cl::opt<unsigned> @@ -73,7 +74,7 @@ namespace { } void MLxExpansion::clearStack() { - std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0); + std::fill(LastMIs, LastMIs + 4, nullptr); MIIdx = 0; } @@ -88,7 +89,7 @@ MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const { // real definition MI. This is important for _sfp instructions. unsigned Reg = MI->getOperand(1).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return 0; + return nullptr; MachineBasicBlock *MBB = MI->getParent(); MachineInstr *DefMI = MRI->getVRegDef(Reg); @@ -352,7 +353,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { if (Domain == ARMII::DomainGeneral) { if (++Skip == 2) // Assume dual issues of non-VFP / NEON instructions. - pushStack(0); + pushStack(nullptr); } else { Skip = 0; diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index a64707e..f4d9be3 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -215,10 +215,6 @@ etc. Almost all Thumb instructions clobber condition code. //===---------------------------------------------------------------------===// -Add ldmia, stmia support. - -//===---------------------------------------------------------------------===// - Thumb load / store address mode offsets are scaled. The values kept in the instruction operands are pre-scale values. This probably ought to be changed to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions. diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 2224652..be29dc5 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -293,7 +293,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } -static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) { +static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) { if (MI->getOpcode() == ARM::tLDRspi && MI->getOperand(1).isFI() && isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) @@ -328,7 +328,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, int NumBytes = (int)MFI->getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 93e2b5a..0c0abbe 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -56,7 +56,7 @@ public: unsigned Reg) const override; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const override; + RegScavenger *RS = nullptr) const override; }; } diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 406dbe0..edb9ff3 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "thumb2-it" #include "ARM.h" #include "ARMMachineFunctionInfo.h" #include "Thumb2InstrInfo.h" @@ -19,6 +18,8 @@ #include "llvm/CodeGen/MachineInstrBundle.h" using namespace llvm; +#define DEBUG_TYPE "thumb2-it" + STATISTIC(NumITs, "Number of IT blocks inserted"); STATISTIC(NumMovedInsts, "Number of predicated instructions moved"); diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 04b83fb..6267ecf 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "t2-reduce-size" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" @@ -25,6 +24,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "t2-reduce-size" + STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones"); @@ -915,15 +916,14 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); - MachineInstr *BundleMI = 0; + MachineInstr *BundleMI = nullptr; - CPSRDef = 0; + CPSRDef = nullptr; HighLatencyCPSR = false; // Check predecessors for the latest CPSRDef. - for (MachineBasicBlock::pred_iterator - I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) { - const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()]; + for (auto *Pred : MBB.predecessors()) { + const MBBInfo &PInfo = BlockInfo[Pred->getNumber()]; if (!PInfo.Visited) { // Since blocks are visited in RPO, this must be a back-edge. continue; @@ -984,7 +984,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); if (MI->isCall()) { // Calls don't really set CPSR. - CPSRDef = 0; + CPSRDef = nullptr; HighLatencyCPSR = false; IsSelfLoop = false; } else if (DefCPSR) { |