aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
committerStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
commitdce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch)
treedcebc53f2b182f145a2e659393bf9a0472cedf23 /lib/Target/ARM
parent220b921aed042f9e520c26cffd8282a94c66c3d5 (diff)
downloadexternal_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp17
-rw-r--r--lib/Target/ARM/ARM.h2
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp163
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h9
-rw-r--r--lib/Target/ARM/ARMAtomicExpandPass.cpp406
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp73
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h2
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp18
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h6
-rw-r--r--lib/Target/ARM/ARMCallingConv.h113
-rw-r--r--lib/Target/ARM/ARMCallingConv.td3
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp9
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp21
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp62
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp32
-rw-r--r--lib/Target/ARM/ARMFeatures.h6
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp129
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h2
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp4
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.h2
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp126
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp888
-rw-r--r--lib/Target/ARM/ARMISelLowering.h28
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td5
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td178
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td1326
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td25
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td30
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp7
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp305
-rw-r--r--lib/Target/ARM/ARMOptimizeBarriersPass.cpp8
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td8
-rw-r--r--lib/Target/ARM/ARMScheduleV6.td4
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp51
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp6
-rw-r--r--lib/Target/ARM/ARMSubtarget.h8
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp13
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h17
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp7
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h2
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp7
-rw-r--r--lib/Target/ARM/Android.mk1
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp485
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp24
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp129
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp8
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp92
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp14
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp14
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp29
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h10
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp14
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp82
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp46
-rw-r--r--lib/Target/ARM/MCTargetDesc/Android.mk4
-rw-r--r--lib/Target/ARM/MCTargetDesc/CMakeLists.txt8
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp9
-rw-r--r--lib/Target/ARM/README-Thumb.txt4
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp4
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h2
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp3
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp14
67 files changed, 3197 insertions, 1944 deletions
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 28ea879..94faf6f 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -24,7 +24,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "a15-sd-optimizer"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
@@ -39,6 +38,8 @@
using namespace llvm;
+#define DEBUG_TYPE "a15-sd-optimizer"
+
namespace {
struct A15SDOptimizer : public MachineFunctionPass {
static char ID;
@@ -90,7 +91,7 @@ namespace {
unsigned createImplicitDef(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
DebugLoc DL);
-
+
//
// Various property checkers
//
@@ -259,7 +260,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
if (DPRMI && SPRMI) {
// See if the first operand of this insert_subreg is IMPLICIT_DEF
MachineInstr *ECDef = elideCopies(DPRMI);
- if (ECDef != 0 && ECDef->isImplicitDef()) {
+ if (ECDef && ECDef->isImplicitDef()) {
// Another corner case - if we're inserting something that is purely
// a subreg copy of a DPR, just use that DPR.
@@ -348,10 +349,10 @@ MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
if (!MI->isFullCopy())
return MI;
if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
- return NULL;
+ return nullptr;
MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
if (!Def)
- return NULL;
+ return nullptr;
return elideCopies(Def);
}
@@ -435,7 +436,7 @@ A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
Out)
.addReg(Reg)
.addImm(Lane));
-
+
return Out;
}
@@ -601,7 +602,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
// * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
// lane, and the other lane(s) of the DPR/QPR register
// that we are inserting in are undefined, use the
- // original DPR/QPR value.
+ // original DPR/QPR value.
// * Otherwise, fall back on the same stategy as COPY.
//
// * REG_SEQUENCE: * If all except one of the input operands are
@@ -693,7 +694,7 @@ bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
MI != ME;) {
Modified |= runOnInstruction(MI++);
}
-
+
}
for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(),
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 4412b45..55df29c 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -49,8 +49,6 @@ FunctionPass *createThumb2SizeReductionPass();
/// \brief Creates an ARM-specific Target Transformation Info pass.
ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM);
-FunctionPass *createARMAtomicExpandPass(const TargetMachine *TM);
-
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 0fa865f..55e9fe5 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "ARMAsmPrinter.h"
#include "ARM.h"
#include "ARMConstantPoolValue.h"
@@ -45,6 +44,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/COFF.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
@@ -55,6 +55,8 @@
#include <cctype>
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
void ARMAsmPrinter::EmitFunctionBodyEnd() {
// Make sure to terminate any constant pools that were at the end
// of the function.
@@ -85,7 +87,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
? MCSymbolRefExpr::VK_ARM_TARGET1
: MCSymbolRefExpr::VK_None),
OutContext);
-
+
OutStreamer.EmitValue(E, Size);
}
@@ -96,7 +98,28 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
AFI = MF.getInfo<ARMFunctionInfo>();
MCP = MF.getConstantPool();
- return AsmPrinter::runOnMachineFunction(MF);
+ SetupMachineFunction(MF);
+
+ if (Subtarget->isTargetCOFF()) {
+ bool Internal = MF.getFunction()->hasInternalLinkage();
+ COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC
+ : COFF::IMAGE_SYM_CLASS_EXTERNAL;
+ int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
+
+ OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer.EmitCOFFSymbolStorageClass(Scl);
+ OutStreamer.EmitCOFFSymbolType(Type);
+ OutStreamer.EndCOFFSymbolDef();
+ }
+
+ // Have common code print out the function header with linkage info etc.
+ EmitFunctionHeader();
+
+ // Emit the rest of the function body.
+ EmitFunctionBody();
+
+ // We didn't modify anything.
+ return false;
}
void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
@@ -239,7 +262,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (ARM::GPRPairRegClass.contains(RegBegin)) {
const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0);
- O << ARMInstPrinter::getRegisterName(Reg0) << ", ";;
+ O << ARMInstPrinter::getRegisterName(Reg0) << ", ";
RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1);
}
O << ARMInstPrinter::getRegisterName(RegBegin);
@@ -383,7 +406,7 @@ void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
// If either end mode is unknown (EndInfo == NULL) or different than
// the start mode, then restore the start mode.
const bool WasThumb = isThumb(StartInfo);
- if (EndInfo == NULL || WasThumb != isThumb(*EndInfo)) {
+ if (!EndInfo || WasThumb != isThumb(*EndInfo)) {
OutStreamer.EmitAssemblerFlag(WasThumb ? MCAF_Code16 : MCAF_Code32);
}
}
@@ -456,6 +479,29 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
emitAttributes();
}
+static void
+emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
+ MachineModuleInfoImpl::StubValueTy &MCSym) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(StubLabel);
+ // .indirect_symbol _foo
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, 4/*size*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info
+ // pointers need to be indirect and pc-rel. We accomplish this by
+ // using NLPs; however, sometimes the types are local to the file.
+ // We need to fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(
+ MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()),
+ 4 /*size*/);
+}
+
void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (Subtarget->isTargetMachO()) {
@@ -472,27 +518,9 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// Switch with ".non_lazy_symbol_pointer" directive.
OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
EmitAlignment(2);
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- // L_foo$stub:
- OutStreamer.EmitLabel(Stubs[i].first);
- // .indirect_symbol _foo
- MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
- OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),MCSA_IndirectSymbol);
-
- if (MCSym.getInt())
- // External to current translation unit.
- OutStreamer.EmitIntValue(0, 4/*size*/);
- else
- // Internal to current translation unit.
- //
- // When we place the LSDA into the TEXT section, the type info
- // pointers need to be indirect and pc-rel. We accomplish this by
- // using NLPs; however, sometimes the types are local to the file.
- // We need to fill in the value for the NLP in those cases.
- OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
- OutContext),
- 4/*size*/);
- }
+
+ for (auto &Stub : Stubs)
+ emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second);
Stubs.clear();
OutStreamer.AddBlankLine();
@@ -500,17 +528,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
Stubs = MMIMacho.GetHiddenGVStubList();
if (!Stubs.empty()) {
- OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
EmitAlignment(2);
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- // L_foo$stub:
- OutStreamer.EmitLabel(Stubs[i].first);
- // .long _foo
- OutStreamer.EmitValue(MCSymbolRefExpr::
- Create(Stubs[i].second.getPointer(),
- OutContext),
- 4/*size*/);
- }
+
+ for (auto &Stub : Stubs)
+ emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second);
Stubs.clear();
OutStreamer.AddBlankLine();
@@ -523,6 +545,28 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// generates code that does this, it is always safe to set.
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
+
+ // Emit a .data.rel section containing any stubs that were created.
+ if (Subtarget->isTargetELF()) {
+ const TargetLoweringObjectFileELF &TLOFELF =
+ static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+ MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+ const DataLayout *TD = TM.getDataLayout();
+
+ for (auto &stub: Stubs) {
+ OutStreamer.EmitLabel(stub.first);
+ OutStreamer.EmitSymbolValue(stub.second.getPointer(),
+ TD->getPointerSize(0));
+ }
+ Stubs.clear();
+ }
+ }
}
//===----------------------------------------------------------------------===//
@@ -575,7 +619,7 @@ void ARMAsmPrinter::emitAttributes() {
getArchForCPU(CPUString, Subtarget));
// Tag_CPU_arch_profile must have the default value of 0 when "Architecture
- // profile is not applicable (e.g. pre v7, or cross-profile code)".
+ // profile is not applicable (e.g. pre v7, or cross-profile code)".
if (Subtarget->hasV7Ops()) {
if (Subtarget->isAClass()) {
ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
@@ -627,6 +671,20 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitFPU(ARM::VFPV2);
}
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // PIC specific attributes.
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data,
+ ARMBuildAttrs::AddressRWPCRel);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RO_data,
+ ARMBuildAttrs::AddressROPCRel);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use,
+ ARMBuildAttrs::AddressGOT);
+ } else {
+ // Allow direct addressing of imported data for all other relocation models.
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use,
+ ARMBuildAttrs::AddressDirect);
+ }
+
// Signal various FP modes.
if (!TM.Options.UnsafeFPMath) {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed);
@@ -723,7 +781,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
MachineModuleInfoImpl::StubValueTy &StubSym =
GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) :
MMIMachO.getGVStubEntry(MCSym);
- if (StubSym.getPointer() == 0)
+ if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
return MCSym;
@@ -971,7 +1029,8 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
RegList.push_back(SrcReg);
break;
}
- ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
+ if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
+ ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
} else {
// Changes of stack / frame pointer.
if (SrcReg == ARM::SP) {
@@ -1016,18 +1075,20 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
}
}
- if (DstReg == FramePtr && FramePtr != ARM::SP)
- // Set-up of the frame pointer. Positive values correspond to "add"
- // instruction.
- ATS.emitSetFP(FramePtr, ARM::SP, -Offset);
- else if (DstReg == ARM::SP) {
- // Change of SP by an offset. Positive values correspond to "sub"
- // instruction.
- ATS.emitPad(Offset);
- } else {
- // Move of SP to a register. Positive values correspond to an "add"
- // instruction.
- ATS.emitMovSP(DstReg, -Offset);
+ if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) {
+ if (DstReg == FramePtr && FramePtr != ARM::SP)
+ // Set-up of the frame pointer. Positive values correspond to "add"
+ // instruction.
+ ATS.emitSetFP(FramePtr, ARM::SP, -Offset);
+ else if (DstReg == ARM::SP) {
+ // Change of SP by an offset. Positive values correspond to "sub"
+ // instruction.
+ ATS.emitPad(Offset);
+ } else {
+ // Move of SP to a register. Positive values correspond to an "add"
+ // instruction.
+ ATS.emitMovSP(DstReg, -Offset);
+ }
}
} else if (DstReg == ARM::SP) {
MI->dump();
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 46c2626..7c103c6 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -47,16 +47,17 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
bool InConstantPool;
public:
explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL), InConstantPool(false) {
- Subtarget = &TM.getSubtarget<ARMSubtarget>();
- }
+ : AsmPrinter(TM, Streamer), AFI(nullptr), MCP(nullptr),
+ InConstantPool(false) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ }
const char *getPassName() const override {
return "ARM Assembly / Object Emitter";
}
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
diff --git a/lib/Target/ARM/ARMAtomicExpandPass.cpp b/lib/Target/ARM/ARMAtomicExpandPass.cpp
deleted file mode 100644
index 18e0783..0000000
--- a/lib/Target/ARM/ARMAtomicExpandPass.cpp
+++ /dev/null
@@ -1,406 +0,0 @@
-//===-- ARMAtomicExpandPass.cpp - Expand atomic instructions --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass (at IR level) to replace atomic instructions with
-// appropriate (intrinsic-based) ldrex/strex loops.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm-atomic-expand"
-#include "ARM.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-namespace {
- class ARMAtomicExpandPass : public FunctionPass {
- const TargetLowering *TLI;
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit ARMAtomicExpandPass(const TargetMachine *TM = 0)
- : FunctionPass(ID), TLI(TM->getTargetLowering()) {}
-
- bool runOnFunction(Function &F) override;
- bool expandAtomicInsts(Function &F);
-
- bool expandAtomicLoad(LoadInst *LI);
- bool expandAtomicStore(StoreInst *LI);
- bool expandAtomicRMW(AtomicRMWInst *AI);
- bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
-
- AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
- void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
-
- /// Perform a load-linked operation on Addr, returning a "Value *" with the
- /// corresponding pointee type. This may entail some non-trivial operations
- /// to truncate or reconstruct illegal types since intrinsics must be legal
- Value *loadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord);
-
- /// Perform a store-conditional operation to Addr. Return the status of the
- /// store: 0 if the it succeeded, non-zero otherwise.
- Value *storeConditional(IRBuilder<> &Builder, Value *Val, Value *Addr,
- AtomicOrdering Ord);
-
- /// Return true if the given (atomic) instruction should be expanded by this
- /// pass.
- bool shouldExpandAtomic(Instruction *Inst);
- };
-}
-
-char ARMAtomicExpandPass::ID = 0;
-
-FunctionPass *llvm::createARMAtomicExpandPass(const TargetMachine *TM) {
- return new ARMAtomicExpandPass(TM);
-}
-
-bool ARMAtomicExpandPass::runOnFunction(Function &F) {
- SmallVector<Instruction *, 1> AtomicInsts;
-
- // Changing control-flow while iterating through it is a bad idea, so gather a
- // list of all atomic instructions before we start.
- for (BasicBlock &BB : F)
- for (Instruction &Inst : BB) {
- if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) ||
- (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) ||
- (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
- AtomicInsts.push_back(&Inst);
- }
-
- bool MadeChange = false;
- for (Instruction *Inst : AtomicInsts) {
- if (!shouldExpandAtomic(Inst))
- continue;
-
- if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
- MadeChange |= expandAtomicRMW(AI);
- else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst))
- MadeChange |= expandAtomicCmpXchg(CI);
- else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- MadeChange |= expandAtomicLoad(LI);
- else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- MadeChange |= expandAtomicStore(SI);
- else
- llvm_unreachable("Unknown atomic instruction");
- }
-
- return MadeChange;
-}
-
-bool ARMAtomicExpandPass::expandAtomicLoad(LoadInst *LI) {
- // Load instructions don't actually need a leading fence, even in the
- // SequentiallyConsistent case.
- AtomicOrdering MemOpOrder =
- TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
-
- // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
- // an ldrexd (A3.5.3).
- IRBuilder<> Builder(LI);
- Value *Val = loadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
-
- insertTrailingFence(Builder, LI->getOrdering());
-
- LI->replaceAllUsesWith(Val);
- LI->eraseFromParent();
-
- return true;
-}
-
-bool ARMAtomicExpandPass::expandAtomicStore(StoreInst *SI) {
- // The only atomic 64-bit store on ARM is an strexd that succeeds, which means
- // we need a loop and the entire instruction is essentially an "atomicrmw
- // xchg" that ignores the value loaded.
- IRBuilder<> Builder(SI);
- AtomicRMWInst *AI =
- Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
- SI->getValueOperand(), SI->getOrdering());
- SI->eraseFromParent();
-
- // Now we have an appropriate swap instruction, lower it as usual.
- return expandAtomicRMW(AI);
-}
-
-bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
- AtomicOrdering Order = AI->getOrdering();
- Value *Addr = AI->getPointerOperand();
- BasicBlock *BB = AI->getParent();
- Function *F = BB->getParent();
- LLVMContext &Ctx = F->getContext();
-
- // Given: atomicrmw some_op iN* %addr, iN %incr ordering
- //
- // The standard expansion we produce is:
- // [...]
- // fence?
- // atomicrmw.start:
- // %loaded = @load.linked(%addr)
- // %new = some_op iN %loaded, %incr
- // %stored = @store_conditional(%new, %addr)
- // %try_again = icmp i32 ne %stored, 0
- // br i1 %try_again, label %loop, label %atomicrmw.end
- // atomicrmw.end:
- // fence?
- // [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
- BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
-
- // This grabs the DebugLoc from AI.
- IRBuilder<> Builder(AI);
-
- // The split call above "helpfully" added a branch at the end of BB (to the
- // wrong place), but we might want a fence too. It's easiest to just remove
- // the branch entirely.
- std::prev(BB->end())->eraseFromParent();
- Builder.SetInsertPoint(BB);
- AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
- Builder.CreateBr(LoopBB);
-
- // Start the main loop block now that we've taken care of the preliminaries.
- Builder.SetInsertPoint(LoopBB);
- Value *Loaded = loadLinked(Builder, Addr, MemOpOrder);
-
- Value *NewVal;
- switch (AI->getOperation()) {
- case AtomicRMWInst::Xchg:
- NewVal = AI->getValOperand();
- break;
- case AtomicRMWInst::Add:
- NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Sub:
- NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::And:
- NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Nand:
- NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()),
- "new");
- break;
- case AtomicRMWInst::Or:
- NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Xor:
- NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Max:
- NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
- NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::Min:
- NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
- NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::UMax:
- NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
- NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
- break;
- case AtomicRMWInst::UMin:
- NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
- NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
- break;
- default:
- llvm_unreachable("Unknown atomic op");
- }
-
- Value *StoreSuccess = storeConditional(Builder, NewVal, Addr, MemOpOrder);
- Value *TryAgain = Builder.CreateICmpNE(
- StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
- Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
-
- Builder.SetInsertPoint(ExitBB, ExitBB->begin());
- insertTrailingFence(Builder, Order);
-
- AI->replaceAllUsesWith(Loaded);
- AI->eraseFromParent();
-
- return true;
-}
-
-bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
- AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
- AtomicOrdering FailureOrder = CI->getFailureOrdering();
- Value *Addr = CI->getPointerOperand();
- BasicBlock *BB = CI->getParent();
- Function *F = BB->getParent();
- LLVMContext &Ctx = F->getContext();
-
- // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
- //
- // The full expansion we produce is:
- // [...]
- // fence?
- // cmpxchg.start:
- // %loaded = @load.linked(%addr)
- // %should_store = icmp eq %loaded, %desired
- // br i1 %should_store, label %cmpxchg.trystore,
- // label %cmpxchg.end/%cmpxchg.barrier
- // cmpxchg.trystore:
- // %stored = @store_conditional(%new, %addr)
- // %try_again = icmp i32 ne %stored, 0
- // br i1 %try_again, label %loop, label %cmpxchg.end
- // cmpxchg.barrier:
- // fence?
- // br label %cmpxchg.end
- // cmpxchg.end:
- // [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
- auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB);
- auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, BarrierBB);
- auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
-
- // This grabs the DebugLoc from CI
- IRBuilder<> Builder(CI);
-
- // The split call above "helpfully" added a branch at the end of BB (to the
- // wrong place), but we might want a fence too. It's easiest to just remove
- // the branch entirely.
- std::prev(BB->end())->eraseFromParent();
- Builder.SetInsertPoint(BB);
- AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
- Builder.CreateBr(LoopBB);
-
- // Start the main loop block now that we've taken care of the preliminaries.
- Builder.SetInsertPoint(LoopBB);
- Value *Loaded = loadLinked(Builder, Addr, MemOpOrder);
- Value *ShouldStore =
- Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
-
- // If the the cmpxchg doesn't actually need any ordering when it fails, we can
- // jump straight past that fence instruction (if it exists).
- BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB;
- Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
-
- Builder.SetInsertPoint(TryStoreBB);
- Value *StoreSuccess =
- storeConditional(Builder, CI->getNewValOperand(), Addr, MemOpOrder);
- Value *TryAgain = Builder.CreateICmpNE(
- StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
- Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB);
-
- // Finally, make sure later instructions don't get reordered with a fence if
- // necessary.
- Builder.SetInsertPoint(BarrierBB);
- insertTrailingFence(Builder, SuccessOrder);
- Builder.CreateBr(ExitBB);
-
- CI->replaceAllUsesWith(Loaded);
- CI->eraseFromParent();
-
- return true;
-}
-
-Value *ARMAtomicExpandPass::loadLinked(IRBuilder<> &Builder, Value *Addr,
- AtomicOrdering Ord) {
- Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
- bool IsAcquire =
- Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
-
- // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
- // intrinsic must return {i32, i32} and we have to recombine them into a
- // single i64 here.
- if (ValTy->getPrimitiveSizeInBits() == 64) {
- Intrinsic::ID Int =
- IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
- Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
-
- Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
- Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
-
- Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
- Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
- Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
- Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
- return Builder.CreateOr(
- Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
- }
-
- Type *Tys[] = { Addr->getType() };
- Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
- Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
-
- return Builder.CreateTruncOrBitCast(
- Builder.CreateCall(Ldrex, Addr),
- cast<PointerType>(Addr->getType())->getElementType());
-}
-
-Value *ARMAtomicExpandPass::storeConditional(IRBuilder<> &Builder, Value *Val,
- Value *Addr, AtomicOrdering Ord) {
- Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- bool IsRelease =
- Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
-
- // Since the intrinsics must have legal type, the i64 intrinsics take two
- // parameters: "i32, i32". We must marshal Val into the appropriate form
- // before the call.
- if (Val->getType()->getPrimitiveSizeInBits() == 64) {
- Intrinsic::ID Int =
- IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
- Function *Strex = Intrinsic::getDeclaration(M, Int);
- Type *Int32Ty = Type::getInt32Ty(M->getContext());
-
- Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
- Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
- Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
- return Builder.CreateCall3(Strex, Lo, Hi, Addr);
- }
-
- Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
- Type *Tys[] = { Addr->getType() };
- Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
-
- return Builder.CreateCall2(
- Strex, Builder.CreateZExtOrBitCast(
- Val, Strex->getFunctionType()->getParamType(0)),
- Addr);
-}
-
-AtomicOrdering ARMAtomicExpandPass::insertLeadingFence(IRBuilder<> &Builder,
- AtomicOrdering Ord) {
- if (!TLI->getInsertFencesForAtomic())
- return Ord;
-
- if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
- Builder.CreateFence(Release);
-
- // The exclusive operations don't need any barrier if we're adding separate
- // fences.
- return Monotonic;
-}
-
-void ARMAtomicExpandPass::insertTrailingFence(IRBuilder<> &Builder,
- AtomicOrdering Ord) {
- if (!TLI->getInsertFencesForAtomic())
- return;
-
- if (Ord == Acquire || Ord == AcquireRelease)
- Builder.CreateFence(Acquire);
- else if (Ord == SequentiallyConsistent)
- Builder.CreateFence(SequentiallyConsistent);
-}
-
-bool ARMAtomicExpandPass::shouldExpandAtomic(Instruction *Inst) {
- // Loads and stores less than 64-bits are already atomic; ones above that
- // are doomed anyway, so defer to the default libcall and blame the OS when
- // things go wrong:
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64;
- else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- return LI->getType()->getPrimitiveSizeInBits() == 64;
-
- // For the real atomic operations, we have ldrex/strex up to 64 bits.
- return Inst->getType()->getPrimitiveSizeInBits() <= 64;
-}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 47f5bf9..bc266e8 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -37,11 +37,13 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-instrinfo"
+
#define GET_INSTRINFO_CTOR_DTOR
#include "ARMGenInstrInfo.inc"
-using namespace llvm;
-
static cl::opt<bool>
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
@@ -125,14 +127,14 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// FIXME: Thumb2 support.
if (!EnableARM3Addr)
- return NULL;
+ return nullptr;
MachineInstr *MI = MBBI;
MachineFunction &MF = *MI->getParent()->getParent();
uint64_t TSFlags = MI->getDesc().TSFlags;
bool isPre = false;
switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
- default: return NULL;
+ default: return nullptr;
case ARMII::IndexModePre:
isPre = true;
break;
@@ -144,10 +146,10 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// operation.
unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
if (MemOpc == 0)
- return NULL;
+ return nullptr;
- MachineInstr *UpdateMI = NULL;
- MachineInstr *MemMI = NULL;
+ MachineInstr *UpdateMI = nullptr;
+ MachineInstr *MemMI = nullptr;
unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
@@ -169,7 +171,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (ARM_AM::getSOImmVal(Amt) == -1)
// Can't encode it in a so_imm operand. This transformation will
// add more than 1 instruction. Abandon!
- return NULL;
+ return nullptr;
UpdateMI = BuildMI(MF, MI->getDebugLoc(),
get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
.addReg(BaseReg).addImm(Amt)
@@ -273,8 +275,8 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
- TBB = 0;
- FBB = 0;
+ TBB = nullptr;
+ FBB = nullptr;
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin())
@@ -331,7 +333,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
I->isReturn())) {
// Forget any previous condition branch information - it no longer applies.
Cond.clear();
- FBB = 0;
+ FBB = nullptr;
// If we can modify the function, delete everything below this
// unconditional branch.
@@ -405,7 +407,7 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
assert((Cond.size() == 2 || Cond.size() == 0) &&
"ARM branch conditions have two components!");
- if (FBB == 0) {
+ if (!FBB) {
if (Cond.empty()) { // Unconditional branch?
if (isThumb)
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
@@ -535,7 +537,8 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
return true;
}
-template<> bool IsCPSRDead<MachineInstr>(MachineInstr* MI) {
+namespace llvm {
+template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || MO.isUndef() || MO.isUse())
@@ -548,6 +551,7 @@ template<> bool IsCPSRDead<MachineInstr>(MachineInstr* MI) {
// all definitions of CPSR are dead
return true;
}
+}
/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
LLVM_ATTRIBUTE_NOINLINE
@@ -620,7 +624,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
unsigned JTI = JTOP.getIndex();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- assert(MJTI != 0);
+ assert(MJTI != nullptr);
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
assert(JTI < JT.size());
// Thumb instructions are 2 byte aligned, but JT entries are 4 byte
@@ -1248,7 +1252,7 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
unsigned PCLabelId = AFI->createPICLabelUId();
- ARMConstantPoolValue *NewCPV = 0;
+ ARMConstantPoolValue *NewCPV = nullptr;
// FIXME: The below assumes PIC relocation model and that the function
// is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
@@ -1659,10 +1663,10 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
// MOVCC AL can't be inverted. Shouldn't happen.
if (CC == ARMCC::AL || PredReg != ARM::CPSR)
- return NULL;
+ return nullptr;
MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
if (!MI)
- return NULL;
+ return nullptr;
// After swapping the MOVCC operands, also invert the condition.
MI->getOperand(MI->findFirstPredOperandIdx())
.setImm(ARMCC::getOppositeCondition(CC));
@@ -1678,35 +1682,36 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
- return 0;
+ return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
- return 0;
+ return nullptr;
MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
- return 0;
+ return nullptr;
// MI is folded into the MOVCC by predicating it.
if (!MI->isPredicable())
- return 0;
+ return nullptr;
// Check if MI has any non-dead defs or physreg uses. This also detects
// predicated instructions which will be reading CPSR.
for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
// Reject frame index operands, PEI can't handle the predicated pseudos.
if (MO.isFI() || MO.isCPI() || MO.isJTI())
- return 0;
+ return nullptr;
if (!MO.isReg())
continue;
// MI can't have any tied operands, that would conflict with predication.
if (MO.isTied())
- return 0;
+ return nullptr;
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
- return 0;
+ return nullptr;
if (MO.isDef() && !MO.isDead())
- return 0;
+ return nullptr;
}
bool DontMoveAcrossStores = true;
- if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores))
- return 0;
+ if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ nullptr,
+ DontMoveAcrossStores))
+ return nullptr;
return MI;
}
@@ -1741,14 +1746,14 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
if (!DefMI)
DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
if (!DefMI)
- return 0;
+ return nullptr;
// Find new register class to use.
MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
unsigned DestReg = MI->getOperand(0).getReg();
const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
if (!MRI.constrainRegClass(DestReg, PreviousClass))
- return 0;
+ return nullptr;
// Create a new predicated version of DefMI.
// Rfalse is the first use.
@@ -2254,7 +2259,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// Masked compares sometimes use the same register as the corresponding 'and'.
if (CmpMask != ~0) {
if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
- MI = 0;
+ MI = nullptr;
for (MachineRegisterInfo::use_instr_iterator
UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
UI != UE; ++UI) {
@@ -2281,17 +2286,17 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// One is MI, the other is a SUB instruction.
// For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
// For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
- MachineInstr *Sub = NULL;
+ MachineInstr *Sub = nullptr;
if (SrcReg2 != 0)
// MI is not a candidate for CMPrr.
- MI = NULL;
+ MI = nullptr;
else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
// Conservatively refuse to convert an instruction which isn't in the same
// BB as the comparison.
// For CMPri, we need to check Sub, thus we can't return here.
if (CmpInstr->getOpcode() == ARM::CMPri ||
CmpInstr->getOpcode() == ARM::t2CMPri)
- MI = NULL;
+ MI = nullptr;
else
return false;
}
@@ -3295,7 +3300,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
if (Idx == -1) {
Dist = 0;
- return 0;
+ return nullptr;
}
UseIdx = Idx;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 3ddddcb..4b3e740 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -261,7 +261,7 @@ private:
unsigned getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
- unsigned *PredCost = 0) const override;
+ unsigned *PredCost = nullptr) const override;
int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const override;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 8130a2d..a2eee9f 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -44,14 +44,18 @@
using namespace llvm;
ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
- : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti),
- FramePtr((STI.isTargetMachO() || STI.isThumb()) ? ARM::R7 : ARM::R11),
- BasePtr(ARM::R6) {
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) {
+ if (STI.isTargetMachO())
+ FramePtr = ARM::R7;
+ else if (STI.isTargetWindows())
+ FramePtr = ARM::R11;
+ else // ARM EABI
+ FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11;
}
-const uint16_t*
+const MCPhysReg*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- const uint16_t *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ const MCPhysReg *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI())
? CSR_iOS_SaveList
: CSR_AAPCS_SaveList;
@@ -107,7 +111,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
// should return NULL
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
- return NULL;
+ return nullptr;
return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask;
}
@@ -173,7 +177,7 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind
const TargetRegisterClass *
ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
if (RC == &ARM::CCRRegClass)
- return 0; // Can't copy CCR registers.
+ return nullptr; // Can't copy CCR registers.
return RC;
}
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 66b3c82..91df565 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -100,8 +100,8 @@ protected:
public:
/// Code Generation virtual methods...
- const uint16_t *
- getCalleeSavedRegs(const MachineFunction *MF = 0) const override;
+ const MCPhysReg *
+ getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const;
@@ -186,7 +186,7 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS = NULL) const override;
+ RegScavenger *RS = nullptr) const override;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 4f94ad2..dc41c1c 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -28,7 +28,7 @@ namespace llvm {
static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
CCState &State, bool CanFail) {
- static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+ static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
// Try to get the first register.
if (unsigned Reg = State.AllocateReg(RegList, 4))
@@ -71,10 +71,10 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
CCState &State, bool CanFail) {
- static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
- static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
- static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
- static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+ static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
+ static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
+ static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 };
+ static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
if (Reg == 0) {
@@ -123,8 +123,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo, CCState &State) {
- static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
- static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
+ static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
+ static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
if (Reg == 0)
@@ -160,6 +160,105 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
State);
}
+static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
+static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
+static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
+
+// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
+// has InConsecutiveRegs set, and that the last member also has
+// InConsecutiveRegsLast set. We must process all members of the HA before
+// we can allocate it, as we need to know the total number of registers that
+// will be needed in order to (attempt to) allocate a contiguous block.
+static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
+ // AAPCS HFAs must have 1-4 elements, all of the same type
+ assert(PendingHAMembers.size() < 8);
+ if (PendingHAMembers.size() > 0)
+ assert(PendingHAMembers[0].getLocVT() == LocVT);
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // HA
+ PendingHAMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (ArgFlags.isInConsecutiveRegsLast()) {
+ assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 &&
+ "Homogeneous aggregates must have between 1 and 4 members");
+
+ // Try to allocate a contiguous block of registers, each of the correct
+ // size to hold one member.
+ const uint16_t *RegList;
+ unsigned NumRegs;
+ switch (LocVT.SimpleTy) {
+ case MVT::i32:
+ case MVT::f32:
+ RegList = SRegList;
+ NumRegs = 16;
+ break;
+ case MVT::f64:
+ RegList = DRegList;
+ NumRegs = 8;
+ break;
+ case MVT::v2f64:
+ RegList = QRegList;
+ NumRegs = 4;
+ break;
+ default:
+ llvm_unreachable("Unexpected member type for HA");
+ break;
+ }
+
+ unsigned RegResult =
+ State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size());
+
+ if (RegResult) {
+ for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin();
+ It != PendingHAMembers.end(); ++It) {
+ It->convertToReg(RegResult);
+ State.addLoc(*It);
+ ++RegResult;
+ }
+ PendingHAMembers.clear();
+ return true;
+ }
+
+ // Register allocation failed, fall back to the stack
+
+ // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp)
+ for (unsigned regNo = 0; regNo < 16; ++regNo)
+ State.AllocateReg(SRegList[regNo]);
+
+ unsigned Size = LocVT.getSizeInBits() / 8;
+ unsigned Align = Size;
+
+ if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) {
+ // Vectors are always aligned to 8 bytes. If we've seen an i32 here
+ // it's because it's been split from a larger type, also with align 8.
+ Align = 8;
+ }
+
+ for (auto It : PendingHAMembers) {
+ It.convertToMem(State.AllocateStack(Size, Align));
+ State.addLoc(It);
+
+ // Only the first member needs to be aligned.
+ Align = 1;
+ }
+
+ // All pending members have now been allocated
+ PendingHAMembers.clear();
+ }
+
+ // This will be allocated by the last member of the HA
+ return true;
+}
+
} // End llvm namespace
#endif
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 7cffd82..526089b 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -174,6 +174,9 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ // HFAs are passed in a contiguous block of registers, or on the stack
+ CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_HA">>,
+
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 7359a11..2fd7edd 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMConstantPoolValue.h"
@@ -40,6 +39,8 @@
#endif
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
STATISTIC(NumEmitted, "Number of machine instructions emitted");
namespace {
@@ -65,10 +66,10 @@ namespace {
static char ID;
public:
ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(ID), JTI(0),
+ : MachineFunctionPass(ID), JTI(nullptr),
II((const ARMBaseInstrInfo *)tm.getInstrInfo()),
TD(tm.getDataLayout()), TM(tm),
- MCE(mce), MCPEs(0), MJTEs(0),
+ MCE(mce), MCPEs(nullptr), MJTEs(nullptr),
IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
/// getBinaryCodeForInstr - This function, generated by the
@@ -373,7 +374,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
MCPEs = &MF.getConstantPool()->getConstants();
- MJTEs = 0;
+ MJTEs = nullptr;
if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
IsPIC = TM.getRelocationModel() == Reloc::PIC_;
IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index ba05171..ce264ee 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-cp-islands"
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
@@ -36,6 +35,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "arm-cp-islands"
+
STATISTIC(NumCPEs, "Number of constpool entries");
STATISTIC(NumSplit, "Number of uncond branches inserted");
STATISTIC(NumCBrFixed, "Number of cond branches fixed");
@@ -593,7 +594,7 @@ ARMConstantIslands::CPEntry
if (CPEs[i].CPEMI == CPEMI)
return &CPEs[i];
}
- return NULL;
+ return nullptr;
}
/// getCPELogAlign - Returns the required alignment of the constant pool entry
@@ -1102,7 +1103,7 @@ bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
assert(CPE && "Unexpected!");
if (--CPE->RefCount == 0) {
removeDeadCPEMI(CPEMI);
- CPE->CPEMI = NULL;
+ CPE->CPEMI = nullptr;
--NumCPEs;
return true;
}
@@ -1135,7 +1136,7 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
if (CPEs[i].CPEMI == CPEMI)
continue;
// Removing CPEs can leave empty entries, skip
- if (CPEs[i].CPEMI == NULL)
+ if (CPEs[i].CPEMI == nullptr)
continue;
if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
U.NegOk)) {
@@ -1317,7 +1318,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
++MI;
unsigned CPUIndex = CPUserIndex+1;
unsigned NumCPUsers = CPUsers.size();
- MachineInstr *LastIT = 0;
+ MachineInstr *LastIT = nullptr;
for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
Offset < BaseInsertOffset;
Offset += TII->GetInstSizeInBytes(MI), MI = std::next(MI)) {
@@ -1491,7 +1492,7 @@ bool ARMConstantIslands::removeUnusedCPEntries() {
for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
removeDeadCPEMI(CPEs[j].CPEMI);
- CPEs[j].CPEMI = NULL;
+ CPEs[j].CPEMI = nullptr;
MadeChange = true;
}
}
@@ -1844,7 +1845,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
// FIXME: After the tables are shrunk, can we get rid some of the
// constantpool tables?
MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- if (MJTI == 0) return false;
+ if (!MJTI) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
@@ -1970,7 +1971,7 @@ bool ARMConstantIslands::reorderThumb2JumpTables() {
bool MadeChange = false;
MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- if (MJTI == 0) return false;
+ if (!MJTI) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
@@ -2012,7 +2013,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// try to move it; otherwise, create a new block following the jump
// table that branches back to the actual target. This is a very simple
// heuristic. FIXME: We can definitely improve it.
- MachineBasicBlock *TBB = 0, *FBB = 0;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
SmallVector<MachineOperand, 4> CondPrior;
MachineFunction::iterator BBi = BB;
@@ -2032,7 +2033,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// Update numbering to account for the block being moved.
MF->RenumberBlocks();
++NumJTMoved;
- return NULL;
+ return nullptr;
}
// Create a new MBB for the code after the jump BB.
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index bd4ee44..6045738 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-pseudo"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
@@ -23,6 +22,7 @@
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/CommandLine.h"
@@ -31,6 +31,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "arm-pseudo"
+
static cl::opt<bool>
VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden,
cl::desc("Verify machine code after expanding ARM pseudos"));
@@ -345,7 +347,7 @@ static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
return I;
- return NULL;
+ return nullptr;
}
/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
@@ -614,6 +616,39 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
MI.eraseFromParent();
}
+static bool IsAnAddressOperand(const MachineOperand &MO) {
+ // This check is overly conservative. Unless we are certain that the machine
+ // operand is not a symbol reference, we return that it is a symbol reference.
+ // This is important as the load pair may not be split up Windows.
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ case MachineOperand::MO_Immediate:
+ case MachineOperand::MO_CImmediate:
+ case MachineOperand::MO_FPImmediate:
+ return false;
+ case MachineOperand::MO_MachineBasicBlock:
+ return true;
+ case MachineOperand::MO_FrameIndex:
+ return false;
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_BlockAddress:
+ return true;
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut:
+ return false;
+ case MachineOperand::MO_Metadata:
+ case MachineOperand::MO_MCSymbol:
+ return true;
+ case MachineOperand::MO_CFIIndex:
+ return false;
+ }
+ llvm_unreachable("unhandled machine operand type");
+}
+
void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
@@ -624,10 +659,14 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
bool DstIsDead = MI.getOperand(0).isDead();
bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
+ bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO);
MachineInstrBuilder LO16, HI16;
if (!STI->hasV6T2Ops() &&
(Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
+ // FIXME Windows CE supports older ARM CPUs
+ assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+");
+
// Expand into a movi + orr.
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
@@ -664,17 +703,29 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
- if (MO.isImm()) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate: {
unsigned Imm = MO.getImm();
unsigned Lo16 = Imm & 0xffff;
unsigned Hi16 = (Imm >> 16) & 0xffff;
LO16 = LO16.addImm(Lo16);
HI16 = HI16.addImm(Hi16);
- } else {
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ const char *ES = MO.getSymbolName();
+ unsigned TF = MO.getTargetFlags();
+ LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16);
+ HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16);
+ break;
+ }
+ default: {
const GlobalValue *GV = MO.getGlobal();
unsigned TF = MO.getTargetFlags();
LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+ break;
+ }
}
LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
@@ -682,6 +733,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
LO16.addImm(Pred).addReg(PredReg);
HI16.addImm(Pred).addReg(PredReg);
+ if (RequiresBundling)
+ finalizeBundle(MBB, &*LO16, &*MBBI);
+
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
}
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index c442444..6f8fb1a 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -166,8 +166,6 @@ class ARMFastISel final : public FastISel {
// Utility routines.
private:
- unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned OpNum,
- unsigned Op);
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
@@ -191,6 +189,8 @@ class ARMFastISel final : public FastISel {
unsigned ARMSelectCallOp(bool UseReg);
unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
+ const TargetLowering *getTargetLowering() { return TM.getTargetLowering(); }
+
// Call handling routines.
private:
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
@@ -283,23 +283,6 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
return MIB;
}
-unsigned ARMFastISel::constrainOperandRegClass(const MCInstrDesc &II,
- unsigned Op, unsigned OpNum) {
- if (TargetRegisterInfo::isVirtualRegister(Op)) {
- const TargetRegisterClass *RegClass =
- TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
- if (!MRI.constrainRegClass(Op, RegClass)) {
- // If it's not legal to COPY between the register classes, something
- // has gone very wrong before we got here.
- unsigned NewOp = createResultReg(RegClass);
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), NewOp).addReg(Op));
- return NewOp;
- }
- }
- return Op;
-}
-
unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill) {
@@ -769,7 +752,7 @@ bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
// Computes the address to get to an object.
bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
// Some boilerplate from the X86 FastISel.
- const User *U = NULL;
+ const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
// Don't walk into other basic blocks unless the object is an alloca from
@@ -1400,7 +1383,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
const APInt &CIVal = ConstInt->getValue();
Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
// For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
- // then a cmn, because there is no way to represent 2147483648 as a
+ // then a cmn, because there is no way to represent 2147483648 as a
// signed 32-bit int.
if (Imm < 0 && Imm != (int)0x80000000) {
isNegativeImm = true;
@@ -2182,7 +2165,8 @@ unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
if (!LCREVT.isSimple()) return 0;
GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,
- GlobalValue::ExternalLinkage, 0, Name);
+ GlobalValue::ExternalLinkage, nullptr,
+ Name);
assert(GV->getType() == GVTy && "We miscomputed the type for the global!");
return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
}
@@ -2286,7 +2270,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
}
bool ARMFastISel::SelectCall(const Instruction *I,
- const char *IntrMemName = 0) {
+ const char *IntrMemName = nullptr) {
const CallInst *CI = cast<CallInst>(I);
const Value *Callee = CI->getCalledValue();
@@ -3092,6 +3076,6 @@ namespace llvm {
TM.Options.NoFramePointerElim = true;
return new ARMFastISel(funcInfo, libInfo);
}
- return 0;
+ return nullptr;
}
}
diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h
index a30f4cd..e191a3c 100644
--- a/lib/Target/ARM/ARMFeatures.h
+++ b/lib/Target/ARM/ARMFeatures.h
@@ -1,4 +1,4 @@
-//===-- ARMFeatures.h - Checks for ARM instruction features ------*- C++ -*-===//
+//===-- ARMFeatures.h - Checks for ARM instruction features -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,11 +16,11 @@
#include "MCTargetDesc/ARMMCTargetDesc.h"
+namespace llvm {
+
template<typename InstrType> // could be MachineInstr or MCInst
bool IsCPSRDead(InstrType *Instr);
-namespace llvm {
-
template<typename InstrType> // could be MachineInstr or MCInst
inline bool isV8EligibleForIT(InstrType *Instr) {
switch (Instr->getOpcode()) {
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 36ecfca..0caf4bf 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -87,7 +87,7 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
static bool isCSRestore(MachineInstr *MI,
const ARMBaseInstrInfo &TII,
- const uint16_t *CSRegs) {
+ const MCPhysReg *CSRegs) {
// Integer spill area is handled with "pop".
if (isPopOpcode(MI->getOpcode())) {
// The first two operands are predicates. The last two are
@@ -142,6 +142,14 @@ static int sizeOfSPAdjustment(const MachineInstr *MI) {
return count;
}
+static bool WindowsRequiresStackProbe(const MachineFunction &MF,
+ size_t StackSizeInBytes) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (MFI->getStackProtectorIndex() > 0)
+ return StackSizeInBytes >= 4080;
+ return StackSizeInBytes >= 4096;
+}
+
void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -149,15 +157,16 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineModuleInfo &MMI = MF.getMMI();
MCContext &Context = MMI.getContext();
+ const TargetMachine &TM = MF.getTarget();
const MCRegisterInfo *MRI = Context.getRegisterInfo();
const ARMBaseRegisterInfo *RegInfo =
- static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
assert(!AFI->isThumb1OnlyFunction() &&
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned Align = TM.getFrameLowering()->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
@@ -187,7 +196,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
}
- if (!AFI->hasStackFrame()) {
+ if (!AFI->hasStackFrame() &&
+ (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
if (NumBytes - ArgRegsSaveSize != 0) {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
MachineInstr::FrameSetup);
@@ -284,6 +294,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
} else
NumBytes = DPRCSOffset;
+ if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
+ uint32_t NumWords = NumBytes >> 2;
+
+ if (NumWords < 65536)
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup));
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ switch (TM.getCodeModel()) {
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Default:
+ case CodeModel::Kernel:
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ case CodeModel::Large:
+ case CodeModel::JITDefault:
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
+ .addExternalSymbol("__chkstk")
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addReg(ARM::R12, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ }
+
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr),
+ ARM::SP)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::R4, RegState::Kill)
+ .setMIFlags(MachineInstr::FrameSetup)));
+ NumBytes = 0;
+ }
+
unsigned adjustedGPRCS1Size = GPRCS1Size;
if (NumBytes) {
// Adjust SP after all the callee-save spills.
@@ -316,10 +371,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock::iterator Pos = ++GPRCS1Push;
BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- unsigned Reg = I->getReg();
- int FI = I->getFrameIdx();
+ for (const auto &Entry : CSI) {
+ unsigned Reg = Entry.getReg();
+ int FI = Entry.getFrameIdx();
switch (Reg) {
case ARM::R8:
case ARM::R9:
@@ -382,10 +436,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- unsigned Reg = I->getReg();
- int FI = I->getFrameIdx();
+ for (const auto &Entry : CSI) {
+ unsigned Reg = Entry.getReg();
+ int FI = Entry.getFrameIdx();
switch (Reg) {
case ARM::R8:
case ARM::R9:
@@ -411,7 +464,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
do {
MachineBasicBlock::iterator Push = DPRCSPush++;
if (!HasFP) {
- CFAOffset -= sizeOfSPAdjustment(Push);;
+ CFAOffset -= sizeOfSPAdjustment(Push);
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -419,10 +472,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
}
} while (DPRCSPush->getOpcode() == ARM::VSTMDDB_UPD);
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- unsigned Reg = I->getReg();
- int FI = I->getFrameIdx();
+ for (const auto &Entry : CSI) {
+ unsigned Reg = Entry.getReg();
+ int FI = Entry.getFrameIdx();
if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
(Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
@@ -540,7 +592,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
} else {
// Unwind MBBI to point to first LDR / VLDRD.
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
if (MBBI != MBB.begin()) {
do {
--MBBI;
@@ -1205,12 +1257,9 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
const ARMBaseInstrInfo &TII) {
unsigned FnSize = 0;
- for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
- MBBI != E; ++MBBI) {
- const MachineBasicBlock &MBB = *MBBI;
- for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
- I != E; ++I)
- FnSize += TII.GetInstSizeInBytes(I);
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB)
+ FnSize += TII.GetInstSizeInBytes(&MI);
}
return FnSize;
}
@@ -1223,21 +1272,21 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
const TargetFrameLowering *TFI) {
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned Limit = (1 << 12) - 1;
- for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
- for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- if (!I->getOperand(i).isFI()) continue;
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (!MI.getOperand(i).isFI())
+ continue;
// When using ADDri to get the address of a stack object, 255 is the
// largest offset guaranteed to fit in the immediate offset.
- if (I->getOpcode() == ARM::ADDri) {
+ if (MI.getOpcode() == ARM::ADDri) {
Limit = std::min(Limit, (1U << 8) - 1);
break;
}
// Otherwise check the addressing mode.
- switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
+ switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
case ARMII::AddrMode3:
case ARMII::AddrModeT2_i8:
Limit = std::min(Limit, (1U << 8) - 1);
@@ -1374,7 +1423,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
@@ -1486,6 +1535,10 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (hasFP(MF)) {
MRI.setPhysRegUsed(FramePtr);
+ auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
+ FramePtr);
+ if (FPPos != UnspilledCS1GPRs.end())
+ UnspilledCS1GPRs.erase(FPPos);
NumGPRSpills++;
}
@@ -1681,7 +1734,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
if (MF.getFunction()->isVarArg())
report_fatal_error("Segmented stacks do not support vararg functions.");
if (!ST->isTargetAndroid() && !ST->isTargetLinux())
- report_fatal_error("Segmented stacks not supported on this platfrom.");
+ report_fatal_error("Segmented stacks not supported on this platform.");
MachineBasicBlock &prologueMBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1693,6 +1746,12 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL;
+ uint64_t StackSize = MFI->getStackSize();
+
+ // Do not generate a prologue for functions with a stack of size zero
+ if (StackSize == 0)
+ return;
+
// Use R4 and R5 as scratch registers.
// We save R4 and R5 before use and restore them before leaving the function.
unsigned ScratchReg0 = ARM::R4;
@@ -1722,8 +1781,6 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MF.push_front(PrevStackMBB);
// The required stack size that is aligned to ARM constant criterion.
- uint64_t StackSize = MFI->getStackSize();
-
AlignedStackSize = alignToARMConstant(StackSize);
// When the frame size is less than 256 we just compare the stack
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 524ee36..981d320 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -57,7 +57,7 @@ public:
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const override;
- void adjustForSegmentedStacks(MachineFunction &MF) const;
+ void adjustForSegmentedStacks(MachineFunction &MF) const override;
private:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 61d4e12..0885c4e 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -77,7 +77,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
}
void ARMHazardRecognizer::Reset() {
- LastMI = 0;
+ LastMI = nullptr;
FpMLxStalls = 0;
ScoreboardHazardRecognizer::Reset();
}
@@ -95,7 +95,7 @@ void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
void ARMHazardRecognizer::AdvanceCycle() {
if (FpMLxStalls && --FpMLxStalls == 0)
// Stalled for 4 cycles but still can't schedule any other instructions.
- LastMI = 0;
+ LastMI = nullptr;
ScoreboardHazardRecognizer::AdvanceCycle();
}
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index e88cd0d..a8198e2 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -35,7 +35,7 @@ public:
ARMHazardRecognizer(const InstrItineraryData *ItinData,
const ScheduleDAG *DAG)
: ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"),
- LastMI(0) {}
+ LastMI(nullptr) {}
HazardType getHazardType(SUnit *SU, int Stalls) override;
void Reset() override;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 70e11c5..08d598d 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMTargetMachine.h"
@@ -37,6 +36,8 @@
using namespace llvm;
+#define DEBUG_TYPE "arm-isel"
+
static cl::opt<bool>
DisableShifterOp("disable-shifter-op", cl::Hidden,
cl::desc("Disable isel of shifter-op"),
@@ -72,6 +73,13 @@ public:
Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
}
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // Reset the subtarget each time through.
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ SelectionDAGISel::runOnMachineFunction(MF);
+ return true;
+ }
+
const char *getPassName() const override {
return "ARM Instruction Selection";
}
@@ -397,7 +405,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
N1, CurDAG->getConstant(TZ, MVT::i32));
CurDAG->UpdateNodeOperands(N, N0, N1);
- }
+ }
}
/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
@@ -1440,7 +1448,7 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::MemIndexedMode AM = LD->getAddressingMode();
if (AM == ISD::UNINDEXED)
- return NULL;
+ return nullptr;
EVT LoadedVT = LD->getMemoryVT();
SDValue Offset, AMOpc;
@@ -1506,14 +1514,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
}
}
- return NULL;
+ return nullptr;
}
SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::MemIndexedMode AM = LD->getAddressingMode();
if (AM == ISD::UNINDEXED)
- return NULL;
+ return nullptr;
EVT LoadedVT = LD->getMemoryVT();
bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
@@ -1540,7 +1548,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
break;
default:
- return NULL;
+ return nullptr;
}
Match = true;
}
@@ -1554,7 +1562,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
MVT::Other, Ops);
}
- return NULL;
+ return nullptr;
}
/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
@@ -1699,10 +1707,10 @@ static bool isVSTfixed(unsigned Opc)
case ARM::VST1d16wb_fixed : return true;
case ARM::VST1d32wb_fixed : return true;
case ARM::VST1d64wb_fixed : return true;
- case ARM::VST1q8wb_fixed : return true;
- case ARM::VST1q16wb_fixed : return true;
- case ARM::VST1q32wb_fixed : return true;
- case ARM::VST1q64wb_fixed : return true;
+ case ARM::VST1q8wb_fixed : return true;
+ case ARM::VST1q16wb_fixed : return true;
+ case ARM::VST1q32wb_fixed : return true;
+ case ARM::VST1q64wb_fixed : return true;
case ARM::VST1d64TPseudoWB_fixed : return true;
case ARM::VST1d64QPseudoWB_fixed : return true;
case ARM::VST2d8wb_fixed : return true;
@@ -1776,7 +1784,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue MemAddr, Align;
unsigned AddrOpIdx = isUpdating ? 1 : 2;
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
- return NULL;
+ return nullptr;
SDValue Chain = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -1895,7 +1903,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
if (isUpdating)
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
- return NULL;
+ return nullptr;
}
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
@@ -1909,7 +1917,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
unsigned AddrOpIdx = isUpdating ? 1 : 2;
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
- return NULL;
+ return nullptr;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -2055,7 +2063,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned AddrOpIdx = isUpdating ? 1 : 2;
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
- return NULL;
+ return nullptr;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -2160,7 +2168,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
if (isUpdating)
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
- return NULL;
+ return nullptr;
}
SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
@@ -2171,7 +2179,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
SDValue MemAddr, Align;
if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
- return NULL;
+ return nullptr;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -2243,7 +2251,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
if (isUpdating)
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
- return NULL;
+ return nullptr;
}
SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
@@ -2282,7 +2290,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
bool isSigned) {
if (!Subtarget->hasV6T2Ops())
- return NULL;
+ return nullptr;
unsigned Opc = isSigned
? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
@@ -2295,7 +2303,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
// The immediate is a mask of the low bits iff imm & (imm+1) == 0
if (And_imm & (And_imm + 1))
- return NULL;
+ return nullptr;
unsigned Srl_imm = 0;
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
@@ -2315,7 +2323,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, MVT::i32),
getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
}
// ARM models shift instructions as MOVsi with shifter operand.
@@ -2325,17 +2333,17 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
MVT::i32);
SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5);
+ return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
}
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, MVT::i32),
CurDAG->getTargetConstant(Width, MVT::i32),
- getAL(CurDAG), Reg0 };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ getAL(CurDAG), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
}
}
- return NULL;
+ return nullptr;
}
// Otherwise, we're looking for a shift of a shift
@@ -2349,16 +2357,16 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
unsigned Width = 32 - Srl_imm - 1;
int LSB = Srl_imm - Shl_imm;
if (LSB < 0)
- return NULL;
+ return nullptr;
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, MVT::i32),
CurDAG->getTargetConstant(Width, MVT::i32),
getAL(CurDAG), Reg0 };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
}
}
- return NULL;
+ return nullptr;
}
/// Target-specific DAG combining for ISD::XOR.
@@ -2377,10 +2385,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
EVT VT = N->getValueType(0);
if (Subtarget->isThumb1Only())
- return NULL;
+ return nullptr;
if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
- return NULL;
+ return nullptr;
SDValue ADDSrc0 = XORSrc0.getOperand(0);
SDValue ADDSrc1 = XORSrc0.getOperand(1);
@@ -2391,13 +2399,13 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
unsigned Size = XType.getSizeInBits() - 1;
if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
- XType.isInteger() && SRAConstant != NULL &&
+ XType.isInteger() && SRAConstant != nullptr &&
Size == SRAConstant->getZExtValue()) {
unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
}
- return NULL;
+ return nullptr;
}
SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
@@ -2414,7 +2422,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (N->isMachineOpcode()) {
N->setNodeId(-1);
- return NULL; // Already selected.
+ return nullptr; // Already selected.
}
switch (N->getOpcode()) {
@@ -2478,7 +2486,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
Ops);
}
ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
- return NULL;
+ return nullptr;
}
// Other cases are autogenerated.
@@ -2492,14 +2500,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (Subtarget->isThumb1Only()) {
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops);
} else {
unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
ARM::t2ADDri : ARM::ADDri);
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
}
}
case ISD::SRL:
@@ -2526,10 +2534,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
+ return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7);
+ return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
}
}
if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
@@ -2542,10 +2550,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
+ return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7);
+ return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
}
}
}
@@ -2660,7 +2668,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
}
case ISD::LOAD: {
- SDNode *ResNode = 0;
+ SDNode *ResNode = nullptr;
if (Subtarget->isThumb() && Subtarget->hasThumb2())
ResNode = SelectT2IndexedLoad(N);
else
@@ -2707,13 +2715,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
ReplaceUses(SDValue(N, 0),
SDValue(Chain.getNode(), Chain.getResNo()));
- return NULL;
+ return nullptr;
}
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
- default: return NULL;
+ default: return nullptr;
case MVT::v8i8: Opc = ARM::VZIPd8; break;
case MVT::v4i16: Opc = ARM::VZIPd16; break;
case MVT::v2f32:
@@ -2733,7 +2741,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
- default: return NULL;
+ default: return nullptr;
case MVT::v8i8: Opc = ARM::VUZPd8; break;
case MVT::v4i16: Opc = ARM::VUZPd16; break;
case MVT::v2f32:
@@ -2753,7 +2761,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
- default: return NULL;
+ default: return nullptr;
case MVT::v8i8: Opc = ARM::VTRNd8; break;
case MVT::v4i16: Opc = ARM::VTRNd16; break;
case MVT::v2f32:
@@ -2834,7 +2842,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD1q16wb_fixed,
ARM::VLD1q32wb_fixed,
ARM::VLD1q64wb_fixed };
- return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
+ return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
}
case ARMISD::VLD2_UPD: {
@@ -2845,7 +2853,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
ARM::VLD2q16PseudoWB_fixed,
ARM::VLD2q32PseudoWB_fixed };
- return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
+ return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
}
case ARMISD::VLD3_UPD: {
@@ -2912,7 +2920,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST1q16wb_fixed,
ARM::VST1q32wb_fixed,
ARM::VST1q64wb_fixed };
- return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
+ return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
}
case ARMISD::VST2_UPD: {
@@ -2923,7 +2931,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
ARM::VST2q16PseudoWB_fixed,
ARM::VST2q32PseudoWB_fixed };
- return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
+ return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
}
case ARMISD::VST3_UPD: {
@@ -3047,7 +3055,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ReplaceUses(SDValue(N, 1), Result);
}
ReplaceUses(SDValue(N, 2), OutChain);
- return NULL;
+ return nullptr;
}
case Intrinsic::arm_stlexd:
case Intrinsic::arm_strexd: {
@@ -3093,7 +3101,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD1d32, ARM::VLD1d64 };
static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
ARM::VLD1q32, ARM::VLD1q64};
- return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
+ return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
}
case Intrinsic::arm_neon_vld2: {
@@ -3101,7 +3109,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD2d32, ARM::VLD1q64 };
static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
ARM::VLD2q32Pseudo };
- return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
+ return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
}
case Intrinsic::arm_neon_vld3: {
@@ -3164,7 +3172,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST1d32, ARM::VST1d64 };
static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
ARM::VST1q32, ARM::VST1q64 };
- return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
+ return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
}
case Intrinsic::arm_neon_vst2: {
@@ -3172,7 +3180,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST2d32, ARM::VST1q64 };
static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
ARM::VST2q32Pseudo };
- return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
+ return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
}
case Intrinsic::arm_neon_vst3: {
@@ -3306,7 +3314,8 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
// them into a GPRPair.
SDLoc dl(N);
- SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) : SDValue(0,0);
+ SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
+ : SDValue(nullptr,0);
SmallVector<bool, 8> OpChanged;
// Glue node will be appended late.
@@ -3388,7 +3397,7 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
// Update the original glue user.
std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
Ops.push_back(T1.getValue(1));
- CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(GU, Ops);
GU = T1.getNode();
}
else {
@@ -3435,11 +3444,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
if (Glue.getNode())
AsmNodeOperands.push_back(Glue);
if (!Changed)
- return NULL;
+ return nullptr;
SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
- CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0],
- AsmNodeOperands.size());
+ CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
New->setNodeId(-1);
return New.getNode();
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 2ebad8e..00d07e8 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-isel"
#include "ARMISelLowering.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
@@ -37,18 +36,22 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
#include <utility>
using namespace llvm;
+#define DEBUG_TYPE "arm-isel"
+
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
@@ -79,7 +82,7 @@ namespace {
}
// The APCS parameter registers.
-static const uint16_t GPRArgRegs[] = {
+static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
@@ -155,7 +158,8 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
if (TM.getSubtarget<ARMSubtarget>().isTargetMachO())
return new TargetLoweringObjectFileMachO();
-
+ if (TM.getSubtarget<ARMSubtarget>().isTargetWindows())
+ return new TargetLoweringObjectFileCOFF();
return new ARMElfTargetObjectFile();
}
@@ -170,7 +174,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (Subtarget->isTargetMachO()) {
// Uses VFP for Thumb libfuncs if available.
if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
- Subtarget->hasARMOps()) {
+ Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) {
// Single-precision floating-point arithmetic.
setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
@@ -246,173 +250,134 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
// These libcalls are not available in 32-bit.
- setLibcallName(RTLIB::SHL_I128, 0);
- setLibcallName(RTLIB::SRL_I128, 0);
- setLibcallName(RTLIB::SRA_I128, 0);
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&
!Subtarget->isTargetWindows()) {
- // Double-precision floating-point arithmetic helper functions
- // RTABI chapter 4.1.2, Table 2
- setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
- setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
- setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
- setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
- setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
-
- // Double-precision floating-point comparison helper functions
- // RTABI chapter 4.1.2, Table 3
- setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
- setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
- setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
- setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
- setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
- setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
- setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
- setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
- setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
- setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
- setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
- setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
- setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun");
- setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
- setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun");
- setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
- setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
-
- // Single-precision floating-point arithmetic helper functions
- // RTABI chapter 4.1.2, Table 4
- setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
- setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
- setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
- setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
- setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
-
- // Single-precision floating-point comparison helper functions
- // RTABI chapter 4.1.2, Table 5
- setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
- setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
- setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
- setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
- setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
- setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
- setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
- setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
- setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
- setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
- setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
- setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
- setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun");
- setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
- setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun");
- setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
- setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
-
- // Floating-point to integer conversions.
- // RTABI chapter 4.1.2, Table 6
- setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
- setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
- setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
- setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
- setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
- setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
- setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
- setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
- setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
-
- // Conversions between floating types.
- // RTABI chapter 4.1.2, Table 7
- setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
- setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d");
- setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
-
- // Integer to floating-point conversions.
- // RTABI chapter 4.1.2, Table 8
- setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
- setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
- setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
- setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
- setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
- setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
- setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
- setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
- setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
-
- // Long long helper functions
- // RTABI chapter 4.2, Table 9
- setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul");
- setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
- setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
- setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
- setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
-
- // Integer division functions
- // RTABI chapter 4.3.1
- setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv");
- setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
- setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
- setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
- setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv");
- setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
- setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
- setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
- setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
-
- // Memory operations
- // RTABI chapter 4.3.4
- setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy");
- setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
- setLibcallName(RTLIB::MEMSET, "__aeabi_memset");
- setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS);
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ const ISD::CondCode Cond;
+ } LibraryCalls[] = {
+ // Double-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 2
+ { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Double-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 3
+ { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
+ { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
+
+ // Single-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 4
+ { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Single-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 5
+ { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
+ { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
+
+ // Floating-point to integer conversions.
+ // RTABI chapter 4.1.2, Table 6
+ { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Conversions between floating types.
+ // RTABI chapter 4.1.2, Table 7
+ { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Integer to floating-point conversions.
+ // RTABI chapter 4.1.2, Table 8
+ { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Long long helper functions
+ // RTABI chapter 4.2, Table 9
+ { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Integer division functions
+ // RTABI chapter 4.3.1
+ { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Memory operations
+ // RTABI chapter 4.3.4
+ { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ if (LC.Cond != ISD::SETCC_INVALID)
+ setCmpLibcallCC(LC.Op, LC.Cond);
+ }
+ }
+
+ if (Subtarget->isTargetWindows()) {
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ } LibraryCalls[] = {
+ { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ }
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
@@ -444,6 +409,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+
+ setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+
+ setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
@@ -631,6 +603,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
}
+ setOperationAction(ISD::SADDO, MVT::i32, Custom);
+ setOperationAction(ISD::UADDO, MVT::i32, Custom);
+ setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+ setOperationAction(ISD::USUBO, MVT::i32, Custom);
+
// i64 operation support.
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::i32, Expand);
@@ -850,7 +827,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
}
}
-
+
// Combine sin / cos into one node or libcall if possible.
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
@@ -913,7 +890,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// and extractions.
std::pair<const TargetRegisterClass*, uint8_t>
ARMTargetLowering::findRepresentativeClass(MVT VT) const{
- const TargetRegisterClass *RRC = 0;
+ const TargetRegisterClass *RRC = nullptr;
uint8_t Cost = 1;
switch (VT.SimpleTy) {
default:
@@ -950,7 +927,7 @@ ARMTargetLowering::findRepresentativeClass(MVT VT) const{
const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return 0;
+ default: return nullptr;
case ARMISD::Wrapper: return "ARMISD::Wrapper";
case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
@@ -1204,40 +1181,58 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
#include "ARMGenCallingConv.inc"
-/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
-/// given CallingConvention value.
-CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
- bool Return,
- bool isVarArg) const {
+/// getEffectiveCallingConv - Get the effective calling convention, taking into
+/// account presence of floating point hardware and calling convention
+/// limitations, such as support for variadic functions.
+CallingConv::ID
+ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
+ bool isVarArg) const {
switch (CC) {
default:
llvm_unreachable("Unsupported calling convention");
- case CallingConv::Fast:
- if (Subtarget->hasVFP2() && !isVarArg) {
- if (!Subtarget->isAAPCS_ABI())
- return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
- // For AAPCS ABI targets, just use VFP variant of the calling convention.
- return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
- }
- // Fallthrough
- case CallingConv::C: {
- // Use target triple & subtarget features to do actual dispatch.
+ case CallingConv::ARM_AAPCS:
+ case CallingConv::ARM_APCS:
+ case CallingConv::GHC:
+ return CC;
+ case CallingConv::ARM_AAPCS_VFP:
+ return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
+ case CallingConv::C:
if (!Subtarget->isAAPCS_ABI())
- return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ return CallingConv::ARM_APCS;
else if (Subtarget->hasVFP2() &&
getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
!isVarArg)
- return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
- return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ return CallingConv::ARM_AAPCS_VFP;
+ else
+ return CallingConv::ARM_AAPCS;
+ case CallingConv::Fast:
+ if (!Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->hasVFP2() && !isVarArg)
+ return CallingConv::Fast;
+ return CallingConv::ARM_APCS;
+ } else if (Subtarget->hasVFP2() && !isVarArg)
+ return CallingConv::ARM_AAPCS_VFP;
+ else
+ return CallingConv::ARM_AAPCS;
}
- case CallingConv::ARM_AAPCS_VFP:
- if (!isVarArg)
- return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
- // Fallthrough
- case CallingConv::ARM_AAPCS:
- return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+}
+
+/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
+/// CallingConvention.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg) const {
+ switch (getEffectiveCallingConv(CC, isVarArg)) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ case CallingConv::ARM_AAPCS_VFP:
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ case CallingConv::Fast:
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
case CallingConv::GHC:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
}
@@ -1286,6 +1281,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
if (VA.getLocVT() == MVT::v2f64) {
@@ -1301,6 +1298,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
DAG.getConstant(1, MVT::i32));
@@ -1351,16 +1350,17 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Arg);
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
+ unsigned id = Subtarget->isLittle() ? 0 : 1;
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
if (NextVA.isRegLoc())
- RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
else {
assert(NextVA.isMemLoc());
- if (StackPtr.getNode() == 0)
+ if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
- MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
dl, DAG, NextVA,
Flags));
}
@@ -1398,6 +1398,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
+ if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
// We don't support GuaranteedTailCallOpt for ARM, only automatically
// detected sibcalls.
if (isTailCall) {
@@ -1542,7 +1545,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
- Ops, array_lengthof(Ops)));
+ Ops));
}
} else if (!isSibCall) {
assert(VA.isMemLoc());
@@ -1553,8 +1556,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
@@ -1741,10 +1743,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (isTailCall)
- return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+ return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
// Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
@@ -2049,8 +2051,7 @@ static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
- return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other,
- RetOps.data(), RetOps.size());
+ return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
}
SDValue
@@ -2074,6 +2075,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
SDValue Flag;
SmallVector<SDValue, 4> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ bool isLittleEndian = Subtarget->isLittle();
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
@@ -2100,12 +2102,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Half);
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ HalfGPRs.getValue(isLittleEndian ? 0 : 1),
+ Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- HalfGPRs.getValue(1), Flag);
+ HalfGPRs.getValue(isLittleEndian ? 1 : 0),
+ Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
@@ -2117,12 +2122,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
// Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
// available.
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
- DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
+ DAG.getVTList(MVT::i32, MVT::i32), Arg);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ fmrrd.getValue(isLittleEndian ? 0 : 1),
+ Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ fmrrd.getValue(isLittleEndian ? 1 : 0),
Flag);
} else
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
@@ -2151,8 +2159,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
return LowerInterruptReturn(RetOps, dl, DAG);
}
- return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
- RetOps.data(), RetOps.size());
+ return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
}
bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
@@ -2314,13 +2321,13 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
Entry.Node = Argument;
Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
Args.push_back(Entry);
+
// FIXME: is there useful debug info available here?
- TargetLowering::CallLoweringInfo CLI(Chain,
- (Type *) Type::getInt32Ty(*DAG.getContext()),
- false, false, false, false,
- 0, CallingConv::C, /*isTailCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), &Args, 0);
+
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
}
@@ -2466,6 +2473,23 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
return Result;
}
+SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
+ assert(Subtarget->useMovt() && "Windows on ARM expects to use movw/movt");
+
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ EVT PtrVT = getPointerTy();
+ SDLoc DL(Op);
+
+ ++NumMovwMovt;
+
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ return DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT));
+}
+
SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() &&
@@ -2654,7 +2678,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
}
-
+ if (!Subtarget->isLittle())
+ std::swap (ArgValue, ArgValue2);
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
}
@@ -2803,8 +2828,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return FrameIndex;
} else {
if (ArgSize == 0) {
@@ -2834,8 +2858,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
// If there is no regs to be stored, just point address after last
// argument passed via stack.
int FrameIndex =
- StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
- 0, ArgOffset, 0, ForceMutable, 0, TotalArgRegsSaveSize);
+ StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
+ CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,
+ 0, TotalArgRegsSaveSize);
AFI->setVarArgsFrameIndex(FrameIndex);
}
@@ -3166,11 +3191,96 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
}
+std::pair<SDValue, SDValue>
+ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
+ SDValue &ARMcc) const {
+ assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
+
+ SDValue Value, OverflowCmp;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+
+ // FIXME: We are currently always generating CMPs because we don't support
+ // generating CMN through the backend. This is not as good as the natural
+ // CMP case because it causes a register dependency and cannot be folded
+ // later.
+
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown overflow instruction!");
+ case ISD::SADDO:
+ ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
+ Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
+ break;
+ case ISD::UADDO:
+ ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
+ Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
+ break;
+ case ISD::SSUBO:
+ ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
+ Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
+ break;
+ case ISD::USUBO:
+ ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
+ Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
+ break;
+ } // switch (...)
+
+ return std::make_pair(Value, OverflowCmp);
+}
+
+
+SDValue
+ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
+ return SDValue();
+
+ SDValue Value, OverflowCmp;
+ SDValue ARMcc;
+ std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ // We use 0 and 1 as false and true values.
+ SDValue TVal = DAG.getConstant(1, MVT::i32);
+ SDValue FVal = DAG.getConstant(0, MVT::i32);
+ EVT VT = Op.getValueType();
+
+ SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal,
+ ARMcc, CCR, OverflowCmp);
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
+}
+
+
SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond = Op.getOperand(0);
SDValue SelectTrue = Op.getOperand(1);
SDValue SelectFalse = Op.getOperand(2);
SDLoc dl(Op);
+ unsigned Opc = Cond.getOpcode();
+
+ if (Cond.getResNo() == 1 &&
+ (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+ Opc == ISD::USUBO)) {
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
+ return SDValue();
+
+ SDValue Value, OverflowCmp;
+ SDValue ARMcc;
+ std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ EVT VT = Op.getValueType();
+
+ return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse,
+ ARMcc, CCR, OverflowCmp);
+
+ }
// Convert:
//
@@ -3472,7 +3582,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
- return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
+ return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
}
return SDValue();
@@ -3512,11 +3622,11 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
- SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
if (CondCode2 != ARMCC::AL) {
ARMcc = DAG.getConstant(CondCode2, MVT::i32);
SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
- Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
}
return Res;
}
@@ -3713,7 +3823,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// Bitcast operand 1 to i32.
if (SrcVT == MVT::f64)
Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
- &Tmp1, 1).getValue(1);
+ Tmp1).getValue(1);
Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
// Or in the signbit with integer operations.
@@ -3729,7 +3839,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// f64: Or the high part with signbit and then combine two parts.
Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
- &Tmp0, 1);
+ Tmp0);
SDValue Lo = Tmp0.getValue(0);
SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
@@ -3761,14 +3871,16 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
}
SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ const ARMBaseRegisterInfo &ARI =
+ *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetMachO())
- ? ARM::R7 : ARM::R11;
+ unsigned FrameReg = ARI.getFrameRegister(MF);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
@@ -3777,6 +3889,18 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+unsigned ARMTargetLowering::getRegisterByName(const char* RegName,
+ EVT VT) const {
+ unsigned Reg = StringSwitch<unsigned>(RegName)
+ .Case("sp", ARM::SP)
+ .Default(0);
+ if (Reg)
+ return Reg;
+ report_fatal_error("Invalid register name global variable");
+}
+
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -3806,8 +3930,15 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
// Turn f64->i64 into VMOVRRD.
if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
- SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
- DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+ SDValue Cvt;
+ if (TLI.isBigEndian() && SrcVT.isVector() &&
+ SrcVT.getVectorNumElements() > 1)
+ Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32),
+ DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
+ else
+ Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Op);
// Merge the pieces into a single i64 value.
return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
}
@@ -3863,7 +3994,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
CCR, Cmp);
SDValue Ops[2] = { Lo, Hi };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
@@ -3897,7 +4028,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
CCR, Cmp);
SDValue Ops[2] = { Lo, Hi };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
@@ -4102,7 +4233,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
// First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
// captures the result into a carry flag.
unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
- Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
// The low part is an ARMISD::RRX operand, which shifts the carry in.
Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
@@ -4859,7 +4990,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
Ops.push_back(N);
Ops.push_back(Op.getOperand(I));
Ops.push_back(DAG.getConstant(I, MVT::i32));
- N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3);
+ N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
}
}
return N;
@@ -4870,7 +5001,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
- SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
Val = LowerBUILD_VECTOR(Val, DAG, ST);
if (Val.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
@@ -4906,7 +5037,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
- SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
@@ -5213,12 +5344,10 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
if (V2.getNode()->getOpcode() == ISD::UNDEF)
return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
- &VTBLMask[0], 8));
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
- &VTBLMask[0], 8));
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
}
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
@@ -5371,7 +5500,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
DAG.getConstant(ShuffleMask[i] & (NumElts-1),
MVT::i32)));
}
- SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
@@ -5608,7 +5737,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
}
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
- MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
+ MVT::getVectorVT(TruncVT, NumElts), Ops);
}
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
@@ -5946,12 +6075,12 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
? "__sincos_stret" : "__sincosf_stret";
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
- TargetLowering::
- CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0,
- CallingConv::C, /*isTaillCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed*/false,
- Callee, Args, DAG, dl);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,
+ &Args, 0)
+ .setDiscardResult();
+
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
@@ -5998,8 +6127,7 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
};
Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
- DAG.getVTList(MVT::i32, MVT::Other), &Ops[0],
- array_lengthof(Ops));
+ DAG.getVTList(MVT::i32, MVT::Other), Ops);
OutChain = Cycles32.getValue(1);
} else {
// Intrinsic is defined to return 0 on unsupported platforms. Technically
@@ -6022,8 +6150,15 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress:
- return Subtarget->isTargetMachO() ? LowerGlobalAddressDarwin(Op, DAG) :
- LowerGlobalAddressELF(Op, DAG);
+ switch (Subtarget->getTargetTriple().getObjectFormat()) {
+ default: llvm_unreachable("unknown object format");
+ case Triple::COFF:
+ return LowerGlobalAddressWindows(Op, DAG);
+ case Triple::ELF:
+ return LowerGlobalAddressELF(Op, DAG);
+ case Triple::MachO:
+ return LowerGlobalAddressDarwin(Op, DAG);
+ }
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
@@ -6068,6 +6203,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ return LowerXALUO(Op, DAG);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
@@ -6558,7 +6698,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
}
// N.B. the order the invoke BBs are processed in doesn't matter here.
- const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF);
+ const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
SmallVector<MachineBasicBlock*, 64> MBBLPads;
for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
@@ -6755,8 +6895,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned UnitSize = 0;
- const TargetRegisterClass *TRC = 0;
- const TargetRegisterClass *VecTRC = 0;
+ const TargetRegisterClass *TRC = nullptr;
+ const TargetRegisterClass *VecTRC = nullptr;
bool IsThumb1 = Subtarget->isThumb1Only();
bool IsThumb2 = Subtarget->isThumb2();
@@ -6790,7 +6930,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
? (const TargetRegisterClass *)&ARM::DPairRegClass
: UnitSize == 8
? (const TargetRegisterClass *)&ARM::DPRRegClass
- : 0;
+ : nullptr;
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
@@ -7520,8 +7660,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
llvm_unreachable("Invalid vector element type for padd optimization.");
}
- SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
- widenType, &Ops[0], Ops.size());
+ SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, Ops);
unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
return DAG.getNode(ExtOp, SDLoc(N), VT, tmp);
}
@@ -7581,7 +7720,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
// Look for the glued ADDE.
SDNode* AddeNode = AddcNode->getGluedUser();
- if (AddeNode == NULL)
+ if (!AddeNode)
return SDValue();
// Make sure it is really an ADDE.
@@ -7616,9 +7755,9 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
// Figure out the high and low input values to the MLAL node.
SDValue* HiMul = &MULOp;
- SDValue* HiAdd = NULL;
- SDValue* LoMul = NULL;
- SDValue* LowAdd = NULL;
+ SDValue* HiAdd = nullptr;
+ SDValue* LoMul = nullptr;
+ SDValue* LowAdd = nullptr;
if (IsLeftOperandMUL)
HiAdd = &AddeOp1;
@@ -7635,7 +7774,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
LowAdd = &AddcOp0;
}
- if (LoMul == NULL)
+ if (!LoMul)
return SDValue();
if (LoMul->getNode() != HiMul->getNode())
@@ -7652,8 +7791,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
Ops.push_back(*HiAdd);
SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode),
- DAG.getVTList(MVT::i32, MVT::i32),
- &Ops[0], Ops.size());
+ DAG.getVTList(MVT::i32, MVT::i32), Ops);
// Replace the ADDs' nodes uses by the MLA node's values.
SDValue HiMLALResult(MLALNode.getNode(), 1);
@@ -8290,8 +8428,7 @@ static SDValue PerformSTORECombine(SDNode *N,
Increment);
Chains.push_back(Ch);
}
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0],
- Chains.size());
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
if (!ISD::isNormalStore(St))
@@ -8302,16 +8439,18 @@ static SDValue PerformSTORECombine(SDNode *N,
if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
StVal.getNode()->hasOneUse()) {
SelectionDAG &DAG = DCI.DAG;
+ bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
SDLoc DL(St);
SDValue BasePtr = St->getBasePtr();
SDValue NewST1 = DAG.getStore(St->getChain(), DL,
- StVal.getNode()->getOperand(0), BasePtr,
- St->getPointerInfo(), St->isVolatile(),
+ StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
+ BasePtr, St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(), St->getAlignment());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, MVT::i32));
- return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1),
+ return DAG.getStore(NewST1.getValue(0), DL,
+ StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
OffsetPtr, St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(),
std::min(4U, St->getAlignment() / 2));
@@ -8387,7 +8526,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N,
DCI.AddToWorklist(V.getNode());
}
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, BV);
}
@@ -8470,7 +8609,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
// Fold obvious case.
V = V.getOperand(0);
else {
- V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
+ V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
// Make the DAGCombiner fold the bitcasts.
DCI.AddToWorklist(V.getNode());
}
@@ -8666,7 +8805,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
Tys[n] = VecTy;
Tys[n++] = MVT::i32;
Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
+ SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs+2));
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // incoming chain
Ops.push_back(N->getOperand(AddrOpIdx));
@@ -8676,8 +8815,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
}
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
- Ops.data(), Ops.size(),
- MemInt->getMemoryVT(),
+ Ops, MemInt->getMemoryVT(),
MemInt->getMemOperand());
// Update the uses.
@@ -8746,11 +8884,11 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
for (n = 0; n < NumVecs; ++n)
Tys[n] = VT;
Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
+ SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumVecs+1));
SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
- Ops, 2, VLDMemInt->getMemoryVT(),
+ Ops, VLDMemInt->getMemoryVT(),
VLDMemInt->getMemOperand());
// Update the uses.
@@ -9348,7 +9486,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
if (Res.getNode()) {
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne);
+ DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne);
// Capture demanded bits information that would be otherwise lost.
if (KnownZero == 0xfffffffe)
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
@@ -9935,11 +10073,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
return true;
}
-void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const {
+void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
unsigned BitWidth = KnownOne.getBitWidth();
KnownZero = KnownOne = APInt(BitWidth, 0);
switch (Op.getOpcode()) {
@@ -9955,11 +10093,11 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
break;
case ARMISD::CMOV: {
// Bits are known zero/one if known on the LHS and RHS.
- DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
if (KnownZero == 0 && KnownOne == 0) return;
APInt KnownZeroRHS, KnownOneRHS;
- DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
+ DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
KnownZero &= KnownZeroRHS;
KnownOne &= KnownOneRHS;
return;
@@ -10053,7 +10191,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
- if (CallOperandVal == NULL)
+ if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
// Look at the constraint type.
@@ -10132,7 +10270,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
- SDValue Result(0, 0);
+ SDValue Result;
// Currently only support length 1 constraints.
if (Constraint.length() != 1) return;
@@ -10331,13 +10469,12 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL);
SDLoc dl(Op);
- TargetLowering::
- CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true,
- 0, getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Callee, Args, DAG, dl);
- std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(InChain)
+ .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
return CallInfo.first;
}
@@ -10494,3 +10631,160 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return false;
return true;
}
+
+bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
+ // Loads and stores less than 64-bits are already atomic; ones above that
+ // are doomed anyway, so defer to the default libcall and blame the OS when
+ // things go wrong:
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64;
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ return LI->getType()->getPrimitiveSizeInBits() == 64;
+
+ // For the real atomic operations, we have ldrex/strex up to 64 bits.
+ return Inst->getType()->getPrimitiveSizeInBits() <= 64;
+}
+
+Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
+ bool IsAcquire =
+ Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+
+ // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
+ // intrinsic must return {i32, i32} and we have to recombine them into a
+ // single i64 here.
+ if (ValTy->getPrimitiveSizeInBits() == 64) {
+ Intrinsic::ID Int =
+ IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
+ Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
+
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
+
+ Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
+ Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
+ Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
+ Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
+ return Builder.CreateOr(
+ Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
+ }
+
+ Type *Tys[] = { Addr->getType() };
+ Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
+ Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
+
+ return Builder.CreateTruncOrBitCast(
+ Builder.CreateCall(Ldrex, Addr),
+ cast<PointerType>(Addr->getType())->getElementType());
+}
+
+Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+ Value *Addr,
+ AtomicOrdering Ord) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ bool IsRelease =
+ Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+
+ // Since the intrinsics must have legal type, the i64 intrinsics take two
+ // parameters: "i32, i32". We must marshal Val into the appropriate form
+ // before the call.
+ if (Val->getType()->getPrimitiveSizeInBits() == 64) {
+ Intrinsic::ID Int =
+ IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
+ Function *Strex = Intrinsic::getDeclaration(M, Int);
+ Type *Int32Ty = Type::getInt32Ty(M->getContext());
+
+ Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
+ Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ return Builder.CreateCall3(Strex, Lo, Hi, Addr);
+ }
+
+ Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
+ Type *Tys[] = { Addr->getType() };
+ Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
+
+ return Builder.CreateCall2(
+ Strex, Builder.CreateZExtOrBitCast(
+ Val, Strex->getFunctionType()->getParamType(0)),
+ Addr);
+}
+
+enum HABaseType {
+ HA_UNKNOWN = 0,
+ HA_FLOAT,
+ HA_DOUBLE,
+ HA_VECT64,
+ HA_VECT128
+};
+
+static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
+ uint64_t &Members) {
+ if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0; i < ST->getNumElements(); ++i) {
+ uint64_t SubMembers = 0;
+ if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
+ return false;
+ Members += SubMembers;
+ }
+ } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ uint64_t SubMembers = 0;
+ if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
+ return false;
+ Members += SubMembers * AT->getNumElements();
+ } else if (Ty->isFloatTy()) {
+ if (Base != HA_UNKNOWN && Base != HA_FLOAT)
+ return false;
+ Members = 1;
+ Base = HA_FLOAT;
+ } else if (Ty->isDoubleTy()) {
+ if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
+ return false;
+ Members = 1;
+ Base = HA_DOUBLE;
+ } else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) {
+ Members = 1;
+ switch (Base) {
+ case HA_FLOAT:
+ case HA_DOUBLE:
+ return false;
+ case HA_VECT64:
+ return VT->getBitWidth() == 64;
+ case HA_VECT128:
+ return VT->getBitWidth() == 128;
+ case HA_UNKNOWN:
+ switch (VT->getBitWidth()) {
+ case 64:
+ Base = HA_VECT64;
+ return true;
+ case 128:
+ Base = HA_VECT128;
+ return true;
+ default:
+ return false;
+ }
+ }
+ }
+
+ return (Members > 0 && Members <= 4);
+}
+
+/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate.
+bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
+ if (getEffectiveCallingConv(CallConv, isVarArg) !=
+ CallingConv::ARM_AAPCS_VFP)
+ return false;
+
+ HABaseType Base = HA_UNKNOWN;
+ uint64_t Members = 0;
+ bool result = isHomogeneousAggregate(Ty, Base, Members);
+ DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n");
+ return result;
+}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index f33e6db..c15305c 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -313,10 +313,10 @@ namespace llvm {
SDValue &Offset, ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
- void computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const override;
+ void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const override;
bool ExpandInlineAsm(CallInst *CI) const override;
@@ -384,6 +384,18 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ /// \brief Returns true if an argument of type Ty needs to be passed in a
+ /// contiguous block of registers in calling convention CallConv.
+ bool functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
+
+ Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const override;
+ Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+ Value *Addr, AtomicOrdering Ord) const override;
+
+ bool shouldExpandAtomicInIR(Instruction *Inst) const override;
+
protected:
std::pair<const TargetRegisterClass*, uint8_t>
findRepresentativeClass(MVT VT) const override;
@@ -404,6 +416,7 @@ namespace llvm {
void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
void addDRTypeForNEON(MVT VT);
void addQRTypeForNEON(MVT VT);
+ std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const;
typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
void PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
@@ -417,6 +430,8 @@ namespace llvm {
SDValue &Root, SelectionDAG &DAG,
SDLoc dl) const;
+ CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC,
+ bool isVarArg) const;
CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
bool isVarArg) const;
SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
@@ -430,6 +445,7 @@ namespace llvm {
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const;
@@ -438,6 +454,7 @@ namespace llvm {
TLSModel::Model model) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
@@ -454,6 +471,8 @@ namespace llvm {
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
+ unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
/// expanded to FMAs when this method returns true, otherwise fmuladd is
@@ -567,7 +586,6 @@ namespace llvm {
OtherModImm
};
-
namespace ARM {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index aafff98..59e9260 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -2029,7 +2029,7 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
// Same as N2V but not predicated.
class N2Vnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
dag oops, dag iops, InstrItinClass itin, string OpcodeStr,
- string Dt, ValueType ResTy, ValueType OpTy, list<dag> pattern>
+ string Dt, list<dag> pattern>
: NeonInp<oops, iops, AddrModeNone, IndexModeNone, N2RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vm", "", pattern> {
bits<5> Vd;
@@ -2138,8 +2138,7 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
class N3Vnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
bit op4, dag oops, dag iops,Format f, InstrItinClass itin,
- string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- SDPatternOperator IntOp, bit Commutable, list<dag> pattern>
+ string OpcodeStr, string Dt, list<dag> pattern>
: NeonInp<oops, iops, AddrModeNone, IndexModeNone, f, itin, OpcodeStr,
Dt, "$Vd, $Vn, $Vm", "", pattern> {
bits<5> Vd;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 75a109e..718d5da 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -991,6 +991,81 @@ def addrmode6oneL32 : Operand<i32>,
let EncoderMethod = "getAddrMode6OneLane32AddressOpValue";
}
+// Base class for addrmode6 with specific alignment restrictions.
+class AddrMode6Align : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
+ let EncoderMethod = "getAddrMode6AddressOpValue";
+ let DecoderMethod = "DecodeAddrMode6Operand";
+}
+
+// Special version of addrmode6 to handle no allowed alignment encoding for
+// VLD/VST instructions and checking the alignment is not specified.
+def AddrMode6AlignNoneAsmOperand : AsmOperandClass {
+ let Name = "AlignedMemoryNone";
+ let DiagnosticType = "AlignedMemoryRequiresNone";
+}
+def addrmode6alignNone : AddrMode6Align {
+ // The alignment specifier can only be omitted.
+ let ParserMatchClass = AddrMode6AlignNoneAsmOperand;
+}
+
+// Special version of addrmode6 to handle 16-bit alignment encoding for
+// VLD/VST instructions and checking the alignment value.
+def AddrMode6Align16AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory16";
+ let DiagnosticType = "AlignedMemoryRequires16";
+}
+def addrmode6align16 : AddrMode6Align {
+ // The alignment specifier can only be 16 or omitted.
+ let ParserMatchClass = AddrMode6Align16AsmOperand;
+}
+
+// Special version of addrmode6 to handle 32-bit alignment encoding for
+// VLD/VST instructions and checking the alignment value.
+def AddrMode6Align32AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory32";
+ let DiagnosticType = "AlignedMemoryRequires32";
+}
+def addrmode6align32 : AddrMode6Align {
+ // The alignment specifier can only be 32 or omitted.
+ let ParserMatchClass = AddrMode6Align32AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit alignment encoding for
+// VLD/VST instructions and checking the alignment value.
+def AddrMode6Align64AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory64";
+ let DiagnosticType = "AlignedMemoryRequires64";
+}
+def addrmode6align64 : AddrMode6Align {
+ // The alignment specifier can only be 64 or omitted.
+ let ParserMatchClass = AddrMode6Align64AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding
+// for VLD/VST instructions and checking the alignment value.
+def AddrMode6Align64or128AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory64or128";
+ let DiagnosticType = "AlignedMemoryRequires64or128";
+}
+def addrmode6align64or128 : AddrMode6Align {
+ // The alignment specifier can only be 64, 128 or omitted.
+ let ParserMatchClass = AddrMode6Align64or128AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit, 128-bit or 256-bit alignment
+// encoding for VLD/VST instructions and checking the alignment value.
+def AddrMode6Align64or128or256AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory64or128or256";
+ let DiagnosticType = "AlignedMemoryRequires64or128or256";
+}
+def addrmode6align64or128or256 : AddrMode6Align {
+ // The alignment specifier can only be 64, 128, 256 or omitted.
+ let ParserMatchClass = AddrMode6Align64or128or256AsmOperand;
+}
+
// Special version of addrmode6 to handle alignment encoding for VLD-dup
// instructions, specifically VLD4-dup.
def addrmode6dup : Operand<i32>,
@@ -1003,6 +1078,69 @@ def addrmode6dup : Operand<i32>,
let ParserMatchClass = AddrMode6AsmOperand;
}
+// Base class for addrmode6dup with specific alignment restrictions.
+class AddrMode6DupAlign : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6DupAddressOpValue";
+}
+
+// Special version of addrmode6 to handle no allowed alignment encoding for
+// VLD-dup instruction and checking the alignment is not specified.
+def AddrMode6dupAlignNoneAsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemoryNone";
+ let DiagnosticType = "DupAlignedMemoryRequiresNone";
+}
+def addrmode6dupalignNone : AddrMode6DupAlign {
+ // The alignment specifier can only be omitted.
+ let ParserMatchClass = AddrMode6dupAlignNoneAsmOperand;
+}
+
+// Special version of addrmode6 to handle 16-bit alignment encoding for VLD-dup
+// instruction and checking the alignment value.
+def AddrMode6dupAlign16AsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemory16";
+ let DiagnosticType = "DupAlignedMemoryRequires16";
+}
+def addrmode6dupalign16 : AddrMode6DupAlign {
+ // The alignment specifier can only be 16 or omitted.
+ let ParserMatchClass = AddrMode6dupAlign16AsmOperand;
+}
+
+// Special version of addrmode6 to handle 32-bit alignment encoding for VLD-dup
+// instruction and checking the alignment value.
+def AddrMode6dupAlign32AsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemory32";
+ let DiagnosticType = "DupAlignedMemoryRequires32";
+}
+def addrmode6dupalign32 : AddrMode6DupAlign {
+ // The alignment specifier can only be 32 or omitted.
+ let ParserMatchClass = AddrMode6dupAlign32AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit alignment encoding for VLD
+// instructions and checking the alignment value.
+def AddrMode6dupAlign64AsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemory64";
+ let DiagnosticType = "DupAlignedMemoryRequires64";
+}
+def addrmode6dupalign64 : AddrMode6DupAlign {
+ // The alignment specifier can only be 64 or omitted.
+ let ParserMatchClass = AddrMode6dupAlign64AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding
+// for VLD instructions and checking the alignment value.
+def AddrMode6dupAlign64or128AsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemory64or128";
+ let DiagnosticType = "DupAlignedMemoryRequires64or128";
+}
+def addrmode6dupalign64or128 : AddrMode6DupAlign {
+ // The alignment specifier can only be 64, 128 or omitted.
+ let ParserMatchClass = AddrMode6dupAlign64or128AsmOperand;
+}
+
// addrmodepc := pc + reg
//
def addrmodepc : Operand<i32>,
@@ -1689,7 +1827,8 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
}
def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary,
- "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
+ "hint", "\t$imm", [(int_arm_hint imm0_239:$imm)]>,
+ Requires<[IsARM, HasV6]> {
bits<8> imm;
let Inst{27-8} = 0b00110010000011110000;
let Inst{7-0} = imm;
@@ -1702,8 +1841,6 @@ def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>;
-def : Pat<(int_arm_sevl), (HINT 5)>;
-
def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
"\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
bits<4> Rd;
@@ -1830,6 +1967,18 @@ def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
let Inst{3-0} = opt;
}
+// A8.8.247 UDF - Undefined (Encoding A1)
+def UDF : AInoP<(outs), (ins imm0_65535:$imm16), MiscFrm, NoItinerary,
+ "udf", "\t$imm16", [(int_arm_undefined imm0_65535:$imm16)]> {
+ bits<16> imm16;
+ let Inst{31-28} = 0b1110; // AL
+ let Inst{27-25} = 0b011;
+ let Inst{24-20} = 0b11111;
+ let Inst{19-8} = imm16{15-4};
+ let Inst{7-4} = 0b1111;
+ let Inst{3-0} = imm16{3-0};
+}
+
/*
* A5.4 Permanently UNDEFINED instructions.
*
@@ -2282,12 +2431,6 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode3:$addr),
LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $Rt2, $addr", []>,
Requires<[IsARM, HasV5TE]>;
-
- // GNU Assembler extension (compatibility)
- let isAsmParserOnly = 1 in
- def LDRD_PAIR : AI3ld<0b1101, 0, (outs GPRPairOp:$Rt), (ins addrmode3:$addr),
- LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $addr", []>,
- Requires<[IsARM, HasV5TE]>;
}
def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
@@ -2557,14 +2700,6 @@ let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
Requires<[IsARM, HasV5TE]> {
let Inst{21} = 0;
}
-
- // GNU Assembler extension (compatibility)
- let isAsmParserOnly = 1 in
- def STRD_PAIR : AI3str<0b1111, (outs), (ins GPRPairOp:$Rt, addrmode3:$addr),
- StMiscFrm, IIC_iStore_d_r, "strd", "\t$Rt, $addr", []>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{21} = 0;
- }
}
// Indexed stores
@@ -3999,6 +4134,11 @@ def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
Requires<[IsARM, HasV6]>,
Sched<[WriteALU]>;
+def : ARMV6Pat<(srl (bswap (extloadi16 addrmode3:$addr)), (i32 16)),
+ (REV16 (LDRH addrmode3:$addr))>;
+def : ARMV6Pat<(truncstorei16 (srl (bswap GPR:$Rn), (i32 16)), addrmode3:$addr),
+ (STRH (REV16 GPR:$Rn), addrmode3:$addr)>;
+
let AddedComplexity = 5 in
def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "revsh", "\t$Rd, $Rm",
@@ -4816,7 +4956,7 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
Requires<[PreV8]>;
-def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm",
+def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
(MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0)>;
def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
@@ -4824,7 +4964,7 @@ def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>,
Requires<[PreV8]>;
-def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm",
+def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
(MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 0d46c49..b32b5d2 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -39,6 +39,49 @@ def nImmVMOVI32 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmVMOVI32AsmOperand;
}
+
+def nImmVMOVI16AsmOperandByteReplicate :
+ AsmOperandClass {
+ let Name = "NEONi16vmovByteReplicate";
+ let PredicateMethod = "isNEONi16ByteReplicate";
+ let RenderMethod = "addNEONvmovByteReplicateOperands";
+}
+def nImmVMOVI32AsmOperandByteReplicate :
+ AsmOperandClass {
+ let Name = "NEONi32vmovByteReplicate";
+ let PredicateMethod = "isNEONi32ByteReplicate";
+ let RenderMethod = "addNEONvmovByteReplicateOperands";
+}
+def nImmVMVNI16AsmOperandByteReplicate :
+ AsmOperandClass {
+ let Name = "NEONi16invByteReplicate";
+ let PredicateMethod = "isNEONi16ByteReplicate";
+ let RenderMethod = "addNEONinvByteReplicateOperands";
+}
+def nImmVMVNI32AsmOperandByteReplicate :
+ AsmOperandClass {
+ let Name = "NEONi32invByteReplicate";
+ let PredicateMethod = "isNEONi32ByteReplicate";
+ let RenderMethod = "addNEONinvByteReplicateOperands";
+}
+
+def nImmVMOVI16ByteReplicate : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMOVI16AsmOperandByteReplicate;
+}
+def nImmVMOVI32ByteReplicate : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMOVI32AsmOperandByteReplicate;
+}
+def nImmVMVNI16ByteReplicate : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMVNI16AsmOperandByteReplicate;
+}
+def nImmVMVNI32ByteReplicate : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMVNI32AsmOperandByteReplicate;
+}
+
def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
def nImmVMOVI32Neg : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
@@ -617,37 +660,37 @@ class VLDQQQQWBPseudo<InstrItinClass itin>
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// VLD1 : Vector Load (multiple single elements)
-class VLD1D<bits<4> op7_4, string Dt>
+class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
- (ins addrmode6:$Rn), IIC_VLD1,
+ (ins AddrMode:$Rn), IIC_VLD1,
"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-class VLD1Q<bits<4> op7_4, string Dt>
+class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
- (ins addrmode6:$Rn), IIC_VLD1x2,
+ (ins AddrMode:$Rn), IIC_VLD1x2,
"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
-def VLD1d16 : VLD1D<{0,1,0,?}, "16">;
-def VLD1d32 : VLD1D<{1,0,0,?}, "32">;
-def VLD1d64 : VLD1D<{1,1,0,?}, "64">;
+def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>;
+def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
+def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
+def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
-def VLD1q8 : VLD1Q<{0,0,?,?}, "8">;
-def VLD1q16 : VLD1Q<{0,1,?,?}, "16">;
-def VLD1q32 : VLD1Q<{1,0,?,?}, "32">;
-def VLD1q64 : VLD1Q<{1,1,?,?}, "64">;
+def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>;
+def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
+def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
+def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
// ...with address register writeback:
-multiclass VLD1DWB<bits<4> op7_4, string Dt> {
+multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
- (ins addrmode6:$Rn), IIC_VLD1u,
+ (ins AddrMode:$Rn), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -655,16 +698,16 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u,
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
}
-multiclass VLD1QWB<bits<4> op7_4, string Dt> {
+multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
- (ins addrmode6:$Rn), IIC_VLD1x2u,
+ (ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -672,7 +715,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
@@ -680,27 +723,27 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> {
}
}
-defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">;
-defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">;
-defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">;
-defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">;
-defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">;
-defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">;
-defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">;
-defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">;
+defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>;
+defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
+defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
+defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
+defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
+defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
+defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
+defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
// ...with 3 registers
-class VLD1D3<bits<4> op7_4, string Dt>
+class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
- (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
+ (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
"$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
+multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
- (ins addrmode6:$Rn), IIC_VLD1x2u,
+ (ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -708,7 +751,7 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
@@ -716,32 +759,32 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
}
}
-def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
-def VLD1d16T : VLD1D3<{0,1,0,?}, "16">;
-def VLD1d32T : VLD1D3<{1,0,0,?}, "32">;
-def VLD1d64T : VLD1D3<{1,1,0,?}, "64">;
+def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>;
+def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
+def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
+def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
-defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">;
-defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">;
-defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;
-defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;
+defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>;
+defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
+defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
+defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>;
def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>;
// ...with 4 registers
-class VLD1D4<bits<4> op7_4, string Dt>
+class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
- (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
+ (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
"$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
+multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
- (ins addrmode6:$Rn), IIC_VLD1x2u,
+ (ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -749,7 +792,7 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
@@ -757,15 +800,15 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
}
}
-def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
-def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">;
-def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">;
-def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">;
+def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
+def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
+def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
+def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
-defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">;
-defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">;
-defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">;
-defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">;
+defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
+defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
+defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
+defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>;
@@ -773,22 +816,28 @@ def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>;
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
- InstrItinClass itin>
+ InstrItinClass itin, Operand AddrMode>
: NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
- (ins addrmode6:$Rn), itin,
+ (ins AddrMode:$Rn), itin,
"vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
-def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>;
-def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>;
-def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>;
+def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
+ addrmode6align64or128>;
+def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
+ addrmode6align64or128>;
+def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
+ addrmode6align64or128>;
-def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
-def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
-def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
+def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
+ addrmode6align64or128or256>;
+def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
+ addrmode6align64or128or256>;
+def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
+ addrmode6align64or128or256>;
def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
@@ -796,9 +845,9 @@ def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
// ...with address register writeback:
multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
- RegisterOperand VdTy, InstrItinClass itin> {
+ RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
- (ins addrmode6:$Rn), itin,
+ (ins AddrMode:$Rn), itin,
"vld2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -806,7 +855,7 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm), itin,
+ (ins AddrMode:$Rn, rGPR:$Rm), itin,
"vld2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
@@ -814,13 +863,19 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
}
}
-defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>;
-defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>;
-defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>;
+defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
+ addrmode6align64or128>;
+defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
+ addrmode6align64or128>;
+defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
+ addrmode6align64or128>;
-defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
-defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
-defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
+ addrmode6align64or128or256>;
+defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
+ addrmode6align64or128or256>;
+defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
+ addrmode6align64or128or256>;
def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
@@ -830,12 +885,18 @@ def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
// ...with double-spaced registers
-def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>;
-def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>;
-def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>;
-defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>;
-defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>;
-defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>;
+def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
+ addrmode6align64or128>;
+def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
+ addrmode6align64or128>;
+def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
+ addrmode6align64or128>;
+defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
+ addrmode6align64or128>;
+defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
+ addrmode6align64or128>;
+defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
+ addrmode6align64or128>;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1293,47 +1354,55 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
// VLD1DUP : Vector Load (single element to all lanes)
-class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
+ Operand AddrMode>
: NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
- (ins addrmode6dup:$Rn),
+ (ins AddrMode:$Rn),
IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
[(set VecListOneDAllLanes:$Vd,
- (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
-def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
-def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
-def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
+def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
+ addrmode6dupalignNone>;
+def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
+ addrmode6dupalign16>;
+def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
+ addrmode6dupalign32>;
def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPd32 addrmode6:$addr)>;
-class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
+ Operand AddrMode>
: NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
- (ins addrmode6dup:$Rn), IIC_VLD1dup,
+ (ins AddrMode:$Rn), IIC_VLD1dup,
"vld1", Dt, "$Vd, $Rn", "",
[(set VecListDPairAllLanes:$Vd,
- (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
-def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>;
-def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>;
-def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>;
+def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
+ addrmode6dupalignNone>;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
+ addrmode6dupalign16>;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
+ addrmode6dupalign32>;
def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPq32 addrmode6:$addr)>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
-multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
+multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListOneDAllLanes:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ (ins AddrMode:$Rn), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1342,17 +1411,17 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
}
def _register : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListOneDAllLanes:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
}
-multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
+multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListDPairAllLanes:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ (ins AddrMode:$Rn), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1361,7 +1430,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
}
def _register : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListDPairAllLanes:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
@@ -1369,38 +1438,47 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
}
}
-defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">;
-defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">;
-defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">;
+defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
+defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
+defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
-defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
-defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
-defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
+defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
+defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
+defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
-class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
+class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
: NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
- (ins addrmode6dup:$Rn), IIC_VLD2dup,
+ (ins AddrMode:$Rn), IIC_VLD2dup,
"vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
}
-def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>;
-def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>;
-def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>;
+def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes,
+ addrmode6dupalign16>;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
+ addrmode6dupalign32>;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
+ addrmode6dupalign64>;
+// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
+// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
// ...with double-spaced registers
-def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>;
-def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
-def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
+def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes,
+ addrmode6dupalign16>;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
+ addrmode6dupalign32>;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
+ addrmode6dupalign64>;
// ...with address register writeback:
-multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
+multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ Operand AddrMode> {
def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
(outs VdTy:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn), IIC_VLD2dupu,
+ (ins AddrMode:$Rn), IIC_VLD2dupu,
"vld2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1409,7 +1487,7 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
}
def _register : NLdSt<1, 0b10, 0b1101, op7_4,
(outs VdTy:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu,
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
"vld2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
@@ -1417,13 +1495,19 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
}
}
-defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>;
-defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>;
-defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>;
+defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes,
+ addrmode6dupalign16>;
+defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
+ addrmode6dupalign32>;
+defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
+ addrmode6dupalign64>;
-defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>;
-defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
-defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
+defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes,
+ addrmode6dupalign16>;
+defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
+ addrmode6dupalign32>;
+defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
+ addrmode6dupalign64>;
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
class VLD3DUP<bits<4> op7_4, string Dt>
@@ -1449,22 +1533,22 @@ def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
// ...with address register writeback:
-class VLD3DUPWB<bits<4> op7_4, string Dt>
+class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
- (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
+ (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = 0;
let DecoderMethod = "DecodeVLD3DupInstruction";
}
-def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
-def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
-def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
+def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>;
+def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
+def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
-def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
-def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
-def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
+def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>;
+def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
+def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
@@ -1560,35 +1644,35 @@ class VSTQQQQWBPseudo<InstrItinClass itin>
"$addr.addr = $wb">;
// VST1 : Vector Store (multiple single elements)
-class VST1D<bits<4> op7_4, string Dt>
- : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd),
+class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
+ : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-class VST1Q<bits<4> op7_4, string Dt>
- : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd),
+class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
+ : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-def VST1d8 : VST1D<{0,0,0,?}, "8">;
-def VST1d16 : VST1D<{0,1,0,?}, "16">;
-def VST1d32 : VST1D<{1,0,0,?}, "32">;
-def VST1d64 : VST1D<{1,1,0,?}, "64">;
+def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>;
+def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>;
+def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>;
+def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>;
-def VST1q8 : VST1Q<{0,0,?,?}, "8">;
-def VST1q16 : VST1Q<{0,1,?,?}, "16">;
-def VST1q32 : VST1Q<{1,0,?,?}, "32">;
-def VST1q64 : VST1Q<{1,1,?,?}, "64">;
+def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>;
+def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
+def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
+def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
// ...with address register writeback:
-multiclass VST1DWB<bits<4> op7_4, string Dt> {
+multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u,
+ (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1596,7 +1680,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
IIC_VLD1u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
@@ -1604,9 +1688,9 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
}
-multiclass VST1QWB<bits<4> op7_4, string Dt> {
+multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
+ (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1614,7 +1698,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
@@ -1623,28 +1707,28 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> {
}
}
-defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">;
-defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">;
-defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">;
-defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">;
+defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>;
+defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
+defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
+defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
-defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">;
-defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">;
-defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">;
-defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">;
+defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
+defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
+defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
+defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
// ...with 3 registers
-class VST1D3<bits<4> op7_4, string Dt>
+class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
- (ins addrmode6:$Rn, VecListThreeD:$Vd),
+ (ins AddrMode:$Rn, VecListThreeD:$Vd),
IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-multiclass VST1D3WB<bits<4> op7_4, string Dt> {
+multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
+ (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1652,7 +1736,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
IIC_VLD1x3u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
@@ -1661,33 +1745,33 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> {
}
}
-def VST1d8T : VST1D3<{0,0,0,?}, "8">;
-def VST1d16T : VST1D3<{0,1,0,?}, "16">;
-def VST1d32T : VST1D3<{1,0,0,?}, "32">;
-def VST1d64T : VST1D3<{1,1,0,?}, "64">;
+def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>;
+def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
+def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
+def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
-defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">;
-defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">;
-defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
-defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
+defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>;
+defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
+defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
+defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>;
def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
// ...with 4 registers
-class VST1D4<bits<4> op7_4, string Dt>
+class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
- (ins addrmode6:$Rn, VecListFourD:$Vd),
+ (ins AddrMode:$Rn, VecListFourD:$Vd),
IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
[]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-multiclass VST1D4WB<bits<4> op7_4, string Dt> {
+multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
+ (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1695,7 +1779,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1x4u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
@@ -1704,15 +1788,15 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> {
}
}
-def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
-def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
-def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
-def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
+def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
+def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
+def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
+def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
-defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">;
-defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">;
-defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
-defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
+defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
+defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
+defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
+defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>;
@@ -1720,21 +1804,27 @@ def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
// VST2 : Vector Store (multiple 2-element structures)
class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
- InstrItinClass itin>
- : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd),
+ InstrItinClass itin, Operand AddrMode>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
itin, "vst2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
-def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>;
-def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>;
-def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>;
+def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2,
+ addrmode6align64or128>;
+def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
+ addrmode6align64or128>;
+def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
+ addrmode6align64or128>;
-def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>;
-def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
-def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
+def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2,
+ addrmode6align64or128or256>;
+def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
+ addrmode6align64or128or256>;
+def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
+ addrmode6align64or128or256>;
def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
@@ -1742,9 +1832,9 @@ def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
// ...with address register writeback:
multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
- RegisterOperand VdTy> {
+ RegisterOperand VdTy, Operand AddrMode> {
def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
+ (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1752,16 +1842,16 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
+ (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
}
-multiclass VST2QWB<bits<4> op7_4, string Dt> {
+multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
+ (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1769,7 +1859,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
@@ -1778,13 +1868,16 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> {
}
}
-defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>;
-defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>;
-defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>;
+defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair,
+ addrmode6align64or128>;
+defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
+ addrmode6align64or128>;
+defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
+ addrmode6align64or128>;
-defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
-defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
-defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
+defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
+defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
+defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
@@ -1794,12 +1887,18 @@ def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
// ...with double-spaced registers
-def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>;
-def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>;
-def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>;
-defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>;
-defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>;
-defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>;
+def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2,
+ addrmode6align64or128>;
+def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
+ addrmode6align64or128>;
+def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
+ addrmode6align64or128>;
+defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced,
+ addrmode6align64or128>;
+defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
+ addrmode6align64or128>;
+defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
+ addrmode6align64or128>;
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -2267,9 +2366,9 @@ def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
(VST1q64 addrmode6:$addr, QPR:$value)>;
def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
- (VLD1q32 addrmode6:$addr)>;
+ (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>;
def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
- (VST1q32 addrmode6:$addr, QPR:$value)>;
+ (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
(VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
@@ -2357,14 +2456,14 @@ class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
- itin, OpcodeStr, Dt, ResTy, OpTy,
+ itin, OpcodeStr, Dt,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
- itin, OpcodeStr, Dt, ResTy, OpTy,
+ itin, OpcodeStr, Dt,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
@@ -2372,7 +2471,7 @@ class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
- itin, OpcodeStr, Dt, ResTy, OpTy,
+ itin, OpcodeStr, Dt,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
// Same as N2VQIntXnp but with Vd as a src register.
@@ -2381,7 +2480,7 @@ class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<op19_18, op17_16, op10_8, op7, op6,
(outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
- itin, OpcodeStr, Dt, ResTy, OpTy,
+ itin, OpcodeStr, Dt,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
let Constraints = "$src = $Vd";
}
@@ -2555,7 +2654,6 @@ class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
- ResTy, OpTy, IntOp, Commutable,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
@@ -2609,7 +2707,6 @@ class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
- ResTy, OpTy, IntOp, Commutable,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
// Same as N3VQIntnp but with Vd as a src register.
@@ -2618,8 +2715,8 @@ class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
- (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr,
- Dt, ResTy, OpTy, IntOp, Commutable,
+ (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
+ f, itin, OpcodeStr, Dt,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
(OpTy QPR:$Vm))))]> {
let Constraints = "$src = $Vd";
@@ -2939,7 +3036,6 @@ class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
- ResTy, OpTy, IntOp, Commutable,
[(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
@@ -5245,6 +5341,35 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
[(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
} // isReMaterializable
+// Add support for bytes replication feature, so it could be GAS compatible.
+// E.g. instructions below:
+// "vmov.i32 d0, 0xffffffff"
+// "vmov.i32 d0, 0xabababab"
+// "vmov.i16 d0, 0xabab"
+// are incorrect, but we could deal with such cases.
+// For last two instructions, for example, it should emit:
+// "vmov.i8 d0, 0xab"
+def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm",
+ (VMOVv8i8 DPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm",
+ (VMOVv8i8 DPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm",
+ (VMOVv16i8 QPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm",
+ (VMOVv16i8 QPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>;
+
+// Also add same support for VMVN instructions. So instruction:
+// "vmvn.i32 d0, 0xabababab"
+// actually means:
+// "vmov.i8 d0, 0x54"
+def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm",
+ (VMOVv8i8 DPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm",
+ (VMOVv8i8 DPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm",
+ (VMOVv16i8 QPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm",
+ (VMOVv16i8 QPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>;
// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
// require zero cycles to execute so they should be used wherever possible for
@@ -5617,22 +5742,22 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
}
-def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
+def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
(VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
-def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
+def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
(VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
-def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
+def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
(VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
-def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
+def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
(VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
-def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
+def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
(VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
-def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
+def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
(VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
-def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
+def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
(VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
-def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
+def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
(VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
@@ -6051,67 +6176,145 @@ def : Pat<(f32 (bitconvert GPR:$a)),
//===----------------------------------------------------------------------===//
// bit_convert
-def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
-def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
-def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
+}
def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
-def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
-def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
-def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
-def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
-def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
+}
def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
+}
def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
-def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+}
def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
-def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
-def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
+}
-def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
-def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
-def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+}
def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
-def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
-def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
-def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
-def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
-def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+}
def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+}
def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
-def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
-def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
-def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+}
def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
-def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
-def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
-def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
-def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+}
+
+let Predicates = [IsBE] in {
+ // 64 bit conversions
+ def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
+
+ // 128 bit conversions
+ def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
+}
// Fold extracting an element out of a v2i32 into a vfp register.
def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
@@ -6120,7 +6323,7 @@ def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
// Vector lengthening move with load, matching extending loads.
// extload, zextload and sextload for a standard lengthening load. Example:
-// Lengthen_Single<"8", "i16", "8"> =
+// Lengthen_Single<"8", "i16", "8"> =
// Pat<(v8i16 (extloadvi8 addrmode6:$addr))
// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
// (f64 (IMPLICIT_DEF)), (i32 0)))>;
@@ -6147,7 +6350,7 @@ multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
// half the lanes available. Example:
// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
-// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
// (f64 (IMPLICIT_DEF)), (i32 0))),
// dsub_0)>;
multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
@@ -6257,7 +6460,7 @@ defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (VLD1LNd16 addrmode6:$addr,
+ (VLD1LNd16 addrmode6:$addr,
(f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
@@ -6311,379 +6514,442 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
// VLD1 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
- (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
- (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
- (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD1LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
- (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD1LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
- (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VLD1LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
- (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD1LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
- (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD1LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
- (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
rGPR:$Rm, pred:$p)>;
def VLD1LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
- (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
// VST1 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
- (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
- (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
- (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST1LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
- (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST1LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
- (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VST1LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
- (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST1LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
- (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VST1LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
- (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
rGPR:$Rm, pred:$p)>;
def VST1LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
- (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
// VLD2 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
- (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
- (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
- (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
- (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
- (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VLD2LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
- (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VLD2LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
- (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD2LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
- (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VLD2LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
- (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD2LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
- (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VLD2LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
- (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
rGPR:$Rm, pred:$p)>;
def VLD2LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
- (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
def VLD2LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
- (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD2LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
- (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
def VLD2LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
- (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
// VST2 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
- (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
- (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
- (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
- (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
- (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST2LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
- (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VST2LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
- (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST2LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
- (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST2LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
- (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST2LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
- (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST2LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
- (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
rGPR:$Rm, pred:$p)>;
def VST2LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
- (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
def VST2LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
- (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST2LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
- (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
def VST2LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
- (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
// VLD3 all-lanes pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPqWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPqWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
rGPR:$Rm, pred:$p)>;
// VLD3 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
- (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
- (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
- (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
- (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
- (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
- (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
- (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
- (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
- (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
- (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListThreeDHWordIndexed:$list,
+ addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
def VLD3LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListThreeQHWordIndexed:$list,
+ addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
def VLD3LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
// VLD3 multiple structure pseudo-instructions. These need special handling for
// the vector operands that the normal instructions don't yet model.
// FIXME: Remove these when the register classes and instructions are updated.
def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3dWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3dWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3dWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3qWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3qWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3qWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3dWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeD:$list, addrmode6:$addr,
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3dWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeD:$list, addrmode6:$addr,
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3dWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeD:$list, addrmode6:$addr,
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3qWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeQ:$list, addrmode6:$addr,
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3qWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeQ:$list, addrmode6:$addr,
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3qWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeQ:$list, addrmode6:$addr,
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
// VST3 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
- (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
- (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
- (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
- (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
- (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
- (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
- (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
- (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
- (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
- (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VST3LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListThreeDHWordIndexed:$list,
+ addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
def VST3LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VST3LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListThreeQHWordIndexed:$list,
+ addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
def VST3LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
@@ -6691,168 +6957,190 @@ def VST3LNqWB_register_Asm_32 :
// the vector operands that the normal instructions don't yet model.
// FIXME: Remove these when the register classes and instructions are updated.
def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VST3dWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VST3dWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VST3dWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VST3qWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VST3qWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VST3qWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VST3dWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeD:$list, addrmode6:$addr,
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST3dWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeD:$list, addrmode6:$addr,
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST3dWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeD:$list, addrmode6:$addr,
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST3qWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeQ:$list, addrmode6:$addr,
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST3qWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeQ:$list, addrmode6:$addr,
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST3qWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeQ:$list, addrmode6:$addr,
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
// VLD4 all-lanes pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
+ pred:$p)>;
def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
+ pred:$p)>;
def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
+ pred:$p)>;
def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
+ pred:$p)>;
def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
+ pred:$p)>;
def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
+ pred:$p)>;
def VLD4DUPdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
+ pred:$p)>;
def VLD4DUPdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
+ pred:$p)>;
def VLD4DUPdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
+ pred:$p)>;
def VLD4DUPqWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
+ pred:$p)>;
def VLD4DUPqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
+ pred:$p)>;
def VLD4DUPqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
+ pred:$p)>;
def VLD4DUPdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4DUPdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4DUPdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListFourDAllLanes:$list,
+ addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
def VLD4DUPqWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4DUPqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4DUPqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListFourQAllLanes:$list,
+ addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
// VLD4 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
- (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
- (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
- (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
- (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
- (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VLD4LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
- (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD4LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
- (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VLD4LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
- (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VLD4LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
- (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VLD4LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
- (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VLD4LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListFourDWordIndexed:$list,
+ addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
def VLD4LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListFourQWordIndexed:$list,
+ addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
@@ -6860,168 +7148,202 @@ def VLD4LNqWB_register_Asm_32 :
// the vector operands that the normal instructions don't yet model.
// FIXME: Remove these when the register classes and instructions are updated.
def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4dWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4dWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4dWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4qWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4qWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4qWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4dWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourD:$list, addrmode6:$addr,
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4dWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourD:$list, addrmode6:$addr,
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4dWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourD:$list, addrmode6:$addr,
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4qWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourQ:$list, addrmode6:$addr,
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4qWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourQ:$list, addrmode6:$addr,
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4qWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourQ:$list, addrmode6:$addr,
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
// VST4 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
- (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
- (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
- (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
- (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
- (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VST4LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
- (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST4LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
- (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST4LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
- (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VST4LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
- (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST4LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
- (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VST4LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
def VST4LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST4LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListFourDWordIndexed:$list,
+ addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
def VST4LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST4LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListFourQWordIndexed:$list,
+ addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
// VST4 multiple structure pseudo-instructions. These need special handling for
// the vector operands that the normal instructions don't yet model.
// FIXME: Remove these when the register classes and instructions are updated.
def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4dWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4dWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4dWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4qWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4qWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4qWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4dWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourD:$list, addrmode6:$addr,
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VST4dWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourD:$list, addrmode6:$addr,
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VST4dWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourD:$list, addrmode6:$addr,
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VST4qWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourQ:$list, addrmode6:$addr,
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VST4qWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourQ:$list, addrmode6:$addr,
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VST4qWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourQ:$list, addrmode6:$addr,
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
// VMOV/VMVN takes an optional datatype suffix
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 754295f..e17f73a 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -269,7 +269,8 @@ class T1SystemEncoding<bits<8> opc>
let Inst{7-0} = opc;
}
-def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm", []>,
+def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm",
+ [(int_arm_hint imm0_15:$imm)]>,
T1SystemEncoding<0x00>,
Requires<[IsThumb, HasV6M]> {
bits<4> imm;
@@ -288,7 +289,6 @@ def : tHintAlias<"sev$p", (tHINT 4, pred:$p)>; // A8.6.157
def : tInstAlias<"sevl$p", (tHINT 5, pred:$p)> {
let Predicates = [IsThumb2, HasV8];
}
-def : T2Pat<(int_arm_sevl), (tHINT 5)>;
// The imm operand $val can be used by a debugger to store more information
// about the breakpoint.
@@ -1193,6 +1193,15 @@ def tTST : // A8.6.230
[(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>,
Sched<[WriteALU]>;
+// A8.8.247 UDF - Undefined (Encoding T1)
+def tUDF : TI<(outs), (ins imm0_255:$imm8), IIC_Br, "udf\t$imm8",
+ [(int_arm_undefined imm0_255:$imm8)]>, Encoding16 {
+ bits<8> imm8;
+ let Inst{15-12} = 0b1101;
+ let Inst{11-8} = 0b1110;
+ let Inst{7-0} = imm8;
+}
+
// Zero-extend byte
def tUXTB : // A8.6.262
T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
@@ -1308,6 +1317,18 @@ def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs),
def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
(tSUBrr tGPR:$lhs, tGPR:$rhs)>;
+// Bswap 16 with load/store
+def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rrs2:$addr)), (i32 16)),
+ (tREV16 (tLDRHr t_addrmode_rrs2:$addr))>;
+def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)),
+ (tREV16 (tLDRHi t_addrmode_is2:$addr))>;
+def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
+ t_addrmode_rrs2:$addr),
+ (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rrs2:$addr)>;
+def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
+ t_addrmode_is2:$addr),
+ (tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>;
+
// ConstantPool
def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 387bd60..c30d6ab 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1445,7 +1445,7 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
// Store doubleword
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
- (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
+ (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
// Indexed stores
@@ -1676,7 +1676,7 @@ defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>;
// pci variant is very similar to i12, but supports negative offsets
// from the PC. Only PLD and PLI have pci variants (not PLDW)
class T2Iplpci<bits<1> inst, string opc> : T2Iso<(outs), (ins t2ldrlabel:$addr),
- IIC_Preload, opc, "\t$addr",
+ IIC_Preload, opc, "\t$addr",
[(ARMPreload (ARMWrapper tconstpool:$addr),
(i32 0), (i32 inst))]>, Sched<[WritePreLd]> {
let Inst{31-25} = 0b1111100;
@@ -1918,7 +1918,7 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
let DecoderMethod = "DecodeT2MOVTWInstruction";
}
-def : t2InstAlias<"mov${p} $Rd, $imm",
+def : t2InstAlias<"mov${p} $Rd, $imm",
(t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p)>;
def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
@@ -2407,6 +2407,19 @@ def t2UBFX: T2TwoRegBitFI<
let Inst{15} = 0;
}
+// A8.8.247 UDF - Undefined (Encoding T2)
+def t2UDF : T2XI<(outs), (ins imm0_65535:$imm16), IIC_Br, "udf.w\t$imm16",
+ [(int_arm_undefined imm0_65535:$imm16)]> {
+ bits<16> imm16;
+ let Inst{31-29} = 0b111;
+ let Inst{28-27} = 0b10;
+ let Inst{26-20} = 0b1111111;
+ let Inst{19-16} = imm16{15-12};
+ let Inst{15} = 0b1;
+ let Inst{14-12} = 0b010;
+ let Inst{11-0} = imm16{11-0};
+}
+
// A8.6.18 BFI - Bitfield insert (Encoding T1)
let Constraints = "$src = $Rd" in {
def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd),
@@ -3495,8 +3508,8 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
let Inst{25-16} = target{20-11};
let Inst{10-0} = target{10-0};
let DecoderMethod = "DecodeT2BInstruction";
- let AsmMatchConverter = "cvtThumbBranches";
-}
+ let AsmMatchConverter = "cvtThumbBranches";
+}
let isNotDuplicable = 1, isIndirectBranch = 1 in {
def t2BR_JT : t2PseudoInst<(outs),
@@ -3671,7 +3684,8 @@ def : t2InstAlias<"cps.w $mode", (t2CPS1p imm0_31:$mode), 0>;
// A6.3.4 Branches and miscellaneous control
// Table A6-14 Change Processor State, and hint instructions
-def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",[]> {
+def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",
+ [(int_arm_hint imm0_239:$imm)]> {
bits<8> imm;
let Inst{31-3} = 0b11110011101011111000000000000;
let Inst{7-0} = imm;
@@ -3698,7 +3712,7 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
// Secure Monitor Call is a system instruction.
// Option = Inst{19-16}
-def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
+def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
[]>, Requires<[IsThumb2, HasTrustZone]> {
let Inst{31-27} = 0b11110;
let Inst{26-20} = 0b1111111;
@@ -4278,7 +4292,7 @@ def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm",
// Aliases for ADD without the ".w" optional width specifier.
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
- (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p,
+ (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p,
cc_out:$s)>;
def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
(t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 73c6eb7..8821c2d 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "ARMJITInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMRelocations.h"
@@ -25,6 +24,8 @@
#include <cstdlib>
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction");
}
@@ -319,13 +320,13 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
break;
}
case ARM::reloc_arm_movw: {
- ResultPtr = ResultPtr & 0xFFFF;
+ ResultPtr = ResultPtr & 0xFFFF;
*((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
*((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
break;
}
case ARM::reloc_arm_movt: {
- ResultPtr = (ResultPtr >> 16) & 0xFFFF;
+ ResultPtr = (ResultPtr >> 16) & 0xFFFF;
*((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
*((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
break;
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 48e0bd7..ee7df54 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -12,13 +12,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-ldst-opt"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
+#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb1RegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -42,6 +43,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "arm-ldst-opt"
+
STATISTIC(NumLDMGened , "Number of ldm instructions generated");
STATISTIC(NumSTMGened , "Number of stm instructions generated");
STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
@@ -65,9 +68,10 @@ namespace {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const ARMSubtarget *STI;
+ const TargetLowering *TL;
ARMFunctionInfo *AFI;
RegScavenger *RS;
- bool isThumb2;
+ bool isThumb1, isThumb2;
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -93,7 +97,10 @@ namespace {
void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
const MemOpQueue &MemOps, unsigned DefReg,
unsigned RangeBegin, unsigned RangeEnd);
-
+ void UpdateBaseRegUses(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc dl, unsigned Base, unsigned WordOffset,
+ ARMCC::CondCodes Pred, unsigned PredReg);
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, bool BaseKill, int Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
@@ -119,7 +126,6 @@ namespace {
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned Scratch, MemOpQueue &MemOps,
SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
-
void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
@@ -159,6 +165,21 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::db: return ARM::STMDB;
case ARM_AM::ib: return ARM::STMIB;
}
+ case ARM::tLDRi:
+ // tLDMIA is writeback-only - unless the base register is in the input
+ // reglist.
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::tLDMIA;
+ }
+ case ARM::tSTRi:
+ // There is no non-writeback tSTMIA either.
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::tSTMIA_UPD;
+ }
case ARM::t2LDRi8:
case ARM::t2LDRi12:
++NumLDMGened;
@@ -217,6 +238,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
case ARM::LDMIA_UPD:
case ARM::STMIA:
case ARM::STMIA_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
case ARM::t2LDMIA_RET:
case ARM::t2LDMIA:
case ARM::t2LDMIA_UPD:
@@ -263,12 +287,20 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
} // end namespace ARM_AM
} // end namespace llvm
+static bool isT1i32Load(unsigned Opc) {
+ return Opc == ARM::tLDRi;
+}
+
static bool isT2i32Load(unsigned Opc) {
return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
}
static bool isi32Load(unsigned Opc) {
- return Opc == ARM::LDRi12 || isT2i32Load(Opc);
+ return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
+}
+
+static bool isT1i32Store(unsigned Opc) {
+ return Opc == ARM::tSTRi;
}
static bool isT2i32Store(unsigned Opc) {
@@ -276,7 +308,102 @@ static bool isT2i32Store(unsigned Opc) {
}
static bool isi32Store(unsigned Opc) {
- return Opc == ARM::STRi12 || isT2i32Store(Opc);
+ return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
+}
+
+static unsigned getImmScale(unsigned Opc) {
+ switch (Opc) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::tLDRi:
+ case ARM::tSTRi:
+ return 1;
+ case ARM::tLDRHi:
+ case ARM::tSTRHi:
+ return 2;
+ case ARM::tLDRBi:
+ case ARM::tSTRBi:
+ return 4;
+ }
+}
+
+/// Update future uses of the base register with the offset introduced
+/// due to writeback. This function only works on Thumb1.
+void
+ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc dl, unsigned Base,
+ unsigned WordOffset,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
+ assert(isThumb1 && "Can only update base register uses for Thumb1!");
+
+ // Start updating any instructions with immediate offsets. Insert a sub before
+ // the first non-updateable instruction (if any).
+ for (; MBBI != MBB.end(); ++MBBI) {
+ if (MBBI->readsRegister(Base)) {
+ unsigned Opc = MBBI->getOpcode();
+ int Offset;
+ bool InsertSub = false;
+
+ if (Opc == ARM::tLDRi || Opc == ARM::tSTRi ||
+ Opc == ARM::tLDRHi || Opc == ARM::tSTRHi ||
+ Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) {
+ // Loads and stores with immediate offsets can be updated, but only if
+ // the new offset isn't negative.
+ // The MachineOperand containing the offset immediate is the last one
+ // before predicates.
+ MachineOperand &MO =
+ MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
+ // The offsets are scaled by 1, 2 or 4 depending on the Opcode
+ Offset = MO.getImm() - WordOffset * getImmScale(Opc);
+ if (Offset >= 0)
+ MO.setImm(Offset);
+ else
+ InsertSub = true;
+
+ } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) {
+ // SUB/ADD using this register. Merge it with the update.
+ // If the merged offset is too large, insert a new sub instead.
+ MachineOperand &MO =
+ MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
+ Offset = (Opc == ARM::tSUBi8) ?
+ MO.getImm() + WordOffset * 4 :
+ MO.getImm() - WordOffset * 4 ;
+ if (TL->isLegalAddImmediate(Offset)) {
+ MO.setImm(Offset);
+ // The base register has now been reset, so exit early.
+ return;
+ } else {
+ InsertSub = true;
+ }
+
+ } else {
+ // Can't update the instruction.
+ InsertSub = true;
+ }
+
+ if (InsertSub) {
+ // An instruction above couldn't be updated, so insert a sub.
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base))
+ .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
+ .addImm(Pred).addReg(PredReg);
+ return;
+ }
+ }
+
+ if (MBBI->killsRegister(Base))
+ // Register got killed. Stop updating.
+ return;
+ }
+
+ // The end of the block was reached. This means register liveness escapes the
+ // block, and it's necessary to insert a sub before the last instruction.
+ if (MBB.succ_size() > 0)
+ // But only insert the SUB if there is actually a successor block.
+ // FIXME: Check more carefully if register is live at this point, e.g. by
+ // also examining the successor block's register liveness information.
+ AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base))
+ .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
+ .addImm(Pred).addReg(PredReg);
}
/// MergeOps - Create and insert a LDM or STM with Base as base register and
@@ -296,18 +423,19 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
- // VFP and Thumb2 do not support IB or DA modes.
+ // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
- bool haveIBAndDA = isNotVFP && !isThumb2;
- if (Offset == 4 && haveIBAndDA)
+ bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
+
+ if (Offset == 4 && haveIBAndDA) {
Mode = ARM_AM::ib;
- else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
+ } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
Mode = ARM_AM::da;
- else if (Offset == -4 * (int)NumRegs && isNotVFP)
+ } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
// VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db;
- else if (Offset != 0) {
- // Check if this is a supported opcode before we insert instructions to
+ } else if (Offset != 0) {
+ // Check if this is a supported opcode before inserting instructions to
// calculate a new base register.
if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
@@ -318,41 +446,98 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return false;
unsigned NewBase;
- if (isi32Load(Opcode))
+ if (isi32Load(Opcode)) {
// If it is a load, then just use one of the destination register to
// use as the new base.
NewBase = Regs[NumRegs-1].first;
- else {
+ } else {
// Use the scratch register to use as a new base.
NewBase = Scratch;
if (NewBase == 0)
return false;
}
- int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
+
+ int BaseOpc =
+ isThumb2 ? ARM::t2ADDri :
+ isThumb1 ? ARM::tADDi8 : ARM::ADDri;
+
if (Offset < 0) {
- BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
+ BaseOpc =
+ isThumb2 ? ARM::t2SUBri :
+ isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
Offset = - Offset;
}
- int ImmedOffset = isThumb2
- ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
- if (ImmedOffset == -1)
- // FIXME: Try t2ADDri12 or t2SUBri12?
- return false; // Probably not worth it then.
-
- BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
- .addImm(Pred).addReg(PredReg).addReg(0);
+
+ if (!TL->isLegalAddImmediate(Offset))
+ // FIXME: Try add with register operand?
+ return false; // Probably not worth it then.
+
+ if (isThumb1) {
+ if (Base != NewBase) {
+ // Need to insert a MOV to the new base first.
+ // FIXME: If the immediate fits in 3 bits, use ADD instead.
+ BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg);
+ }
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase))
+ .addReg(NewBase, getKillRegState(true)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg).addReg(0);
+ }
+
Base = NewBase;
- BaseKill = true; // New base is always killed right its use.
+ BaseKill = true; // New base is always killed straight away.
}
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
Opcode == ARM::VLDRD);
+
+ // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
+ // base register writeback.
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
if (!Opcode) return false;
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(Pred).addReg(PredReg);
+
+ bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
+
+ // Exception: If the base register is in the input reglist, Thumb1 LDM is
+ // non-writeback. Check for this.
+ if (Opcode == ARM::tLDRi && isThumb1)
+ for (unsigned I = 0; I < NumRegs; ++I)
+ if (Base == Regs[I].first) {
+ Writeback = false;
+ break;
+ }
+
+ MachineInstrBuilder MIB;
+
+ if (Writeback) {
+ if (Opcode == ARM::tLDMIA)
+ // Update tLDMIA with writeback if necessary.
+ Opcode = ARM::tLDMIA_UPD;
+
+ // The base isn't dead after a merged instruction with writeback. Update
+ // future uses of the base with the added offset (if possible), or reset
+ // the base register as necessary.
+ if (!BaseKill)
+ UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
+
+ MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+
+ // Thumb1: we might need to set base writeback when building the MI.
+ MIB.addReg(Base, getDefRegState(true))
+ .addReg(Base, getKillRegState(BaseKill));
+ } else {
+ // No writeback, simply build the MachineInstr.
+ MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+ MIB.addReg(Base, getKillRegState(BaseKill));
+ }
+
+ MIB.addImm(Pred).addReg(PredReg);
+
for (unsigned i = 0; i != NumRegs; ++i)
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
| getKillRegState(Regs[i].second));
@@ -492,7 +677,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
// affected uses.
for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
E = UsesOfImpDefs.end();
- I != E; ++I)
+ I != E; ++I)
(*I)->setIsUndef();
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
@@ -589,7 +774,6 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
- return;
}
static bool definesCPSR(MachineInstr *MI) {
@@ -616,6 +800,7 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
bool CheckCPSRDef = false;
switch (MI->getOpcode()) {
default: return false;
+ case ARM::tSUBi8:
case ARM::t2SUBri:
case ARM::SUBri:
CheckCPSRDef = true;
@@ -628,10 +813,11 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
if (Bytes == 0 || (Limit && Bytes >= Limit))
return false;
- unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
+ unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
+ MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
if (!(MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ (MI->getOperand(2).getImm() * Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg))
return false;
@@ -649,6 +835,7 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
bool CheckCPSRDef = false;
switch (MI->getOpcode()) {
default: return false;
+ case ARM::tADDi8:
case ARM::t2ADDri:
case ARM::ADDri:
CheckCPSRDef = true;
@@ -661,10 +848,11 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
// Make sure the offset fits in 8 bits.
return false;
- unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
+ unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
+ MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
if (!(MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ (MI->getOperand(2).getImm() * Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg))
return false;
@@ -677,6 +865,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
default: return 0;
case ARM::LDRi12:
case ARM::STRi12:
+ case ARM::tLDRi:
+ case ARM::tSTRi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
@@ -695,6 +885,9 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
case ARM::STMDA:
case ARM::STMDB:
case ARM::STMIB:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
case ARM::t2LDMIA:
case ARM::t2LDMDB:
case ARM::t2STMIA:
@@ -791,6 +984,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
bool &Advance,
MachineBasicBlock::iterator &I) {
+ // Thumb1 is already using updating loads/stores.
+ if (isThumb1) return false;
+
MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(0).getReg();
bool BaseKill = MI->getOperand(0).isKill();
@@ -927,6 +1123,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
const TargetInstrInfo *TII,
bool &Advance,
MachineBasicBlock::iterator &I) {
+ // Thumb1 doesn't have updating LDR/STR.
+ // FIXME: Use LDM/STM with single register instead.
+ if (isThumb1) return false;
+
MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(1).getReg();
bool BaseKill = MI->getOperand(1).isKill();
@@ -1002,7 +1202,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
return false;
if (isAM5) {
- // VLDM[SD}_UPD, VSTM[SD]_UPD
+ // VLDM[SD]_UPD, VSTM[SD]_UPD
// (There are no base-updating versions of VLDR/VSTR instructions, but the
// updating load/store-multiple instructions can be used with only one
// register.)
@@ -1100,6 +1300,8 @@ static bool isMemoryOp(const MachineInstr *MI) {
return MI->getOperand(1).isReg();
case ARM::LDRi12:
case ARM::STRi12:
+ case ARM::tLDRi:
+ case ARM::tSTRi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
@@ -1137,6 +1339,10 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
return OffField;
+ // Thumb1 immediate offsets are scaled by 4
+ if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
+ return OffField * 4;
+
int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
: ARM_AM::getAM5Offset(OffField) * 4;
if (isAM3) {
@@ -1408,16 +1614,20 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
if (MBBI == E)
// Reach the end of the block, try merging the memory instructions.
TryMerge = true;
- } else
+ } else {
TryMerge = true;
+ }
if (TryMerge) {
if (NumMemOps > 1) {
// Try to find a free register to use as a new base in case it's needed.
// First advance to the instruction just before the start of the chain.
AdvanceRS(MBB, MemOps);
+
// Find a scratch register.
- unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
+ unsigned Scratch =
+ RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
+
// Process the load / store instructions.
RS->forward(std::prev(MBBI));
@@ -1483,6 +1693,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
/// =>
/// ldmfd sp!, {..., pc}
bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+ // Thumb1 LDM doesn't allow high registers.
+ if (isThumb1) return false;
if (MBB.empty()) return false;
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -1513,12 +1725,14 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
+ TL = TM.getTargetLowering();
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
STI = &TM.getSubtarget<ARMSubtarget>();
RS = new RegScavenger();
isThumb2 = AFI->isThumb2Function();
+ isThumb1 = AFI->isThumbFunction() && !isThumb2;
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
@@ -1666,11 +1880,11 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
// FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
unsigned Scale = 1;
unsigned Opcode = Op0->getOpcode();
- if (Opcode == ARM::LDRi12)
+ if (Opcode == ARM::LDRi12) {
NewOpc = ARM::LDRD;
- else if (Opcode == ARM::STRi12)
+ } else if (Opcode == ARM::STRi12) {
NewOpc = ARM::STRD;
- else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
+ } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
NewOpc = ARM::t2LDRDi8;
Scale = 4;
isT2 = true;
@@ -1678,8 +1892,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
NewOpc = ARM::t2STRDi8;
Scale = 4;
isT2 = true;
- } else
+ } else {
return false;
+ }
// Make sure the base address satisfies i64 ld / st alignment requirement.
// At the moment, we ignore the memoryoperand's value.
@@ -1746,8 +1961,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
while (Ops.size() > 1) {
unsigned FirstLoc = ~0U;
unsigned LastLoc = 0;
- MachineInstr *FirstOp = 0;
- MachineInstr *LastOp = 0;
+ MachineInstr *FirstOp = nullptr;
+ MachineInstr *LastOp = nullptr;
int LastOffset = 0;
unsigned LastOpcode = 0;
unsigned LastBytes = 0;
diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index 20619fa..2a49255 100644
--- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -8,8 +8,6 @@
//
//===------------------------------------------------------------------------------------------===//
-#define DEBUG_TYPE "double barriers"
-
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMInstrInfo.h"
@@ -17,6 +15,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
+#define DEBUG_TYPE "double barriers"
+
STATISTIC(NumDMBsRemoved, "Number of DMBs removed");
namespace {
@@ -25,9 +25,9 @@ public:
static char ID;
ARMOptimizeBarriersPass() : MachineFunctionPass(ID) {}
- virtual bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "optimise barriers pass";
}
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 7f0fe05..b290e7f 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -116,13 +116,13 @@ def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>;
}
// VFP3 defines 16 additional double registers
-def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>;
+def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>;
def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>;
def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>;
def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>;
def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>;
def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>;
-def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>;
+def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>;
def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>;
def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>;
def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>;
@@ -158,11 +158,11 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>;
// Current Program Status Register.
// We model fpscr with two registers: FPSCR models the control bits and will be
// reserved. FPSCR_NZCV models the flag bits and will be unreserved. APSR_NZCV
-// models the APSR when it's accessed by some special instructions. In such cases
+// models the APSR when it's accessed by some special instructions. In such cases
// it has the same encoding as PC.
def CPSR : ARMReg<0, "cpsr">;
def APSR : ARMReg<1, "apsr">;
-def APSR_NZCV : ARMReg<15, "apsr_nzcv">;
+def APSR_NZCV : ARMReg<15, "apsr_nzcv">;
def SPSR : ARMReg<2, "spsr">;
def FPSCR : ARMReg<3, "fpscr">;
def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index 0ace9bc..57d0bfb 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -93,7 +93,7 @@ def ARMV6Itineraries : ProcessorItineraries<
InstrItinData<IIC_iMAC32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>,
InstrItinData<IIC_iMUL64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>,
InstrItinData<IIC_iMAC64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>,
-
+
// Integer load pipeline
//
// Immediate offset
@@ -181,7 +181,7 @@ def ARMV6Itineraries : ProcessorItineraries<
//
// Store multiple + update
InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>,
-
+
// Branch
//
// no delay slots, so the latency of a branch is unimportant
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index ba3cf4d..008ad64 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -11,12 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-selectiondag-info"
#include "ARMTargetMachine.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/DerivedTypes.h"
using namespace llvm;
+#define DEBUG_TYPE "arm-selectiondag-info"
+
ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
: TargetSelectionDAGInfo(TM),
Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
@@ -52,9 +53,10 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
EVT VT = MVT::i32;
unsigned VTSize = 4;
unsigned i = 0;
- const unsigned MAX_LOADS_IN_LDM = 6;
- SDValue TFOps[MAX_LOADS_IN_LDM];
- SDValue Loads[MAX_LOADS_IN_LDM];
+ // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
+ const unsigned MAX_LOADS_IN_LDM = Subtarget->isThumb1Only() ? 4 : 6;
+ SDValue TFOps[6];
+ SDValue Loads[6];
uint64_t SrcOff = 0, DstOff = 0;
// Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
@@ -71,7 +73,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
TFOps[i] = Loads[i].getValue(1);
SrcOff += VTSize;
}
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(TFOps, i));
for (i = 0;
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
@@ -82,7 +85,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
isVolatile, false, 0);
DstOff += VTSize;
}
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(TFOps, i));
EmittedNumMemOps += i;
}
@@ -112,7 +116,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SrcOff += VTSize;
BytesLeft -= VTSize;
}
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(TFOps, i));
i = 0;
BytesLeft = BytesLeftSave;
@@ -133,7 +138,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
DstOff += VTSize;
BytesLeft -= VTSize;
}
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(TFOps, i));
}
// Adjust parameters for memset, EABI uses format (ptr, size, value),
@@ -146,7 +152,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
// Use default for non-AAPCS (or MachO) subtargets
- if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetMachO())
+ if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetMachO() ||
+ Subtarget->isTargetWindows())
return SDValue();
const ARMTargetLowering &TLI =
@@ -179,22 +186,14 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
Args.push_back(Entry);
// Emit __eabi_memset call
- TargetLowering::CallLoweringInfo CLI(Chain,
- Type::getVoidTy(*DAG.getContext()), // return type
- false, // return sign ext
- false, // return zero ext
- false, // is var arg
- false, // is in regs
- 0, // number of fixed arguments
- TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv
- false, // is tail call
- false, // does not return
- false, // is return val used
- DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
- TLI.getPointerTy()), // callee
- Args, DAG, dl);
- std::pair<SDValue,SDValue> CallResult =
- TLI.LowerCallTo(CLI);
-
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMSET),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+ TLI.getPointerTy()), &Args, 0)
+ .setDiscardResult();
+
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 73e2018..5b204f6 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -21,12 +21,14 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-subtarget"
+
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "ARMGenSubtargetInfo.inc"
-using namespace llvm;
-
static cl::opt<bool>
ReserveR9("arm-reserve-r9", cl::Hidden,
cl::desc("Reserve R9, making it unavailable as GPR"));
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 3855419..38536b2 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -31,7 +31,7 @@ class TargetOptions;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
- Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
+ Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
CortexR5, Swift, CortexA53, CortexA57, Krait
};
enum ARMProcClassEnum {
@@ -242,9 +242,7 @@ protected:
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
unsigned getMaxInlineSizeThreshold() const {
- // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb1.
- // Change this once Thumb1 ldmia / stmia support is added.
- return isThumb1Only() ? 0 : 64;
+ return 64;
}
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
@@ -396,7 +394,7 @@ public:
bool isLittle() const { return IsLittle; }
unsigned getMispredictionPenalty() const;
-
+
/// This function returns true if the target has sincos() routine in its
/// compiler runtime or math libraries.
bool hasSinCos() const;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 4ae539a..8876227 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -228,7 +228,7 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
bool ARMPassConfig::addPreISel() {
const ARMSubtarget *Subtarget = &getARMSubtarget();
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only())
- addPass(createARMAtomicExpandPass(TM));
+ addPass(createAtomicExpandLoadLinkedPass(TM));
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createGlobalMergePass(TM));
@@ -247,8 +247,7 @@ bool ARMPassConfig::addInstSelector() {
}
bool ARMPassConfig::addPreRegAlloc() {
- // FIXME: temporarily disabling load / store optimization pass for Thumb1.
- if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only())
+ if (getOptLevel() != CodeGenOpt::None)
addPass(createARMLoadStoreOptimizationPass(true));
if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
addPass(createMLxExpansionPass());
@@ -262,12 +261,10 @@ bool ARMPassConfig::addPreRegAlloc() {
}
bool ARMPassConfig::addPreSched2() {
- // FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (getOptLevel() != CodeGenOpt::None) {
- if (!getARMSubtarget().isThumb1Only()) {
- addPass(createARMLoadStoreOptimizationPass());
- printAndVerify("After ARM load / store optimizer");
- }
+ addPass(createARMLoadStoreOptimizationPass());
+ printAndVerify("After ARM load / store optimizer");
+
if (getARMSubtarget().hasNEON())
addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 0c80a95..664c992 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -23,7 +23,6 @@
#include "Thumb1FrameLowering.h"
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetMachine.h"
@@ -102,7 +101,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
/// ARMLETargetMachine - ARM little endian target machine.
///
class ARMLETargetMachine : public ARMTargetMachine {
- virtual void anchor();
+ void anchor() override;
public:
ARMLETargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -113,7 +112,7 @@ public:
/// ARMBETargetMachine - ARM big endian target machine.
///
class ARMBETargetMachine : public ARMTargetMachine {
- virtual void anchor();
+ void anchor() override;
public:
ARMBETargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -128,12 +127,12 @@ public:
class ThumbTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
// Either Thumb1InstrInfo or Thumb2InstrInfo.
- OwningPtr<ARMBaseInstrInfo> InstrInfo;
+ std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
const DataLayout DL; // Calculates type size & alignment
ARMTargetLowering TLInfo;
ARMSelectionDAGInfo TSInfo;
// Either Thumb1FrameLowering or ARMFrameLowering.
- OwningPtr<ARMFrameLowering> FrameLowering;
+ std::unique_ptr<ARMFrameLowering> FrameLowering;
public:
ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -169,7 +168,7 @@ public:
/// ThumbLETargetMachine - Thumb little endian target machine.
///
class ThumbLETargetMachine : public ThumbTargetMachine {
- virtual void anchor();
+ void anchor() override;
public:
ThumbLETargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -180,10 +179,10 @@ public:
/// ThumbBETargetMachine - Thumb big endian target machine.
///
class ThumbBETargetMachine : public ThumbTargetMachine {
- virtual void anchor();
+ void anchor() override;
public:
- ThumbBETargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
+ ThumbBETargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 3379f85..48238bf 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -11,6 +11,7 @@
#include "ARMSubtarget.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
@@ -31,7 +32,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
InitializeELF(isAAPCS_ABI);
if (isAAPCS_ABI) {
- LSDASection = NULL;
+ LSDASection = nullptr;
}
AttributesSection =
@@ -45,6 +46,10 @@ const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
const TargetMachine &TM, MachineModuleInfo *MMI,
MCStreamer &Streamer) const {
+ if (TM.getMCAsmInfo()->getExceptionHandlingType() != ExceptionHandling::ARM)
+ return TargetLoweringObjectFileELF::getTTypeGlobalReference(
+ GV, Encoding, Mang, TM, MMI, Streamer);
+
assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
return MCSymbolRefExpr::Create(TM.getSymbol(GV, Mang),
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index 5f8d612..c926421 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -23,7 +23,7 @@ protected:
public:
ARMElfTargetObjectFile() :
TargetLoweringObjectFileELF(),
- AttributesSection(NULL)
+ AttributesSection(nullptr)
{}
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index d3b43cd..57df7da 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -14,7 +14,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "armtti"
#include "ARM.h"
#include "ARMTargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -23,8 +22,10 @@
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
+#define DEBUG_TYPE "armtti"
+
// Declare the pass initialization routine locally as target-specific passes
-// don't havve a target-wide initialization entry point, and so we rely on the
+// don't have a target-wide initialization entry point, and so we rely on the
// pass constructor initialization.
namespace llvm {
void initializeARMTTIPass(PassRegistry &);
@@ -42,7 +43,7 @@ class ARMTTI final : public ImmutablePass, public TargetTransformInfo {
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
public:
- ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ ARMTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
llvm_unreachable("This pass cannot be directly constructed");
}
diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk
index 4be95aa..095955b 100644
--- a/lib/Target/ARM/Android.mk
+++ b/lib/Target/ARM/Android.mk
@@ -17,7 +17,6 @@ arm_codegen_TBLGEN_TABLES := \
arm_codegen_SRC_FILES := \
A15SDOptimizer.cpp \
ARMAsmPrinter.cpp \
- ARMAtomicExpandPass.cpp \
ARMBaseInstrInfo.cpp \
ARMBaseRegisterInfo.cpp \
ARMCodeEmitter.cpp \
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 9c57a24..5cdf394 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -13,7 +13,6 @@
#include "MCTargetDesc/ARMArchName.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -23,9 +22,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
-#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFStreamer.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -345,7 +342,8 @@ public:
};
ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &MII)
+ const MCInstrInfo &MII,
+ const MCTargetOptions &Options)
: MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), UC(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
@@ -416,7 +414,7 @@ class ARMOperand : public MCParsedAsmOperand {
k_Token
} Kind;
- SMLoc StartLoc, EndLoc;
+ SMLoc StartLoc, EndLoc, AlignmentLoc;
SmallVector<unsigned, 8> Registers;
struct CCOp {
@@ -633,6 +631,12 @@ public:
/// operand.
SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
+ /// getAlignmentLoc - Get the location of the Alignment token of this operand.
+ SMLoc getAlignmentLoc() const {
+ assert(Kind == k_Memory && "Invalid access!");
+ return AlignmentLoc;
+ }
+
ARMCC::CondCodes getCondCode() const {
assert(Kind == k_CondCode && "Invalid access!");
return CC.Val;
@@ -1089,12 +1093,12 @@ public:
bool isPostIdxReg() const {
return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift;
}
- bool isMemNoOffset(bool alignOK = false) const {
+ bool isMemNoOffset(bool alignOK = false, unsigned Alignment = 0) const {
if (!isMem())
return false;
// No offset of any kind.
- return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 &&
- (alignOK || Memory.Alignment == 0);
+ return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr &&
+ (alignOK || Memory.Alignment == Alignment);
}
bool isMemPCRelImm12() const {
if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
@@ -1110,6 +1114,65 @@ public:
bool isAlignedMemory() const {
return isMemNoOffset(true);
}
+ bool isAlignedMemoryNone() const {
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemoryNone() const {
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory16() const {
+ if (isMemNoOffset(false, 2)) // alignment in bytes for 16-bits is 2.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemory16() const {
+ if (isMemNoOffset(false, 2)) // alignment in bytes for 16-bits is 2.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory32() const {
+ if (isMemNoOffset(false, 4)) // alignment in bytes for 32-bits is 4.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemory32() const {
+ if (isMemNoOffset(false, 4)) // alignment in bytes for 32-bits is 4.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory64() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemory64() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory64or128() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemory64or128() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory64or128or256() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16.
+ return true;
+ if (isMemNoOffset(false, 32)) // alignment in bytes for 256-bits is 32.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
bool isAddrMode2() const {
if (!isMem() || Memory.Alignment != 0) return false;
// Check for register offset.
@@ -1545,7 +1608,10 @@ public:
}
bool isNEONi16splat() const {
- if (!isImm()) return false;
+ if (isNEONByteReplicate(2))
+ return false; // Leave that for bytes replication and forbid by default.
+ if (!isImm())
+ return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
// Must be a constant.
if (!CE) return false;
@@ -1555,7 +1621,10 @@ public:
}
bool isNEONi32splat() const {
- if (!isImm()) return false;
+ if (isNEONByteReplicate(4))
+ return false; // Leave that for bytes replication and forbid by default.
+ if (!isImm())
+ return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
// Must be a constant.
if (!CE) return false;
@@ -1567,11 +1636,36 @@ public:
(Value >= 0x01000000 && Value <= 0xff000000);
}
+ bool isNEONByteReplicate(unsigned NumBytes) const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE)
+ return false;
+ int64_t Value = CE->getValue();
+ if (!Value)
+ return false; // Don't bother with zero.
+
+ unsigned char B = Value & 0xff;
+ for (unsigned i = 1; i < NumBytes; ++i) {
+ Value >>= 8;
+ if ((Value & 0xff) != B)
+ return false;
+ }
+ return true;
+ }
+ bool isNEONi16ByteReplicate() const { return isNEONByteReplicate(2); }
+ bool isNEONi32ByteReplicate() const { return isNEONByteReplicate(4); }
bool isNEONi32vmov() const {
- if (!isImm()) return false;
+ if (isNEONByteReplicate(4))
+ return false; // Let it to be classified as byte-replicate case.
+ if (!isImm())
+ return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
// Must be a constant.
- if (!CE) return false;
+ if (!CE)
+ return false;
int64_t Value = CE->getValue();
// i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
// for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
@@ -1612,7 +1706,7 @@ public:
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
- if (Expr == 0)
+ if (!Expr)
Inst.addOperand(MCOperand::CreateImm(0));
else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
@@ -1926,6 +2020,50 @@ public:
Inst.addOperand(MCOperand::CreateImm(Memory.Alignment));
}
+ void addDupAlignedMemoryNoneOperands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemoryNoneOperands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory16Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addDupAlignedMemory16Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory32Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addDupAlignedMemory32Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory64Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addDupAlignedMemory64Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory64or128Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addDupAlignedMemory64or128Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory64or128or256Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
void addAddrMode2Operands(MCInst &Inst, unsigned N) const {
assert(N == 3 && "Invalid number of operands!");
int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
@@ -2275,6 +2413,19 @@ public:
Inst.addOperand(MCOperand::CreateImm(Value));
}
+ void addNEONinvByteReplicateOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ assert((Inst.getOpcode() == ARM::VMOVv8i8 ||
+ Inst.getOpcode() == ARM::VMOVv16i8) &&
+ "All vmvn instructions that wants to replicate non-zero byte "
+ "always must be replaced with VMOVv8i8 or VMOVv16i8.");
+ unsigned B = ((~Value) & 0xff);
+ B |= 0xe00; // cmode = 0b1110
+ Inst.addOperand(MCOperand::CreateImm(B));
+ }
void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate encodes the type of constant as well as the value.
@@ -2289,6 +2440,19 @@ public:
Inst.addOperand(MCOperand::CreateImm(Value));
}
+ void addNEONvmovByteReplicateOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ assert((Inst.getOpcode() == ARM::VMOVv8i8 ||
+ Inst.getOpcode() == ARM::VMOVv16i8) &&
+ "All instructions that wants to replicate non-zero byte "
+ "always must be replaced with VMOVv8i8 or VMOVv16i8.");
+ unsigned B = Value & 0xff;
+ B |= 0xe00; // cmode = 0b1110
+ Inst.addOperand(MCOperand::CreateImm(B));
+ }
void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate encodes the type of constant as well as the value.
@@ -2523,7 +2687,8 @@ public:
unsigned ShiftImm,
unsigned Alignment,
bool isNegative,
- SMLoc S, SMLoc E) {
+ SMLoc S, SMLoc E,
+ SMLoc AlignmentLoc = SMLoc()) {
ARMOperand *Op = new ARMOperand(k_Memory);
Op->Memory.BaseRegNum = BaseRegNum;
Op->Memory.OffsetImm = OffsetImm;
@@ -2534,6 +2699,7 @@ public:
Op->Memory.isNegative = isNegative;
Op->StartLoc = S;
Op->EndLoc = E;
+ Op->AlignmentLoc = AlignmentLoc;
return Op;
}
@@ -2806,7 +2972,7 @@ int ARMAsmParser::tryParseShiftRegister(
// The source register for the shift has already been added to the
// operand list, so we need to pop it off and combine it into the shifted
// register operand instead.
- OwningPtr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val());
+ std::unique_ptr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val());
if (!PrevOp->isReg())
return Error(PrevOp->getStartLoc(), "shift must be of a register");
int SrcReg = PrevOp->getReg();
@@ -2825,7 +2991,7 @@ int ARMAsmParser::tryParseShiftRegister(
Parser.getTok().is(AsmToken::Dollar)) {
Parser.Lex(); // Eat hash.
SMLoc ImmLoc = Parser.getTok().getLoc();
- const MCExpr *ShiftExpr = 0;
+ const MCExpr *ShiftExpr = nullptr;
if (getParser().parseExpression(ShiftExpr, EndLoc)) {
Error(ImmLoc, "invalid immediate shift value");
return -1;
@@ -2855,12 +3021,12 @@ int ARMAsmParser::tryParseShiftRegister(
EndLoc = Parser.getTok().getEndLoc();
ShiftReg = tryParseRegister();
if (ShiftReg == -1) {
- Error (L, "expected immediate or register in shift operand");
+ Error(L, "expected immediate or register in shift operand");
return -1;
}
} else {
- Error (Parser.getTok().getLoc(),
- "expected immediate or register in shift operand");
+ Error(Parser.getTok().getLoc(),
+ "expected immediate or register in shift operand");
return -1;
}
}
@@ -4323,8 +4489,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
E = Tok.getEndLoc();
Parser.Lex(); // Eat right bracket token.
- Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift,
- 0, 0, false, S, E));
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, 0,
+ ARM_AM::no_shift, 0, 0, false,
+ S, E));
// If there's a pre-indexing writeback marker, '!', just add it as a token
// operand. It's rather odd, but syntactically valid.
@@ -4346,6 +4513,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (Parser.getTok().is(AsmToken::Colon)) {
Parser.Lex(); // Eat the ':'.
E = Parser.getTok().getLoc();
+ SMLoc AlignmentLoc = Tok.getLoc();
const MCExpr *Expr;
if (getParser().parseExpression(Expr))
@@ -4378,9 +4546,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Don't worry about range checking the value here. That's handled by
// the is*() predicates.
- Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0,
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, 0,
ARM_AM::no_shift, 0, Align,
- false, S, E));
+ false, S, E, AlignmentLoc));
// If there's a pre-indexing writeback marker, '!', just add it as a token
// operand.
@@ -4471,7 +4639,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat right bracket token.
- Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum,
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, OffsetRegNum,
ShiftType, ShiftImm, 0, isNegative,
S, E));
@@ -4926,8 +5094,9 @@ getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" ||
Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" ||
- Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic.startswith("crc32") ||
- Mnemonic.startswith("cps") || Mnemonic.startswith("vsel") ||
+ Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic == "udf" ||
+ Mnemonic.startswith("crc32") || Mnemonic.startswith("cps") ||
+ Mnemonic.startswith("vsel") ||
Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" ||
Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" ||
Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" ||
@@ -5404,21 +5573,24 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
// GNU Assembler extension (compatibility)
- if ((Mnemonic == "ldrd" || Mnemonic == "strd") && !isThumb() &&
- Operands.size() == 4) {
- ARMOperand *Op = static_cast<ARMOperand *>(Operands[2]);
- assert(Op->isReg() && "expected register argument");
+ if ((Mnemonic == "ldrd" || Mnemonic == "strd")) {
+ ARMOperand *Op2 = static_cast<ARMOperand *>(Operands[2]);
+ ARMOperand *Op3 = static_cast<ARMOperand *>(Operands[3]);
+ if (Op3->isMem()) {
+ assert(Op2->isReg() && "expected register argument");
- unsigned SuperReg = MRI->getMatchingSuperReg(
- Op->getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID));
+ unsigned SuperReg = MRI->getMatchingSuperReg(
+ Op2->getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID));
- assert(SuperReg && "expected register pair");
+ assert(SuperReg && "expected register pair");
- unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1);
+ unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1);
- Operands.insert(Operands.begin() + 3,
- ARMOperand::CreateReg(PairedReg, Op->getStartLoc(),
- Op->getEndLoc()));
+ Operands.insert(Operands.begin() + 3,
+ ARMOperand::CreateReg(PairedReg,
+ Op2->getStartLoc(),
+ Op2->getEndLoc()));
+ }
}
// FIXME: As said above, this is all a pretty gross hack. This instruction
@@ -5748,6 +5920,30 @@ validateInstruction(MCInst &Inst,
return Error(Operands[Op]->getStartLoc(), "branch target out of range");
break;
}
+ case ARM::MOVi16:
+ case ARM::t2MOVi16:
+ case ARM::t2MOVTi16:
+ {
+ // We want to avoid misleadingly allowing something like "mov r0, <symbol>"
+ // especially when we turn it into a movw and the expression <symbol> does
+ // not have a :lower16: or :upper16 as part of the expression. We don't
+ // want the behavior of silently truncating, which can be unexpected and
+ // lead to bugs that are difficult to find since this is an easy mistake
+ // to make.
+ int i = (Operands[3]->isImm()) ? 3 : 4;
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[i]);
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (CE) break;
+ const MCExpr *E = dyn_cast<MCExpr>(Op->getImm());
+ if (!E) break;
+ const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(E);
+ if (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 &&
+ ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)) {
+ return Error(Op->getStartLoc(),
+ "immediate expression for mov requires :lower16: or :upper16");
+ break;
+ }
+ }
}
return false;
@@ -5898,7 +6094,7 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD;
case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD;
case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD;
- case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPq16_UPD;
+ case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD;
case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD;
case ARM::VLD3DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD;
case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD;
@@ -7860,9 +8056,11 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
return Match_Success;
}
-template<> inline bool IsCPSRDead<MCInst>(MCInst* Instr) {
+namespace llvm {
+template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) {
return true; // In an assembly source, no need to second-guess
}
+}
static const char *getSubtargetFeatureName(unsigned Val);
bool ARMAsmParser::
@@ -7965,6 +8163,42 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
return Error(ErrorLoc, "immediate operand must be in the range [0,239]");
}
+ case Match_AlignedMemoryRequiresNone:
+ case Match_DupAlignedMemoryRequiresNone:
+ case Match_AlignedMemoryRequires16:
+ case Match_DupAlignedMemoryRequires16:
+ case Match_AlignedMemoryRequires32:
+ case Match_DupAlignedMemoryRequires32:
+ case Match_AlignedMemoryRequires64:
+ case Match_DupAlignedMemoryRequires64:
+ case Match_AlignedMemoryRequires64or128:
+ case Match_DupAlignedMemoryRequires64or128:
+ case Match_AlignedMemoryRequires64or128or256:
+ {
+ SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getAlignmentLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ switch (MatchResult) {
+ default:
+ llvm_unreachable("Missing Match_Aligned type");
+ case Match_AlignedMemoryRequiresNone:
+ case Match_DupAlignedMemoryRequiresNone:
+ return Error(ErrorLoc, "alignment must be omitted");
+ case Match_AlignedMemoryRequires16:
+ case Match_DupAlignedMemoryRequires16:
+ return Error(ErrorLoc, "alignment must be 16 or omitted");
+ case Match_AlignedMemoryRequires32:
+ case Match_DupAlignedMemoryRequires32:
+ return Error(ErrorLoc, "alignment must be 32 or omitted");
+ case Match_AlignedMemoryRequires64:
+ case Match_DupAlignedMemoryRequires64:
+ return Error(ErrorLoc, "alignment must be 64 or omitted");
+ case Match_AlignedMemoryRequires64or128:
+ case Match_DupAlignedMemoryRequires64or128:
+ return Error(ErrorLoc, "alignment must be 64, 128 or omitted");
+ case Match_AlignedMemoryRequires64or128or256:
+ return Error(ErrorLoc, "alignment must be 64, 128, 256 or omitted");
+ }
+ }
}
llvm_unreachable("Implement any new match types added!");
@@ -7972,6 +8206,10 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
/// parseDirective parses the arm specific directives
bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
+ const MCObjectFileInfo::Environment Format =
+ getContext().getObjectFileInfo()->getObjectFileType();
+ bool IsMachO = Format == MCObjectFileInfo::IsMachO;
+
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
return parseLiteralValues(4, DirectiveID.getLoc());
@@ -7989,16 +8227,6 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveSyntax(DirectiveID.getLoc());
else if (IDVal == ".unreq")
return parseDirectiveUnreq(DirectiveID.getLoc());
- else if (IDVal == ".arch")
- return parseDirectiveArch(DirectiveID.getLoc());
- else if (IDVal == ".eabi_attribute")
- return parseDirectiveEabiAttr(DirectiveID.getLoc());
- else if (IDVal == ".cpu")
- return parseDirectiveCPU(DirectiveID.getLoc());
- else if (IDVal == ".fpu")
- return parseDirectiveFPU(DirectiveID.getLoc());
- else if (IDVal == ".fnstart")
- return parseDirectiveFnStart(DirectiveID.getLoc());
else if (IDVal == ".fnend")
return parseDirectiveFnEnd(DirectiveID.getLoc());
else if (IDVal == ".cantunwind")
@@ -8015,12 +8243,6 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveRegSave(DirectiveID.getLoc(), false);
else if (IDVal == ".vsave")
return parseDirectiveRegSave(DirectiveID.getLoc(), true);
- else if (IDVal == ".inst")
- return parseDirectiveInst(DirectiveID.getLoc());
- else if (IDVal == ".inst.n")
- return parseDirectiveInst(DirectiveID.getLoc(), 'n');
- else if (IDVal == ".inst.w")
- return parseDirectiveInst(DirectiveID.getLoc(), 'w');
else if (IDVal == ".ltorg" || IDVal == ".pool")
return parseDirectiveLtorg(DirectiveID.getLoc());
else if (IDVal == ".even")
@@ -8029,18 +8251,38 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectivePersonalityIndex(DirectiveID.getLoc());
else if (IDVal == ".unwind_raw")
return parseDirectiveUnwindRaw(DirectiveID.getLoc());
- else if (IDVal == ".tlsdescseq")
- return parseDirectiveTLSDescSeq(DirectiveID.getLoc());
else if (IDVal == ".movsp")
return parseDirectiveMovSP(DirectiveID.getLoc());
- else if (IDVal == ".object_arch")
- return parseDirectiveObjectArch(DirectiveID.getLoc());
else if (IDVal == ".arch_extension")
return parseDirectiveArchExtension(DirectiveID.getLoc());
else if (IDVal == ".align")
return parseDirectiveAlign(DirectiveID.getLoc());
else if (IDVal == ".thumb_set")
return parseDirectiveThumbSet(DirectiveID.getLoc());
+
+ if (!IsMachO) {
+ if (IDVal == ".arch")
+ return parseDirectiveArch(DirectiveID.getLoc());
+ else if (IDVal == ".cpu")
+ return parseDirectiveCPU(DirectiveID.getLoc());
+ else if (IDVal == ".eabi_attribute")
+ return parseDirectiveEabiAttr(DirectiveID.getLoc());
+ else if (IDVal == ".fpu")
+ return parseDirectiveFPU(DirectiveID.getLoc());
+ else if (IDVal == ".fnstart")
+ return parseDirectiveFnStart(DirectiveID.getLoc());
+ else if (IDVal == ".inst")
+ return parseDirectiveInst(DirectiveID.getLoc());
+ else if (IDVal == ".inst.n")
+ return parseDirectiveInst(DirectiveID.getLoc(), 'n');
+ else if (IDVal == ".inst.w")
+ return parseDirectiveInst(DirectiveID.getLoc(), 'w');
+ else if (IDVal == ".object_arch")
+ return parseDirectiveObjectArch(DirectiveID.getLoc());
+ else if (IDVal == ".tlsdescseq")
+ return parseDirectiveTLSDescSeq(DirectiveID.getLoc());
+ }
+
return true;
}
@@ -8121,32 +8363,6 @@ void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
if (NextSymbolIsThumb) {
getParser().getStreamer().EmitThumbFunc(Symbol);
NextSymbolIsThumb = false;
- return;
- }
-
- if (!isThumb())
- return;
-
- const MCObjectFileInfo::Environment Format =
- getContext().getObjectFileInfo()->getObjectFileType();
- switch (Format) {
- case MCObjectFileInfo::IsCOFF: {
- const MCSymbolData &SD =
- getParser().getStreamer().getOrCreateSymbolData(Symbol);
- char Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
- if (SD.getFlags() & (Type << COFF::SF_TypeShift))
- getParser().getStreamer().EmitThumbFunc(Symbol);
- break;
- }
- case MCObjectFileInfo::IsELF: {
- const MCSymbolData &SD =
- getParser().getStreamer().getOrCreateSymbolData(Symbol);
- if (MCELF::GetType(SD) & (ELF::STT_FUNC << ELF_STT_Shift))
- getParser().getStreamer().EmitThumbFunc(Symbol);
- break;
- }
- case MCObjectFileInfo::IsMachO:
- break;
}
}
@@ -8303,14 +8519,6 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
/// parseDirectiveArch
/// ::= .arch token
bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(L, ".arch directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
StringRef Arch = getParser().parseStringToEndOfStatement().trim();
unsigned ID = StringSwitch<unsigned>(Arch)
@@ -8334,14 +8542,6 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
/// ::= .eabi_attribute int, int [, "str"]
/// ::= .eabi_attribute Tag_name, int [, "str"]
bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(L, ".eabi_attribute directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
int64_t Tag;
SMLoc TagLoc;
TagLoc = Parser.getTok().getLoc();
@@ -8447,14 +8647,6 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
/// parseDirectiveCPU
/// ::= .cpu str
bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(L, ".cpu directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
StringRef CPU = getParser().parseStringToEndOfStatement().trim();
getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU);
return false;
@@ -8463,14 +8655,6 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
/// parseDirectiveFPU
/// ::= .fpu str
bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(L, ".fpu directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
StringRef FPU = getParser().parseStringToEndOfStatement().trim();
unsigned ID = StringSwitch<unsigned>(FPU)
@@ -8490,14 +8674,6 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
/// parseDirectiveFnStart
/// ::= .fnstart
bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(L, ".fnstart directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
if (UC.hasFnStart()) {
Error(L, ".fnstart starts before the end of previous one");
UC.emitFnStartLocNotes();
@@ -8777,14 +8953,6 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
/// ::= .inst.n opcode [, ...]
/// ::= .inst.w opcode [, ...]
bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(Loc, ".inst directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
int Width;
if (isThumb()) {
@@ -9033,14 +9201,6 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
/// parseDirectiveTLSDescSeq
/// ::= .tlsdescseq tls-variable
bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(L, ".tlsdescseq directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
if (getLexer().isNot(AsmToken::Identifier)) {
TokError("expected variable after '.tlsdescseq' directive");
Parser.eatToEndOfStatement();
@@ -9128,14 +9288,6 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
/// parseDirectiveObjectArch
/// ::= .object_arch name
bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(L, ".object_arch directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
if (getLexer().isNot(AsmToken::Identifier)) {
Error(getLexer().getLoc(), "unexpected token");
Parser.eatToEndOfStatement();
@@ -9221,36 +9373,7 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
Lex();
MCSymbol *Alias = getContext().GetOrCreateSymbol(Name);
- if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Value)) {
- MCSymbol *Sym = getContext().LookupSymbol(SRE->getSymbol().getName());
- if (!Sym->isDefined()) {
- getStreamer().EmitSymbolAttribute(Sym, MCSA_Global);
- getStreamer().EmitAssignment(Alias, Value);
- return false;
- }
-
- const MCObjectFileInfo::Environment Format =
- getContext().getObjectFileInfo()->getObjectFileType();
- switch (Format) {
- case MCObjectFileInfo::IsCOFF: {
- char Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
- getStreamer().EmitCOFFSymbolType(Type);
- // .set values are always local in COFF
- getStreamer().EmitSymbolAttribute(Alias, MCSA_Local);
- break;
- }
- case MCObjectFileInfo::IsELF:
- getStreamer().EmitSymbolAttribute(Alias, MCSA_ELF_TypeFunction);
- break;
- case MCObjectFileInfo::IsMachO:
- break;
- }
- }
-
- // FIXME: set the function as being a thumb function via the assembler
- getStreamer().EmitThumbFunc(Alias);
- getStreamer().EmitAssignment(Alias, Value);
-
+ getTargetStreamer().emitThumbSet(Alias, Value);
return false;
}
@@ -9365,8 +9488,8 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
int64_t Value;
if (!SOExpr->EvaluateAsAbsolute(Value))
return Match_Success;
- assert((Value >= INT32_MIN && Value <= INT32_MAX) &&
- "expression value must be representiable in 32 bits");
+ assert((Value >= INT32_MIN && Value <= UINT32_MAX) &&
+ "expression value must be representable in 32 bits");
}
break;
case MCK_GPRPair:
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 8e14883..9b5fa75 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -17,7 +17,6 @@ add_public_tablegen_target(ARMCommonTableGen)
add_llvm_target(ARMCodeGen
A15SDOptimizer.cpp
ARMAsmPrinter.cpp
- ARMAtomicExpandPass.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
ARMCodeEmitter.cpp
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 9e40381..4d4038d 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -7,8 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-disassembler"
-
#include "llvm/MC/MCDisassembler.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
@@ -29,6 +27,8 @@
using namespace llvm;
+#define DEBUG_TYPE "arm-disassembler"
+
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
@@ -90,8 +90,8 @@ class ARMDisassembler : public MCDisassembler {
public:
/// Constructor - Initializes the disassembler.
///
- ARMDisassembler(const MCSubtargetInfo &STI) :
- MCDisassembler(STI) {
+ ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
+ MCDisassembler(STI, Ctx) {
}
~ARMDisassembler() {
@@ -109,8 +109,8 @@ class ThumbDisassembler : public MCDisassembler {
public:
/// Constructor - Initializes the disassembler.
///
- ThumbDisassembler(const MCSubtargetInfo &STI) :
- MCDisassembler(STI) {
+ ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
+ MCDisassembler(STI, Ctx) {
}
~ThumbDisassembler() {
@@ -400,12 +400,16 @@ static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
#include "ARMGenDisassemblerTables.inc"
-static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) {
- return new ARMDisassembler(STI);
+static MCDisassembler *createARMDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new ARMDisassembler(STI, Ctx);
}
-static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtargetInfo &STI) {
- return new ThumbDisassembler(STI);
+static MCDisassembler *createThumbDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new ThumbDisassembler(STI, Ctx);
}
DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index da3fe01..e4b785d 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "ARMInstPrinter.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
@@ -23,6 +22,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
#include "ARMGenAsmWriter.inc"
/// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 1db517f..7acd9cc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -306,8 +306,36 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
+static uint32_t swapHalfWords(uint32_t Value, bool IsLittleEndian) {
+ if (IsLittleEndian) {
+ // Note that the halfwords are stored high first and low second in thumb;
+ // so we need to swap the fixup value here to map properly.
+ uint32_t Swapped = (Value & 0xFFFF0000) >> 16;
+ Swapped |= (Value & 0x0000FFFF) << 16;
+ return Swapped;
+ }
+ else
+ return Value;
+}
+
+static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf,
+ bool IsLittleEndian) {
+ uint32_t Value;
+
+ if (IsLittleEndian) {
+ Value = (SecondHalf & 0xFFFF) << 16;
+ Value |= (FirstHalf & 0xFFFF);
+ } else {
+ Value = (SecondHalf & 0xFFFF);
+ Value |= (FirstHalf & 0xFFFF) << 16;
+ }
+
+ return Value;
+}
+
static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- bool IsPCRel, MCContext *Ctx) {
+ bool IsPCRel, MCContext *Ctx,
+ bool IsLittleEndian) {
unsigned Kind = Fixup.getKind();
switch (Kind) {
default:
@@ -316,6 +344,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case FK_Data_2:
case FK_Data_4:
return Value;
+ case FK_SecRel_2:
+ return Value;
+ case FK_SecRel_4:
+ return Value;
case ARM::fixup_arm_movt_hi16:
if (!IsPCRel)
Value >>= 16;
@@ -342,9 +374,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// inst{14-12} = Mid3;
// inst{7-0} = Lo8;
Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
- uint64_t swapped = (Value & 0xFFFF0000) >> 16;
- swapped |= (Value & 0x0000FFFF) << 16;
- return swapped;
+ return swapHalfWords(Value, IsLittleEndian);
}
case ARM::fixup_arm_ldst_pcrel_12:
// ARM PC-relative values are offset by 8.
@@ -364,11 +394,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Same addressing mode as fixup_arm_pcrel_10,
// but with 16-bit halfwords swapped.
- if (Kind == ARM::fixup_t2_ldst_pcrel_12) {
- uint64_t swapped = (Value & 0xFFFF0000) >> 16;
- swapped |= (Value & 0x0000FFFF) << 16;
- return swapped;
- }
+ if (Kind == ARM::fixup_t2_ldst_pcrel_12)
+ return swapHalfWords(Value, IsLittleEndian);
return Value;
}
@@ -401,9 +428,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
out |= (Value & 0x700) << 4;
out |= (Value & 0x0FF);
- uint64_t swapped = (out & 0xFFFF0000) >> 16;
- swapped |= (out & 0x0000FFFF) << 16;
- return swapped;
+ return swapHalfWords(out, IsLittleEndian);
}
case ARM::fixup_arm_condbranch:
@@ -434,9 +459,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
out |= (Value & 0x1FF800) << 5; // imm6 field
out |= (Value & 0x0007FF); // imm11 field
- uint64_t swapped = (out & 0xFFFF0000) >> 16;
- swapped |= (out & 0x0000FFFF) << 16;
- return swapped;
+ return swapHalfWords(out, IsLittleEndian);
}
case ARM::fixup_t2_condbranch: {
Value = Value - 4;
@@ -449,9 +472,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
out |= (Value & 0x1F800) << 5; // imm6 field
out |= (Value & 0x007FF); // imm11 field
- uint32_t swapped = (out & 0xFFFF0000) >> 16;
- swapped |= (out & 0x0000FFFF) << 16;
- return swapped;
+ return swapHalfWords(out, IsLittleEndian);
}
case ARM::fixup_arm_thumb_bl: {
// The value doesn't encode the low bit (always zero) and is offset by
@@ -475,13 +496,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
uint32_t imm10Bits = (offset & 0x1FF800) >> 11;
uint32_t imm11Bits = (offset & 0x000007FF);
- uint32_t Binary = 0;
- uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
- uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+ uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
+ uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
(uint16_t)imm11Bits);
- Binary |= secondHalf << 16;
- Binary |= firstHalf;
- return Binary;
+ return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
}
case ARM::fixup_arm_thumb_blx: {
// The value doesn't encode the low two bits (always zero) and is offset by
@@ -508,13 +526,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
uint32_t imm10HBits = (offset & 0xFFC00) >> 10;
uint32_t imm10LBits = (offset & 0x3FF);
- uint32_t Binary = 0;
- uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
- uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+ uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
+ uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
((uint16_t)imm10LBits) << 1);
- Binary |= secondHalf << 16;
- Binary |= firstHalf;
- return Binary;
+ return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
}
case ARM::fixup_arm_thumb_cp:
// Offset by 4, and don't encode the low two bits. Two bytes of that
@@ -566,11 +581,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
// swapped.
- if (Kind == ARM::fixup_t2_pcrel_10) {
- uint32_t swapped = (Value & 0xFFFF0000) >> 16;
- swapped |= (Value & 0x0000FFFF) << 16;
- return swapped;
- }
+ if (Kind == ARM::fixup_t2_pcrel_10)
+ return swapHalfWords(Value, IsLittleEndian);
return Value;
}
@@ -603,7 +615,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// the offset when the destination has the same MCFragment.
if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
- MCSymbolData &SymData = Asm.getSymbolData(Sym);
+ const MCSymbolData &SymData = Asm.getSymbolData(Sym);
IsResolved = (SymData.getFragment() == DF);
}
// We must always generate a relocation for BL/BLX instructions if we have
@@ -618,7 +630,8 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// Try to get the encoded value for the fixup as-if we're mapping it into
// the instruction. This allows adjustFixupValue() to issue a diagnostic
// if the value aren't invalid.
- (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext());
+ (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(),
+ IsLittleEndian);
}
/// getFixupKindNumBytes - The number of bytes the fixup may change.
@@ -662,6 +675,11 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movw_lo16:
return 4;
+
+ case FK_SecRel_2:
+ return 2;
+ case FK_SecRel_4:
+ return 4;
}
}
@@ -716,7 +734,7 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
bool IsPCRel) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr);
+ Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian);
if (!Value) return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
@@ -724,8 +742,11 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
// Used to point to big endian bytes.
unsigned FullSizeBytes;
- if (!IsLittleEndian)
+ if (!IsLittleEndian) {
FullSizeBytes = getFixupKindContainerSizeBytes(Fixup.getKind());
+ assert((Offset + FullSizeBytes) <= DataSize && "Invalid fixup size!");
+ assert(NumBytes <= FullSizeBytes && "Invalid fixup size!");
+ }
// For each byte of the fragment that the fixup touches, mask in the bits from
// the fixup value. The Value has been "split up" into the appropriate
@@ -737,6 +758,15 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
}
namespace {
+// FIXME: This should be in a separate file.
+class ARMWinCOFFAsmBackend : public ARMAsmBackend {
+public:
+ ARMWinCOFFAsmBackend(const Target &T, const StringRef &Triple)
+ : ARMAsmBackend(T, Triple, true) { }
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
+ }
+};
// FIXME: This should be in a separate file.
// ELF is an ELF of course...
@@ -777,7 +807,9 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
bool isLittle) {
Triple TheTriple(TT);
- if (TheTriple.isOSBinFormatMachO()) {
+ switch (TheTriple.getObjectFormat()) {
+ default: llvm_unreachable("unsupported object format");
+ case Triple::MachO: {
MachO::CPUSubTypeARM CS =
StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
.Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
@@ -792,15 +824,14 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
return new DarwinARMAsmBackend(T, TT, CS);
}
-
-#if 0
- // FIXME: Introduce yet another checker but assert(0).
- if (TheTriple.isOSBinFormatCOFF())
- assert(0 && "Windows not supported on ARM");
-#endif
-
- uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
- return new ELFARMAsmBackend(T, TT, OSABI, isLittle);
+ case Triple::COFF:
+ assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
+ return new ARMWinCOFFAsmBackend(T, TT);
+ case Triple::ELF:
+ assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target");
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+ return new ELFARMAsmBackend(T, TT, OSABI, isLittle);
+ }
}
MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index a4661b1..1c84263 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -74,7 +74,7 @@ unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
- MCSymbolRefExpr::VariantKind Modifier = Fixup.getAccessVariant();
+ MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
unsigned Type = 0;
if (IsPCRel) {
@@ -91,6 +91,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_GOTTPOFF:
Type = ELF::R_ARM_TLS_IE32;
break;
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ Type = ELF::R_ARM_GOT_PREL;
+ break;
}
break;
case ARM::fixup_arm_blx:
@@ -167,6 +170,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_GOTOFF:
Type = ELF::R_ARM_GOTOFF32;
break;
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ Type = ELF::R_ARM_GOT_PREL;
+ break;
case MCSymbolRefExpr::VK_ARM_TARGET1:
Type = ELF::R_ARM_TARGET1;
break;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 5a01d26..a4d13ed 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -30,6 +30,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
@@ -62,7 +63,7 @@ static const char *GetFPUName(unsigned ID) {
#define ARM_FPU_NAME(NAME, ID) case ARM::ID: return NAME;
#include "ARMFPUName.def"
}
- return NULL;
+ return nullptr;
}
static const char *GetArchName(unsigned ID) {
@@ -75,7 +76,7 @@ static const char *GetArchName(unsigned ID) {
#define ARM_ARCH_ALIAS(NAME, ID) /* empty */
#include "ARMArchName.def"
}
- return NULL;
+ return nullptr;
}
static const char *GetArchDefaultCPUName(unsigned ID) {
@@ -88,7 +89,7 @@ static const char *GetArchDefaultCPUName(unsigned ID) {
#define ARM_ARCH_ALIAS(NAME, ID) /* empty */
#include "ARMArchName.def"
}
- return NULL;
+ return nullptr;
}
static unsigned GetArchDefaultCPUArch(unsigned ID) {
@@ -139,6 +140,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer {
void finishAttributeSection() override;
void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override;
+ void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override;
public:
ARMTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS,
@@ -260,6 +262,10 @@ ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
OS << "\t.tlsdescseq\t" << S->getSymbol().getName();
}
+void ARMTargetAsmStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
+ OS << "\t.thumb_set\t" << *Symbol << ", " << *Value << '\n';
+}
+
void ARMTargetAsmStreamer::emitInst(uint32_t Inst, char Suffix) {
OS << "\t.inst";
if (Suffix)
@@ -310,7 +316,7 @@ private:
for (size_t i = 0; i < Contents.size(); ++i)
if (Contents[i].Tag == Attribute)
return &Contents[i];
- return 0;
+ return nullptr;
}
void setAttributeItem(unsigned Attribute, unsigned Value,
@@ -406,8 +412,10 @@ private:
void emitFPU(unsigned FPU) override;
void emitInst(uint32_t Inst, char Suffix = '\0') override;
void finishAttributeSection() override;
+ void emitLabel(MCSymbol *Symbol) override;
void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override;
+ void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override;
size_t calculateContentSize() const;
@@ -415,7 +423,7 @@ public:
ARMTargetELFStreamer(MCStreamer &S)
: ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::INVALID_FPU),
Arch(ARM::INVALID_ARCH), EmittedArch(ARM::INVALID_ARCH),
- AttributeSection(0) {}
+ AttributeSection(nullptr) {}
};
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
@@ -531,7 +539,8 @@ public:
/// This is one of the functions used to emit data into an ELF section, so the
/// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
/// necessary.
- void EmitValueImpl(const MCExpr *Value, unsigned Size) override {
+ void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc) override {
EmitDataMappingSymbol();
MCELFStreamer::EmitValueImpl(Value, Size);
}
@@ -600,12 +609,8 @@ private:
}
void EmitThumbFunc(MCSymbol *Func) override {
- // FIXME: Anything needed here to flag the function as thumb?
-
getAssembler().setIsThumbFunc(Func);
-
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func);
- SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc);
+ EmitSymbolAttribute(Func, MCSA_ELF_TypeFunction);
}
// Helper functions for ARM exception handling directives
@@ -980,10 +985,35 @@ void ARMTargetELFStreamer::finishAttributeSection() {
Contents.clear();
FPU = ARM::INVALID_FPU;
}
+
+void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
+ ARMELFStreamer &Streamer = getStreamer();
+ if (!Streamer.IsThumb)
+ return;
+
+ const MCSymbolData &SD = Streamer.getOrCreateSymbolData(Symbol);
+ if (MCELF::GetType(SD) & (ELF::STT_FUNC << ELF_STT_Shift))
+ Streamer.EmitThumbFunc(Symbol);
+}
+
void
ARMTargetELFStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
getStreamer().EmitFixup(S, FK_Data_4);
}
+
+void ARMTargetELFStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
+ if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Value)) {
+ const MCSymbol &Sym = SRE->getSymbol();
+ if (!Sym.isDefined()) {
+ getStreamer().EmitAssignment(Symbol, Value);
+ return;
+ }
+ }
+
+ getStreamer().EmitThumbFunc(Symbol);
+ getStreamer().EmitAssignment(Symbol, Value);
+}
+
void ARMTargetELFStreamer::emitInst(uint32_t Inst, char Suffix) {
getStreamer().emitInst(Inst, Suffix);
}
@@ -1012,7 +1042,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
}
// Get .ARM.extab or .ARM.exidx section
- const MCSectionELF *EHSection = NULL;
+ const MCSectionELF *EHSection = nullptr;
if (const MCSymbol *Group = FnSection.getGroup()) {
EHSection = getContext().getELFSection(
EHSecName, Type, Flags | ELF::SHF_GROUP, Kind,
@@ -1049,9 +1079,9 @@ void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
}
void ARMELFStreamer::Reset() {
- ExTab = NULL;
- FnStart = NULL;
- Personality = NULL;
+ ExTab = nullptr;
+ FnStart = nullptr;
+ Personality = nullptr;
PersonalityIndex = ARM::EHABI::NUM_PERSONALITY_INDEX;
FPReg = ARM::SP;
FPOffset = 0;
@@ -1065,7 +1095,7 @@ void ARMELFStreamer::Reset() {
}
void ARMELFStreamer::emitFnStart() {
- assert(FnStart == 0);
+ assert(FnStart == nullptr);
FnStart = getContext().CreateTempSymbol();
EmitLabel(FnStart);
}
@@ -1104,11 +1134,14 @@ void ARMELFStreamer::emitFnEnd() {
// the second word of exception index table entry. The size of the unwind
// opcodes should always be 4 bytes.
assert(PersonalityIndex == ARM::EHABI::AEABI_UNWIND_CPP_PR0 &&
- "Compact model must use __aeabi_cpp_unwind_pr0 as personality");
+ "Compact model must use __aeabi_unwind_cpp_pr0 as personality");
assert(Opcodes.size() == 4u &&
- "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4");
- EmitBytes(StringRef(reinterpret_cast<const char*>(Opcodes.data()),
- Opcodes.size()));
+ "Unwind opcode size for __aeabi_unwind_cpp_pr0 must be equal to 4");
+ uint64_t Intval = Opcodes[0] |
+ Opcodes[1] << 8 |
+ Opcodes[2] << 16 |
+ Opcodes[3] << 24;
+ EmitIntValue(Intval, Opcodes.size());
}
// Switch to the section containing FnStart
@@ -1180,8 +1213,15 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) {
}
// Emit unwind opcodes
- EmitBytes(StringRef(reinterpret_cast<const char *>(Opcodes.data()),
- Opcodes.size()));
+ assert((Opcodes.size() % 4) == 0 &&
+ "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be multiple of 4");
+ for (unsigned I = 0; I != Opcodes.size(); I += 4) {
+ uint64_t Intval = Opcodes[I] |
+ Opcodes[I + 1] << 8 |
+ Opcodes[I + 2] << 16 |
+ Opcodes[I + 3] << 24;
+ EmitIntValue(Intval, 4);
+ }
// According to ARM EHABI section 9.2, if the __aeabi_unwind_cpp_pr1() or
// __aeabi_unwind_cpp_pr2() is used, then the handler data must be emitted
@@ -1283,13 +1323,11 @@ void ARMELFStreamer::emitUnwindRaw(int64_t Offset,
namespace llvm {
MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useCFI,
- bool useDwarfDirectory,
+ bool isVerboseAsm, bool useDwarfDirectory,
MCInstPrinter *InstPrint, MCCodeEmitter *CE,
MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S =
- llvm::createAsmStreamer(Ctx, OS, isVerboseAsm, useCFI, useDwarfDirectory,
- InstPrint, CE, TAB, ShowInst);
+ MCStreamer *S = llvm::createAsmStreamer(
+ Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
new ARMTargetAsmStreamer(*S, OS, *InstPrint, isVerboseAsm);
return S;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index b7f96e0..7a19208 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -25,7 +25,7 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(StringRef TT) {
(TheTriple.getArch() == Triple::thumbeb))
IsLittleEndian = false;
- Data64bitsDirective = 0;
+ Data64bitsDirective = nullptr;
CommentString = "@";
Code16Directive = ".code\t16";
Code32Directive = ".code\t32";
@@ -50,7 +50,7 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) {
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
- Data64bitsDirective = 0;
+ Data64bitsDirective = nullptr;
CommentString = "@";
Code16Directive = ".code\t16";
Code32Directive = ".code\t32";
@@ -59,7 +59,14 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) {
SupportsDebugInformation = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::ARM;
+ switch (TheTriple.getOS()) {
+ case Triple::NetBSD:
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+ break;
+ default:
+ ExceptionsType = ExceptionHandling::ARM;
+ break;
+ }
// foo(plt) instead of foo@plt
UseParensForSymbolVariant = true;
@@ -89,6 +96,7 @@ void ARMCOFFMCAsmInfoGNU::anchor() { }
ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
AlignmentIsInBytes = false;
+ HasSingleParameterDotFile = true;
CommentString = "@";
Code16Directive = ".code\t16";
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index beaf6a4..51cfa0a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -35,13 +35,13 @@ namespace llvm {
};
class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
- void anchor();
+ void anchor() override;
public:
explicit ARMCOFFMCAsmInfoMicrosoft();
};
class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF {
- void anchor();
+ void anchor() override;
public:
explicit ARMCOFFMCAsmInfoGNU();
};
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 5564e0a..5b51a52 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mccodeemitter"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
@@ -31,6 +30,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mccodeemitter"
+
STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
@@ -1036,16 +1037,17 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
: ARM::fixup_arm_movw_lo16);
break;
}
+
Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc()));
return 0;
}
// If the expression doesn't have :upper16: or :lower16: on it,
- // it's just a plain immediate expression, and those evaluate to
+ // it's just a plain immediate expression, previously those evaluated to
// the lower 16 bits of the expression regardless of whether
- // we have a movt or a movw.
- Kind = MCFixupKind(isThumb2(STI) ? ARM::fixup_t2_movw_lo16
- : ARM::fixup_arm_movw_lo16);
- Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc()));
+ // we have a movt or a movw, but that led to misleadingly results.
+ // This is now disallowed in the the AsmParser in validateInstruction()
+ // so this should never happen.
+ assert(0 && "expression without :upper16: or :lower16:");
return 0;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index fc8505b..87ea875 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -7,12 +7,13 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "armmcexpr"
#include "ARMMCExpr.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
using namespace llvm;
+#define DEBUG_TYPE "armmcexpr"
+
const ARMMCExpr*
ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
MCContext &Ctx) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 949a3d5..04d63a7 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
@@ -106,9 +107,11 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
unsigned SubVer = TT[Idx];
if (SubVer == '8') {
if (NoCPU)
- // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, FeatureMP,
- // FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, FeatureT2XtPk, FeatureCrypto, FeatureCRC
- ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,+trustzone,+t2xtpk,+crypto,+crc";
+ // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
+ // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
+ // FeatureT2XtPk, FeatureCrypto, FeatureCRC
+ ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
+ "+trustzone,+t2xtpk,+crypto,+crc";
else
// Use CPU to figure out the exact features
ARMArchFeature = "+v8";
@@ -245,7 +248,7 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
}
unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true);
- MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(0, Reg, 0));
+ MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0));
return MAI;
}
@@ -273,18 +276,20 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
bool NoExecStack) {
Triple TheTriple(TT);
- if (TheTriple.isOSBinFormatMachO()) {
+ switch (TheTriple.getObjectFormat()) {
+ default: llvm_unreachable("unsupported object format");
+ case Triple::MachO: {
MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, false);
new ARMTargetStreamer(*S);
return S;
}
-
- if (TheTriple.isOSWindows()) {
- llvm_unreachable("ARM does not support Windows COFF format");
+ case Triple::COFF:
+ assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
+ return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS);
+ case Triple::ELF:
+ return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack,
+ TheTriple.getArch() == Triple::thumb);
}
-
- return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack,
- TheTriple.getArch() == Triple::thumb);
}
static MCInstPrinter *createARMMCInstPrinter(const Target &T,
@@ -295,7 +300,7 @@ static MCInstPrinter *createARMMCInstPrinter(const Target &T,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
return new ARMInstPrinter(MAI, MII, MRI, STI);
- return 0;
+ return nullptr;
}
static MCRelocationInfo *createARMMCRelocationInfo(StringRef TT,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index e81876f..8853a8c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -47,8 +47,7 @@ namespace ARM_MC {
}
MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useCFI,
- bool useDwarfDirectory,
+ bool isVerboseAsm, bool useDwarfDirectory,
MCInstPrinter *InstPrint, MCCodeEmitter *CE,
MCAsmBackend *TAB, bool ShowInst);
@@ -78,6 +77,11 @@ MCAsmBackend *createThumbLEAsmBackend(const Target &T, const MCRegisterInfo &MRI
MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
+/// createARMWinCOFFStreamer - Construct a PE/COFF machine code streamer which
+/// will generate a PE/COFF object file.
+MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ MCCodeEmitter &Emitter, raw_ostream &OS);
+
/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
uint8_t OSABI,
@@ -89,6 +93,8 @@ MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
uint32_t CPUType,
uint32_t CPUSubtype);
+/// createARMWinCOFFObjectWriter - Construct an ARM PE/COFF object writer.
+MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit);
/// createARMMachORelocationInfo - Construct ARM Mach-O relocation info.
MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 3bf5cf1..ecfa4e5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -156,7 +156,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+ const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
if (!A_SD->getFragment())
Asm.getContext().FatalError(Fixup.getLoc(),
@@ -170,7 +170,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
FixedValue += SecAddr;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
- MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+ const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
if (!B_SD->getFragment())
Asm.getContext().FatalError(Fixup.getLoc(),
@@ -206,11 +206,11 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
// The thumb bit shouldn't be set in the 'other-half' bit of the
// relocation, but it will be set in FixedValue if the base symbol
// is a thumb function. Clear it out here.
- if (A_SD->getFlags() & SF_ThumbFunc)
+ if (Asm.isThumbFunc(A))
FixedValue &= 0xfffffffe;
break;
case ARM::fixup_t2_movt_hi16:
- if (A_SD->getFlags() & SF_ThumbFunc)
+ if (Asm.isThumbFunc(A))
FixedValue &= 0xfffffffe;
MovtBit = 1;
// Fallthrough
@@ -259,7 +259,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+ const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
if (!A_SD->getFragment())
Asm.getContext().FatalError(Fixup.getLoc(),
@@ -272,7 +272,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
uint32_t Value2 = 0;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
- MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+ const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
if (!B_SD->getFragment())
Asm.getContext().FatalError(Fixup.getLoc(),
@@ -378,7 +378,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
}
// Get the symbol data, if any.
- MCSymbolData *SD = 0;
+ const MCSymbolData *SD = nullptr;
if (Target.getSymA())
SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index fdc0ed7..e3cfb05 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -109,7 +109,7 @@ ConstantPool *
AssemblerConstantPools::getConstantPool(const MCSection *Section) {
ConstantPoolMapTy::iterator CP = ConstantPools.find(Section);
if (CP == ConstantPools.end())
- return 0;
+ return nullptr;
return &CP->second;
}
@@ -246,3 +246,7 @@ void ARMTargetStreamer::AnnotateTLSDescriptorSequence(
const MCSymbolRefExpr *SRE) {
llvm_unreachable("unimplemented");
}
+
+void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
+ llvm_unreachable("unimplemented");
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
new file mode 100644
index 0000000..d31f1f4
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -0,0 +1,82 @@
+//===-- ARMWinCOFFObjectWriter.cpp - ARM Windows COFF Object Writer -- C++ -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCWinCOFFObjectWriter.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace {
+class ARMWinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
+public:
+ ARMWinCOFFObjectWriter(bool Is64Bit)
+ : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) {
+ assert(!Is64Bit && "AArch64 support not yet implemented");
+ }
+ virtual ~ARMWinCOFFObjectWriter() { }
+
+ unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsCrossSection) const override;
+
+ bool recordRelocation(const MCFixup &) const override;
+};
+
+unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsCrossSection) const {
+ assert(getMachine() == COFF::IMAGE_FILE_MACHINE_ARMNT &&
+ "AArch64 support not yet implemented");
+
+ MCSymbolRefExpr::VariantKind Modifier =
+ Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+ switch (static_cast<unsigned>(Fixup.getKind())) {
+ default: llvm_unreachable("unsupported relocation type");
+ case FK_Data_4:
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_COFF_IMGREL32:
+ return COFF::IMAGE_REL_ARM_ADDR32NB;
+ case MCSymbolRefExpr::VK_SECREL:
+ return COFF::IMAGE_REL_ARM_SECREL;
+ default:
+ return COFF::IMAGE_REL_ARM_ADDR32;
+ }
+ case FK_SecRel_2:
+ return COFF::IMAGE_REL_ARM_SECTION;
+ case FK_SecRel_4:
+ return COFF::IMAGE_REL_ARM_SECREL;
+ case ARM::fixup_t2_condbranch:
+ return COFF::IMAGE_REL_ARM_BRANCH20T;
+ case ARM::fixup_t2_uncondbranch:
+ return COFF::IMAGE_REL_ARM_BRANCH24T;
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ return COFF::IMAGE_REL_ARM_BLX23T;
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movt_hi16:
+ return COFF::IMAGE_REL_ARM_MOV32T;
+ }
+}
+
+bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
+ return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16;
+}
+}
+
+namespace llvm {
+MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit) {
+ MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit);
+ return createWinCOFFObjectWriter(MOTW, OS);
+}
+}
+
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
new file mode 100644
index 0000000..b344ced
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -0,0 +1,46 @@
+//===-- ARMWinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCTargetDesc.h"
+#include "llvm/MC/MCWinCOFFStreamer.h"
+
+using namespace llvm;
+
+namespace {
+class ARMWinCOFFStreamer : public MCWinCOFFStreamer {
+public:
+ ARMWinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE,
+ raw_ostream &OS)
+ : MCWinCOFFStreamer(C, AB, CE, OS) { }
+
+ void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
+ void EmitThumbFunc(MCSymbol *Symbol) override;
+};
+
+void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ default: llvm_unreachable("not implemented");
+ case MCAF_SyntaxUnified:
+ case MCAF_Code16:
+ break;
+ }
+}
+
+void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
+ getAssembler().setIsThumbFunc(Symbol);
+}
+}
+
+namespace llvm {
+MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ MCCodeEmitter &Emitter, raw_ostream &OS) {
+ return new ARMWinCOFFStreamer(Context, MAB, Emitter, OS);
+}
+}
+
diff --git a/lib/Target/ARM/MCTargetDesc/Android.mk b/lib/Target/ARM/MCTargetDesc/Android.mk
index 074d29e..a5827f7 100644
--- a/lib/Target/ARM/MCTargetDesc/Android.mk
+++ b/lib/Target/ARM/MCTargetDesc/Android.mk
@@ -17,7 +17,9 @@ arm_mc_desc_SRC_FILES := \
ARMMachObjectWriter.cpp \
ARMMachORelocationInfo.cpp \
ARMTargetStreamer.cpp \
- ARMUnwindOpAsm.cpp
+ ARMUnwindOpAsm.cpp \
+ ARMWinCOFFObjectWriter.cpp \
+ ARMWinCOFFStreamer.cpp \
# For the host
# =====================================================
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index 06812d4..9582e8c 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -1,14 +1,16 @@
add_llvm_library(LLVMARMDesc
ARMAsmBackend.cpp
ARMELFObjectWriter.cpp
+ ARMELFObjectWriter.cpp
ARMELFStreamer.cpp
+ ARMMachObjectWriter.cpp
+ ARMMachORelocationInfo.cpp
ARMMCAsmInfo.cpp
ARMMCCodeEmitter.cpp
ARMMCExpr.cpp
ARMMCTargetDesc.cpp
- ARMMachObjectWriter.cpp
- ARMELFObjectWriter.cpp
ARMTargetStreamer.cpp
ARMUnwindOpAsm.cpp
- ARMMachORelocationInfo.cpp
+ ARMWinCOFFObjectWriter.cpp
+ ARMWinCOFFStreamer.cpp
)
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 80af859..f6d24e9 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mlx-expansion"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
@@ -28,6 +27,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "mlx-expansion"
+
static cl::opt<bool>
ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
static cl::opt<unsigned>
@@ -73,7 +74,7 @@ namespace {
}
void MLxExpansion::clearStack() {
- std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0);
+ std::fill(LastMIs, LastMIs + 4, nullptr);
MIIdx = 0;
}
@@ -88,7 +89,7 @@ MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
// real definition MI. This is important for _sfp instructions.
unsigned Reg = MI->getOperand(1).getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg))
- return 0;
+ return nullptr;
MachineBasicBlock *MBB = MI->getParent();
MachineInstr *DefMI = MRI->getVRegDef(Reg);
@@ -352,7 +353,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
if (Domain == ARMII::DomainGeneral) {
if (++Skip == 2)
// Assume dual issues of non-VFP / NEON instructions.
- pushStack(0);
+ pushStack(nullptr);
} else {
Skip = 0;
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index a64707e..f4d9be3 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -215,10 +215,6 @@ etc. Almost all Thumb instructions clobber condition code.
//===---------------------------------------------------------------------===//
-Add ldmia, stmia support.
-
-//===---------------------------------------------------------------------===//
-
Thumb load / store address mode offsets are scaled. The values kept in the
instruction operands are pre-scale values. This probably ought to be changed
to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions.
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 2224652..be29dc5 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -293,7 +293,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setShouldRestoreSPFromFP(true);
}
-static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
+static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
if (MI->getOpcode() == ARM::tLDRspi &&
MI->getOperand(1).isFI() &&
isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
@@ -328,7 +328,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
int NumBytes = (int)MFI->getStackSize();
assert((unsigned)NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
if (!AFI->hasStackFrame()) {
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 93e2b5a..0c0abbe 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -56,7 +56,7 @@ public:
unsigned Reg) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS = NULL) const override;
+ RegScavenger *RS = nullptr) const override;
};
}
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 406dbe0..edb9ff3 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "thumb2-it"
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
#include "Thumb2InstrInfo.h"
@@ -19,6 +18,8 @@
#include "llvm/CodeGen/MachineInstrBundle.h"
using namespace llvm;
+#define DEBUG_TYPE "thumb2-it"
+
STATISTIC(NumITs, "Number of IT blocks inserted");
STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 04b83fb..6267ecf 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "t2-reduce-size"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
@@ -25,6 +24,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "t2-reduce-size"
+
STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
@@ -915,15 +916,14 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Yes, CPSR could be livein.
bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
- MachineInstr *BundleMI = 0;
+ MachineInstr *BundleMI = nullptr;
- CPSRDef = 0;
+ CPSRDef = nullptr;
HighLatencyCPSR = false;
// Check predecessors for the latest CPSRDef.
- for (MachineBasicBlock::pred_iterator
- I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) {
- const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()];
+ for (auto *Pred : MBB.predecessors()) {
+ const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
if (!PInfo.Visited) {
// Since blocks are visited in RPO, this must be a back-edge.
continue;
@@ -984,7 +984,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
if (MI->isCall()) {
// Calls don't really set CPSR.
- CPSRDef = 0;
+ CPSRDef = nullptr;
HighLatencyCPSR = false;
IsSelfLoop = false;
} else if (DefCPSR) {