aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.h2
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp2
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp40
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp12
-rw-r--r--lib/Target/ARM/ARMCallingConv.h20
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp44
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp71
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp2
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp15
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp95
-rw-r--r--lib/Target/ARM/ARMISelLowering.h3
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h4
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td124
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td100
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td69
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td85
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h3
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td20
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp53
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp118
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp42
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h46
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp80
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp1
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp1
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h4
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp3
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h3
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp1
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h2
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.cpp4
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.h3
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp8
-rw-r--r--lib/Target/CBackend/CBackend.cpp10
-rw-r--r--lib/Target/CellSPU/SPUFrameLowering.cpp2
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp70
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h2
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h2
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp2
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp2
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h3
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp8
-rw-r--r--lib/Target/Hexagon/Hexagon.h1
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonCallingConvLower.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h2
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h2
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h3
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h9
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp6
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h6
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/Makefile2
-rw-r--r--lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp4
-rw-r--r--lib/Target/MBlaze/MBlazeFrameLowering.h3
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.h4
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.h2
-rw-r--r--lib/Target/MBlaze/MBlazeMCInstLower.h1
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp2
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h2
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.h1
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.h3
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp37
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp6
-rw-r--r--lib/Target/Mips/Mips.h2
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.cpp20
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.h10
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp4
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h2
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp51
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp104
-rw-r--r--lib/Target/Mips/MipsISelLowering.h5
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h2
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp2
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h4
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp6
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h6
-rw-r--r--lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h2
-rw-r--r--lib/Target/PTX/PTX.h1
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.cpp2
-rw-r--r--lib/Target/PTX/PTXISelLowering.cpp2
-rw-r--r--lib/Target/PTX/PTXISelLowering.h2
-rw-r--r--lib/Target/PTX/PTXInstrInfo.cpp2
-rw-r--r--lib/Target/PTX/PTXParamManager.cpp2
-rw-r--r--lib/Target/PTX/PTXParamManager.h1
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.cpp2
-rw-r--r--lib/Target/PTX/PTXTargetMachine.cpp5
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp2
-rw-r--r--lib/Target/PowerPC/PPC.h5
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp2
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td31
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp2
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h2
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp52
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h4
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td16
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td16
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp106
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h1
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp2
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h2
-rw-r--r--lib/Target/Sparc/FPMover.cpp8
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp6
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h2
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h2
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp4
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp32
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c9
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp8
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp58
-rw-r--r--lib/Target/X86/README-SSE.txt19
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp26
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h28
-rw-r--r--lib/Target/X86/X86.h2
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp4
-rw-r--r--lib/Target/X86/X86AsmPrinter.h4
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h2
-rw-r--r--lib/Target/X86/X86FastISel.cpp2
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp6
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp4
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp399
-rw-r--r--lib/Target/X86/X86InstrCompiler.td107
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td5
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp50
-rw-r--r--lib/Target/X86/X86InstrInfo.h2
-rw-r--r--lib/Target/X86/X86InstrInfo.td26
-rw-r--r--lib/Target/X86/X86InstrSSE.td94
-rw-r--r--lib/Target/X86/X86InstrSystem.td10
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp4
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp2
-rw-r--r--lib/Target/X86/X86Schedule.td11
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td15
-rw-r--r--lib/Target/X86/X86Subtarget.h2
-rw-r--r--lib/Target/X86/X86TargetMachine.h1
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h1
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp2
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp2
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h2
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp2
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h2
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp18
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h9
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h4
167 files changed, 1592 insertions, 1255 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index acb57f7..2a1e8e4 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -18,9 +18,7 @@
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
-#include <cassert>
namespace llvm {
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 4ec19cc..ca30716 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
-#include "ARM.h"
#include "ARMAsmPrinter.h"
+#include "ARM.h"
#include "ARMBuildAttrs.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 75b796e..366e2fa 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -935,6 +935,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
}
} else
llvm_unreachable("Unknown reg class!");
@@ -953,6 +955,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -2756,24 +2760,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD4q8oddPseudo_UPD:
case ARM::VLD4q16oddPseudo_UPD:
case ARM::VLD4q32oddPseudo_UPD:
- case ARM::VLD1DUPq8Pseudo:
- case ARM::VLD1DUPq16Pseudo:
- case ARM::VLD1DUPq32Pseudo:
- case ARM::VLD1DUPq8PseudoWB_fixed:
- case ARM::VLD1DUPq16PseudoWB_fixed:
- case ARM::VLD1DUPq32PseudoWB_fixed:
- case ARM::VLD1DUPq8PseudoWB_register:
- case ARM::VLD1DUPq16PseudoWB_register:
- case ARM::VLD1DUPq32PseudoWB_register:
- case ARM::VLD2DUPd8Pseudo:
- case ARM::VLD2DUPd16Pseudo:
- case ARM::VLD2DUPd32Pseudo:
- case ARM::VLD2DUPd8PseudoWB_fixed:
- case ARM::VLD2DUPd16PseudoWB_fixed:
- case ARM::VLD2DUPd32PseudoWB_fixed:
- case ARM::VLD2DUPd8PseudoWB_register:
- case ARM::VLD2DUPd16PseudoWB_register:
- case ARM::VLD2DUPd32PseudoWB_register:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq8wb_fixed:
+ case ARM::VLD1DUPq16wb_fixed:
+ case ARM::VLD1DUPq32wb_fixed:
+ case ARM::VLD1DUPq8wb_register:
+ case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16:
+ case ARM::VLD2DUPd32:
+ case ARM::VLD2DUPd8wb_fixed:
+ case ARM::VLD2DUPd16wb_fixed:
+ case ARM::VLD2DUPd32wb_fixed:
+ case ARM::VLD2DUPd8wb_register:
+ case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_register:
case ARM::VLD4DUPd8Pseudo:
case ARM::VLD4DUPd16Pseudo:
case ARM::VLD4DUPd32Pseudo:
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index d2aff9a..291369f 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMBaseRegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
-#include "ARMBaseRegisterInfo.h"
#include "ARMFrameLowering.h"
#include "ARMInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
@@ -79,6 +79,7 @@ getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(ARM::SP);
Reserved.set(ARM::PC);
+ Reserved.set(ARM::FPSCR);
if (TFI->hasFP(MF))
Reserved.set(FramePtr);
if (hasBasePointer(MF))
@@ -492,8 +493,7 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
// When outgoing call frames are so large that we adjust the stack pointer
// around the call, we can no longer use the stack pointer to reach the
// emergency spill slot.
- if (needsStackRealignment(MF) && (MFI->hasVarSizedObjects() ||
- !TFI->hasReservedCallFrame(MF)))
+ if (needsStackRealignment(MF) && !TFI->hasReservedCallFrame(MF))
return true;
// Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
@@ -517,7 +517,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
}
bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
// We can't realign the stack if:
@@ -532,8 +531,9 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
// register allocation with frame pointer elimination, it is too late now.
if (!MRI->canReserveReg(FramePtr))
return false;
- // We may also need a base pointer if there are dynamic allocas.
- if (!MFI->hasVarSizedObjects())
+ // We may also need a base pointer if there are dynamic allocas or stack
+ // pointer adjustments around calls.
+ if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF))
return true;
if (!EnableBasePointer)
return false;
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 437b4c7..2b9c55d 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -15,13 +15,13 @@
#ifndef ARMCALLINGCONV_H
#define ARMCALLINGCONV_H
-#include "llvm/CallingConv.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMRegisterInfo.h"
#include "ARMSubtarget.h"
-#include "ARM.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
@@ -29,7 +29,7 @@ namespace llvm {
static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
CCState &State, bool CanFail) {
- static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+ static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
// Try to get the first register.
if (unsigned Reg = State.AllocateReg(RegList, 4))
@@ -72,9 +72,9 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
CCState &State, bool CanFail) {
- static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
- static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
- static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
+ static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
+ static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
+ static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
if (Reg == 0) {
@@ -118,8 +118,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo, CCState &State) {
- static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
- static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+ static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
+ static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
if (Reg == 0)
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index c4ab99d..c2b7816 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -99,8 +99,8 @@ namespace {
// Entries for NEON load/store information table. The table is sorted by
// PseudoOpc for fast binary-search lookups.
struct NEONLdStTableEntry {
- unsigned PseudoOpc;
- unsigned RealOpc;
+ uint16_t PseudoOpc;
+ uint16_t RealOpc;
bool IsLoad;
bool isUpdating;
bool hasWritebackOperand;
@@ -129,16 +129,6 @@ namespace {
}
static const NEONLdStTableEntry NEONLdStTable[] = {
-{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,false},
-{ ARM::VLD1DUPq16PseudoWB_fixed, ARM::VLD1DUPq16wb_fixed, true, true, true, SingleSpc, 2, 4,false},
-{ ARM::VLD1DUPq16PseudoWB_register, ARM::VLD1DUPq16wb_register, true, true, true, SingleSpc, 2, 4,false},
-{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,false},
-{ ARM::VLD1DUPq32PseudoWB_fixed, ARM::VLD1DUPq32wb_fixed, true, true, false, SingleSpc, 2, 2,false},
-{ ARM::VLD1DUPq32PseudoWB_register, ARM::VLD1DUPq32wb_register, true, true, true, SingleSpc, 2, 2,false},
-{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,false},
-{ ARM::VLD1DUPq8PseudoWB_fixed, ARM::VLD1DUPq8wb_fixed, true, true, false, SingleSpc, 2, 8,false},
-{ ARM::VLD1DUPq8PseudoWB_register, ARM::VLD1DUPq8wb_register, true, true, true, SingleSpc, 2, 8,false},
-
{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true},
{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true},
@@ -149,16 +139,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
-{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, false, SingleSpc, 2, 4,false},
-{ ARM::VLD2DUPd16PseudoWB_fixed, ARM::VLD2DUPd16wb_fixed, true, true, false, SingleSpc, 2, 4,false},
-{ ARM::VLD2DUPd16PseudoWB_register, ARM::VLD2DUPd16wb_register, true, true, true, SingleSpc, 2, 4,false},
-{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, false, SingleSpc, 2, 2,false},
-{ ARM::VLD2DUPd32PseudoWB_fixed, ARM::VLD2DUPd32wb_fixed, true, true, false, SingleSpc, 2, 2,false},
-{ ARM::VLD2DUPd32PseudoWB_register, ARM::VLD2DUPd32wb_register, true, true, true, SingleSpc, 2, 2,false},
-{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, false, SingleSpc, 2, 8,false},
-{ ARM::VLD2DUPd8PseudoWB_fixed, ARM::VLD2DUPd8wb_fixed, true, true, false, SingleSpc, 2, 8,false},
-{ ARM::VLD2DUPd8PseudoWB_register, ARM::VLD2DUPd8wb_register, true, true, true, SingleSpc, 2, 8,false},
-
{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true},
@@ -345,7 +325,7 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
/// load or store pseudo instruction.
static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
- unsigned NumEntries = array_lengthof(NEONLdStTable);
+ const unsigned NumEntries = array_lengthof(NEONLdStTable);
#ifndef NDEBUG
// Make sure the table is sorted.
@@ -1090,24 +1070,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD4q8oddPseudo_UPD:
case ARM::VLD4q16oddPseudo_UPD:
case ARM::VLD4q32oddPseudo_UPD:
- case ARM::VLD1DUPq8Pseudo:
- case ARM::VLD1DUPq16Pseudo:
- case ARM::VLD1DUPq32Pseudo:
- case ARM::VLD1DUPq8PseudoWB_fixed:
- case ARM::VLD1DUPq16PseudoWB_fixed:
- case ARM::VLD1DUPq32PseudoWB_fixed:
- case ARM::VLD1DUPq8PseudoWB_register:
- case ARM::VLD1DUPq16PseudoWB_register:
- case ARM::VLD1DUPq32PseudoWB_register:
- case ARM::VLD2DUPd8Pseudo:
- case ARM::VLD2DUPd16Pseudo:
- case ARM::VLD2DUPd32Pseudo:
- case ARM::VLD2DUPd8PseudoWB_fixed:
- case ARM::VLD2DUPd16PseudoWB_fixed:
- case ARM::VLD2DUPd32PseudoWB_fixed:
- case ARM::VLD2DUPd8PseudoWB_register:
- case ARM::VLD2DUPd16PseudoWB_register:
- case ARM::VLD2DUPd32PseudoWB_register:
case ARM::VLD3DUPd8Pseudo:
case ARM::VLD3DUPd16Pseudo:
case ARM::VLD3DUPd32Pseudo:
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 818b202..a24eab4 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -1384,7 +1384,10 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
SrcVT == MVT::i1) {
const APInt &CIVal = ConstInt->getValue();
Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
- if (Imm < 0) {
+ // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
+ // then a cmn, because there is no way to represent 2147483648 as a
+ // signed 32-bit int.
+ if (Imm < 0 && Imm != (int)0x80000000) {
isNegativeImm = true;
Imm = -Imm;
}
@@ -1475,7 +1478,6 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
bool ARMFastISel::SelectCmp(const Instruction *I) {
const CmpInst *CI = cast<CmpInst>(I);
- Type *Ty = CI->getOperand(0)->getType();
// Get the compare predicate.
ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
@@ -1495,11 +1497,10 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
unsigned DestReg = createResultReg(RC);
Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
unsigned ZeroReg = TargetMaterializeConstant(Zero);
- bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
- unsigned CondReg = isFloat ? ARM::FPSCR : ARM::CPSR;
+ // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
.addReg(ZeroReg).addImm(1)
- .addImm(ARMPred).addReg(CondReg);
+ .addImm(ARMPred).addReg(ARM::CPSR);
UpdateValueMap(I, DestReg);
return true;
@@ -1851,6 +1852,48 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
+ // Check that we can handle all of the arguments. If we can't, then bail out
+ // now before we add code to the MBB.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // We don't handle NEON/vector parameters yet.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+ return false;
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ continue;
+ } else if (VA.needsCustom()) {
+ // TODO: We need custom lowering for vector (v2f64) args.
+ if (VA.getLocVT() != MVT::f64 ||
+ // TODO: Only handle register args for now.
+ !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
+ return false;
+ } else {
+ switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) {
+ default:
+ return false;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2())
+ return false;
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2())
+ return false;
+ break;
+ }
+ }
+ }
+
+ // At the point, we are able to handle the call's arguments in fast isel.
+
// Get a count of how many bytes are to be pushed on the stack.
NumBytes = CCInfo.getNextStackOffset();
@@ -1866,9 +1909,8 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
unsigned Arg = ArgRegs[VA.getValNo()];
MVT ArgVT = ArgVTs[VA.getValNo()];
- // We don't handle NEON/vector parameters yet.
- if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
- return false;
+ assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
+ "We don't handle NEON/vector parameters yet.");
// Handle arg promotion, etc.
switch (VA.getLocInfo()) {
@@ -1908,12 +1950,13 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
RegArgs.push_back(VA.getLocReg());
} else if (VA.needsCustom()) {
// TODO: We need custom lowering for vector (v2f64) args.
- if (VA.getLocVT() != MVT::f64) return false;
+ assert(VA.getLocVT() == MVT::f64 &&
+ "Custom lowering for v2f64 args not available");
CCValAssign &NextVA = ArgLocs[++i];
- // TODO: Only handle register args for now.
- if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+ assert(VA.isRegLoc() && NextVA.isRegLoc() &&
+ "We only handle register args!");
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::VMOVRRD), VA.getLocReg())
@@ -1929,9 +1972,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
Addr.Base.Reg = ARM::SP;
Addr.Offset = VA.getLocMemOffset();
- if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
+ bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
+ assert(EmitRet && "Could not emit a store for argument!");
}
}
+
return true;
}
@@ -2136,7 +2181,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// TODO: Turn this into the table of arm call ops.
MachineInstrBuilder MIB;
unsigned CallOpc = ARMSelectCallOp(NULL);
- if(isThumb2)
+ if (isThumb2)
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc)))
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 0fd6025..bd4b2a9 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -501,7 +501,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
// SP can move around if there are allocas. We may also lose track of SP
// when emergency spilling inside a non-reserved call frame setup.
- bool hasMovingSP = MFI->hasVarSizedObjects() || !hasReservedCallFrame(MF);
+ bool hasMovingSP = !hasReservedCallFrame(MF);
// When dynamically realigning the stack, use the frame pointer for
// parameters, and the stack/base pointer for locals.
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c99db98..ffb9acb 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1589,9 +1589,9 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
- case ARM::VLD2DUPd8PseudoWB_fixed: return ARM::VLD2DUPd8PseudoWB_register;
- case ARM::VLD2DUPd16PseudoWB_fixed: return ARM::VLD2DUPd16PseudoWB_register;
- case ARM::VLD2DUPd32PseudoWB_fixed: return ARM::VLD2DUPd32PseudoWB_register;
+ case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
+ case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
+ case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
}
return Opc; // If not one we handle, return it unchanged.
}
@@ -2891,8 +2891,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2DUP: {
- unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
- ARM::VLD2DUPd32Pseudo };
+ unsigned Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
+ ARM::VLD2DUPd32 };
return SelectVLDDup(N, false, 2, Opcodes);
}
@@ -2909,9 +2909,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::VLD2DUP_UPD: {
- unsigned Opcodes[] = { ARM::VLD2DUPd8PseudoWB_fixed,
- ARM::VLD2DUPd16PseudoWB_fixed,
- ARM::VLD2DUPd32PseudoWB_fixed };
+ unsigned Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed,
+ ARM::VLD2DUPd32wb_fixed };
return SelectVLDDup(N, true, 2, Opcodes);
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 477b5f4..e26dd22 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -13,10 +13,10 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-isel"
+#include "ARMISelLowering.h"
#include "ARM.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
-#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMPerfectShuffle.h"
#include "ARMRegisterInfo.h"
@@ -49,7 +49,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include <sstream>
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
@@ -87,7 +86,7 @@ namespace {
}
// The APCS parameter registers.
-static const unsigned GPRArgRegs[] = {
+static const uint16_t GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
@@ -456,6 +455,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
}
+ setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
addDRTypeForNEON(MVT::v8i8);
@@ -3673,6 +3674,27 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
return Result;
}
+SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const {
+ if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+ return SDValue();
+
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+ assert(Op.getValueType() == MVT::f32 &&
+ "ConstantFP custom lowering should only occur for f32.");
+
+ APFloat FPVal = CFP->getValueAPF();
+ int ImmVal = ARM_AM::getFP32Imm(FPVal);
+ if (ImmVal == -1)
+ return SDValue();
+
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
+ SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
+ DAG.getConstant(0, MVT::i32));
+}
+
/// isNEONModifiedImm - Check if the specified splat value corresponds to a
/// valid vector constant for a NEON instruction with a "modified immediate"
/// operand (e.g., VMOV). If so, return the encoded value.
@@ -5109,6 +5131,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::SETCC: return LowerVSETCC(Op, DAG);
+ case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
@@ -6842,33 +6865,63 @@ static SDValue PerformMULCombine(SDNode *N,
if (!C)
return SDValue();
- uint64_t MulAmt = C->getZExtValue();
+ int64_t MulAmt = C->getSExtValue();
unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+
ShiftAmt = ShiftAmt & (32 - 1);
SDValue V = N->getOperand(0);
DebugLoc DL = N->getDebugLoc();
SDValue Res;
MulAmt >>= ShiftAmt;
- if (isPowerOf2_32(MulAmt - 1)) {
- // (mul x, 2^N + 1) => (add (shl x, N), x)
- Res = DAG.getNode(ISD::ADD, DL, VT,
- V, DAG.getNode(ISD::SHL, DL, VT,
- V, DAG.getConstant(Log2_32(MulAmt-1),
- MVT::i32)));
- } else if (isPowerOf2_32(MulAmt + 1)) {
- // (mul x, 2^N - 1) => (sub (shl x, N), x)
- Res = DAG.getNode(ISD::SUB, DL, VT,
- DAG.getNode(ISD::SHL, DL, VT,
- V, DAG.getConstant(Log2_32(MulAmt+1),
- MVT::i32)),
- V);
- } else
- return SDValue();
+
+ if (MulAmt >= 0) {
+ if (isPowerOf2_32(MulAmt - 1)) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmt - 1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmt + 1)) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmt + 1),
+ MVT::i32)),
+ V);
+ } else
+ return SDValue();
+ } else {
+ uint64_t MulAmtAbs = -MulAmt;
+ if (isPowerOf2_32(MulAmtAbs + 1)) {
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmtAbs + 1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmtAbs - 1)) {
+ // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V,
+ DAG.getConstant(Log2_32(MulAmtAbs-1),
+ MVT::i32)));
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getConstant(0, MVT::i32),Res);
+
+ } else
+ return SDValue();
+ }
if (ShiftAmt != 0)
- Res = DAG.getNode(ISD::SHL, DL, VT, Res,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ Res = DAG.getNode(ISD::SHL, DL, VT,
+ Res, DAG.getConstant(ShiftAmt, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 7f12293..a71b74e 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -15,6 +15,7 @@
#ifndef ARMISELLOWERING_H
#define ARMISELLOWERING_H
+#include "ARM.h"
#include "ARMSubtarget.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -434,6 +435,8 @@ namespace llvm {
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 7bedf30..72af535 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -14,11 +14,11 @@
#ifndef ARMINSTRUCTIONINFO_H
#define ARMINSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMRegisterInfo.h"
#include "ARMSubtarget.h"
-#include "ARM.h"
+#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
class ARMSubtarget;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 0b1406e..8196582 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -637,6 +637,7 @@ def BitfieldAsmOperand : AsmOperandClass {
let Name = "Bitfield";
let ParserMethod = "parseBitfield";
}
+
def bf_inv_mask_imm : Operand<i32>,
PatLeaf<(imm), [{
return ARM::isBitFieldInvertedMask(N->getZExtValue());
@@ -4084,74 +4085,43 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
[/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $Rd">;
-let isCodeGenOnly = 1 in {
// Conditional instructions
-multiclass AsI1_bincc_irs<bits<4> opcod, string opc,
- InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
- def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
- iii, opc, "\t$Rd, $Rn, $imm", []>,
- RegConstraint<"$Rn = $Rd"> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> imm;
- let Inst{25} = 1;
- let Inst{19-16} = Rn;
- let Inst{15-12} = Rd;
- let Inst{11-0} = imm;
- }
- def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
- iir, opc, "\t$Rd, $Rn, $Rm", []>,
- RegConstraint<"$Rn = $Rd"> {
- bits<4> Rd;
- bits<4> Rn;
- bits<4> Rm;
- let Inst{25} = 0;
- let Inst{19-16} = Rn;
- let Inst{15-12} = Rd;
- let Inst{11-4} = 0b00000000;
- let Inst{3-0} = Rm;
- }
-
- def rsi : AsI1<opcod, (outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
- iis, opc, "\t$Rd, $Rn, $shift", []>,
- RegConstraint<"$Rn = $Rd"> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> shift;
- let Inst{25} = 0;
- let Inst{19-16} = Rn;
- let Inst{15-12} = Rd;
- let Inst{11-5} = shift{11-5};
- let Inst{4} = 0;
- let Inst{3-0} = shift{3-0};
- }
-
- def rsr : AsI1<opcod, (outs GPR:$Rd),
- (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
- iis, opc, "\t$Rd, $Rn, $shift", []>,
- RegConstraint<"$Rn = $Rd"> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> shift;
- let Inst{25} = 0;
- let Inst{19-16} = Rn;
- let Inst{15-12} = Rd;
- let Inst{11-8} = shift{11-8};
- let Inst{7} = 0;
- let Inst{6-5} = shift{6-5};
- let Inst{4} = 1;
- let Inst{3-0} = shift{3-0};
- }
-} // AsI1_bincc_irs
-
-defm ANDCC : AsI1_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-defm ORRCC : AsI1_bincc_irs<0b1100, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-defm EORCC : AsI1_bincc_irs<0b0001, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi,
+ Instruction irsr,
+ InstrItinClass iii, InstrItinClass iir,
+ InstrItinClass iis> {
+ def ri : ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s),
+ 4, iii, [],
+ (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ def rr : ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ 4, iir, [],
+ (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ def rsi : ARMPseudoExpand<(outs GPR:$Rd),
+ (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s),
+ 4, iis, [],
+ (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+ def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s),
+ 4, iis, [],
+ (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
+}
+
+defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-} // isCodeGenOnly
} // neverHasSideEffects
+
//===----------------------------------------------------------------------===//
// Atomic operations intrinsics
//
@@ -4605,10 +4575,16 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
(outs GPR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
+ (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0, pred:$p)>;
def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
@@ -4642,10 +4618,16 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm",
+ (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
(outs GPR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm",
+ (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
imm:$CRm, imm:$opc2),
@@ -5252,6 +5234,20 @@ def : ARMInstAlias<"mul${s}${p} $Rn, $Rm",
def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
(RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>;
+// Pre-v6, 'mov r0, r0' was used as a NOP encoding.
+def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>,
+ Requires<[IsARM, NoV6]>;
+
+// UMULL/SMULL are available on all arches, but the instruction definitions
+// need difference constraints pre-v6. Use these aliases for the assembly
+// parsing on pre-v6.
+def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+ (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+ (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+ Requires<[IsARM, NoV6]>;
+
// 'it' blocks in ARM mode just validate the predicates. The IT itself
// is discarded.
def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 8684ce1..f61eb2b 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -94,7 +94,7 @@ def VecListDPairAsmOperand : AsmOperandClass {
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
-def VecListDPair : RegisterOperand<DPair, "printVectorListDPair"> {
+def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
let ParserMatchClass = VecListDPairAsmOperand;
}
// Register list of three sequential D registers.
@@ -121,7 +121,7 @@ def VecListDPairSpacedAsmOperand : AsmOperandClass {
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
-def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListDPairSpaced"> {
+def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
let ParserMatchClass = VecListDPairSpacedAsmOperand;
}
// Register list of three D registers spaced by 2 (three Q registers).
@@ -153,23 +153,24 @@ def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
let ParserMatchClass = VecListOneDAllLanesAsmOperand;
}
// Register list of two D registers, with "all lanes" subscripting.
-def VecListTwoDAllLanesAsmOperand : AsmOperandClass {
- let Name = "VecListTwoDAllLanes";
+def VecListDPairAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
-def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> {
- let ParserMatchClass = VecListTwoDAllLanesAsmOperand;
+def VecListDPairAllLanes : RegisterOperand<DPair,
+ "printVectorListTwoAllLanes"> {
+ let ParserMatchClass = VecListDPairAllLanesAsmOperand;
}
// Register list of two D registers spaced by 2 (two sequential Q registers).
-def VecListTwoQAllLanesAsmOperand : AsmOperandClass {
- let Name = "VecListTwoQAllLanes";
+def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
+ let Name = "VecListDPairSpacedAllLanes";
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
-def VecListTwoQAllLanes : RegisterOperand<DPR,
+def VecListDPairSpacedAllLanes : RegisterOperand<DPair,
"printVectorListTwoSpacedAllLanes"> {
- let ParserMatchClass = VecListTwoQAllLanesAsmOperand;
+ let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
}
// Register list of three D registers, with "all lanes" subscripting.
def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
@@ -1276,39 +1277,32 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
-class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
- let Pattern = [(set QPR:$dst,
- (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
-}
-
def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
-def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>;
-def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
-def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
-
def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPd32 addrmode6:$addr)>;
-def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
- (VLD1DUPq32Pseudo addrmode6:$addr)>;
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-
-class VLD1QDUP<bits<4> op7_4, string Dt>
- : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd),
+class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+ : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
(ins addrmode6dup:$Rn), IIC_VLD1dup,
- "vld1", Dt, "$Vd, $Rn", "", []> {
+ "vld1", Dt, "$Vd, $Rn", "",
+ [(set VecListDPairAllLanes:$Vd,
+ (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
-def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">;
-def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
-def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
+def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>;
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+ (VLD1DUPq32 addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
@@ -1333,7 +1327,7 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
}
multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
- (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+ (outs VecListDPairAllLanes:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
@@ -1343,7 +1337,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
let AsmMatchConverter = "cvtVLDwbFixed";
}
def _register : NLdSt<1, 0b10, 0b1100, op7_4,
- (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+ (outs VecListDPairAllLanes:$Vd, GPR:$wb),
(ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
@@ -1361,13 +1355,6 @@ defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
-def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>;
-def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
-
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
: NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
@@ -1378,18 +1365,14 @@ class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
let DecoderMethod = "DecodeVLD2DupInstruction";
}
-def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListTwoDAllLanes>;
-def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListTwoDAllLanes>;
-def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListTwoDAllLanes>;
+def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>;
-def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>;
-def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
-def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
-
-// ...with double-spaced registers (not used for codegen):
-def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListTwoQAllLanes>;
-def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListTwoQAllLanes>;
-def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListTwoQAllLanes>;
+// ...with double-spaced registers
+def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
// ...with address register writeback:
multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
@@ -1414,20 +1397,13 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
}
}
-defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListTwoDAllLanes>;
-defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListTwoDAllLanes>;
-defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListTwoDAllLanes>;
-
-defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListTwoQAllLanes>;
-defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListTwoQAllLanes>;
-defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListTwoQAllLanes>;
+defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>;
+defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>;
+defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>;
-def VLD2DUPd8PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>;
-def VLD2DUPd8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
-def VLD2DUPd16PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>;
-def VLD2DUPd16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
-def VLD2DUPd32PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>;
-def VLD2DUPd32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
+defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>;
+defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
+defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
class VLD3DUP<bits<4> op7_4, string Dt>
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index e8984e1..1f7edc1 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -574,7 +574,7 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
cc_out:$s)>;
// and with the optional destination operand, too.
- def : t2InstAlias<!strconcat(opc, "${s}${p}.ri", " $Rdn, $imm"),
+ def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"),
(!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
t2_so_imm:$imm, pred:$p,
cc_out:$s)>;
@@ -2952,45 +2952,36 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
(ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
RegConstraint<"$false = $Rd">;
+} // isCodeGenOnly = 1
-multiclass T2I_bincc_irs<bits<4> opcod, string opc,
+multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
// shifted imm
- def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
- iii, opc, ".w\t$Rd, $Rn, $imm", []>,
- RegConstraint<"$Rn = $Rd"> {
- let Inst{31-27} = 0b11110;
- let Inst{25} = 0;
- let Inst{24-21} = opcod;
- let Inst{15} = 0;
- }
+ def ri : t2PseudoExpand<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s),
+ 4, iii, [],
+ (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
// register
- def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
- iir, opc, ".w\t$Rd, $Rn, $Rm", []>,
- RegConstraint<"$Rn = $Rd"> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- let Inst{14-12} = 0b000; // imm3
- let Inst{7-6} = 0b00; // imm2
- let Inst{5-4} = 0b00; // type
- }
+ def rr : t2PseudoExpand<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s),
+ 4, iir, [],
+ (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
// shifted register
- def rs : T2sTwoRegShiftedReg<(outs rGPR:$Rd),
- (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
- iis, opc, ".w\t$Rd, $Rn, $ShiftedRm", []>,
- RegConstraint<"$Rn = $Rd"> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- }
+ def rs : t2PseudoExpand<(outs rGPR:$Rd),
+ (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s),
+ 4, iis, [],
+ (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>,
+ RegConstraint<"$Rn = $Rd">;
} // T2I_bincc_irs
-defm t2ANDCC : T2I_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
-defm t2ORRCC : T2I_bincc_irs<0b0010, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
-defm t2EORCC : T2I_bincc_irs<0b0100, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
-
-} // isCodeGenOnly = 1
+defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
+defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
+defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs,
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
} // neverHasSideEffects
//===----------------------------------------------------------------------===//
@@ -3768,20 +3759,32 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : t2InstAlias<"mcr $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
(outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>;
+def : t2InstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
/* from coprocessor to ARM core register */
def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
(outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
(outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
+ (t2MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+ c_imm:$CRm, 0)>;
def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index aa10af7..e9d5720 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -206,6 +206,14 @@ def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>,
Requires<[HasVFP2]>;
def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>,
Requires<[HasVFP2]>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+ (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+ (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+ (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+ (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>;
// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
@@ -286,7 +294,7 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b),
(VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
// These are encoded as unary instructions.
-let Defs = [FPSCR] in {
+let Defs = [FPSCR_NZCV] in {
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
@@ -315,7 +323,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-} // Defs = [FPSCR]
+} // Defs = [FPSCR_NZCV]
//===----------------------------------------------------------------------===//
// FP Unary Operations.
@@ -335,7 +343,7 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
let D = VFPNeonA8Domain;
}
-let Defs = [FPSCR] in {
+let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
@@ -376,7 +384,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
}
-} // Defs = [FPSCR]
+} // Defs = [FPSCR_NZCV]
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
@@ -810,7 +818,29 @@ let Constraints = "$a = $dst" in {
// FP to Fixed-Point:
-def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
+// Single Precision register
+class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+ bits<5> dst;
+ // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+ let Inst{22} = dst{0};
+ let Inst{15-12} = dst{4-1};
+}
+
+// Double Precision register
+class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+ bits<5> dst;
+ // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+ let Inst{22} = dst{4};
+ let Inst{15-12} = dst{3-0};
+}
+
+def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -818,7 +848,7 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
let D = VFPNeonA8Domain;
}
-def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
+def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -826,7 +856,7 @@ def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
let D = VFPNeonA8Domain;
}
-def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
+def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -834,7 +864,7 @@ def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
let D = VFPNeonA8Domain;
}
-def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
+def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -842,25 +872,25 @@ def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
let D = VFPNeonA8Domain;
}
-def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
+def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>;
-def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0,
+def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>;
-def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1,
+def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>;
-def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
+def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>;
// Fixed-Point to FP:
-def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
+def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -868,7 +898,7 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
let D = VFPNeonA8Domain;
}
-def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
+def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -876,7 +906,7 @@ def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
let D = VFPNeonA8Domain;
}
-def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
+def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -884,7 +914,7 @@ def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
let D = VFPNeonA8Domain;
}
-def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
+def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> {
// Some single precision VFP instructions may be executed on both NEON and
@@ -892,19 +922,19 @@ def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
let D = VFPNeonA8Domain;
}
-def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
+def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>;
-def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0,
+def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>;
-def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1,
+def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>;
-def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
+def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>;
@@ -1166,9 +1196,9 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
// to APSR.
-let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in
+let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in
def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
- "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>;
+ "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
// Application level FPSCR -> GPR
let hasSideEffects = 1, Uses = [FPSCR] in
@@ -1182,6 +1212,10 @@ let Uses = [FPSCR] in {
"vmrs", "\t$Rt, fpexc", []>;
def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
"vmrs", "\t$Rt, fpsid", []>;
+ def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr0", []>;
+ def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr1", []>;
}
//===----------------------------------------------------------------------===//
@@ -1304,6 +1338,13 @@ def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">;
def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">;
def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">;
def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">;
+def : VFP2MnemonicAlias<"fmrx", "vmrs">;
+def : VFP2MnemonicAlias<"fmxr", "vmsr">;
+
+// Be friendly and accept the old form of zero-compare
+def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
+def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
+
def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index afbe0e4..753e578 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -62,7 +62,7 @@ extern "C" {
// concerned, so we can't just preserve the callee saved regs.
"stmdb sp!, {r0, r1, r2, r3, lr}\n"
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
- "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+ "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
#endif
// The LR contains the address of the stub function on entry.
// pass it as the argument to the C part of the callback
@@ -86,7 +86,7 @@ extern "C" {
//
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
// Restore VFP caller-saved registers.
- "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+ "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
#endif
//
// We need to exchange the values in slots 0 and 1 so we can
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 1f83762..6f3819a 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -11,9 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMRegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
-#include "ARMRegisterInfo.h"
using namespace llvm;
void ARMRegisterInfo::anchor() { }
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 65ed95d..8a24842 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -15,13 +15,12 @@
#define ARMREGISTERINFO_H
#include "ARM.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "ARMBaseRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
- class Type;
struct ARMRegisterInfo : public ARMBaseRegisterInfo {
virtual void anchor();
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index b16a12c..1327fb8 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -153,14 +153,21 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>;
}
// Current Program Status Register.
-def CPSR : ARMReg<0, "cpsr">;
-def APSR : ARMReg<1, "apsr">;
-def SPSR : ARMReg<2, "spsr">;
-def FPSCR : ARMReg<3, "fpscr">;
-def ITSTATE : ARMReg<4, "itstate">;
+// We model fpscr with two registers: FPSCR models the control bits and will be
+// reserved. FPSCR_NZCV models the flag bits and will be unreserved.
+def CPSR : ARMReg<0, "cpsr">;
+def APSR : ARMReg<1, "apsr">;
+def SPSR : ARMReg<2, "spsr">;
+def FPSCR : ARMReg<3, "fpscr">;
+def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
+ let Aliases = [FPSCR];
+}
+def ITSTATE : ARMReg<4, "itstate">;
// Special Registers - only available in privileged mode.
def FPSID : ARMReg<0, "fpsid">;
+def MVFR1 : ARMReg<6, "mvfr1">;
+def MVFR0 : ARMReg<7, "mvfr0">;
def FPEXC : ARMReg<8, "fpexc">;
// Register classes.
@@ -304,7 +311,8 @@ def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1],
// Register class representing a pair of consecutive D registers.
// Use the Q registers for the even-odd pairs.
-def DPair : RegisterClass<"ARM", [v2i64], 128, (interleave QPR, TuplesOE2D)> {
+def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (interleave QPR, TuplesOE2D)> {
// Allocate starting at non-VFP2 registers D16-D31 first.
let AltOrders = [(rotl DPair, 16)];
let AltOrderSelect = [{ return 1; }];
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 2045482..911eb13 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -1101,13 +1101,8 @@ public:
return VectorList.Count == 4;
}
- bool isVecListTwoQ() const {
- if (!isDoubleSpacedVectorList()) return false;
- return VectorList.Count == 2;
- }
-
bool isVecListDPairSpaced() const {
- if (!isSingleSpacedVectorList()) return false;
+ if (isSingleSpacedVectorList()) return false;
return (ARMMCRegisterClasses[ARM::DPairSpcRegClassID]
.contains(VectorList.RegNum));
}
@@ -1133,12 +1128,13 @@ public:
return VectorList.Count == 1;
}
- bool isVecListTwoDAllLanes() const {
+ bool isVecListDPairAllLanes() const {
if (!isSingleSpacedVectorAllLanes()) return false;
- return VectorList.Count == 2;
+ return (ARMMCRegisterClasses[ARM::DPairRegClassID]
+ .contains(VectorList.RegNum));
}
- bool isVecListTwoQAllLanes() const {
+ bool isVecListDPairSpacedAllLanes() const {
if (!isDoubleSpacedVectorAllLanes()) return false;
return VectorList.Count == 2;
}
@@ -2858,8 +2854,12 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!RC->contains(Reg))
return Error(RegLoc, "invalid register in register list");
// List must be monotonically increasing.
- if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg))
- return Error(RegLoc, "register list not in ascending order");
+ if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) {
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ Warning(RegLoc, "register list not in ascending order");
+ else
+ return Error(RegLoc, "register list not in ascending order");
+ }
if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
Warning(RegLoc, "duplicated register (" + RegTok.getString() +
") in register list");
@@ -2905,6 +2905,12 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
Parser.Lex(); // Eat the ']'.
return MatchOperand_Success;
}
+
+ // There's an optional '#' token here. Normally there wouldn't be, but
+ // inline assemble puts one in, and it's friendly to accept that.
+ if (Parser.getTok().is(AsmToken::Hash))
+ Parser.Lex(); // Eat the '#'
+
const MCExpr *LaneIndex;
SMLoc Loc = Parser.getTok().getLoc();
if (getParser().ParseExpression(LaneIndex)) {
@@ -2981,12 +2987,13 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
case NoLanes:
E = Parser.getTok().getLoc();
Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
- &ARMMCRegisterClasses[ARM::DPairRegClassID]);
-
+ &ARMMCRegisterClasses[ARM::DPairRegClassID]);
Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
break;
case AllLanes:
E = Parser.getTok().getLoc();
+ Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
+ &ARMMCRegisterClasses[ARM::DPairRegClassID]);
Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
S, E));
break;
@@ -3152,7 +3159,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
switch (LaneKind) {
case NoLanes:
- // Non-lane two-register operands have been converted to the
+ // Two-register operands have been converted to the
// composite register classes.
if (Count == 2) {
const MCRegisterClass *RC = (Spacing == 1) ?
@@ -3165,6 +3172,14 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
(Spacing == 2), S, E));
break;
case AllLanes:
+ // Two-register operands have been converted to the
+ // composite register classes.
+ if (Count == 2) {
+ const MCRegisterClass *RC = (Spacing == 1) ?
+ &ARMMCRegisterClasses[ARM::DPairRegClassID] :
+ &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
+ FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
+ }
Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
(Spacing == 2),
S, E));
@@ -3253,7 +3268,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (isMClass()) {
// See ARMv6-M 10.1.1
- unsigned FlagsVal = StringSwitch<unsigned>(Mask)
+ std::string Name = Mask.lower();
+ unsigned FlagsVal = StringSwitch<unsigned>(Name)
.Case("apsr", 0)
.Case("iapsr", 1)
.Case("eapsr", 2)
@@ -4427,10 +4443,11 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
else if (Res == -1) // irrecoverable error
return true;
// If this is VMRS, check for the apsr_nzcv operand.
- if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") {
+ if (Mnemonic == "vmrs" &&
+ Parser.getTok().getString().equals_lower("apsr_nzcv")) {
S = Parser.getTok().getLoc();
Parser.Lex();
- Operands.push_back(ARMOperand::CreateToken("apsr_nzcv", S));
+ Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
return false;
}
@@ -4598,7 +4615,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" ||
Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" ||
Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" ||
- Mnemonic == "fmuls" || Mnemonic == "fcmps" ||
+ Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" ||
(Mnemonic == "movs" && isThumb()))) {
Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
CarrySetting = true;
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4101f59..ce4587b 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -849,7 +849,7 @@ extern "C" void LLVMInitializeARMDisassembler() {
createThumbDisassembler);
}
-static const unsigned GPRDecoderTable[] = {
+static const uint16_t GPRDecoderTable[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R4, ARM::R5, ARM::R6, ARM::R7,
ARM::R8, ARM::R9, ARM::R10, ARM::R11,
@@ -869,8 +869,14 @@ static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
static DecodeStatus
DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
- if (RegNo == 15) return MCDisassembler::Fail;
- return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo == 15)
+ S = MCDisassembler::SoftFail;
+
+ Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+
+ return S;
}
static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
@@ -916,7 +922,7 @@ static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static const unsigned SPRDecoderTable[] = {
+static const uint16_t SPRDecoderTable[] = {
ARM::S0, ARM::S1, ARM::S2, ARM::S3,
ARM::S4, ARM::S5, ARM::S6, ARM::S7,
ARM::S8, ARM::S9, ARM::S10, ARM::S11,
@@ -937,7 +943,7 @@ static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static const unsigned DPRDecoderTable[] = {
+static const uint16_t DPRDecoderTable[] = {
ARM::D0, ARM::D1, ARM::D2, ARM::D3,
ARM::D4, ARM::D5, ARM::D6, ARM::D7,
ARM::D8, ARM::D9, ARM::D10, ARM::D11,
@@ -973,7 +979,7 @@ DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static const unsigned QPRDecoderTable[] = {
+static const uint16_t QPRDecoderTable[] = {
ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7,
ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11,
@@ -992,7 +998,7 @@ static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static const unsigned DPairDecoderTable[] = {
+static const uint16_t DPairDecoderTable[] = {
ARM::Q0, ARM::D1_D2, ARM::Q1, ARM::D3_D4, ARM::Q2, ARM::D5_D6,
ARM::Q3, ARM::D7_D8, ARM::Q4, ARM::D9_D10, ARM::Q5, ARM::D11_D12,
ARM::Q6, ARM::D13_D14, ARM::Q7, ARM::D15_D16, ARM::Q8, ARM::D17_D18,
@@ -1011,7 +1017,7 @@ static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static const unsigned DPairSpacedDecoderTable[] = {
+static const uint16_t DPairSpacedDecoderTable[] = {
ARM::D0_D2, ARM::D1_D3, ARM::D2_D4, ARM::D3_D5,
ARM::D4_D6, ARM::D5_D7, ARM::D6_D8, ARM::D7_D9,
ARM::D8_D10, ARM::D9_D11, ARM::D10_D12, ARM::D11_D13,
@@ -2001,27 +2007,15 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
// First output register
switch (Inst.getOpcode()) {
- case ARM::VLD1q16:
- case ARM::VLD1q32:
- case ARM::VLD1q64:
- case ARM::VLD1q8:
- case ARM::VLD1q16wb_fixed:
- case ARM::VLD1q16wb_register:
- case ARM::VLD1q32wb_fixed:
- case ARM::VLD1q32wb_register:
- case ARM::VLD1q64wb_fixed:
- case ARM::VLD1q64wb_register:
- case ARM::VLD1q8wb_fixed:
- case ARM::VLD1q8wb_register:
- case ARM::VLD2d16:
- case ARM::VLD2d32:
- case ARM::VLD2d8:
- case ARM::VLD2d16wb_fixed:
- case ARM::VLD2d16wb_register:
- case ARM::VLD2d32wb_fixed:
- case ARM::VLD2d32wb_register:
- case ARM::VLD2d8wb_fixed:
- case ARM::VLD2d8wb_register:
+ case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: case ARM::VLD1q8:
+ case ARM::VLD1q16wb_fixed: case ARM::VLD1q16wb_register:
+ case ARM::VLD1q32wb_fixed: case ARM::VLD1q32wb_register:
+ case ARM::VLD1q64wb_fixed: case ARM::VLD1q64wb_register:
+ case ARM::VLD1q8wb_fixed: case ARM::VLD1q8wb_register:
+ case ARM::VLD2d16: case ARM::VLD2d32: case ARM::VLD2d8:
+ case ARM::VLD2d16wb_fixed: case ARM::VLD2d16wb_register:
+ case ARM::VLD2d32wb_fixed: case ARM::VLD2d32wb_register:
+ case ARM::VLD2d8wb_fixed: case ARM::VLD2d8wb_register:
if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
break;
@@ -2325,6 +2319,8 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::VST2b8wb_register:
case ARM::VST2b16wb_register:
case ARM::VST2b32wb_register:
+ Inst.addOperand(MCOperand::CreateImm(0));
+ break;
case ARM::VST3d8_UPD:
case ARM::VST3d16_UPD:
case ARM::VST3d32_UPD:
@@ -2366,6 +2362,23 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
case ARM::VST1q16wb_fixed:
case ARM::VST1q32wb_fixed:
case ARM::VST1q64wb_fixed:
+ case ARM::VST1d8Twb_fixed:
+ case ARM::VST1d16Twb_fixed:
+ case ARM::VST1d32Twb_fixed:
+ case ARM::VST1d64Twb_fixed:
+ case ARM::VST1d8Qwb_fixed:
+ case ARM::VST1d16Qwb_fixed:
+ case ARM::VST1d32Qwb_fixed:
+ case ARM::VST1d64Qwb_fixed:
+ case ARM::VST2d8wb_fixed:
+ case ARM::VST2d16wb_fixed:
+ case ARM::VST2d32wb_fixed:
+ case ARM::VST2q8wb_fixed:
+ case ARM::VST2q16wb_fixed:
+ case ARM::VST2q32wb_fixed:
+ case ARM::VST2b8wb_fixed:
+ case ARM::VST2b16wb_fixed:
+ case ARM::VST2b32wb_fixed:
break;
}
@@ -2525,8 +2538,19 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
align *= (1 << size);
- if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
+ switch (Inst.getOpcode()) {
+ case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16wb_fixed: case ARM::VLD1DUPq16wb_register:
+ case ARM::VLD1DUPq32wb_fixed: case ARM::VLD1DUPq32wb_register:
+ case ARM::VLD1DUPq8wb_fixed: case ARM::VLD1DUPq8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ }
if (Rm != 0xF) {
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
@@ -2556,18 +2580,33 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
unsigned align = fieldFromInstruction32(Insn, 4, 1);
unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2);
- unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+ unsigned pred = fieldFromInstruction32(Insn, 22, 4);
align *= 2*size;
- if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
- return MCDisassembler::Fail;
- if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
- return MCDisassembler::Fail;
- if (Rm != 0xF) {
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ switch (Inst.getOpcode()) {
+ case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register:
+ case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register:
+ case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register:
+ if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ case ARM::VLD2DUPd16x2: case ARM::VLD2DUPd32x2: case ARM::VLD2DUPd8x2:
+ case ARM::VLD2DUPd16x2wb_fixed: case ARM::VLD2DUPd16x2wb_register:
+ case ARM::VLD2DUPd32x2wb_fixed: case ARM::VLD2DUPd32x2wb_register:
+ case ARM::VLD2DUPd8x2wb_fixed: case ARM::VLD2DUPd8x2wb_register:
+ if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+ return MCDisassembler::Fail;
+ break;
+ default:
+ if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
+ break;
}
+ if (Rm != 0xF)
+ Inst.addOperand(MCOperand::CreateImm(0));
+
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateImm(align));
@@ -2579,6 +2618,9 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
}
+ if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
return S;
}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index bae4e78..2b994df 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -1026,15 +1026,6 @@ void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- // Normally, it's not safe to use register enum values directly with
- // addition to get the next register, but for VFP registers, the
- // sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "}";
-}
-
-void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
@@ -1042,9 +1033,9 @@ void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum,
O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
}
-void ARMInstPrinter::printVectorListDPairSpaced(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
@@ -1081,11 +1072,10 @@ void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
- // Normally, it's not safe to use register enum values directly with
- // addition to get the next register, but for VFP registers, the
- // sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}";
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+ O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
}
void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
@@ -1111,23 +1101,13 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
<< getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}";
}
-void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- // Normally, it's not safe to use register enum values directly with
- // addition to get the next register, but for VFP registers, the
- // sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
-}
-
void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
- // Normally, it's not safe to use register enum values directly with
- // addition to get the next register, but for VFP registers, the
- // sort order is guaranteed because they're all of the form D<n>.
- O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
- << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}";
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+ unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+ O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
}
void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 1037161..e9cd407 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -134,9 +134,8 @@ public:
void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printVectorListDPair(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printVectorListDPairSpaced(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
@@ -147,8 +146,6 @@ public:
raw_ostream &O);
void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
- void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index d3a3d3a..25849ee 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -167,6 +167,7 @@ static unsigned getRelaxedOpcode(unsigned Op) {
case ARM::tBcc: return ARM::t2Bcc;
case ARM::tLDRpciASM: return ARM::t2LDRpci;
case ARM::tADR: return ARM::t2ADR;
+ case ARM::tB: return ARM::t2B;
}
}
@@ -181,6 +182,16 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
const MCInstFragment *DF,
const MCAsmLayout &Layout) const {
switch ((unsigned)Fixup.getKind()) {
+ case ARM::fixup_arm_thumb_br: {
+ // Relaxing tB to t2B. tB has a signed 12-bit displacement with the
+ // low bit being an implied zero. There's an implied +4 offset for the
+ // branch, so we adjust the other way here to determine what's
+ // encodable.
+ //
+ // Relax if the value is too big for a (signed) i8.
+ int64_t Offset = int64_t(Value) - 4;
+ return Offset > 2046 || Offset < -2048;
+ }
case ARM::fixup_arm_thumb_bcc: {
// Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
// low bit being an implied zero. There's an implied +4 offset for the
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 06eb4e5..ae11be8 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -187,21 +187,37 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) {
case S31: case D31: return 31;
// Composite registers use the regnum of the first register in the list.
- case D1_D2: return 1;
- case D3_D5: return 3;
- case D5_D7: return 5;
- case D7_D9: return 7;
- case D9_D10: return 9;
- case D11_D12: return 11;
- case D13_D14: return 13;
- case D15_D16: return 15;
- case D17_D18: return 17;
- case D19_D20: return 19;
- case D21_D22: return 21;
- case D23_D24: return 23;
- case D25_D26: return 25;
- case D27_D28: return 27;
- case D29_D30: return 29;
+ /* Q0 */ case D0_D2: return 0;
+ case D1_D2: case D1_D3: return 1;
+ /* Q1 */ case D2_D4: return 2;
+ case D3_D4: case D3_D5: return 3;
+ /* Q2 */ case D4_D6: return 4;
+ case D5_D6: case D5_D7: return 5;
+ /* Q3 */ case D6_D8: return 6;
+ case D7_D8: case D7_D9: return 7;
+ /* Q4 */ case D8_D10: return 8;
+ case D9_D10: case D9_D11: return 9;
+ /* Q5 */ case D10_D12: return 10;
+ case D11_D12: case D11_D13: return 11;
+ /* Q6 */ case D12_D14: return 12;
+ case D13_D14: case D13_D15: return 13;
+ /* Q7 */ case D14_D16: return 14;
+ case D15_D16: case D15_D17: return 15;
+ /* Q8 */ case D16_D18: return 16;
+ case D17_D18: case D17_D19: return 17;
+ /* Q9 */ case D18_D20: return 18;
+ case D19_D20: case D19_D21: return 19;
+ /* Q10 */ case D20_D22: return 20;
+ case D21_D22: case D21_D23: return 21;
+ /* Q11 */ case D22_D24: return 22;
+ case D23_D24: case D23_D25: return 23;
+ /* Q12 */ case D24_D26: return 24;
+ case D25_D26: case D25_D27: return 25;
+ /* Q13 */ case D26_D28: return 26;
+ case D27_D28: case D27_D29: return 27;
+ /* Q14 */ case D28_D30: return 28;
+ case D29_D30: case D29_D31: return 29;
+ /* Q15 */
}
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 1606b92..ed27f9f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -151,13 +151,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
Triple TheTriple(TT);
if (TheTriple.isOSDarwin())
- return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, false);
if (TheTriple.isOSWindows()) {
llvm_unreachable("ARM does not support Windows COFF format");
}
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack);
}
static MCInstPrinter *createARMMCInstPrinter(const Target &T,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index faf73ac..9d3da14 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -34,12 +34,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
MCValue Target,
unsigned Log2Size,
uint64_t &FixedValue);
- void RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue);
+ void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue);
public:
ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
@@ -102,34 +102,47 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
Log2Size = llvm::Log2_32(4);
return true;
+ // For movw/movt r_type relocations they always have a pair following them and
+ // the r_length bits are used differently. The encoding of the r_length is as
+ // follows:
+ // low bit of r_length:
+ // 0 - :lower16: for movw instructions
+ // 1 - :upper16: for movt instructions
+ // high bit of r_length:
+ // 0 - arm instructions
+ // 1 - thumb instructions
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 1;
+ return true;
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_HalfDifference);
- // Report as 'long', even though that is not quite accurate.
- Log2Size = llvm::Log2_32(4);
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 3;
return true;
case ARM::fixup_arm_movw_lo16:
case ARM::fixup_arm_movw_lo16_pcrel:
+ RelocType = unsigned(macho::RIT_ARM_Half);
+ Log2Size = 0;
+ return true;
case ARM::fixup_t2_movw_lo16:
case ARM::fixup_t2_movw_lo16_pcrel:
RelocType = unsigned(macho::RIT_ARM_Half);
- // Report as 'long', even though that is not quite accurate.
- Log2Size = llvm::Log2_32(4);
+ Log2Size = 2;
return true;
}
}
void ARMMachObjectWriter::
-RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup,
- MCValue Target,
- uint64_t &FixedValue) {
+RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Type = macho::RIT_ARM_Half;
@@ -313,10 +326,9 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// scattered relocation entry. Differences always require scattered
// relocations.
if (Target.getSymB()) {
- if (RelocType == macho::RIT_ARM_Half ||
- RelocType == macho::RIT_ARM_HalfDifference)
- return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, FixedValue);
+ if (RelocType == macho::RIT_ARM_Half)
+ return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment,
+ Fixup, Target, FixedValue);
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
Target, Log2Size, FixedValue);
}
@@ -391,6 +403,30 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
(Log2Size << 25) |
(IsExtern << 27) |
(Type << 28));
+
+ // Even when it's not a scattered relocation, movw/movt always uses
+ // a PAIR relocation.
+ if (Type == macho::RIT_ARM_Half) {
+ // The other-half value only gets populated for the movt relocation.
+ uint32_t Value = 0;;
+ switch ((unsigned)Fixup.getKind()) {
+ default: break;
+ case ARM::fixup_arm_movt_hi16:
+ case ARM::fixup_arm_movt_hi16_pcrel:
+ case ARM::fixup_t2_movt_hi16:
+ case ARM::fixup_t2_movt_hi16_pcrel:
+ Value = FixedValue;
+ break;
+ }
+ macho::RelocationEntry MREPair;
+ MREPair.Word0 = Value;
+ MREPair.Word1 = ((0xffffff) |
+ (Log2Size << 25) |
+ (macho::RIT_Pair << 28));
+
+ Writer->addRelocation(Fragment->getParent(), MREPair);
+ }
+
Writer->addRelocation(Fragment->getParent(), MRE);
}
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index a89a663..edd73c2 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "Thumb1FrameLowering.h"
-#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index adaccdd..8cf7cac 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -13,7 +13,6 @@
#include "Thumb1InstrInfo.h"
#include "ARM.h"
-#include "ARMMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 4d97626..27fce9b 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -14,10 +14,10 @@
#ifndef THUMB1INSTRUCTIONINFO_H
#define THUMB1INSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "ARM.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
#include "Thumb1RegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
class ARMSubtarget;
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 6b8bf0e..ef77bbd 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -12,12 +12,11 @@
//
//===----------------------------------------------------------------------===//
+#include "Thumb1RegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
-#include "Thumb1InstrInfo.h"
-#include "Thumb1RegisterInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 9060e59..6971842 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -16,13 +16,12 @@
#define THUMB1REGISTERINFO_H
#include "ARM.h"
-#include "ARMRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
- class Type;
struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
public:
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 6cb182a..2fe4b85 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -15,7 +15,6 @@
#include "ARM.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
-#include "Thumb2InstrInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index a754649..1ae2ef1 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -14,10 +14,10 @@
#ifndef THUMB2INSTRUCTIONINFO_H
#define THUMB2INSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "ARM.h"
#include "ARMInstrInfo.h"
#include "Thumb2RegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
class ARMSubtarget;
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 6d210fe..29a87d0 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -12,10 +12,10 @@
//
//===----------------------------------------------------------------------===//
+#include "Thumb2RegisterInfo.h"
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
-#include "Thumb2InstrInfo.h"
-#include "Thumb2RegisterInfo.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index 824378a..6b397e8 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -16,13 +16,12 @@
#define THUMB2REGISTERINFO_H
#include "ARM.h"
-#include "ARMRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
- class Type;
struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
public:
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 5ee5f42..fb9d93b 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -39,9 +39,9 @@ namespace {
/// ReduceTable - A static table with information on mapping from wide
/// opcodes to narrow
struct ReduceEntry {
- unsigned WideOpc; // Wide opcode
- unsigned NarrowOpc1; // Narrow opcode to transform to
- unsigned NarrowOpc2; // Narrow opcode when it's two-address
+ uint16_t WideOpc; // Wide opcode
+ uint16_t NarrowOpc1; // Narrow opcode to transform to
+ uint16_t NarrowOpc2; // Narrow opcode when it's two-address
uint8_t Imm1Limit; // Limit of immediate field (bits)
uint8_t Imm2Limit; // Limit of immediate field when it's two-address
unsigned LowRegs1 : 1; // Only possible if low-registers are used
@@ -189,7 +189,7 @@ Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
}
static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
- for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs)
+ for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
if (*Regs == ARM::CPSR)
return true;
return false;
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 80973b7..b6b209e 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -2392,17 +2392,17 @@ void CWriter::visitSwitchInst(SwitchInst &SI) {
printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
Out << ";\n";
- unsigned NumCases = SI.getNumCases();
// Skip the first item since that's the default case.
- for (unsigned i = 0; i < NumCases; ++i) {
- ConstantInt* CaseVal = SI.getCaseValue(i);
- BasicBlock* Succ = SI.getCaseSuccessor(i);
+ for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
+ ConstantInt* CaseVal = i.getCaseValue();
+ BasicBlock* Succ = i.getCaseSuccessor();
Out << " case ";
writeOperand(CaseVal);
Out << ":\n";
printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
printBranchToBlock(SI.getParent(), Succ, 2);
- if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent())))
+ if (Function::iterator(Succ) ==
+ llvm::next(Function::iterator(SI.getParent())))
Out << " break;\n";
}
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
index 916f9ba..fac806e 100644
--- a/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ b/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "SPU.h"
#include "SPUFrameLowering.h"
+#include "SPU.h"
#include "SPUInstrBuilder.h"
#include "SPUInstrInfo.h"
#include "llvm/Function.h"
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 3d2b32d..55b3f72 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -31,14 +31,10 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include <map>
using namespace llvm;
-// Used in getTargetNodeName() below
namespace {
- std::map<unsigned, const char *> node_names;
-
// Byte offset of the preferred slot (counted from the MSB)
int prefslotOffset(EVT VT) {
int retval=0;
@@ -481,40 +477,34 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setSchedulingPreference(Sched::RegPressure);
}
-const char *
-SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
-{
- if (node_names.empty()) {
- node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
- node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
- node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
- node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
- node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
- node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
- node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
- node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
- node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
- node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
- node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
- node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
- node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
- node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
- node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
- node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
- node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
- node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
- node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
- "SPUISD::ROTBYTES_LEFT_BITS";
- node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
- node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
- node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
- node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
- node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
- }
-
- std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
-
- return ((i != node_names.end()) ? i->second : 0);
+const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG";
+ case SPUISD::Hi: return "SPUISD::Hi";
+ case SPUISD::Lo: return "SPUISD::Lo";
+ case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr";
+ case SPUISD::AFormAddr: return "SPUISD::AFormAddr";
+ case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr";
+ case SPUISD::LDRESULT: return "SPUISD::LDRESULT";
+ case SPUISD::CALL: return "SPUISD::CALL";
+ case SPUISD::SHUFB: return "SPUISD::SHUFB";
+ case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK";
+ case SPUISD::CNTB: return "SPUISD::CNTB";
+ case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC";
+ case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT";
+ case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS";
+ case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES";
+ case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL";
+ case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR";
+ case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT";
+ case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS";
+ case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK";
+ case SPUISD::SELB: return "SPUISD::SELB";
+ case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER";
+ case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER";
+ case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER";
+ }
}
//===----------------------------------------------------------------------===//
@@ -1216,7 +1206,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
if (isVarArg) {
// FIXME: we should be able to query the argument registers from
// tablegen generated code.
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
@@ -1230,7 +1220,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
};
// size of ArgRegs array
- unsigned NumArgRegs = 77;
+ const unsigned NumArgRegs = 77;
// We will spill (79-3)+1 registers to the stack
SmallVector<SDValue, 79-3+1> MemOps;
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index e28e2a4..25c5355 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -15,9 +15,9 @@
#ifndef SPU_ISELLOWERING_H
#define SPU_ISELLOWERING_H
+#include "SPU.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "SPU.h"
namespace llvm {
namespace SPUISD {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index f0d21ad..85e5821 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -15,8 +15,8 @@
#define SPU_INSTRUCTIONINFO_H
#include "SPU.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "SPURegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "SPUGenInstrInfo.inc"
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index 92983e1..1b2da5f 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "reginfo"
-#include "SPU.h"
#include "SPURegisterInfo.h"
+#include "SPU.h"
#include "SPUInstrBuilder.h"
#include "SPUSubtarget.h"
#include "SPUMachineFunction.h"
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index e43f5ad..21f6b25 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "SPU.h"
#include "SPUTargetMachine.h"
+#include "SPU.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/Support/DynamicLibrary.h"
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index c179292..3e5d38c 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -23,9 +23,6 @@
#include "llvm/Target/TargetData.h"
namespace llvm {
-class PassManager;
-class GlobalValue;
-class TargetFrameLowering;
/// SPUTargetMachine
///
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 76b5e9c..107c6cc 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1090,10 +1090,10 @@ void CppWriter::printInstruction(const Instruction *I,
<< getOpName(SI->getDefaultDest()) << ", "
<< SI->getNumCases() << ", " << bbname << ");";
nl(Out);
- unsigned NumCases = SI->getNumCases();
- for (unsigned i = 0; i < NumCases; ++i) {
- const ConstantInt* CaseVal = SI->getCaseValue(i);
- const BasicBlock *BB = SI->getCaseSuccessor(i);
+ for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i) {
+ const ConstantInt* CaseVal = i.getCaseValue();
+ const BasicBlock *BB = i.getCaseSuccessor();
Out << iName << "->addCase("
<< getOpName(CaseVal) << ", "
<< getOpName(BB) << ");";
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index bbefcaf..270c7a7 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -15,7 +15,6 @@
#ifndef TARGET_Hexagon_H
#define TARGET_Hexagon_H
-#include <cassert>
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/Target/TargetLowering.h"
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 688b8e3..bf333b7 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -32,11 +32,11 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -46,8 +46,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
index 71787de..46c20e9 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -14,13 +14,13 @@
//===----------------------------------------------------------------------===//
#include "HexagonCallingConvLower.h"
+#include "Hexagon.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "Hexagon.h"
using namespace llvm;
Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 49c6cdf..e8a6924 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -8,13 +8,13 @@
//
//===----------------------------------------------------------------------===//
+#include "HexagonFrameLowering.h"
#include "Hexagon.h"
#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
#include "HexagonMachineFunctionInfo.h"
-#include "HexagonFrameLowering.h"
#include "llvm/Function.h"
#include "llvm/Type.h"
#include "llvm/ADT/BitVector.h"
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 04ea4ed..57772a5 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -27,6 +27,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "hwloops"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
#include "llvm/Constants.h"
#include "llvm/PassSupport.h"
#include "llvm/ADT/DenseMap.h"
@@ -43,8 +45,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include <algorithm>
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index ed4b840..d6da0d0 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -28,17 +28,16 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "HexagonMachineFunctionInfo.h"
#include "llvm/Support/CommandLine.h"
+using namespace llvm;
const unsigned Hexagon_MAX_RET_SIZE = 64;
-using namespace llvm;
static cl::opt<bool>
EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
@@ -159,7 +158,7 @@ static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
- static const unsigned RegList[] = {
+ static const uint16_t RegList[] = {
Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
Hexagon::R5
};
@@ -182,10 +181,10 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
return false;
}
- static const unsigned RegList1[] = {
+ static const uint16_t RegList1[] = {
Hexagon::D1, Hexagon::D2
};
- static const unsigned RegList2[] = {
+ static const uint16_t RegList2[] = {
Hexagon::R1, Hexagon::R3
};
if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) {
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 5396486..4208bcb 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -15,10 +15,10 @@
#ifndef Hexagon_ISELLOWERING_H
#define Hexagon_ISELLOWERING_H
+#include "Hexagon.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/CallingConv.h"
#include "llvm/CodeGen/CallingConvLower.h"
-#include "Hexagon.h"
namespace llvm {
namespace HexagonISD {
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 07872d4..3d7ace5 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "HexagonRegisterInfo.h"
#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
#include "Hexagon.h"
#include "llvm/ADT/STLExtras.h"
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index eb088c3..7306870 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -14,10 +14,10 @@
#ifndef HexagonINSTRUCTIONINFO_H
#define HexagonINSTRUCTIONINFO_H
+#include "HexagonRegisterInfo.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "HexagonRegisterInfo.h"
#define GET_INSTRINFO_HEADER
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 06c732f..55cbc09 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -36,6 +36,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "hexagon-peephole"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
#include "llvm/Constants.h"
#include "llvm/PassSupport.h"
#include "llvm/ADT/DenseMap.h"
@@ -45,16 +47,13 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include <algorithm>
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
-
-#include "llvm/Support/CommandLine.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index c481270..2a9de92 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Hexagon.h"
#include "HexagonRegisterInfo.h"
+#include "Hexagon.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
#include "HexagonMachineFunctionInfo.h"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index fc65305..6cf727b 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -16,9 +16,10 @@
#define HexagonREGISTERINFO_H
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/MC/MachineLocation.h"
+
#define GET_REGINFO_HEADER
#include "HexagonGenRegisterInfo.inc"
-#include "llvm/MC/MachineLocation.h"
//
// We try not to hard code the reserved registers in our code,
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 319eab2..b9e6894 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -50,7 +50,7 @@ extern "C" void LLVMInitializeHexagonTarget() {
///
HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
- TargetOptions Options,
+ const TargetOptions &Options,
Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL)
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index 70bea56..0336965 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -14,13 +14,13 @@
#ifndef HexagonTARGETMACHINE_H
#define HexagonTARGETMACHINE_H
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
#include "HexagonISelLowering.h"
#include "HexagonSelectionDAGInfo.h"
#include "HexagonFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
namespace llvm {
@@ -37,8 +37,9 @@ class HexagonTargetMachine : public LLVMTargetMachine {
public:
HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
- StringRef FS, TargetOptions Options, Reloc::Model RM,
- CodeModel::Model CM, CodeGenOpt::Level OL);
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
virtual const HexagonInstrInfo *getInstrInfo() const {
return &InstrInfo;
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index d3ce5a6..32cc709 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -11,6 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "HexagonTargetObjectFile.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Target/TargetData.h"
@@ -18,9 +21,6 @@
#include "llvm/MC/MCContext.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/CommandLine.h"
-#include "HexagonSubtarget.h"
-#include "HexagonTargetObjectFile.h"
-#include "HexagonTargetMachine.h"
using namespace llvm;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 74abc56..3cfa4fd 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- HexagonMCTargetDesc.cpp - Cell Hexagon Target Descriptions --------===//
+//===-- HexagonMCTargetDesc.cpp - Hexagon Target Descriptions -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file provides Cell Hexagon specific target descriptions.
+// This file provides Hexagon specific target descriptions.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 364841f..b18d23a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -1,4 +1,4 @@
-//===-- SPUMCTargetDesc.h - Hexagon Target Descriptions ---------*- C++ -*-===//
+//===-- HexagonMCTargetDesc.h - Hexagon Target Descriptions -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef SPUMCTARGETDESC_H
-#define SPUMCTARGETDESC_H
+#ifndef HEXAGONMCTARGETDESC_H
+#define HEXAGONMCTARGETDESC_H
namespace llvm {
class MCSubtargetInfo;
diff --git a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
index 1114d99..73c7e01 100644
--- a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;===- ./lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
diff --git a/lib/Target/Hexagon/MCTargetDesc/Makefile b/lib/Target/Hexagon/MCTargetDesc/Makefile
index 67be2bc..885be2d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/Makefile
+++ b/lib/Target/Hexagon/MCTargetDesc/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===##
+##===- lib/Target/Hexagon/TargetDesc/Makefile --------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index adedf93..6b958c8 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -34,9 +34,9 @@ extern const MCInstrDesc MBlazeInsts[];
using namespace llvm;
-const unsigned UNSUPPORTED = -1;
+const uint16_t UNSUPPORTED = -1;
-static const unsigned mblazeBinary2Opcode[] = {
+static const uint16_t mblazeBinary2Opcode[] = {
MBlaze::ADD, MBlaze::RSUB, MBlaze::ADDC, MBlaze::RSUBC, //00,01,02,03
MBlaze::ADDK, MBlaze::RSUBK, MBlaze::ADDKC, MBlaze::RSUBKC, //04,05,06,07
MBlaze::ADDI, MBlaze::RSUBI, MBlaze::ADDIC, MBlaze::RSUBIC, //08,09,0A,0B
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.h b/lib/Target/MBlaze/MBlazeFrameLowering.h
index 8be15bf..01e6578 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.h
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -15,11 +15,10 @@
#define MBLAZE_FRAMEINFO_H
#include "MBlaze.h"
-#include "MBlazeSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
- class MBlazeSubtarget;
+class MBlazeSubtarget;
class MBlazeFrameLowering : public TargetFrameLowering {
protected:
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 23c8e13..9ef6bb6 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -657,7 +657,7 @@ static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
MBlaze::R5, MBlaze::R6, MBlaze::R7,
MBlaze::R8, MBlaze::R9, MBlaze::R10
};
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index 168694b..6a79fc1 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -15,11 +15,11 @@
#ifndef MBlazeISELLOWERING_H
#define MBlazeISELLOWERING_H
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
-#include "MBlaze.h"
-#include "MBlazeSubtarget.h"
namespace llvm {
namespace MBlazeCC {
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index a309d2b..5252147 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -15,9 +15,9 @@
#define MBLAZEINSTRUCTIONINFO_H
#include "MBlaze.h"
+#include "MBlazeRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "MBlazeRegisterInfo.h"
#define GET_INSTRINFO_HEADER
#include "MBlazeGenInstrInfo.inc"
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.h b/lib/Target/MBlaze/MBlazeMCInstLower.h
index bb77ed4..7b97744 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.h
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.h
@@ -14,7 +14,6 @@
namespace llvm {
class AsmPrinter;
- class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 6801a1a..46f5207 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -14,9 +14,9 @@
#define DEBUG_TYPE "mblaze-frame-info"
+#include "MBlazeRegisterInfo.h"
#include "MBlaze.h"
#include "MBlazeSubtarget.h"
-#include "MBlazeRegisterInfo.h"
#include "MBlazeMachineFunction.h"
#include "llvm/Constants.h"
#include "llvm/Type.h"
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 5c07424..dd7de9b 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MBlaze.h"
#include "MBlazeTargetMachine.h"
+#include "MBlaze.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/FormattedStream.h"
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index fd5de34..c03ba47 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MSP430.h"
#include "MSP430InstrInfo.h"
+#include "MSP430.h"
#include "MSP430MachineFunctionInfo.h"
#include "MSP430TargetMachine.h"
#include "llvm/Function.h"
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index fe2a75c..04f339b 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -14,8 +14,8 @@
#ifndef LLVM_TARGET_MSP430INSTRINFO_H
#define LLVM_TARGET_MSP430INSTRINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "MSP430RegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "MSP430GenInstrInfo.inc"
diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h
index 297efd2..24151e2 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/MSP430MCInstLower.h
@@ -14,7 +14,6 @@
namespace llvm {
class AsmPrinter;
- class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index f9ddfb3..51ec71a 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -13,9 +13,9 @@
#define DEBUG_TYPE "msp430-reg-info"
+#include "MSP430RegisterInfo.h"
#include "MSP430.h"
#include "MSP430MachineFunctionInfo.h"
-#include "MSP430RegisterInfo.h"
#include "MSP430TargetMachine.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index e7bebbd..4d8792e 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -15,12 +15,11 @@
#define LLVM_TARGET_MSP430_SUBTARGET_H
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "MSP430GenSubtargetInfo.inc"
-#include <string>
-
namespace llvm {
class StringRef;
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index af62e48..9f2eda1 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MSP430.h"
#include "MSP430TargetMachine.h"
+#include "MSP430.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index d69570b..9d5a2f1 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -92,25 +92,42 @@ public:
if (!Value)
return; // Doesn't change encoding.
+ // Where do we start in the object
unsigned Offset = Fixup.getOffset();
- // FIXME: The below code will not work across endian models
- // How many bytes/bits are we fixing up?
- unsigned NumBytes = ((getFixupKindInfo(Kind).TargetSize-1)/8)+1;
- uint64_t Mask = ((uint64_t)1 << getFixupKindInfo(Kind).TargetSize) - 1;
+ // Number of bytes we need to fixup
+ unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
+ // Used to point to big endian bytes
+ unsigned FullSize;
+
+ switch ((unsigned)Kind) {
+ case Mips::fixup_Mips_16:
+ FullSize = 2;
+ break;
+ case Mips::fixup_Mips_64:
+ FullSize = 8;
+ break;
+ default:
+ FullSize = 4;
+ break;
+ }
// Grab current value, if any, from bits.
uint64_t CurVal = 0;
- for (unsigned i = 0; i != NumBytes; ++i)
- CurVal |= ((uint8_t)Data[Offset + i]) << (i * 8);
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ unsigned Idx = IsLittle ? i : (FullSize - 1 - i);
+ CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8);
+ }
+
+ uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize));
CurVal = (CurVal & ~Mask) | ((CurVal + Value) & Mask);
- // Write out the bytes back to the code/data bits.
- // First the unaffected bits and then the fixup.
+ // Write out the fixed up bytes back to the code/data bits.
for (unsigned i = 0; i != NumBytes; ++i) {
- Data[Offset + i] = uint8_t((CurVal >> (i * 8)) & 0xff);
+ unsigned Idx = IsLittle ? i : (FullSize - 1 - i);
+ Data[Offset + Idx] = (uint8_t)((CurVal >> (i*8)) & 0xff);
}
-}
+ }
unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; }
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index b039678..9ebb6d2 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -49,9 +49,9 @@ public:
void EmitInstruction(uint64_t Val, unsigned Size, raw_ostream &OS) const {
// Output the instruction encoding in little endian byte order.
- for (unsigned i = 0; i != Size; ++i) {
- EmitByte(Val & 255, OS);
- Val >>= 8;
+ for (unsigned i = 0; i < Size; ++i) {
+ unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
+ EmitByte((Val >> Shift) & 0xff, OS);
}
}
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index bacecf2..bafadc8 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -21,8 +21,6 @@
namespace llvm {
class MipsTargetMachine;
class FunctionPass;
- class MachineCodeEmitter;
- class formatted_raw_ostream;
FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
index 31b669a..dc8fbd0 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.cpp
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
@@ -26,28 +26,28 @@ void MipsAnalyzeImmediate::AddInstr(InstSeqLs &SeqLs, const Inst &I) {
Iter->push_back(I);
}
-void MipsAnalyzeImmediate::GetInstSeqLsADDiu(int64_t Imm, unsigned RemSize,
+void MipsAnalyzeImmediate::GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize,
InstSeqLs &SeqLs) {
- GetInstSeqLs((Imm + 0x8000) & ~0xffff, RemSize, SeqLs);
- AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffff));
+ GetInstSeqLs((Imm + 0x8000ULL) & 0xffffffffffff0000ULL, RemSize, SeqLs);
+ AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffffULL));
}
-void MipsAnalyzeImmediate::GetInstSeqLsORi(int64_t Imm, unsigned RemSize,
+void MipsAnalyzeImmediate::GetInstSeqLsORi(uint64_t Imm, unsigned RemSize,
InstSeqLs &SeqLs) {
- GetInstSeqLs(Imm & ~0xffff, RemSize, SeqLs);
- AddInstr(SeqLs, Inst(ORi, Imm & 0xffff));
+ GetInstSeqLs(Imm & 0xffffffffffff0000ULL, RemSize, SeqLs);
+ AddInstr(SeqLs, Inst(ORi, Imm & 0xffffULL));
}
-void MipsAnalyzeImmediate::GetInstSeqLsSLL(int64_t Imm, unsigned RemSize,
+void MipsAnalyzeImmediate::GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize,
InstSeqLs &SeqLs) {
unsigned Shamt = CountTrailingZeros_64(Imm);
GetInstSeqLs(Imm >> Shamt, RemSize - Shamt, SeqLs);
AddInstr(SeqLs, Inst(SLL, Shamt));
}
-void MipsAnalyzeImmediate::GetInstSeqLs(int64_t Imm, unsigned RemSize,
+void MipsAnalyzeImmediate::GetInstSeqLs(uint64_t Imm, unsigned RemSize,
InstSeqLs &SeqLs) {
- int64_t MaskedImm = Imm & (((uint64_t)-1) >> (64 - Size));
+ uint64_t MaskedImm = Imm & (0xffffffffffffffffULL >> (64 - Size));
// Do nothing if Imm is 0.
if (!MaskedImm)
@@ -122,7 +122,7 @@ void MipsAnalyzeImmediate::GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts) {
}
const MipsAnalyzeImmediate::InstSeq
-&MipsAnalyzeImmediate::Analyze(int64_t Imm, unsigned Size,
+&MipsAnalyzeImmediate::Analyze(uint64_t Imm, unsigned Size,
bool LastInstrIsADDiu) {
this->Size = Size;
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h
index 24e6e5f..a094dda 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.h
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -25,7 +25,7 @@ namespace llvm {
/// Analyze - Get an instrucion sequence to load immediate Imm. The last
/// instruction in the sequence must be an ADDiu if LastInstrIsADDiu is
/// true;
- const InstSeq &Analyze(int64_t Imm, unsigned Size, bool LastInstrIsADDiu);
+ const InstSeq &Analyze(uint64_t Imm, unsigned Size, bool LastInstrIsADDiu);
private:
typedef SmallVector<InstSeq, 5> InstSeqLs;
@@ -34,18 +34,18 @@ namespace llvm {
/// GetInstSeqLsADDiu - Get instrucion sequences which end with an ADDiu to
/// load immediate Imm
- void GetInstSeqLsADDiu(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+ void GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
/// GetInstSeqLsORi - Get instrucion sequences which end with an ORi to
/// load immediate Imm
- void GetInstSeqLsORi(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+ void GetInstSeqLsORi(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
/// GetInstSeqLsSLL - Get instrucion sequences which end with a SLL to
/// load immediate Imm
- void GetInstSeqLsSLL(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+ void GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
/// GetInstSeqLs - Get instrucion sequences to load immediate Imm.
- void GetInstSeqLs(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+ void GetInstSeqLs(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
/// ReplaceADDiuSLLWithLUi - Replace an ADDiu & SLL pair with a LUi.
void ReplaceADDiuSLLWithLUi(InstSeq &Seq);
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index aeabc0f..f2b842a 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-asm-printer"
-#include "Mips.h"
#include "MipsAsmPrinter.h"
+#include "Mips.h"
#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
#include "MipsMCInstLower.h"
@@ -34,8 +34,6 @@
#include "llvm/Instructions.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 8502db2..473da7e 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -22,9 +22,9 @@
namespace llvm {
class MCStreamer;
class MachineInstr;
-class raw_ostream;
class MachineBasicBlock;
class Module;
+class raw_ostream;
class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index e83c64e..ebfbb4a 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsAnalyzeImmediate.h"
#include "MipsFrameLowering.h"
+#include "MipsAnalyzeImmediate.h"
#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
#include "MCTargetDesc/MipsBaseInfo.h"
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 782d203..536879e 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -99,6 +99,8 @@ private:
return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
}
+ void ProcessFunctionAfterISel(MachineFunction &MF);
+ bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
void InitGlobalBaseReg(MachineFunction &MF);
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
@@ -181,10 +183,57 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
}
}
+bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
+ const MachineInstr& MI) {
+ unsigned DstReg = 0, ZeroReg = 0;
+
+ // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
+ if ((MI.getOpcode() == Mips::ADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO;
+ } else if ((MI.getOpcode() == Mips::DADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO_64;
+ }
+
+ if (!DstReg)
+ return false;
+
+ // Replace uses with ZeroReg.
+ for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
+ E = MRI->use_end(); U != E; ++U) {
+ MachineOperand &MO = U.getOperand();
+ MachineInstr *MI = MO.getParent();
+
+ // Do not replace if it is a phi's operand or is tied to def operand.
+ if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo()))
+ continue;
+
+ MO.setReg(ZeroReg);
+ }
+
+ return true;
+}
+
+void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) {
+ InitGlobalBaseReg(MF);
+
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
+ ++MFI)
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+ ReplaceUsesWithZeroReg(MRI, *I);
+}
+
bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
- InitGlobalBaseReg(MF);
+ ProcessFunctionAfterISel(MF);
return Ret;
}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index dc894d9..ecde5b6 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -18,13 +18,13 @@
#include "MipsTargetMachine.h"
#include "MipsTargetObjectFile.h"
#include "MipsSubtarget.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Intrinsics.h"
#include "llvm/CallingConv.h"
-#include "InstPrinter/MipsInstPrinter.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -130,22 +130,32 @@ MipsTargetLowering(MipsTargetMachine &TM)
// Mips Custom Operations
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
- setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
- setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
- setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
- setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
- setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ if (HasMips64) {
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT, MVT::i64, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
+ }
setOperationAction(ISD::SDIV, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i32, Expand);
@@ -185,8 +195,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
@@ -214,9 +222,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
-
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
@@ -246,11 +251,11 @@ MipsTargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::SUBE);
setTargetDAGCombine(ISD::SDIVREM);
setTargetDAGCombine(ISD::UDIVREM);
- setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
- setMinFunctionAlignment(2);
+ setMinFunctionAlignment(HasMips64 ? 3 : 2);
setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP);
computeRegisterProperties();
@@ -559,21 +564,37 @@ static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True,
True.getValueType(), True, False, Cond);
}
-static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const MipsSubtarget* Subtarget) {
+static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0));
+ SDValue SetCC = N->getOperand(0);
- if (Cond.getOpcode() != MipsISD::FPCmp)
+ if ((SetCC.getOpcode() != ISD::SETCC) ||
+ !SetCC.getOperand(0).getValueType().isInteger())
return SDValue();
- SDValue True = DAG.getConstant(1, MVT::i32);
- SDValue False = DAG.getConstant(0, MVT::i32);
+ SDValue False = N->getOperand(2);
+ EVT FalseTy = False.getValueType();
- return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc());
+ if (!FalseTy.isInteger())
+ return SDValue();
+
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(False);
+
+ if (!CN || CN->getZExtValue())
+ return SDValue();
+
+ const DebugLoc DL = N->getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
+ SDValue True = N->getOperand(1);
+
+ SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0),
+ SetCC.getOperand(1), ISD::getSetCCInverse(CC, true));
+
+ return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True);
}
static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
@@ -684,8 +705,8 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
case ISD::SDIVREM:
case ISD::UDIVREM:
return PerformDivRemCombine(N, DAG, DCI, Subtarget);
- case ISD::SETCC:
- return PerformSETCCCombine(N, DAG, DCI, Subtarget);
+ case ISD::SELECT:
+ return PerformSELECTCombine(N, DAG, DCI, Subtarget);
case ISD::AND:
return PerformANDCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
@@ -708,6 +729,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
@@ -1475,6 +1497,18 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG) const
Op.getDebugLoc());
}
+SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = CreateFPCmp(DAG, Op);
+
+ assert(Cond.getOpcode() == MipsISD::FPCmp &&
+ "Floating point operand expected.");
+
+ SDValue True = DAG.getConstant(1, MVT::i32);
+ SDValue False = DAG.getConstant(0, MVT::i32);
+
+ return CreateCMovFP(DAG, Cond, True, False, Op.getDebugLoc());
+}
+
SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
// FIXME there isn't actually debug info here
@@ -1841,13 +1875,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
static const unsigned IntRegsSize=4, FloatRegsSize=2;
- static const unsigned IntRegs[] = {
+ static const uint16_t IntRegs[] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
- static const unsigned F32Regs[] = {
+ static const uint16_t F32Regs[] = {
Mips::F12, Mips::F14
};
- static const unsigned F64Regs[] = {
+ static const uint16_t F64Regs[] = {
Mips::D6, Mips::D7
};
@@ -1926,10 +1960,10 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
return false; // CC must always match
}
-static const unsigned Mips64IntRegs[8] =
+static const uint16_t Mips64IntRegs[8] =
{Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64,
Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64};
-static const unsigned Mips64DPRegs[8] =
+static const uint16_t Mips64DPRegs[8] =
{Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64};
@@ -1996,7 +2030,7 @@ AnalyzeMips64CallOperands(CCState &CCInfo,
static const unsigned O32IntRegsSize = 4;
-static const unsigned O32IntRegs[] = {
+static const uint16_t O32IntRegs[] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
@@ -2115,9 +2149,9 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
if (!IsRegLoc)
LocMemOffset = VA.getLocMemOffset();
else {
- const unsigned *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8,
+ const uint16_t *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8,
VA.getLocReg());
- const unsigned *RegEnd = Mips64IntRegs + 8;
+ const uint16_t *RegEnd = Mips64IntRegs + 8;
// Copy double words to registers.
for (; (Reg != RegEnd) && (ByValSize >= Offset + 8); ++Reg, Offset += 8) {
@@ -2540,7 +2574,7 @@ CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl,
MachineFrameInfo *MFI, bool IsRegLoc,
SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI,
EVT PtrTy) {
- const unsigned *Reg = Mips64IntRegs + 8;
+ const uint16_t *Reg = Mips64IntRegs + 8;
int FOOffset; // Frame object offset from virtual frame pointer.
if (IsRegLoc) {
@@ -2709,7 +2743,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
if (isVarArg) {
unsigned NumOfRegs = IsO32 ? 4 : 8;
- const unsigned *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs;
+ const uint16_t *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs;
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumOfRegs);
int FirstRegSlotOffset = IsO32 ? 0 : -64 ; // offset of $a0's slot.
const TargetRegisterClass *RC
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 621bbec..66f45cd 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -15,10 +15,10 @@
#ifndef MipsISELLOWERING_H
#define MipsISELLOWERING_H
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
#include "Mips.h"
#include "MipsSubtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
namespace llvm {
namespace MipsISD {
@@ -128,6 +128,7 @@ namespace llvm {
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 10caf30..4be727d 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -15,9 +15,9 @@
#define MIPSINSTRUCTIONINFO_H
#include "Mips.h"
+#include "MipsRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "MipsRegisterInfo.h"
#define GET_INSTRINFO_HEADER
#include "MipsGenInstrInfo.inc"
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index be65298..0d51298 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -12,9 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#include "MipsMCInstLower.h"
#include "MipsAsmPrinter.h"
#include "MipsInstrInfo.h"
-#include "MipsMCInstLower.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index cbd5264..20bb338 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -14,11 +14,9 @@
#include "llvm/Support/Compiler.h"
namespace llvm {
- class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
- class MCSymbol;
class MachineInstr;
class MachineFunction;
class Mangler;
@@ -38,7 +36,7 @@ public:
void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
void LowerCPRESTORE(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
void LowerUnalignedLoadStore(const MachineInstr *MI,
- SmallVector<MCInst, 4>& MCInsts);
+ SmallVector<MCInst, 4>& MCInsts);
void LowerSETGP01(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
private:
MCOperand LowerSymbolOperand(const MachineOperand &MO,
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 57ff069..abb5404 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -14,10 +14,10 @@
#ifndef MIPS_MACHINE_FUNCTION_INFO_H
#define MIPS_MACHINE_FUNCTION_INFO_H
-#include <utility>
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include <utility>
namespace llvm {
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index e0ecba2..5cfda34 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -13,10 +13,10 @@
#define DEBUG_TYPE "mips-reg-info"
+#include "MipsRegisterInfo.h"
#include "Mips.h"
#include "MipsAnalyzeImmediate.h"
#include "MipsSubtarget.h"
-#include "MipsRegisterInfo.h"
#include "MipsMachineFunction.h"
#include "llvm/Constants.h"
#include "llvm/Type.h"
@@ -83,12 +83,12 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const
BitVector MipsRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
- static const unsigned ReservedCPURegs[] = {
+ static const uint16_t ReservedCPURegs[] = {
Mips::ZERO, Mips::AT, Mips::K0, Mips::K1,
Mips::SP, Mips::FP, Mips::RA
};
- static const unsigned ReservedCPU64Regs[] = {
+ static const uint16_t ReservedCPU64Regs[] = {
Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64,
Mips::SP_64, Mips::FP_64, Mips::RA_64
};
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 8806aaf..ad02231 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Mips.h"
#include "MipsTargetMachine.h"
+#include "Mips.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 19ae142..80c00e8 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -14,15 +14,15 @@
#ifndef MIPSTARGETMACHINE_H
#define MIPSTARGETMACHINE_H
-#include "MipsSubtarget.h"
+#include "MipsFrameLowering.h"
#include "MipsInstrInfo.h"
#include "MipsISelLowering.h"
-#include "MipsFrameLowering.h"
+#include "MipsJITInfo.h"
#include "MipsSelectionDAGInfo.h"
+#include "MipsSubtarget.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
-#include "MipsJITInfo.h"
namespace llvm {
class formatted_raw_ostream;
diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
index 77a298d..a3e0f32 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
+++ b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
@@ -17,9 +17,9 @@
#ifndef PTXBASEINFO_H
#define PTXBASEINFO_H
+#include "PTXMCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "PTXMCTargetDesc.h"
namespace llvm {
namespace PTXStateSpace {
diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h
index 7d46cce..ffb92cb 100644
--- a/lib/Target/PTX/PTX.h
+++ b/lib/Target/PTX/PTX.h
@@ -1,4 +1,3 @@
-//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index 58ac5f2..0b6ac7b 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -14,8 +14,8 @@
#define DEBUG_TYPE "ptx-asm-printer"
-#include "PTX.h"
#include "PTXAsmPrinter.h"
+#include "PTX.h"
#include "PTXMachineFunctionInfo.h"
#include "PTXParamManager.h"
#include "PTXRegisterInfo.h"
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index e5d4edc..db1c953 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PTX.h"
#include "PTXISelLowering.h"
+#include "PTX.h"
#include "PTXMachineFunctionInfo.h"
#include "PTXRegisterInfo.h"
#include "PTXSubtarget.h"
diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h
index fd20982..33220f4 100644
--- a/lib/Target/PTX/PTXISelLowering.h
+++ b/lib/Target/PTX/PTXISelLowering.h
@@ -18,8 +18,6 @@
#include "llvm/Target/TargetLowering.h"
namespace llvm {
-class PTXSubtarget;
-class PTXTargetMachine;
namespace PTXISD {
enum NodeType {
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index 9d6cbf1..443cd54 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -13,8 +13,8 @@
#define DEBUG_TYPE "ptx-instrinfo"
-#include "PTX.h"
#include "PTXInstrInfo.h"
+#include "PTX.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
diff --git a/lib/Target/PTX/PTXParamManager.cpp b/lib/Target/PTX/PTXParamManager.cpp
index 74538e6..cc1cc71 100644
--- a/lib/Target/PTX/PTXParamManager.cpp
+++ b/lib/Target/PTX/PTXParamManager.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PTX.h"
#include "PTXParamManager.h"
+#include "PTX.h"
#include "llvm/ADT/StringExtras.h"
using namespace llvm;
diff --git a/lib/Target/PTX/PTXParamManager.h b/lib/Target/PTX/PTXParamManager.h
index 32342f7..92e7728 100644
--- a/lib/Target/PTX/PTXParamManager.h
+++ b/lib/Target/PTX/PTXParamManager.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include <string>
namespace llvm {
diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp
index 3f087cd..b6ffd38 100644
--- a/lib/Target/PTX/PTXRegisterInfo.cpp
+++ b/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PTX.h"
#include "PTXRegisterInfo.h"
+#include "PTX.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index 9305377..40835d0 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PTX.h"
#include "PTXTargetMachine.h"
+#include "PTX.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/Verifier.h"
@@ -26,6 +26,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
@@ -37,8 +38,6 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 02dad45..9c6eefe 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -7,9 +7,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCAsmBackend.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 5dc1863..24a7178 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -25,14 +25,11 @@
namespace llvm {
class PPCTargetMachine;
class FunctionPass;
- class formatted_raw_ostream;
class JITCodeEmitter;
- class Target;
class MachineInstr;
class AsmPrinter;
class MCInst;
- class TargetMachine;
-
+
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 591ae02..4abb469 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -20,6 +20,7 @@
#include "PPC.h"
#include "PPCTargetMachine.h"
#include "PPCSubtarget.h"
+#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Constants.h"
@@ -53,7 +54,6 @@
#include "llvm/Support/ELF.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/SmallString.h"
-#include "InstPrinter/PPCInstPrinter.h"
using namespace llvm;
namespace {
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 8efc9c1..9883c2e 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -130,3 +130,34 @@ def CC_PPC_SVR4_ByVal : CallingConv<[
CCCustom<"CC_PPC_SVR4_Custom_Dummy">
]>;
+def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28,
+ R29, R30, R31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_SVR432 : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20, VRSAVE,
+ R21, R22, R23, R24, R25, R26, R27, R28,
+ R29, R30, R31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
+ X21, X22, X23, X24, X25, X26, X27, X28,
+ X29, X30, X31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAVE,
+ X21, X22, X23, X24, X25, X26, X27, X28,
+ X29, X30, X31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4,
+ V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 6d612f7..b77a80b 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
/// VRRegNo - Map from a numbered VR register to its enum value.
///
-static const unsigned short VRRegNo[] = {
+static const uint16_t VRRegNo[] = {
PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 95d0d64..d80a385 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -14,10 +14,10 @@
#ifndef PPCHAZRECS_H
#define PPCHAZRECS_H
+#include "PPCInstrInfo.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "PPCInstrInfo.h"
namespace llvm {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index bfed7ba..85b5bc1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16,6 +16,11 @@
#include "PPCPerfectShuffle.h"
#include "PPCTargetMachine.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,16 +29,11 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
@@ -1547,7 +1547,7 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
@@ -1574,7 +1574,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
@@ -1598,8 +1598,8 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
/// GetFPR - Get the set of FP registers that should be allocated for arguments,
/// on Darwin.
-static const unsigned *GetFPR() {
- static const unsigned FPR[] = {
+static const uint16_t *GetFPR() {
+ static const uint16_t FPR[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
};
@@ -1780,13 +1780,13 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- static const unsigned GPArgRegs[] = {
+ static const uint16_t GPArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
- static const unsigned FPArgRegs[] = {
+ static const uint16_t FPArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
@@ -1879,18 +1879,18 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
- static const unsigned GPR_32[] = { // 32-bit registers.
+ static const uint16_t GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
- static const unsigned GPR_64[] = { // 64-bit registers.
+ static const uint16_t GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const unsigned *FPR = GetFPR();
+ static const uint16_t *FPR = GetFPR();
- static const unsigned VR[] = {
+ static const uint16_t VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
@@ -1901,7 +1901,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+ const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
// In 32-bit non-varargs functions, the stack space for vectors is after the
// stack space for non-vectors. We do not use this space unless we have
@@ -2769,6 +2769,12 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
(CallConv == CallingConv::Fast &&
getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -3141,17 +3147,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- static const unsigned GPR_32[] = { // 32-bit registers.
+ static const uint16_t GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
- static const unsigned GPR_64[] = { // 64-bit registers.
+ static const uint16_t GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const unsigned *FPR = GetFPR();
+ static const uint16_t *FPR = GetFPR();
- static const unsigned VR[] = {
+ static const uint16_t VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
@@ -3159,7 +3165,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
- const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+ const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 3534e9c..2e046c4 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -15,10 +15,10 @@
#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/CodeGen/SelectionDAG.h"
#include "PPC.h"
#include "PPCSubtarget.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
namespace llvm {
namespace PPCISD {
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 02bffed..78f3596 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -64,13 +64,7 @@ let Defs = [LR8] in
PPC970_Unit_BRU;
// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7,
- // All calls clobber the PPC64 non-callee saved registers.
- Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
- F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
- V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
- LR8,CTR8,
- CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL8_Darwin : IForm<18, 0, 1,
@@ -90,13 +84,7 @@ let isCall = 1, PPC970_Unit = 7,
// ELF 64 ABI Calls = Darwin ABI Calls
// Used to define BL8_ELF and BLA8_ELF
-let isCall = 1, PPC970_Unit = 7,
- // All calls clobber the PPC64 non-callee saved registers.
- Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
- F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
- V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
- LR8,CTR8,
- CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL8_ELF : IForm<18, 0, 1,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index e5f171d..7d49aa1 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -15,8 +15,8 @@
#define POWERPC_INSTRUCTIONINFO_H
#include "PPC.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "PPCRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "PPCGenInstrInfo.inc"
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index e234012..939b71a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -438,13 +438,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
}
// Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7,
- // All calls clobber the non-callee saved registers...
- Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
- F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
- V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
- LR,CTR,
- CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL_Darwin : IForm<18, 0, 1,
@@ -463,13 +457,7 @@ let isCall = 1, PPC970_Unit = 7,
}
// SVR4 ABI Calls.
-let isCall = 1, PPC970_Unit = 7,
- // All calls clobber the non-callee saved registers...
- Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
- F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
- V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
- LR,CTR,
- CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL_SVR4 : IForm<18, 0, 1,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 306cc1f..2976f01 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -13,10 +13,10 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "reginfo"
+#include "PPCRegisterInfo.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
-#include "PPCRegisterInfo.h"
#include "PPCFrameLowering.h"
#include "PPCSubtarget.h"
#include "llvm/CallingConv.h"
@@ -100,104 +100,20 @@ PPCRegisterInfo::getPointerRegClass(unsigned Kind) const {
const uint16_t*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- // 32-bit Darwin calling convention.
- static const uint16_t Darwin32_CalleeSavedRegs[] = {
- PPC::R13, PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31,
-
- PPC::F14, PPC::F15, PPC::F16, PPC::F17,
- PPC::F18, PPC::F19, PPC::F20, PPC::F21,
- PPC::F22, PPC::F23, PPC::F24, PPC::F25,
- PPC::F26, PPC::F27, PPC::F28, PPC::F29,
- PPC::F30, PPC::F31,
-
- PPC::CR2, PPC::CR3, PPC::CR4,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-
- PPC::LR, 0
- };
-
- // 32-bit SVR4 calling convention.
- static const uint16_t SVR4_CalleeSavedRegs[] = {
- PPC::R14, PPC::R15,
- PPC::R16, PPC::R17, PPC::R18, PPC::R19,
- PPC::R20, PPC::R21, PPC::R22, PPC::R23,
- PPC::R24, PPC::R25, PPC::R26, PPC::R27,
- PPC::R28, PPC::R29, PPC::R30, PPC::R31,
-
- PPC::F14, PPC::F15, PPC::F16, PPC::F17,
- PPC::F18, PPC::F19, PPC::F20, PPC::F21,
- PPC::F22, PPC::F23, PPC::F24, PPC::F25,
- PPC::F26, PPC::F27, PPC::F28, PPC::F29,
- PPC::F30, PPC::F31,
-
- PPC::CR2, PPC::CR3, PPC::CR4,
-
- PPC::VRSAVE,
-
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-
- 0
- };
- // 64-bit Darwin calling convention.
- static const uint16_t Darwin64_CalleeSavedRegs[] = {
- PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31,
-
- PPC::F14, PPC::F15, PPC::F16, PPC::F17,
- PPC::F18, PPC::F19, PPC::F20, PPC::F21,
- PPC::F22, PPC::F23, PPC::F24, PPC::F25,
- PPC::F26, PPC::F27, PPC::F28, PPC::F29,
- PPC::F30, PPC::F31,
-
- PPC::CR2, PPC::CR3, PPC::CR4,
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-
- PPC::LR8, 0
- };
-
- // 64-bit SVR4 calling convention.
- static const uint16_t SVR4_64_CalleeSavedRegs[] = {
- PPC::X14, PPC::X15,
- PPC::X16, PPC::X17, PPC::X18, PPC::X19,
- PPC::X20, PPC::X21, PPC::X22, PPC::X23,
- PPC::X24, PPC::X25, PPC::X26, PPC::X27,
- PPC::X28, PPC::X29, PPC::X30, PPC::X31,
-
- PPC::F14, PPC::F15, PPC::F16, PPC::F17,
- PPC::F18, PPC::F19, PPC::F20, PPC::F21,
- PPC::F22, PPC::F23, PPC::F24, PPC::F25,
- PPC::F26, PPC::F27, PPC::F28, PPC::F29,
- PPC::F30, PPC::F31,
-
- PPC::CR2, PPC::CR3, PPC::CR4,
-
- PPC::VRSAVE,
+ if (Subtarget.isDarwinABI())
+ return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
+ CSR_Darwin32_SaveList;
- PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27,
- PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+ return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
+}
- 0
- };
-
+const unsigned*
+PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs :
- Darwin32_CalleeSavedRegs;
+ return Subtarget.isPPC64() ? CSR_Darwin64_RegMask :
+ CSR_Darwin32_RegMask;
- return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
+ return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask;
}
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 6ce90bc..b1e6a72 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -42,6 +42,7 @@ public:
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+ const unsigned *getCallPreservedMask(CallingConv::ID CC) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index da20274..ba9c779 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
#include "PPCTargetMachine.h"
+#include "PPC.h"
#include "llvm/PassManager.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/CodeGen/Passes.h"
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 6dd11c9..7da2b0c 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -24,8 +24,6 @@
#include "llvm/Target/TargetData.h"
namespace llvm {
-class PassManager;
-class GlobalValue;
/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
///
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
index 1423b1e..9a729bd 100644
--- a/lib/Target/Sparc/FPMover.cpp
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -59,19 +59,19 @@ FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) {
/// registers that correspond to it.
static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
unsigned &OddReg) {
- static const unsigned EvenHalvesOfPairs[] = {
+ static const uint16_t EvenHalvesOfPairs[] = {
SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14,
SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30
};
- static const unsigned OddHalvesOfPairs[] = {
+ static const uint16_t OddHalvesOfPairs[] = {
SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15,
SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31
};
- static const unsigned DoubleRegsInOrder[] = {
+ static const uint16_t DoubleRegsInOrder[] = {
SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8,
SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15
};
- for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i)
+ for (unsigned i = 0; i < array_lengthof(DoubleRegsInOrder); ++i)
if (DoubleRegsInOrder[i] == DoubleReg) {
EvenReg = EvenHalvesOfPairs[i];
OddReg = OddHalvesOfPairs[i];
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index a6b63fb..ee12633 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -50,7 +50,7 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
MVT &LocVT, CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State)
{
- static const unsigned RegList[] = {
+ static const uint16_t RegList[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
};
//Try to get first reg
@@ -301,11 +301,11 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
// Store remaining ArgRegs to the stack if this is a varargs function.
if (isVarArg) {
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
};
unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
- const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
+ const uint16_t *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
unsigned ArgOffset = CCInfo.getNextStackOffset();
if (NumAllocated == 6)
ArgOffset += StackOffset;
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 4a7c479..f483c96 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -15,8 +15,8 @@
#ifndef SPARC_ISELLOWERING_H
#define SPARC_ISELLOWERING_H
-#include "llvm/Target/TargetLowering.h"
#include "Sparc.h"
+#include "llvm/Target/TargetLowering.h"
namespace llvm {
namespace SPISD {
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 4932531..204f698 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -14,8 +14,8 @@
#ifndef SPARCINSTRUCTIONINFO_H
#define SPARCINSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "SparcRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "SparcGenInstrInfo.inc"
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index c392fcc..6357468 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -11,15 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#include "Sparc.h"
#include "SparcRegisterInfo.h"
+#include "Sparc.h"
#include "SparcSubtarget.h"
+#include "llvm/Type.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Type.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 80a3be6..6f31356 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#include "Sparc.h"
#include "SparcTargetMachine.h"
+#include "Sparc.h"
#include "llvm/PassManager.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index d91830f..9e88472 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -67,11 +67,11 @@ private:
MCStreamer &Out);
/// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
- /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
+ /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
bool isSrcOp(X86Operand &Op);
- /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
- /// or %es:(%edi) in 32bit mode.
+ /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
+ /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
bool isDstOp(X86Operand &Op);
bool is64BitMode() const {
@@ -468,7 +468,8 @@ bool X86AsmParser::isSrcOp(X86Operand &Op) {
bool X86AsmParser::isDstOp(X86Operand &Op) {
unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
- return Op.isMem() && Op.Mem.SegReg == X86::ES &&
+ return Op.isMem() &&
+ (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
@@ -838,6 +839,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// If we reached here, then we just ate the ( of the memory operand. Process
// the rest of the memory operand.
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
+ SMLoc IndexLoc;
if (getLexer().is(AsmToken::Percent)) {
SMLoc StartLoc, EndLoc;
@@ -851,6 +853,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
if (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
+ IndexLoc = Parser.getTok().getLoc();
// Following the comma we should have either an index register, or a scale
// value. We don't support the later form, but we want to parse it
@@ -876,8 +879,10 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
SMLoc Loc = Parser.getTok().getLoc();
int64_t ScaleVal;
- if (getParser().ParseAbsoluteExpression(ScaleVal))
+ if (getParser().ParseAbsoluteExpression(ScaleVal)){
+ Error(Loc, "expected scale expression");
return 0;
+ }
// Validate the scale amount.
if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
@@ -910,6 +915,23 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
SMLoc MemEnd = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
+ // If we have both a base register and an index register make sure they are
+ // both 64-bit or 32-bit registers.
+ if (BaseReg != 0 && IndexReg != 0) {
+ if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
+ !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) &&
+ IndexReg != X86::RIZ) {
+ Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
+ return 0;
+ }
+ if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
+ !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) &&
+ IndexReg != X86::EIZ){
+ Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
+ return 0;
+ }
+ }
+
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
MemStart, MemEnd);
}
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index b0e66f0..fbd81d2 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -312,6 +312,15 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (consumeByte(insn, &byte))
return -1;
+
+ /*
+ * If the first byte is a LOCK prefix break and let it be disassembled
+ * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
+ * FIXME there is currently no way to get the disassembler to print the
+ * lock prefix if it is not the first byte.
+ */
+ if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
+ break;
switch (byte) {
case 0xf0: /* LOCK */
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 30a847f..f532019 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
const char *(*getRegName)(unsigned)) {
// If this is a shuffle operation, the switch should fill in this state.
- SmallVector<unsigned, 8> ShuffleMask;
+ SmallVector<int, 8> ShuffleMask;
const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
switch (MI->getOpcode()) {
@@ -500,7 +500,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
if (Src1Name == Src2Name) {
for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
- ShuffleMask[i] >= e) // From second mask.
+ ShuffleMask[i] >= (int)e) // From second mask.
ShuffleMask[i] -= e;
}
}
@@ -518,13 +518,13 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
// Otherwise, it must come from src1 or src2. Print the span of elements
// that comes from this src.
- bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
+ bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();
const char *SrcName = isSrc1 ? Src1Name : Src2Name;
OS << (SrcName ? SrcName : "mem") << '[';
bool IsFirst = true;
while (i != e &&
(int)ShuffleMask[i] >= 0 &&
- (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
+ (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
if (!IsFirst)
OS << ',';
else
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 9ccbf1c..3f770f7 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -7,10 +7,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCAsmBackend.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86FixupKinds.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 37727b6..80990e5 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -46,6 +46,11 @@ public:
return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
}
+ bool is32BitMode() const {
+ // FIXME: Can tablegen auto-generate this?
+ return (STI.getFeatureBits() & X86::Mode64Bit) == 0;
+ }
+
static unsigned GetX86RegNum(const MCOperand &MO) {
return X86_MC::getX86RegNum(MO.getReg());
}
@@ -154,9 +159,8 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
return MCFixup::getKindForSize(Size, isPCRel);
}
-/// Is32BitMemOperand - Return true if the specified instruction with a memory
-/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit
-/// memory operand. Op specifies the operand # of the memoperand.
+/// Is32BitMemOperand - Return true if the specified instruction has
+/// a 32-bit memory operand. Op specifies the operand # of the memoperand.
static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
@@ -169,6 +173,36 @@ static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
return false;
}
+/// Is64BitMemOperand - Return true if the specified instruction has
+/// a 64-bit memory operand. Op specifies the operand # of the memoperand.
+#ifndef NDEBUG
+static bool Is64BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+#endif
+
+/// Is16BitMemOperand - Return true if the specified instruction has
+/// a 16-bit memory operand. Op specifies the operand # of the memoperand.
+static bool Is16BitMemOperand(const MCInst &MI, unsigned Op) {
+ const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+ if ((BaseReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) ||
+ (IndexReg.getReg() != 0 &&
+ X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg())))
+ return true;
+ return false;
+}
+
/// StartsWithGlobalOffsetTable - Check if this expression starts with
/// _GLOBAL_OFFSET_TABLE_ and if it is of the form
/// _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF
@@ -817,8 +851,22 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
EmitByte(0xF3, CurByte, OS);
// Emit the address size opcode prefix as needed.
- if ((TSFlags & X86II::AdSize) ||
- (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand)))
+ bool need_address_override;
+ if (TSFlags & X86II::AdSize) {
+ need_address_override = true;
+ } else if (MemOperand == -1) {
+ need_address_override = false;
+ } else if (is64BitMode()) {
+ assert(!Is16BitMemOperand(MI, MemOperand));
+ need_address_override = Is32BitMemOperand(MI, MemOperand);
+ } else if (is32BitMode()) {
+ assert(!Is64BitMemOperand(MI, MemOperand));
+ need_address_override = Is16BitMemOperand(MI, MemOperand);
+ } else {
+ need_address_override = false;
+ }
+
+ if (need_address_override)
EmitByte(0x67, CurByte, OS);
// Emit the operand size opcode prefix as needed.
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index a581993..624e56f 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -922,3 +922,22 @@ _test2: ## @test2
The insertps's of $0 are pointless complex copies.
//===---------------------------------------------------------------------===//
+
+[UNSAFE FP]
+
+void foo(double, double, double);
+void norm(double x, double y, double z) {
+ double scale = __builtin_sqrt(x*x + y*y + z*z);
+ foo(x/scale, y/scale, z/scale);
+}
+
+We currently generate an sqrtsd and 3 divsd instructions. This is bad, fp div is
+slow and not pipelined. In -ffast-math mode we could compute "1.0/scale" first
+and emit 3 mulsd in place of the divs. This can be done as a target-independent
+transform.
+
+If we're dealing with floats instead of doubles we could even replace the sqrtss
+and inversion with an rsqrtss instruction, which computes 1/sqrt faster at the
+cost of reduced accuracy.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index f4b85ae..32c722a 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -20,7 +20,7 @@
namespace llvm {
-void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
// Defaults the copying the dest value.
ShuffleMask.push_back(0);
ShuffleMask.push_back(1);
@@ -44,8 +44,7 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
}
// <3,1> or <6,7,2,3>
-void DecodeMOVHLPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
for (unsigned i = NElts/2; i != NElts; ++i)
ShuffleMask.push_back(NElts+i);
@@ -54,8 +53,7 @@ void DecodeMOVHLPSMask(unsigned NElts,
}
// <0,2> or <0,1,4,5>
-void DecodeMOVLHPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
for (unsigned i = 0; i != NElts/2; ++i)
ShuffleMask.push_back(i);
@@ -66,8 +64,7 @@ void DecodeMOVLHPSMask(unsigned NElts,
/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
/// VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
-void DecodePSHUFMask(EVT VT, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits() / 128;
@@ -83,8 +80,7 @@ void DecodePSHUFMask(EVT VT, unsigned Imm,
}
}
-void DecodePSHUFHWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
ShuffleMask.push_back(0);
ShuffleMask.push_back(1);
ShuffleMask.push_back(2);
@@ -95,8 +91,7 @@ void DecodePSHUFHWMask(unsigned Imm,
}
}
-void DecodePSHUFLWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
for (unsigned i = 0; i != 4; ++i) {
ShuffleMask.push_back((Imm & 3));
Imm >>= 2;
@@ -110,8 +105,7 @@ void DecodePSHUFLWMask(unsigned Imm,
/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
/// the type of the vector allowing it to handle different datatypes and vector
/// widths.
-void DecodeSHUFPMask(EVT VT, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits() / 128;
@@ -136,7 +130,7 @@ void DecodeSHUFPMask(EVT VT, unsigned Imm,
/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
/// and punpckh*. VT indicates the type of the vector allowing it to handle
/// different datatypes and vector widths.
-void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -156,7 +150,7 @@ void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// and punpckl*. VT indicates the type of the vector allowing it to handle
/// different datatypes and vector widths.
-void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -174,7 +168,7 @@ void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
}
void DecodeVPERM2X128Mask(EVT VT, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask) {
+ SmallVectorImpl<int> &ShuffleMask) {
unsigned HalfSize = VT.getVectorNumElements()/2;
unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 877c9bd..5b8c6ef 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -24,47 +24,41 @@
namespace llvm {
enum {
- SM_SentinelZero = ~0U
+ SM_SentinelZero = -1
};
-void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
// <3,1> or <6,7,2,3>
-void DecodeMOVHLPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
// <0,2> or <0,1,4,5>
-void DecodeMOVLHPSMask(unsigned NElts,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
-void DecodePSHUFMask(EVT VT, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
-void DecodePSHUFHWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
-void DecodePSHUFLWMask(unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
/// the type of the vector allowing it to handle different datatypes and vector
/// widths.
-void DecodeSHUFPMask(EVT VT, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
/// and punpckh*. VT indicates the type of the vector allowing it to handle
/// different datatypes and vector widths.
-void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask);
/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
/// and punpckl*. VT indicates the type of the vector allowing it to handle
/// different datatypes and vector widths.
-void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask);
void DecodeVPERM2X128Mask(EVT VT, unsigned Imm,
- SmallVectorImpl<unsigned> &ShuffleMask);
+ SmallVectorImpl<int> &ShuffleMask);
} // llvm namespace
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 81e9422..ecc7b59 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -24,8 +24,6 @@ namespace llvm {
class FunctionPass;
class JITCodeEmitter;
-class MachineCodeEmitter;
-class Target;
class X86TargetMachine;
/// createX86ISelDag - This pass converts a legalized DAG into a
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 268cbf4..f1cedf3 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,13 +13,13 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
-#include "InstPrinter/X86ATTInstPrinter.h"
-#include "InstPrinter/X86IntelInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86.h"
#include "X86COFFMachineModuleInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 1058df5..a6ed9ba 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -24,11 +24,7 @@
namespace llvm {
-class MachineJumpTableInfo;
-class MCContext;
-class MCInst;
class MCStreamer;
-class MCSymbol;
class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const X86Subtarget *Subtarget;
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index 63c08f1..0cec95a 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -14,9 +14,9 @@
#ifndef X86COFF_MACHINEMODULEINFO_H
#define X86COFF_MACHINEMODULEINFO_H
+#include "X86MachineFunctionInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/ADT/DenseSet.h"
-#include "X86MachineFunctionInfo.h"
namespace llvm {
class X86MachineFunctionInfo;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index f90764e..3d63b7e 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1779,7 +1779,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
// Count the number of XMM registers allocated.
- static const unsigned XMMArgRegs[] = {
+ static const uint16_t XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 32de194..936df27 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -26,6 +26,7 @@
#define DEBUG_TYPE "x86-codegen"
#include "X86.h"
#include "X86InstrInfo.h"
+#include "llvm/InlineAsm.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -37,7 +38,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/InlineAsm.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -570,8 +570,8 @@ void FPS::finishBlockStack() {
namespace {
struct TableEntry {
- unsigned from;
- unsigned to;
+ uint16_t from;
+ uint16_t to;
bool operator<(const TableEntry &TE) const { return from < TE.from; }
friend bool operator<(const TableEntry &TE, unsigned V) {
return TE.from < V;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index aa508b8..9405c2f 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -21,7 +21,6 @@
#include "X86TargetMachine.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -32,6 +31,7 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -1654,7 +1654,7 @@ enum AtomicSz {
AtomicSzEnd
};
-static const unsigned int AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
+static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
{
X86::LOCK_OR8mi,
X86::LOCK_OR8mr,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index cae9aad..88f3829 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1927,17 +1927,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
// FIXME: We should really autogenerate these arrays
- static const unsigned GPR64ArgRegsWin64[] = {
+ static const uint16_t GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
};
- static const unsigned GPR64ArgRegs64Bit[] = {
+ static const uint16_t GPR64ArgRegs64Bit[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
};
- static const unsigned XMMArgRegs64Bit[] = {
+ static const uint16_t XMMArgRegs64Bit[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
- const unsigned *GPR64ArgRegs;
+ const uint16_t *GPR64ArgRegs;
unsigned NumXMMRegs = 0;
if (IsWin64) {
@@ -2326,7 +2326,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// registers used and is in the range 0 - 8 inclusive.
// Count the number of XMM registers allocated.
- static const unsigned XMMArgRegs[] = {
+ static const uint16_t XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
@@ -2910,7 +2910,7 @@ static bool isTargetShuffle(unsigned Opcode) {
}
static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
- SDValue V1, SelectionDAG &DAG) {
+ SDValue V1, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::MOVSHDUP:
@@ -2921,7 +2921,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
}
static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
- SDValue V1, unsigned TargetMask, SelectionDAG &DAG) {
+ SDValue V1, unsigned TargetMask,
+ SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::PSHUFD:
@@ -2933,7 +2934,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
}
static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
- SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
+ SDValue V1, SDValue V2, unsigned TargetMask,
+ SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::PALIGN:
@@ -3712,6 +3714,8 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
bool V2IsSplat = false, bool V2IsUndef = false) {
unsigned NumOps = VT.getVectorNumElements();
+ if (VT.getSizeInBits() == 256)
+ return false;
if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
return false;
@@ -4342,9 +4346,81 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
}
+/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
+/// target specific opcode. Returns true if the Mask could be calculated.
+/// Sets IsUnary to true if only uses one source.
+static bool getTargetShuffleMask(SDNode *N, EVT VT,
+ SmallVectorImpl<int> &Mask, bool &IsUnary) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SDValue ImmN;
+
+ IsUnary = false;
+ switch(N->getOpcode()) {
+ case X86ISD::SHUFP:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
+ case X86ISD::UNPCKH:
+ DecodeUNPCKHMask(VT, Mask);
+ break;
+ case X86ISD::UNPCKL:
+ DecodeUNPCKLMask(VT, Mask);
+ break;
+ case X86ISD::MOVHLPS:
+ DecodeMOVHLPSMask(NumElems, Mask);
+ break;
+ case X86ISD::MOVLHPS:
+ DecodeMOVLHPSMask(NumElems, Mask);
+ break;
+ case X86ISD::PSHUFD:
+ case X86ISD::VPERMILP:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = true;
+ break;
+ case X86ISD::PSHUFHW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = true;
+ break;
+ case X86ISD::PSHUFLW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = true;
+ break;
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD: {
+ // The index 0 always comes from the first element of the second source,
+ // this is why MOVSS and MOVSD are used in the first place. The other
+ // elements come from the other positions of the first source vector
+ Mask.push_back(NumElems);
+ for (unsigned i = 1; i != NumElems; ++i) {
+ Mask.push_back(i);
+ }
+ break;
+ }
+ case X86ISD::VPERM2X128:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ break;
+ case X86ISD::MOVDDUP:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::PALIGN:
+ // Not yet implemented
+ return false;
+ default: llvm_unreachable("unknown target shuffle node");
+ }
+
+ return true;
+}
+
/// getShuffleScalarElt - Returns the scalar element that will make up the ith
/// element of the result of the vector shuffle.
-static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
unsigned Depth) {
if (Depth == 6)
return SDValue(); // Limit search depth.
@@ -4355,89 +4431,34 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
// Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
- Index = SV->getMaskElt(Index);
+ int Elt = SV->getMaskElt(Index);
- if (Index < 0)
+ if (Elt < 0)
return DAG.getUNDEF(VT.getVectorElementType());
unsigned NumElems = VT.getVectorNumElements();
- SDValue NewV = (Index < (int)NumElems) ? SV->getOperand(0)
- : SV->getOperand(1);
- return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1);
+ SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
+ : SV->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
}
// Recurse into target specific vector shuffles to find scalars.
if (isTargetShuffle(Opcode)) {
unsigned NumElems = VT.getVectorNumElements();
- SmallVector<unsigned, 16> ShuffleMask;
+ SmallVector<int, 16> ShuffleMask;
SDValue ImmN;
+ bool IsUnary;
- switch(Opcode) {
- case X86ISD::SHUFP:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::UNPCKH:
- DecodeUNPCKHMask(VT, ShuffleMask);
- break;
- case X86ISD::UNPCKL:
- DecodeUNPCKLMask(VT, ShuffleMask);
- break;
- case X86ISD::MOVHLPS:
- DecodeMOVHLPSMask(NumElems, ShuffleMask);
- break;
- case X86ISD::MOVLHPS:
- DecodeMOVLHPSMask(NumElems, ShuffleMask);
- break;
- case X86ISD::PSHUFD:
- case X86ISD::VPERMILP:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::PSHUFHW:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::PSHUFLW:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::MOVSS:
- case X86ISD::MOVSD: {
- // The index 0 always comes from the first element of the second source,
- // this is why MOVSS and MOVSD are used in the first place. The other
- // elements come from the other positions of the first source vector.
- unsigned OpNum = (Index == 0) ? 1 : 0;
- return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
- Depth+1);
- }
- case X86ISD::VPERM2X128:
- ImmN = N->getOperand(N->getNumOperands()-1);
- DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
- ShuffleMask);
- break;
- case X86ISD::MOVDDUP:
- case X86ISD::MOVLHPD:
- case X86ISD::MOVLPD:
- case X86ISD::MOVLPS:
- case X86ISD::MOVSHDUP:
- case X86ISD::MOVSLDUP:
- case X86ISD::PALIGN:
- return SDValue(); // Not yet implemented.
- default: llvm_unreachable("unknown target shuffle node");
- }
-
- Index = ShuffleMask[Index];
- if (Index < 0)
+ if (!getTargetShuffleMask(N, VT, ShuffleMask, IsUnary))
+ return SDValue();
+
+ int Elt = ShuffleMask[Index];
+ if (Elt < 0)
return DAG.getUNDEF(VT.getVectorElementType());
- SDValue NewV = (Index < (int)NumElems) ? N->getOperand(0)
+ SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
: N->getOperand(1);
- return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG,
+ return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
Depth+1);
}
@@ -4453,7 +4474,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
return (Index == 0) ? V.getOperand(0)
- : DAG.getUNDEF(VT.getVectorElementType());
+ : DAG.getUNDEF(VT.getVectorElementType());
if (V.getOpcode() == ISD::BUILD_VECTOR)
return V.getOperand(Index);
@@ -4465,38 +4486,37 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
/// shuffle operation which come from a consecutively from a zero. The
/// search can start in two different directions, from left or right.
static
-unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
+unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, unsigned NumElems,
bool ZerosFromLeft, SelectionDAG &DAG) {
- int i = 0;
-
- while (i < NumElems) {
+ unsigned i;
+ for (i = 0; i != NumElems; ++i) {
unsigned Index = ZerosFromLeft ? i : NumElems-i-1;
- SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0);
+ SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0);
if (!(Elt.getNode() &&
(Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt))))
break;
- ++i;
}
return i;
}
-/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to
-/// MaskE correspond consecutively to elements from one of the vector operands,
+/// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE)
+/// correspond consecutively to elements from one of the vector operands,
/// starting from its index OpIdx. Also tell OpNum which source vector operand.
static
-bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE,
- int OpIdx, int NumElems, unsigned &OpNum) {
+bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
+ unsigned MaskI, unsigned MaskE, unsigned OpIdx,
+ unsigned NumElems, unsigned &OpNum) {
bool SeenV1 = false;
bool SeenV2 = false;
- for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) {
+ for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) {
int Idx = SVOp->getMaskElt(i);
// Ignore undef indicies
if (Idx < 0)
continue;
- if (Idx < NumElems)
+ if (Idx < (int)NumElems)
SeenV1 = true;
else
SeenV2 = true;
@@ -4531,7 +4551,7 @@ static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
//
if (!isShuffleMaskConsecutive(SVOp,
0, // Mask Start Index
- NumElems-NumZeros-1, // Mask End Index
+ NumElems-NumZeros, // Mask End Index(exclusive)
NumZeros, // Where to start looking in the src vector
NumElems, // Number of elements in vector
OpSrc)) // Which source operand ?
@@ -4564,7 +4584,7 @@ static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
//
if (!isShuffleMaskConsecutive(SVOp,
NumZeros, // Mask Start Index
- NumElems-1, // Mask End Index
+ NumElems, // Mask End Index(exclusive)
0, // Where to start looking in the src vector
NumElems, // Number of elements in vector
OpSrc)) // Which source operand ?
@@ -6080,88 +6100,6 @@ static bool RelaxedMayFoldVectorLoad(SDValue V) {
return false;
}
-/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
-/// a vector extract, and if both can be later optimized into a single load.
-/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
-/// here because otherwise a target specific shuffle node is going to be
-/// emitted for this shuffle, and the optimization not done.
-/// FIXME: This is probably not the best approach, but fix the problem
-/// until the right path is decided.
-static
-bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
- const TargetLowering &TLI) {
- EVT VT = V.getValueType();
- ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
-
- // Be sure that the vector shuffle is present in a pattern like this:
- // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
- if (!V.hasOneUse())
- return false;
-
- SDNode *N = *V.getNode()->use_begin();
- if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
- return false;
-
- SDValue EltNo = N->getOperand(1);
- if (!isa<ConstantSDNode>(EltNo))
- return false;
-
- // If the bit convert changed the number of elements, it is unsafe
- // to examine the mask.
- bool HasShuffleIntoBitcast = false;
- if (V.getOpcode() == ISD::BITCAST) {
- EVT SrcVT = V.getOperand(0).getValueType();
- if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
- return false;
- V = V.getOperand(0);
- HasShuffleIntoBitcast = true;
- }
-
- // Select the input vector, guarding against out of range extract vector.
- unsigned NumElems = VT.getVectorNumElements();
- unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
- V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
-
- // If we are accessing the upper part of a YMM register
- // then the EXTRACT_VECTOR_ELT is likely to be legalized to a sequence of
- // EXTRACT_SUBVECTOR + EXTRACT_VECTOR_ELT, which are not detected at this point
- // because the legalization of N did not happen yet.
- if (Idx >= (int)NumElems/2 && VT.getSizeInBits() == 256)
- return false;
-
- // Skip one more bit_convert if necessary
- if (V.getOpcode() == ISD::BITCAST) {
- if (!V.hasOneUse())
- return false;
- V = V.getOperand(0);
- }
-
- if (!ISD::isNormalLoad(V.getNode()))
- return false;
-
- // Is the original load suitable?
- LoadSDNode *LN0 = cast<LoadSDNode>(V);
-
- if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
- return false;
-
- if (!HasShuffleIntoBitcast)
- return true;
-
- // If there's a bitcast before the shuffle, check if the load type and
- // alignment is valid.
- unsigned Align = LN0->getAlignment();
- unsigned NewAlign =
- TLI.getTargetData()->getABITypeAlignment(
- VT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
- return false;
-
- return true;
-}
-
static
SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
@@ -6282,12 +6220,6 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
if (SVOp->isSplat()) {
unsigned NumElem = VT.getVectorNumElements();
int Size = VT.getSizeInBits();
- // Special case, this is the only place now where it's allowed to return
- // a vector_shuffle operation without using a target specific node, because
- // *hopefully* it will be optimized away by the dag combiner. FIXME: should
- // this be moved to DAGCombine instead?
- if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
- return Op;
// Use vbroadcast whenever the splat comes from a foldable load
SDValue LD = isVectorBroadcast(Op, Subtarget);
@@ -13005,11 +12937,109 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target
+/// specific shuffle of a load can be folded into a single element load.
+/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
+/// shuffles have been customed lowered so we need to handle those here.
+static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue InVec = N->getOperand(0);
+ SDValue EltNo = N->getOperand(1);
+
+ if (!isa<ConstantSDNode>(EltNo))
+ return SDValue();
+
+ EVT VT = InVec.getValueType();
+
+ bool HasShuffleIntoBitcast = false;
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (BCVT.getVectorNumElements() != VT.getVectorNumElements())
+ return SDValue();
+ InVec = InVec.getOperand(0);
+ HasShuffleIntoBitcast = true;
+ }
+
+ if (!isTargetShuffle(InVec.getOpcode()))
+ return SDValue();
+
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ SmallVector<int, 16> ShuffleMask;
+ bool UnaryShuffle;
+ if (!getTargetShuffleMask(InVec.getNode(), VT, ShuffleMask, UnaryShuffle))
+ return SDValue();
+
+ // Select the input vector, guarding against out of range extract vector.
+ unsigned NumElems = VT.getVectorNumElements();
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
+ SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
+ : InVec.getOperand(1);
+
+ // If inputs to shuffle are the same for both ops, then allow 2 uses
+ unsigned AllowedUses = InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1;
+
+ if (LdNode.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0))
+ return SDValue();
+
+ AllowedUses = 1; // only allow 1 load use if we have a bitcast
+ LdNode = LdNode.getOperand(0);
+ }
+
+ if (!ISD::isNormalLoad(LdNode.getNode()))
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
+
+ if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
+ return SDValue();
+
+ if (HasShuffleIntoBitcast) {
+ // If there's a bitcast before the shuffle, check if the load type and
+ // alignment is valid.
+ unsigned Align = LN0->getAlignment();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ unsigned NewAlign = TLI.getTargetData()->
+ getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+ return SDValue();
+ }
+
+ // All checks match so transform back to vector_shuffle so that DAG combiner
+ // can finish the job
+ DebugLoc dl = N->getDebugLoc();
+
+ // Create shuffle node taking into account the case that its a unary shuffle
+ SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1);
+ Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl,
+ InVec.getOperand(0), Shuffle,
+ &ShuffleMask[0]);
+ Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
+ EltNo);
+}
+
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
/// generation and convert it from being a bunch of shuffles and extracts
/// to a simple store and scalar loads to extract the elements.
static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI);
+ if (NewOp.getNode())
+ return NewOp;
+
SDValue InputVector = N->getOperand(0);
// Only operate on vectors of 4 elements, where the alternative shuffling
@@ -13070,6 +13100,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
unsigned EltSize =
InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
@@ -13093,6 +13124,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
+
+
DebugLoc DL = N->getDebugLoc();
SDValue Cond = N->getOperand(0);
// Get the LHS/RHS of the select.
@@ -14897,7 +14930,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default: break;
case ISD::EXTRACT_VECTOR_ELT:
- return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+ return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
case ISD::VSELECT:
case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index ac49232..42a5014 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -153,7 +153,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
"ret\t#eh_return, addr: $addr",
- [(X86ehret GR32:$addr)]>;
+ [(X86ehret GR32:$addr)], IIC_RET>;
}
@@ -161,7 +161,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
"ret\t#eh_return, addr: $addr",
- [(X86ehret GR64:$addr)]>;
+ [(X86ehret GR64:$addr)], IIC_RET>;
}
@@ -193,7 +193,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in {
def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
- [(set GR8:$dst, 0)]>;
+ [(set GR8:$dst, 0)], IIC_ALU_NONMEM>;
// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
// encoding and avoids a partial-register update sometimes, but doing so
@@ -202,11 +202,11 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
// to an MCInst.
def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
"",
- [(set GR16:$dst, 0)]>, OpSize;
+ [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize;
// FIXME: Set encoding to pseudo.
def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, 0)]>;
+ [(set GR32:$dst, 0)], IIC_ALU_NONMEM>;
}
// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
@@ -218,7 +218,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
let Defs = [EFLAGS], isCodeGenOnly=1,
AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, 0)]>;
+ [(set GR64:$dst, 0)], IIC_ALU_NONMEM>;
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
@@ -226,7 +226,8 @@ def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in
def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
- "", [(set GR64:$dst, i64immZExt32:$src)]>;
+ "", [(set GR64:$dst, i64immZExt32:$src)],
+ IIC_ALU_NONMEM>;
// Use sbb to materialize carry bit.
let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
@@ -236,14 +237,18 @@ let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
// X86CodeEmitter.
def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
- [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+ [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+ IIC_ALU_NONMEM>;
def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
- [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
+ [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+ IIC_ALU_NONMEM>,
OpSize;
def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+ [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+ IIC_ALU_NONMEM>;
def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+ [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+ IIC_ALU_NONMEM>;
} // isCodeGenOnly
@@ -297,32 +302,32 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
//
let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
- [(X86rep_movs i8)]>, REP;
+ [(X86rep_movs i8)], IIC_REP_MOVS>, REP;
def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
- [(X86rep_movs i16)]>, REP, OpSize;
+ [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize;
def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
- [(X86rep_movs i32)]>, REP;
+ [(X86rep_movs i32)], IIC_REP_MOVS>, REP;
}
let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
- [(X86rep_movs i64)]>, REP;
+ [(X86rep_movs i64)], IIC_REP_MOVS>, REP;
// FIXME: Should use "(X86rep_stos AL)" as the pattern.
let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
- [(X86rep_stos i8)]>, REP;
+ [(X86rep_stos i8)], IIC_REP_STOS>, REP;
let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
- [(X86rep_stos i16)]>, REP, OpSize;
+ [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize;
let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
- [(X86rep_stos i32)]>, REP;
+ [(X86rep_stos i32)], IIC_REP_STOS>, REP;
let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
- [(X86rep_stos i64)]>, REP;
+ [(X86rep_stos i64)], IIC_REP_STOS>, REP;
//===----------------------------------------------------------------------===//
@@ -571,7 +576,7 @@ let isCodeGenOnly = 1, Defs = [EFLAGS] in
def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
"lock\n\t"
"or{l}\t{$zero, $dst|$dst, $zero}",
- []>, Requires<[In32BitMode]>, LOCK;
+ [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK;
let hasSideEffects = 1 in
def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
@@ -591,72 +596,72 @@ def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
!strconcat("lock\n\t", mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_NONMEM>, LOCK;
def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
!strconcat("lock\n\t", mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, OpSize, LOCK;
+ [], IIC_ALU_NONMEM>, OpSize, LOCK;
def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
!strconcat("lock\n\t", mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_NONMEM>, LOCK;
def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
!strconcat("lock\n\t", mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_NONMEM>, LOCK;
def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
!strconcat("lock\n\t", mnemonic, "{b}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
!strconcat("lock\n\t", mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
!strconcat("lock\n\t", mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
!strconcat("lock\n\t", mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
!strconcat("lock\n\t", mnemonic, "{w}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
!strconcat("lock\n\t", mnemonic, "{l}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
!strconcat("lock\n\t", mnemonic, "{q}\t",
"{$src2, $dst|$dst, $src2}"),
- []>, LOCK;
+ [], IIC_ALU_MEM>, LOCK;
}
@@ -673,29 +678,29 @@ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
"lock\n\t"
- "inc{b}\t$dst", []>, LOCK;
+ "inc{b}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
"lock\n\t"
- "inc{w}\t$dst", []>, OpSize, LOCK;
+ "inc{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK;
def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
"lock\n\t"
- "inc{l}\t$dst", []>, LOCK;
+ "inc{l}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
"lock\n\t"
- "inc{q}\t$dst", []>, LOCK;
+ "inc{q}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
"lock\n\t"
- "dec{b}\t$dst", []>, LOCK;
+ "dec{b}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
"lock\n\t"
- "dec{w}\t$dst", []>, OpSize, LOCK;
+ "dec{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK;
def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
"lock\n\t"
- "dec{l}\t$dst", []>, LOCK;
+ "dec{l}\t$dst", [], IIC_UNARY_MEM>, LOCK;
def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
"lock\n\t"
- "dec{q}\t$dst", []>, LOCK;
+ "dec{q}\t$dst", [], IIC_UNARY_MEM>, LOCK;
}
// Atomic compare and swap.
@@ -704,42 +709,42 @@ let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
"lock\n\t"
"cmpxchg8b\t$ptr",
- [(X86cas8 addr:$ptr)]>, TB, LOCK;
+ [(X86cas8 addr:$ptr)], IIC_CMPX_LOCK_8B>, TB, LOCK;
let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
isCodeGenOnly = 1 in
def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
"lock\n\t"
"cmpxchg16b\t$ptr",
- [(X86cas16 addr:$ptr)]>, TB, LOCK,
+ [(X86cas16 addr:$ptr)], IIC_CMPX_LOCK_16B>, TB, LOCK,
Requires<[HasCmpxchg16b]>;
let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
"lock\n\t"
"cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
+ [(X86cas addr:$ptr, GR8:$swap, 1)], IIC_CMPX_LOCK_8>, TB, LOCK;
}
let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in {
def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
"lock\n\t"
"cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
+ [(X86cas addr:$ptr, GR16:$swap, 2)], IIC_CMPX_LOCK>, TB, OpSize, LOCK;
}
let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in {
def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
"lock\n\t"
"cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
+ [(X86cas addr:$ptr, GR32:$swap, 4)], IIC_CMPX_LOCK>, TB, LOCK;
}
let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
"lock\n\t"
"cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}",
- [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+ [(X86cas addr:$ptr, GR64:$swap, 8)], IIC_CMPX_LOCK>, TB, LOCK;
}
// Atomic exchange and add
@@ -747,22 +752,26 @@ let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
"lock\n\t"
"xadd{b}\t{$val, $ptr|$ptr, $val}",
- [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
+ [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))],
+ IIC_XADD_LOCK_MEM8>,
TB, LOCK;
def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
"lock\n\t"
"xadd{w}\t{$val, $ptr|$ptr, $val}",
- [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
+ [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))],
+ IIC_XADD_LOCK_MEM>,
TB, OpSize, LOCK;
def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
"lock\n\t"
"xadd{l}\t{$val, $ptr|$ptr, $val}",
- [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
+ [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))],
+ IIC_XADD_LOCK_MEM>,
TB, LOCK;
def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
"lock\n\t"
"xadd{q}\t{$val, $ptr|$ptr, $val}",
- [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
+ [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))],
+ IIC_XADD_LOCK_MEM>,
TB, LOCK;
}
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 4f9f089..ae3ed1b 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -218,6 +218,11 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
+// Like 'X86vzload', but always requires 128-bit vector alignment.
+def alignedX86vzload : PatFrag<(ops node:$ptr), (X86vzload node:$ptr), [{
+ return cast<MemSDNode>(N)->getAlignment() >= 16;
+}]>;
+
// Like 'load', but always requires 256-bit vector alignment.
def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 32;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 5a479f0..307c96b 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -25,13 +25,13 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/MC/MCAsmInfo.h"
#include <limits>
#define GET_INSTRINFO_CTOR
@@ -82,6 +82,12 @@ enum {
TB_FOLDED_STORE = 1 << 19
};
+struct X86OpTblEntry {
+ uint16_t RegOp;
+ uint16_t MemOp;
+ uint32_t Flags;
+};
+
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
? X86::ADJCALLSTACKDOWN64
@@ -91,7 +97,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: X86::ADJCALLSTACKUP32)),
TM(tm), RI(tm, *this) {
- static const unsigned OpTbl2Addr[][3] = {
+ static const X86OpTblEntry OpTbl2Addr[] = {
{ X86::ADC32ri, X86::ADC32mi, 0 },
{ X86::ADC32ri8, X86::ADC32mi8, 0 },
{ X86::ADC32rr, X86::ADC32mr, 0 },
@@ -259,16 +265,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
};
for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
- unsigned RegOp = OpTbl2Addr[i][0];
- unsigned MemOp = OpTbl2Addr[i][1];
- unsigned Flags = OpTbl2Addr[i][2];
+ unsigned RegOp = OpTbl2Addr[i].RegOp;
+ unsigned MemOp = OpTbl2Addr[i].MemOp;
+ unsigned Flags = OpTbl2Addr[i].Flags;
AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
RegOp, MemOp,
// Index 0, folded load and store, no alignment requirement.
Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
}
- static const unsigned OpTbl0[][3] = {
+ static const X86OpTblEntry OpTbl0[] = {
{ X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD },
{ X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
{ X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
@@ -370,14 +376,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
- unsigned RegOp = OpTbl0[i][0];
- unsigned MemOp = OpTbl0[i][1];
- unsigned Flags = OpTbl0[i][2];
+ unsigned RegOp = OpTbl0[i].RegOp;
+ unsigned MemOp = OpTbl0[i].MemOp;
+ unsigned Flags = OpTbl0[i].Flags;
AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
RegOp, MemOp, TB_INDEX_0 | Flags);
}
- static const unsigned OpTbl1[][3] = {
+ static const X86OpTblEntry OpTbl1[] = {
{ X86::CMP16rr, X86::CMP16rm, 0 },
{ X86::CMP32rr, X86::CMP32rm, 0 },
{ X86::CMP64rr, X86::CMP64rm, 0 },
@@ -555,16 +561,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
- unsigned RegOp = OpTbl1[i][0];
- unsigned MemOp = OpTbl1[i][1];
- unsigned Flags = OpTbl1[i][2];
+ unsigned RegOp = OpTbl1[i].RegOp;
+ unsigned MemOp = OpTbl1[i].MemOp;
+ unsigned Flags = OpTbl1[i].Flags;
AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
RegOp, MemOp,
// Index 1, folded load
Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
}
- static const unsigned OpTbl2[][3] = {
+ static const X86OpTblEntry OpTbl2[] = {
{ X86::ADC32rr, X86::ADC32rm, 0 },
{ X86::ADC64rr, X86::ADC64rm, 0 },
{ X86::ADD16rr, X86::ADD16rm, 0 },
@@ -1108,9 +1114,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
- unsigned RegOp = OpTbl2[i][0];
- unsigned MemOp = OpTbl2[i][1];
- unsigned Flags = OpTbl2[i][2];
+ unsigned RegOp = OpTbl2[i].RegOp;
+ unsigned MemOp = OpTbl2[i].MemOp;
+ unsigned Flags = OpTbl2[i].Flags;
AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
RegOp, MemOp,
// Index 2, folded load
@@ -3627,7 +3633,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
// These are the replaceable SSE instructions. Some of these have Int variants
// that we don't include here. We don't want to replace instructions selected
// by intrinsics.
-static const unsigned ReplaceableInstrs[][3] = {
+static const uint16_t ReplaceableInstrs[][3] = {
//PackedSingle PackedDouble PackedInt
{ X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
{ X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
@@ -3667,7 +3673,7 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }
};
-static const unsigned ReplaceableInstrsAVX2[][3] = {
+static const uint16_t ReplaceableInstrsAVX2[][3] = {
//PackedSingle PackedDouble PackedInt
{ X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm },
{ X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr },
@@ -3688,14 +3694,14 @@ static const unsigned ReplaceableInstrsAVX2[][3] = {
// FIXME: Some shuffle and unpack instructions have equivalents in different
// domains, but they require a bit more work than just switching opcodes.
-static const unsigned *lookup(unsigned opcode, unsigned domain) {
+static const uint16_t *lookup(unsigned opcode, unsigned domain) {
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
if (ReplaceableInstrs[i][domain-1] == opcode)
return ReplaceableInstrs[i];
return 0;
}
-static const unsigned *lookupAVX2(unsigned opcode, unsigned domain) {
+static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
return ReplaceableInstrsAVX2[i];
@@ -3718,7 +3724,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
assert(Domain>0 && Domain<4 && "Invalid execution domain");
uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
assert(dom && "Not an SSE instruction");
- const unsigned *table = lookup(MI->getOpcode(), dom);
+ const uint16_t *table = lookup(MI->getOpcode(), dom);
if (!table) { // try the other table
assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) &&
"256-bit vector operations only available in AVX2");
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index d065d2d..b23d756 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -14,10 +14,10 @@
#ifndef X86INSTRUCTIONINFO_H
#define X86INSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "X86.h"
#include "X86RegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "X86GenInstrInfo.inc"
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index f585b47..dd7cf50 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1856,19 +1856,19 @@ def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>;
def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>;
// shld/shrd op,op -> shld op, op, CL
-def : InstAlias<"shldw $r1, $r2", (SHLD16rrCL GR16:$r1, GR16:$r2)>;
-def : InstAlias<"shldl $r1, $r2", (SHLD32rrCL GR32:$r1, GR32:$r2)>;
-def : InstAlias<"shldq $r1, $r2", (SHLD64rrCL GR64:$r1, GR64:$r2)>;
-def : InstAlias<"shrdw $r1, $r2", (SHRD16rrCL GR16:$r1, GR16:$r2)>;
-def : InstAlias<"shrdl $r1, $r2", (SHRD32rrCL GR32:$r1, GR32:$r2)>;
-def : InstAlias<"shrdq $r1, $r2", (SHRD64rrCL GR64:$r1, GR64:$r2)>;
-
-def : InstAlias<"shldw $mem, $reg", (SHLD16mrCL i16mem:$mem, GR16:$reg)>;
-def : InstAlias<"shldl $mem, $reg", (SHLD32mrCL i32mem:$mem, GR32:$reg)>;
-def : InstAlias<"shldq $mem, $reg", (SHLD64mrCL i64mem:$mem, GR64:$reg)>;
-def : InstAlias<"shrdw $mem, $reg", (SHRD16mrCL i16mem:$mem, GR16:$reg)>;
-def : InstAlias<"shrdl $mem, $reg", (SHRD32mrCL i32mem:$mem, GR32:$reg)>;
-def : InstAlias<"shrdq $mem, $reg", (SHRD64mrCL i64mem:$mem, GR64:$reg)>;
+def : InstAlias<"shldw $r2, $r1", (SHLD16rrCL GR16:$r1, GR16:$r2)>;
+def : InstAlias<"shldl $r2, $r1", (SHLD32rrCL GR32:$r1, GR32:$r2)>;
+def : InstAlias<"shldq $r2, $r1", (SHLD64rrCL GR64:$r1, GR64:$r2)>;
+def : InstAlias<"shrdw $r2, $r1", (SHRD16rrCL GR16:$r1, GR16:$r2)>;
+def : InstAlias<"shrdl $r2, $r1", (SHRD32rrCL GR32:$r1, GR32:$r2)>;
+def : InstAlias<"shrdq $r2, $r1", (SHRD64rrCL GR64:$r1, GR64:$r2)>;
+
+def : InstAlias<"shldw $reg, $mem", (SHLD16mrCL i16mem:$mem, GR16:$reg)>;
+def : InstAlias<"shldl $reg, $mem", (SHLD32mrCL i32mem:$mem, GR32:$reg)>;
+def : InstAlias<"shldq $reg, $mem", (SHLD64mrCL i64mem:$mem, GR64:$reg)>;
+def : InstAlias<"shrdw $reg, $mem", (SHRD16mrCL i16mem:$mem, GR16:$reg)>;
+def : InstAlias<"shrdl $reg, $mem", (SHRD32mrCL i32mem:$mem, GR32:$reg)>;
+def : InstAlias<"shrdq $reg, $mem", (SHRD64mrCL i64mem:$mem, GR64:$reg)>;
/* FIXME: This is disabled because the asm matcher is currently incapable of
* matching a fixed immediate like $1.
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index c6d1d19..df42627 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -641,7 +641,7 @@ let Predicates = [HasAVX] in {
(VMOVSDrr (v2i64 (V_SET0)),
(EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>;
-// Extract and store.
+ // Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(VMOVSSmr addr:$dst,
@@ -2306,7 +2306,7 @@ let Defs = [EFLAGS] in {
"comisd", SSEPackedDouble>, TB, OpSize;
} // Defs = [EFLAGS]
-// sse12_cmp_packed - sse 1 & 2 compared packed instructions
+// sse12_cmp_packed - sse 1 & 2 compare packed instructions
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
Intrinsic Int, string asm, string asm_alt,
Domain d> {
@@ -4820,8 +4820,10 @@ let Predicates = [HasSSE2], AddedComplexity = 20 in {
}
let Predicates = [HasAVX] in {
-def : Pat<(v4i64 (X86vzload addr:$src)),
+def : Pat<(v4i64 (alignedX86vzload addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>;
+def : Pat<(v4i64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>;
}
//===---------------------------------------------------------------------===//
@@ -7307,6 +7309,24 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
[]>, VEX;
}
+// Extract and store.
+let Predicates = [HasAVX] in {
+ def : Pat<(alignedstore (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+ def : Pat<(alignedstore (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+ def : Pat<(alignedstore (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), addr:$dst),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+
+ def : Pat<(int_x86_sse_storeu_ps addr:$dst, (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2)),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+ def : Pat<(int_x86_sse2_storeu_pd addr:$dst, (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2)),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, (bc_v16i8 (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2))),
+ (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+}
+
+// AVX1 patterns
let Predicates = [HasAVX] in {
def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
@@ -7314,6 +7334,31 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4f32 (VEXTRACTF128rr
+ (v8f32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2f64 (VEXTRACTF128rr
+ (v4f64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v2i64 (VEXTRACTF128rr
+ (v4i64 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v4i32 (VEXTRACTF128rr
+ (v8i32 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v8i16 (VEXTRACTF128rr
+ (v16i16 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+ (v16i8 (VEXTRACTF128rr
+ (v32i8 VR256:$src1),
+ (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
}
//===----------------------------------------------------------------------===//
@@ -7711,7 +7756,7 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
(int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2),
imm:$src3))]>, VEX_4V;
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2], AddedComplexity = 1 in {
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
(i32 imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2,
@@ -7756,6 +7801,19 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
(i32 imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
+ (i32 imm)),
+ (VINSERTF128rm VR256:$src1, addr:$src2,
+ (INSERT_get_vinsertf128_imm VR256:$ins))>;
}
//===----------------------------------------------------------------------===//
@@ -7791,34 +7849,6 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
}
-// AVX1 patterns
-let Predicates = [HasAVX] in {
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v4f32 (VEXTRACTF128rr
- (v8f32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v2f64 (VEXTRACTF128rr
- (v4f64 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v2i64 (VEXTRACTF128rr
- (v4i64 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v4i32 (VEXTRACTF128rr
- (v8i32 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v8i16 (VEXTRACTF128rr
- (v16i16 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
- (v16i8 (VEXTRACTF128rr
- (v32i8 VR256:$src1),
- (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-}
-
//===----------------------------------------------------------------------===//
// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
//
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 8843848..bddba6c 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -45,17 +45,17 @@ def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
-def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
-def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
+def SYSRET : I<0x07, RawFrm, (outs), (ins), "sysret{l}", []>, TB;
+def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysret{q}", []>, TB,
Requires<[In64BitMode]>;
def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
-def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexitl", []>, TB;
-def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexitq", []>, TB,
+def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", []>, TB;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", []>, TB,
Requires<[In64BitMode]>;
-def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize;
+def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>;
def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
Requires<[In64BitMode]>;
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index a7a5c56..b578e8d 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -12,10 +12,11 @@
//
//===----------------------------------------------------------------------===//
-#include "InstPrinter/X86ATTInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86AsmPrinter.h"
#include "X86COFFMachineModuleInfo.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "llvm/Type.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -26,7 +27,6 @@
#include "llvm/Target/Mangler.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/Type.h"
using namespace llvm;
X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 93e2744..b56025f 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#include "X86.h"
#include "X86RegisterInfo.h"
+#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index d6d0149..17f4efd 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -114,6 +114,9 @@ def IIC_MOVZX : InstrItinClass;
def IIC_MOVZX_R16_R8 : InstrItinClass;
def IIC_MOVZX_R16_M8 : InstrItinClass;
+def IIC_REP_MOVS : InstrItinClass;
+def IIC_REP_STOS : InstrItinClass;
+
// SSE scalar/parallel binary operations
def IIC_SSE_ALU_F32S_RR : InstrItinClass;
def IIC_SSE_ALU_F32S_RM : InstrItinClass;
@@ -250,6 +253,14 @@ def IIC_SSE_CVT_SS2SI64_RR : InstrItinClass;
def IIC_SSE_CVT_SD2SI_RM : InstrItinClass;
def IIC_SSE_CVT_SD2SI_RR : InstrItinClass;
+def IIC_CMPX_LOCK : InstrItinClass;
+def IIC_CMPX_LOCK_8 : InstrItinClass;
+def IIC_CMPX_LOCK_8B : InstrItinClass;
+def IIC_CMPX_LOCK_16B : InstrItinClass;
+
+def IIC_XADD_LOCK_MEM : InstrItinClass;
+def IIC_XADD_LOCK_MEM8 : InstrItinClass;
+
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index e8cf72a..77d4e56 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -144,6 +144,9 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
+ InstrItinData<IIC_REP_MOVS, [InstrStage<75, [Port0, Port1]>] >,
+ InstrItinData<IIC_REP_STOS, [InstrStage<74, [Port0, Port1]>] >,
+
// SSE binary operations
// arithmetic fp scalar
InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<5, [Port1]>] >,
@@ -289,6 +292,14 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<9, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<10, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<8, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >
-]>;
+ InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_CMPX_LOCK, [InstrStage<14, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<6, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<18, [Port0, Port1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<22, [Port0, Port1]>] >,
+
+ InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<2, [Port0, Port1]>] >,
+ InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<3, [Port0, Port1]>] >
+ ]>;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index a36d0d8..7fd832b 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -14,9 +14,9 @@
#ifndef X86SUBTARGET_H
#define X86SUBTARGET_H
+#include "llvm/CallingConv.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/CallingConv.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 143caba..8e935af 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -28,7 +28,6 @@
namespace llvm {
-class formatted_raw_ostream;
class StringRef;
class X86TargetMachine : public LLVMTargetMachine {
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index ceb7a4a..a02a368 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -15,7 +15,6 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
- class X86TargetMachine;
/// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin
/// x86-64.
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 4d8ef74..50fda58 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "XCore.h"
#include "XCoreFrameLowering.h"
+#include "XCore.h"
#include "XCoreInstrInfo.h"
#include "XCoreMachineFunctionInfo.h"
#include "llvm/Function.h"
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index c2d2a5d..593cebc 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1152,7 +1152,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
if (isVarArg) {
/* Argument registers */
- static const unsigned ArgRegs[] = {
+ static const uint16_t ArgRegs[] = {
XCore::R0, XCore::R1, XCore::R2, XCore::R3
};
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index f5a6822..5cd3e67 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -15,9 +15,9 @@
#ifndef XCOREISELLOWERING_H
#define XCOREISELLOWERING_H
+#include "XCore.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
-#include "XCore.h"
namespace llvm {
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index f930623..0a3008d 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#include "XCoreMachineFunctionInfo.h"
#include "XCoreInstrInfo.h"
+#include "XCoreMachineFunctionInfo.h"
#include "XCore.h"
#include "llvm/MC/MCContext.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index e47d212..42eeed8 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -14,8 +14,8 @@
#ifndef XCOREINSTRUCTIONINFO_H
#define XCOREINSTRUCTIONINFO_H
-#include "llvm/Target/TargetInstrInfo.h"
#include "XCoreRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "XCoreGenInstrInfo.inc"
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 8730282..f3b4b4c 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -14,6 +14,8 @@
#include "XCoreRegisterInfo.h"
#include "XCoreMachineFunctionInfo.h"
#include "XCore.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,8 +26,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
@@ -54,20 +54,6 @@ static inline bool isImmU16(unsigned val) {
return val < (1 << 16);
}
-static const unsigned XCore_ArgRegs[] = {
- XCore::R0, XCore::R1, XCore::R2, XCore::R3
-};
-
-const unsigned * XCoreRegisterInfo::getArgRegs(const MachineFunction *MF)
-{
- return XCore_ArgRegs;
-}
-
-unsigned XCoreRegisterInfo::getNumArgRegs(const MachineFunction *MF)
-{
- return array_lengthof(XCore_ArgRegs);
-}
-
bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
return MF.getMMI().hasDebugInfo() ||
MF.getFunction()->needsUnwindTableEntry();
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index ab6ce56..7391cfd 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -62,15 +62,6 @@ public:
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
- //! Return the array of argument passing registers
- /*!
- \note The size of this array is returned by getArgRegsSize().
- */
- static const unsigned *getArgRegs(const MachineFunction *MF = 0);
-
- //! Return the size of the argument passing register array
- static unsigned getNumArgRegs(const MachineFunction *MF = 0);
-
//! Return whether to emit frame moves
static bool needsFrameMoves(const MachineFunction &MF);
};
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 2c174f4..2546681 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -14,13 +14,13 @@
#ifndef XCORETARGETMACHINE_H
#define XCORETARGETMACHINE_H
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
#include "XCoreFrameLowering.h"
#include "XCoreSubtarget.h"
#include "XCoreInstrInfo.h"
#include "XCoreISelLowering.h"
#include "XCoreSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
namespace llvm {